Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
It's now available at `pyld.identifier_issuer`.
- **BREAKING**: The classes `URDNA2015` and `URGNA2012` were moved to `canon.py`.
They are now available at `pyld.canon`.
- `jsonld.expand()` now accepts a `on_property_dropped` parameter which is a handler
called on every ignored JSON property.
- **BREAKING**: In cases where there is no document base (for instance, when
using a string as input), 'http://example.org/base/' is used as the base IRI
when `@base` is absent or explicitely set to `null`.

## 2.0.4 - 2024-02-16

Expand Down
17 changes: 17 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,23 @@ If Requests_ is not available, the loader is set to aiohttp_. The fallback
document loader is a dummy document loader that raises an exception on every
invocation.

Handling ignored properties during JSON-LD expansion
----------------------------------------------

If a property in a JSON-LD document does not map to an absolute IRI then it is
ignored. You can customize this behaviour by passing a customizable handler to
`on_property_dropped` parameter of `jsonld.expand()`.

For example, you can introduce a strict mode by raising a ValueError on every
dropped property:

```python
def raise_this(value):
raise ValueError(value)

jsonld.expand(doc, None, on_property_dropped=raise_this)
```

Commercial Support
------------------

Expand Down
65 changes: 52 additions & 13 deletions lib/pyld/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import warnings
import uuid

from typing import Optional, Callable, Any
from pyld.canon import URDNA2015, URGNA2012, UnknownFormatError
from pyld.nquads import ParserError, parse_nquads, to_nquad, to_nquads
from pyld.identifier_issuer import IdentifierIssuer
Expand Down Expand Up @@ -104,6 +105,9 @@
# JSON-LD link header rel
LINK_HEADER_REL = JSON_LD_NS + 'context'

# Default base IRI if none is provided through input or options
DEFAULT_BASE_IRI = 'http://example.org/base/'

# Restraints
MAX_CONTEXT_URLS = 10

Expand All @@ -116,6 +120,13 @@
# Initial contexts, defined on first access
INITIAL_CONTEXTS = {}

# Handler to call if a property was dropped during expansion
OnPropertyDropped = Callable[[Optional[str]], Any]

def noop(*args, **kwargs):
return None


def compact(input_, ctx, options=None):
"""
Performs JSON-LD compaction.
Expand All @@ -141,7 +152,7 @@ def compact(input_, ctx, options=None):
return JsonLdProcessor().compact(input_, ctx, options)


def expand(input_, options=None):
def expand(input_, options=None, on_property_dropped: OnPropertyDropped = noop):
"""
Performs JSON-LD expansion.

Expand All @@ -156,10 +167,13 @@ def expand(input_, options=None):
defaults to 'json-ld-1.1'.
[documentLoader(url, options)] the document loader
(default: _default_document_loader).
:param [on_property_dropped]: handler called on every ignored property.

:return: the expanded JSON-LD output.
"""
return JsonLdProcessor().expand(input_, options)
return JsonLdProcessor(
on_property_dropped=on_property_dropped
).expand(input_, options)


def flatten(input_, ctx=None, options=None):
Expand Down Expand Up @@ -442,17 +456,18 @@ def unregister_rdf_parser(content_type):
del _rdf_parsers[content_type]


class JsonLdProcessor(object):
class JsonLdProcessor:
"""
A JSON-LD processor.
"""

def __init__(self):
def __init__(self, on_property_dropped: OnPropertyDropped = noop):
"""
Initialize the JSON-LD processor.
"""
# processor-specific RDF parsers
self.rdf_parsers = None
self.on_property_dropped = on_property_dropped

def compact(self, input_, ctx, options):
"""
Expand Down Expand Up @@ -801,11 +816,11 @@ def frame(self, input_, frame, options):
if frame is not None:
ctx = frame.get('@context', {})
if remote_frame['contextUrl'] is not None:
if ctx is not None:
ctx = remote_frame['contextUrl']
else:
if ctx:
ctx = JsonLdProcessor.arrayify(ctx)
ctx.append(remote_frame['contextUrl'])
else:
ctx = remote_frame['contextUrl']
frame['@context'] = ctx

# process context
Expand Down Expand Up @@ -2076,6 +2091,7 @@ def _expand_object(
not (
_is_absolute_iri(expanded_property) or
_is_keyword(expanded_property))):
self.on_property_dropped(expanded_property)
continue

if _is_keyword(expanded_property):
Expand Down Expand Up @@ -3266,18 +3282,31 @@ def _object_to_rdf(self, item, issuer, triples, rdfDirection):
datatype = item.get('@type')

# convert to XSD datatypes as appropriate
if item.get('@type') == '@json':
if datatype == '@json':
object['value'] = canonicalize(value).decode('UTF-8')
object['datatype'] = RDF_JSON_LITERAL
elif _is_bool(value):
object['value'] = 'true' if value else 'false'
object['datatype'] = datatype or XSD_BOOLEAN
elif _is_double(value) or datatype == XSD_DOUBLE:
elif _is_double(value):
# canonical double representation
object['value'] = re.sub(
r'(\d)0*E\+?0*(\d)', r'\1E\2',
('%1.15E' % value))
object['value'] = _canonicalize_double(value)
object['datatype'] = datatype or XSD_DOUBLE
return object
elif datatype == XSD_DOUBLE:
# Since the previous branch did not activate, we know that `value` is not a float number.
try:
float_value = float(value)
except (ValueError, TypeError):
# If `value` is not convertible to float, we will return it as-is.
object['value'] = value
object['datatype'] = XSD_DOUBLE
return object
else:
# We have a float, and canonicalization may proceed.
object['value'] = _canonicalize_double(float_value)
object['datatype'] = XSD_DOUBLE
return object
elif _is_integer(value):
object['value'] = str(value)
object['datatype'] = datatype or XSD_INTEGER
Expand Down Expand Up @@ -5115,11 +5144,14 @@ def _expand_iri(

# resolve against base
rval = value
if base and '@base' in active_ctx:
if '@base' in active_ctx:
# The None case preserves rval as potentially relative
if active_ctx['@base'] is not None:
resolved_base = active_ctx['@base'] if _is_absolute_iri(active_ctx['@base']) else resolve(active_ctx['@base'], base)
rval = resolve(rval, resolved_base)
# fallback to document base if the base is absent or set to null
elif base == '':
rval = resolve(rval, DEFAULT_BASE_IRI)
elif base:
rval = resolve(rval, base)

Expand Down Expand Up @@ -5382,6 +5414,13 @@ def _is_double(v):
return not isinstance(v, Integral) and isinstance(v, Real)


def _canonicalize_double(value: float) -> str:
"""Convert a float value to canonical lexical form of `xsd:double`."""
return re.sub(
r'(\d)0*E\+?0*(\d)', r'\1E\2',
('%1.15E' % value))


def _is_numeric(v):
"""
Returns True if the given value is numeric.
Expand Down
Loading
Loading