diff --git a/.gitmodules b/.gitmodules index eb15380..f962f62 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "tests/cts"] path = tests/cts url = git@github.com:jsonpath-standard/jsonpath-compliance-test-suite.git +[submodule "tests/nts"] + path = tests/nts + url = git@github.com:jg-rp/jsonpath-compliance-normalized-paths.git diff --git a/CHANGELOG.md b/CHANGELOG.md index b2c4ae7..cd40af8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Python JSONPath Change Log +## Version 1.3.0 (unreleased) + +**Fixes** + +- Fixed `jsonpath.JSONPathMatch.path`. It is now a "normalized path" following section 2.7 of RFC 9525. +- Fixed normalized slice indexes. We were failing to normalize somme indexes given a negative step. + +**Other changes** + +- `jsonpath.match.NodeList` is now re-exported as `jsonpath.NodeList`. +- Added `jsonpath.NodeList.paths()`, which returns a list of normalized paths, one for each node in the list. +- Serialization of compiled JSONPath queries (instances of `jsonpath.JSONPath`) has changed. String literals inside filter selectors are now serialized using the canonical format, as described in section 2.7 of RFC 9525, and parentheses in filter selectors are kept to a minimum. + ## Version 1.2.2 **Fixes** diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index 3d112e8..2a34e01 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -24,6 +24,7 @@ from .fluent_api import Query from .lex import Lexer from .match import JSONPathMatch +from .match import NodeList from .parse import Parser from .patch import JSONPatch from .path import CompoundJSONPath @@ -58,6 +59,7 @@ "JSONPointerResolutionError", "JSONPointerTypeError", "Lexer", + "NodeList", "match", "Parser", "Projection", diff --git a/jsonpath/filter.py b/jsonpath/filter.py index a2b5989..0f33cff 100644 --- a/jsonpath/filter.py +++ b/jsonpath/filter.py @@ -3,7 +3,6 @@ from __future__ import annotations import copy -import json import re from abc import ABC from abc import abstractmethod @@ -25,6 +24,7 @@ from .match import NodeList from .selectors import Filter as FilterSelector from .selectors import ListSelector +from .serialize import canonical_string if TYPE_CHECKING: from .path import JSONPath @@ -208,7 +208,7 @@ class StringLiteral(Literal[str]): __slots__ = () def __str__(self) -> str: - return json.dumps(self.value) + return canonical_string(self.value) class IntegerLiteral(Literal[int]): @@ -375,6 +375,12 @@ def set_children(self, children: List[FilterExpression]) -> None: self.right = children[1] +PRECEDENCE_LOWEST = 1 +PRECEDENCE_LOGICAL_OR = 3 +PRECEDENCE_LOGICAL_AND = 4 +PRECEDENCE_PREFIX = 7 + + class BooleanExpression(FilterExpression): """An expression that always evaluates to `True` or `False`.""" @@ -408,13 +414,40 @@ def cacheable_nodes(self) -> bool: ) def __str__(self) -> str: - return str(self.expression) + return self._canonical_string(self.expression, PRECEDENCE_LOWEST) def __eq__(self, other: object) -> bool: return ( isinstance(other, BooleanExpression) and self.expression == other.expression ) + def _canonical_string( + self, expression: FilterExpression, parent_precedence: int + ) -> str: + if isinstance(expression, InfixExpression): + if expression.operator == "&&": + left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_AND) + right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_AND) + expr = f"{left} && {right}" + return ( + f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_AND else expr + ) + + if expression.operator == "||": + left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_OR) + right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_OR) + expr = f"{left} || {right}" + return ( + f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_OR else expr + ) + + if isinstance(expression, PrefixExpression): + operand = self._canonical_string(expression.right, PRECEDENCE_PREFIX) + expr = f"!{operand}" + return f"({expr})" if parent_precedence > PRECEDENCE_PREFIX else expr + + return str(expression) + def evaluate(self, context: FilterContext) -> bool: return context.env.is_truthy(self.expression.evaluate(context)) diff --git a/jsonpath/match.py b/jsonpath/match.py index bfeba98..dea2fee 100644 --- a/jsonpath/match.py +++ b/jsonpath/match.py @@ -1,4 +1,5 @@ """The JSONPath match object, as returned from `JSONPath.finditer()`.""" + from __future__ import annotations from typing import Any @@ -104,6 +105,10 @@ def values_or_singular(self) -> object: return self[0].obj return [match.obj for match in self] + def paths(self) -> List[str]: + """Return a normalized path for each node in this node list.""" + return [match.path for match in self] + def empty(self) -> bool: """Return `True` if this node list is empty.""" return not bool(self) diff --git a/jsonpath/selectors.py b/jsonpath/selectors.py index 318225c..44007e9 100644 --- a/jsonpath/selectors.py +++ b/jsonpath/selectors.py @@ -1,4 +1,5 @@ """JSONPath segments and selectors, as returned from `Parser.parse`.""" + from __future__ import annotations from abc import ABC @@ -17,6 +18,7 @@ from .exceptions import JSONPathIndexError from .exceptions import JSONPathTypeError +from .serialize import canonical_string if TYPE_CHECKING: from .env import JSONPathEnvironment @@ -75,7 +77,11 @@ def __init__( self.shorthand = shorthand def __str__(self) -> str: - return f"['{self.name}']" if self.shorthand else f"'{self.name}'" + return ( + f"[{canonical_string(self.name)}]" + if self.shorthand + else f"{canonical_string(self.name)}" + ) def __eq__(self, __value: object) -> bool: return ( @@ -98,7 +104,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: obj=self.env.getitem(match.obj, self.name), parent=match, parts=match.parts + (self.name,), - path=match.path + f"['{self.name}']", + path=match.path + f"[{canonical_string(self.name)}]", root=match.root, ) match.add_child(_match) @@ -117,7 +123,7 @@ async def resolve_async( obj=await self.env.getitem_async(match.obj, self.name), parent=match, parts=match.parts + (self.name,), - path=match.path + f"['{self.name}']", + path=match.path + f"[{canonical_string(self.name)}]", root=match.root, ) match.add_child(_match) @@ -321,20 +327,15 @@ def _check_range(self, *indices: Optional[int]) -> None: ): raise JSONPathIndexError("index out of range", token=self.token) - def _normalized_index(self, obj: Sequence[object], index: int) -> int: - if index < 0 and len(obj) >= abs(index): - return len(obj) + index - return index - def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: for match in matches: if not isinstance(match.obj, Sequence) or self.slice.step == 0: continue - idx = self.slice.start or 0 - step = self.slice.step or 1 - for obj in self.env.getitem(match.obj, self.slice): - norm_index = self._normalized_index(match.obj, idx) + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(match.obj))), + self.env.getitem(match.obj, self.slice), + ): _match = self.env.match_class( filter_context=match.filter_context(), obj=obj, @@ -345,7 +346,6 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: ) match.add_child(_match) yield _match - idx += step async def resolve_async( self, matches: AsyncIterable[JSONPathMatch] @@ -354,10 +354,10 @@ async def resolve_async( if not isinstance(match.obj, Sequence) or self.slice.step == 0: continue - idx = self.slice.start or 0 - step = self.slice.step or 1 - for obj in await self.env.getitem_async(match.obj, self.slice): - norm_index = self._normalized_index(match.obj, idx) + for norm_index, obj in zip( # noqa: B905 + range(*self.slice.indices(len(match.obj))), + await self.env.getitem_async(match.obj, self.slice), + ): _match = self.env.match_class( filter_context=match.filter_context(), obj=obj, @@ -368,7 +368,6 @@ async def resolve_async( ) match.add_child(_match) yield _match - idx += step class WildSelector(JSONPathSelector): @@ -402,7 +401,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]: obj=val, parent=match, parts=match.parts + (key,), - path=match.path + f"['{key}']", + path=match.path + f"[{canonical_string(key)}]", root=match.root, ) match.add_child(_match) @@ -431,7 +430,7 @@ async def resolve_async( obj=val, parent=match, parts=match.parts + (key,), - path=match.path + f"['{key}']", + path=match.path + f"[{canonical_string(key)}]", root=match.root, ) match.add_child(_match) @@ -479,7 +478,7 @@ def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]: obj=val, parent=match, parts=match.parts + (key,), - path=match.path + f"['{key}']", + path=match.path + f"[{canonical_string(key)}]", root=match.root, ) match.add_child(_match) @@ -633,7 +632,7 @@ def resolve( # noqa: PLR0912 obj=val, parent=match, parts=match.parts + (key,), - path=match.path + f"['{key}']", + path=match.path + f"[{canonical_string(key)}]", root=match.root, ) match.add_child(_match) @@ -701,7 +700,7 @@ async def resolve_async( # noqa: PLR0912 obj=val, parent=match, parts=match.parts + (key,), - path=match.path + f"['{key}']", + path=match.path + f"[{canonical_string(key)}]", root=match.root, ) match.add_child(_match) diff --git a/jsonpath/serialize.py b/jsonpath/serialize.py new file mode 100644 index 0000000..bbb39cd --- /dev/null +++ b/jsonpath/serialize.py @@ -0,0 +1,13 @@ +"""Helper functions for serializing compiled JSONPath queries.""" + +import json + + +def canonical_string(value: str) -> str: + """Return _value_ as a canonically formatted string literal.""" + single_quoted = ( + json.dumps(value, ensure_ascii=False)[1:-1] + .replace('\\"', '"') + .replace("'", "\\'") + ) + return f"'{single_quoted}'" diff --git a/tests/cts b/tests/cts index 0bd4474..b1e176a 160000 --- a/tests/cts +++ b/tests/cts @@ -1 +1 @@ -Subproject commit 0bd4474b24e93e1e169ae88e42284499ce21d4a2 +Subproject commit b1e176abf1cba66ce0c07b294c60f1bc10f58a4b diff --git a/tests/nts b/tests/nts new file mode 160000 index 0000000..c9288b3 --- /dev/null +++ b/tests/nts @@ -0,0 +1 @@ +Subproject commit c9288b33aae7440fa1d8ee8cc0a150a47f4d5c96 diff --git a/tests/test_compliance.py b/tests/test_compliance.py index 613f4a2..38592cb 100644 --- a/tests/test_compliance.py +++ b/tests/test_compliance.py @@ -28,6 +28,8 @@ class Case: document: Union[Mapping[str, Any], Sequence[Any], None] = None result: Any = None results: Optional[List[Any]] = None + result_paths: Optional[List[str]] = None + results_paths: Optional[List[List[str]]] = None invalid_selector: Optional[bool] = None tags: List[str] = field(default_factory=list) @@ -105,12 +107,16 @@ def test_compliance(case: Case) -> None: pytest.skip(reason=SKIP[case.name]) assert case.document is not None - rv = jsonpath.findall(case.selector, case.document) + nodes = jsonpath.NodeList(jsonpath.finditer(case.selector, case.document)) if case.results is not None: - assert rv in case.results + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths else: - assert rv == case.result + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths @pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name")) @@ -118,14 +124,21 @@ def test_compliance_async(case: Case) -> None: if case.name in SKIP: pytest.skip(reason=SKIP[case.name]) - async def coro() -> List[object]: + async def coro() -> jsonpath.NodeList: assert case.document is not None - return await jsonpath.findall_async(case.selector, case.document) + it = await jsonpath.finditer_async(case.selector, case.document) + return jsonpath.NodeList([node async for node in it]) + + nodes = asyncio.run(coro()) if case.results is not None: - assert asyncio.run(coro()) in case.results + assert case.results_paths is not None + assert nodes.values() in case.results + assert nodes.paths() in case.results_paths else: - assert asyncio.run(coro()) == case.result + assert case.result_paths is not None + assert nodes.values() == case.result + assert nodes.paths() == case.result_paths @pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name")) diff --git a/tests/test_nts.py b/tests/test_nts.py new file mode 100644 index 0000000..4f7d6ca --- /dev/null +++ b/tests/test_nts.py @@ -0,0 +1,51 @@ +"""Test Python JSONPath against the Normalized Path Test Suite.""" + +import json +import operator +from dataclasses import dataclass +from typing import Any +from typing import List + +import pytest + +import jsonpath + + +@dataclass +class NormalizedCase: + name: str + query: str + document: Any + paths: List[str] + + +def normalized_cases() -> List[NormalizedCase]: + with open("tests/nts/normalized_paths.json", encoding="utf8") as fd: + data = json.load(fd) + return [NormalizedCase(**case) for case in data["tests"]] + + +@pytest.mark.parametrize("case", normalized_cases(), ids=operator.attrgetter("name")) +def test_nts_normalized_paths(case: NormalizedCase) -> None: + nodes = jsonpath.NodeList(jsonpath.finditer(case.query, case.document)) + paths = nodes.paths() + assert paths == case.paths + + +@dataclass +class CanonicalCase: + name: str + query: str + canonical: str + + +def canonical_cases() -> List[CanonicalCase]: + with open("tests/nts/canonical_paths.json", encoding="utf8") as fd: + data = json.load(fd) + return [CanonicalCase(**case) for case in data["tests"]] + + +@pytest.mark.parametrize("case", canonical_cases(), ids=operator.attrgetter("name")) +def test_nts_canonical_paths(case: CanonicalCase) -> None: + query = jsonpath.compile(case.query) + assert str(query) == case.canonical diff --git a/tests/test_parse.py b/tests/test_parse.py index 46e4169..96949a1 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -108,7 +108,7 @@ class Case: Case( description="filter with list membership test", path="$.some[?(@.thing in ['foo', 'bar', 42])]", - want="$['some'][?@['thing'] in [\"foo\", \"bar\", 42]]", + want="$['some'][?@['thing'] in ['foo', 'bar', 42]]", ), Case( description="filter with boolean literals", @@ -143,7 +143,7 @@ class Case: Case( description="filter with string literal", path="$.some[?(@.thing == 'foo')]", - want="$['some'][?@['thing'] == \"foo\"]", + want="$['some'][?@['thing'] == 'foo']", ), Case( description="filter with integer literal", @@ -158,12 +158,12 @@ class Case: Case( description="filter with logical not", path="$.some[?(@.thing > 1 and not $.other)]", - want="$['some'][?(@['thing'] > 1 && !$['other'])]", + want="$['some'][?@['thing'] > 1 && !$['other']]", ), Case( description="filter with grouped expression", path="$.some[?(@.thing > 1 and ($.foo or $.bar))]", - want="$['some'][?(@['thing'] > 1 && ($['foo'] || $['bar']))]", + want="$['some'][?@['thing'] > 1 && ($['foo'] || $['bar'])]", ), Case( description="keys selector", @@ -178,22 +178,22 @@ class Case: Case( description="comparison to single quoted string literal with escape", path="$[?@.foo == 'ba\\'r']", - want="$[?@['foo'] == \"ba'r\"]", + want="$[?@['foo'] == 'ba\\'r']", ), Case( description="comparison to double quoted string literal with escape", path='$[?@.foo == "ba\\"r"]', - want='$[?@[\'foo\'] == "ba\\"r"]', + want="$[?@['foo'] == 'ba\"r']", ), Case( description="not binds more tightly than or", path="$[?!@.a || !@.b]", - want="$[?(!@['a'] || !@['b'])]", + want="$[?!@['a'] || !@['b']]", ), Case( description="not binds more tightly than and", path="$[?!@.a && !@.b]", - want="$[?(!@['a'] && !@['b'])]", + want="$[?!@['a'] && !@['b']]", ), Case( description="control precedence with parens", @@ -213,7 +213,7 @@ class Case: Case( description="match function", path=r"$[?match(@, '\\d')]", - want='$[?match(@, "\\\\d")]', + want="$[?match(@, '\\\\d')]", ), ]