Skip to content

Fix normalized paths and serialization of compiled JSONPath queries. #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "tests/cts"]
path = tests/cts
url = [email protected]:jsonpath-standard/jsonpath-compliance-test-suite.git
[submodule "tests/nts"]
path = tests/nts
url = [email protected]:jg-rp/jsonpath-compliance-normalized-paths.git
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Python JSONPath Change Log

## Version 1.3.0 (unreleased)

**Fixes**

- Fixed `jsonpath.JSONPathMatch.path`. It is now a "normalized path" following section 2.7 of RFC 9525.
- Fixed normalized slice indexes. We were failing to normalize somme indexes given a negative step.

**Other changes**

- `jsonpath.match.NodeList` is now re-exported as `jsonpath.NodeList`.
- Added `jsonpath.NodeList.paths()`, which returns a list of normalized paths, one for each node in the list.
- Serialization of compiled JSONPath queries (instances of `jsonpath.JSONPath`) has changed. String literals inside filter selectors are now serialized using the canonical format, as described in section 2.7 of RFC 9525, and parentheses in filter selectors are kept to a minimum.

## Version 1.2.2

**Fixes**
Expand Down
2 changes: 2 additions & 0 deletions jsonpath/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .fluent_api import Query
from .lex import Lexer
from .match import JSONPathMatch
from .match import NodeList
from .parse import Parser
from .patch import JSONPatch
from .path import CompoundJSONPath
Expand Down Expand Up @@ -58,6 +59,7 @@
"JSONPointerResolutionError",
"JSONPointerTypeError",
"Lexer",
"NodeList",
"match",
"Parser",
"Projection",
Expand Down
39 changes: 36 additions & 3 deletions jsonpath/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import annotations

import copy
import json
import re
from abc import ABC
from abc import abstractmethod
Expand All @@ -25,6 +24,7 @@
from .match import NodeList
from .selectors import Filter as FilterSelector
from .selectors import ListSelector
from .serialize import canonical_string

if TYPE_CHECKING:
from .path import JSONPath
Expand Down Expand Up @@ -208,7 +208,7 @@ class StringLiteral(Literal[str]):
__slots__ = ()

def __str__(self) -> str:
return json.dumps(self.value)
return canonical_string(self.value)


class IntegerLiteral(Literal[int]):
Expand Down Expand Up @@ -375,6 +375,12 @@ def set_children(self, children: List[FilterExpression]) -> None:
self.right = children[1]


PRECEDENCE_LOWEST = 1
PRECEDENCE_LOGICAL_OR = 3
PRECEDENCE_LOGICAL_AND = 4
PRECEDENCE_PREFIX = 7


class BooleanExpression(FilterExpression):
"""An expression that always evaluates to `True` or `False`."""

Expand Down Expand Up @@ -408,13 +414,40 @@ def cacheable_nodes(self) -> bool:
)

def __str__(self) -> str:
return str(self.expression)
return self._canonical_string(self.expression, PRECEDENCE_LOWEST)

def __eq__(self, other: object) -> bool:
return (
isinstance(other, BooleanExpression) and self.expression == other.expression
)

def _canonical_string(
self, expression: FilterExpression, parent_precedence: int
) -> str:
if isinstance(expression, InfixExpression):
if expression.operator == "&&":
left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_AND)
right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_AND)
expr = f"{left} && {right}"
return (
f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_AND else expr
)

if expression.operator == "||":
left = self._canonical_string(expression.left, PRECEDENCE_LOGICAL_OR)
right = self._canonical_string(expression.right, PRECEDENCE_LOGICAL_OR)
expr = f"{left} || {right}"
return (
f"({expr})" if parent_precedence >= PRECEDENCE_LOGICAL_OR else expr
)

if isinstance(expression, PrefixExpression):
operand = self._canonical_string(expression.right, PRECEDENCE_PREFIX)
expr = f"!{operand}"
return f"({expr})" if parent_precedence > PRECEDENCE_PREFIX else expr

return str(expression)

def evaluate(self, context: FilterContext) -> bool:
return context.env.is_truthy(self.expression.evaluate(context))

Expand Down
5 changes: 5 additions & 0 deletions jsonpath/match.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The JSONPath match object, as returned from `JSONPath.finditer()`."""

from __future__ import annotations

from typing import Any
Expand Down Expand Up @@ -104,6 +105,10 @@ def values_or_singular(self) -> object:
return self[0].obj
return [match.obj for match in self]

def paths(self) -> List[str]:
"""Return a normalized path for each node in this node list."""
return [match.path for match in self]

def empty(self) -> bool:
"""Return `True` if this node list is empty."""
return not bool(self)
Expand Down
45 changes: 22 additions & 23 deletions jsonpath/selectors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""JSONPath segments and selectors, as returned from `Parser.parse`."""

from __future__ import annotations

from abc import ABC
Expand All @@ -17,6 +18,7 @@

from .exceptions import JSONPathIndexError
from .exceptions import JSONPathTypeError
from .serialize import canonical_string

if TYPE_CHECKING:
from .env import JSONPathEnvironment
Expand Down Expand Up @@ -75,7 +77,11 @@ def __init__(
self.shorthand = shorthand

def __str__(self) -> str:
return f"['{self.name}']" if self.shorthand else f"'{self.name}'"
return (
f"[{canonical_string(self.name)}]"
if self.shorthand
else f"{canonical_string(self.name)}"
)

def __eq__(self, __value: object) -> bool:
return (
Expand All @@ -98,7 +104,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
obj=self.env.getitem(match.obj, self.name),
parent=match,
parts=match.parts + (self.name,),
path=match.path + f"['{self.name}']",
path=match.path + f"[{canonical_string(self.name)}]",
root=match.root,
)
match.add_child(_match)
Expand All @@ -117,7 +123,7 @@ async def resolve_async(
obj=await self.env.getitem_async(match.obj, self.name),
parent=match,
parts=match.parts + (self.name,),
path=match.path + f"['{self.name}']",
path=match.path + f"[{canonical_string(self.name)}]",
root=match.root,
)
match.add_child(_match)
Expand Down Expand Up @@ -321,20 +327,15 @@ def _check_range(self, *indices: Optional[int]) -> None:
):
raise JSONPathIndexError("index out of range", token=self.token)

def _normalized_index(self, obj: Sequence[object], index: int) -> int:
if index < 0 and len(obj) >= abs(index):
return len(obj) + index
return index

def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
for match in matches:
if not isinstance(match.obj, Sequence) or self.slice.step == 0:
continue

idx = self.slice.start or 0
step = self.slice.step or 1
for obj in self.env.getitem(match.obj, self.slice):
norm_index = self._normalized_index(match.obj, idx)
for norm_index, obj in zip( # noqa: B905
range(*self.slice.indices(len(match.obj))),
self.env.getitem(match.obj, self.slice),
):
_match = self.env.match_class(
filter_context=match.filter_context(),
obj=obj,
Expand All @@ -345,7 +346,6 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
)
match.add_child(_match)
yield _match
idx += step

async def resolve_async(
self, matches: AsyncIterable[JSONPathMatch]
Expand All @@ -354,10 +354,10 @@ async def resolve_async(
if not isinstance(match.obj, Sequence) or self.slice.step == 0:
continue

idx = self.slice.start or 0
step = self.slice.step or 1
for obj in await self.env.getitem_async(match.obj, self.slice):
norm_index = self._normalized_index(match.obj, idx)
for norm_index, obj in zip( # noqa: B905
range(*self.slice.indices(len(match.obj))),
await self.env.getitem_async(match.obj, self.slice),
):
_match = self.env.match_class(
filter_context=match.filter_context(),
obj=obj,
Expand All @@ -368,7 +368,6 @@ async def resolve_async(
)
match.add_child(_match)
yield _match
idx += step


class WildSelector(JSONPathSelector):
Expand Down Expand Up @@ -402,7 +401,7 @@ def resolve(self, matches: Iterable[JSONPathMatch]) -> Iterable[JSONPathMatch]:
obj=val,
parent=match,
parts=match.parts + (key,),
path=match.path + f"['{key}']",
path=match.path + f"[{canonical_string(key)}]",
root=match.root,
)
match.add_child(_match)
Expand Down Expand Up @@ -431,7 +430,7 @@ async def resolve_async(
obj=val,
parent=match,
parts=match.parts + (key,),
path=match.path + f"['{key}']",
path=match.path + f"[{canonical_string(key)}]",
root=match.root,
)
match.add_child(_match)
Expand Down Expand Up @@ -479,7 +478,7 @@ def _expand(self, match: JSONPathMatch) -> Iterable[JSONPathMatch]:
obj=val,
parent=match,
parts=match.parts + (key,),
path=match.path + f"['{key}']",
path=match.path + f"[{canonical_string(key)}]",
root=match.root,
)
match.add_child(_match)
Expand Down Expand Up @@ -633,7 +632,7 @@ def resolve( # noqa: PLR0912
obj=val,
parent=match,
parts=match.parts + (key,),
path=match.path + f"['{key}']",
path=match.path + f"[{canonical_string(key)}]",
root=match.root,
)
match.add_child(_match)
Expand Down Expand Up @@ -701,7 +700,7 @@ async def resolve_async( # noqa: PLR0912
obj=val,
parent=match,
parts=match.parts + (key,),
path=match.path + f"['{key}']",
path=match.path + f"[{canonical_string(key)}]",
root=match.root,
)
match.add_child(_match)
Expand Down
13 changes: 13 additions & 0 deletions jsonpath/serialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Helper functions for serializing compiled JSONPath queries."""

import json


def canonical_string(value: str) -> str:
"""Return _value_ as a canonically formatted string literal."""
single_quoted = (
json.dumps(value, ensure_ascii=False)[1:-1]
.replace('\\"', '"')
.replace("'", "\\'")
)
return f"'{single_quoted}'"
1 change: 1 addition & 0 deletions tests/nts
Submodule nts added at c9288b
27 changes: 20 additions & 7 deletions tests/test_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class Case:
document: Union[Mapping[str, Any], Sequence[Any], None] = None
result: Any = None
results: Optional[List[Any]] = None
result_paths: Optional[List[str]] = None
results_paths: Optional[List[List[str]]] = None
invalid_selector: Optional[bool] = None
tags: List[str] = field(default_factory=list)

Expand Down Expand Up @@ -105,27 +107,38 @@ def test_compliance(case: Case) -> None:
pytest.skip(reason=SKIP[case.name])

assert case.document is not None
rv = jsonpath.findall(case.selector, case.document)
nodes = jsonpath.NodeList(jsonpath.finditer(case.selector, case.document))

if case.results is not None:
assert rv in case.results
assert case.results_paths is not None
assert nodes.values() in case.results
assert nodes.paths() in case.results_paths
else:
assert rv == case.result
assert case.result_paths is not None
assert nodes.values() == case.result
assert nodes.paths() == case.result_paths


@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name"))
def test_compliance_async(case: Case) -> None:
if case.name in SKIP:
pytest.skip(reason=SKIP[case.name])

async def coro() -> List[object]:
async def coro() -> jsonpath.NodeList:
assert case.document is not None
return await jsonpath.findall_async(case.selector, case.document)
it = await jsonpath.finditer_async(case.selector, case.document)
return jsonpath.NodeList([node async for node in it])

nodes = asyncio.run(coro())

if case.results is not None:
assert asyncio.run(coro()) in case.results
assert case.results_paths is not None
assert nodes.values() in case.results
assert nodes.paths() in case.results_paths
else:
assert asyncio.run(coro()) == case.result
assert case.result_paths is not None
assert nodes.values() == case.result
assert nodes.paths() == case.result_paths


@pytest.mark.parametrize("case", invalid_cases(), ids=operator.attrgetter("name"))
Expand Down
Loading