Skip to content

Commit 65d0ba8

Browse files
committed
feat: control UTF-16 decoding in JSONPath strings, closes #25
1 parent f06786d commit 65d0ba8

File tree

5 files changed

+43
-12
lines changed

5 files changed

+43
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
- Added `JSONPointer.exists()`, a method that returns `True` if a the pointer can be resolved against some data, or `False` otherwise.
2121
- Added the `RelativeJSONPointer` class for building new `JSONPointer` instances from Relative JSON Pointer syntax.
2222
- Added support for a non-standard index/property pointer using `#<property or index>`. This is to support Relative JSON Pointer's use of hash (`#`) when building `JSONPointer` instances from relative JSON Pointers.
23+
- Added the `unicode_escape` argument to `JSONPathEnvironment`. When `True` (the default), UTF-16 escaped sequences found in JSONPath string literals will be decoded.
2324

2425
## Version 0.8.1
2526

jsonpath/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,9 @@ def handle_path_command(args: argparse.Namespace) -> None: # noqa: PLR0912
239239
path = args.query_file.read().strip()
240240

241241
try:
242-
path = jsonpath.compile(path)
242+
path = jsonpath.JSONPathEnvironment(
243+
unicode_escape=not args.no_unicode_escape
244+
).compile(path)
243245
except JSONPathSyntaxError as err:
244246
if args.debug:
245247
raise

jsonpath/env.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,10 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
7070
## Class attributes
7171
7272
Arguments:
73-
filter_caching (bool): If `True`, filter expressions will be cached where
74-
possible.
73+
filter_caching (bool): If `True`, filter expressions will be cached
74+
where possible.
75+
unicode_escape: If `True`, decode UTF-16 escape sequences found in
76+
JSONPath string literals.
7577
7678
Attributes:
7779
filter_context_token (str): The pattern used to select extra filter context
@@ -113,8 +115,18 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
113115
parser_class: Type[Parser] = Parser
114116
match_class: Type[JSONPathMatch] = JSONPathMatch
115117

116-
def __init__(self, *, filter_caching: bool = True) -> None:
117-
self.filter_caching = filter_caching
118+
def __init__(
119+
self,
120+
*,
121+
filter_caching: bool = True,
122+
unicode_escape: bool = True,
123+
) -> None:
124+
self.filter_caching: bool = filter_caching
125+
"""Enable or disable filter expression caching."""
126+
127+
self.unicode_escape: bool = unicode_escape
128+
"""Enable or disable decoding of UTF-16 escape sequences found in
129+
JSONPath string literals."""
118130

119131
self.lexer: Lexer = self.lexer_class(env=self)
120132
"""The lexer bound to this environment."""

jsonpath/parse.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def parse_slice(self, stream: TokenStream) -> SliceSelector:
347347
step=step,
348348
)
349349

350-
def parse_selector_list(self, stream: TokenStream) -> ListSelector:
350+
def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912
351351
"""Parse a comma separated list JSONPath selectors from a stream of tokens."""
352352
tok = stream.next_token()
353353
list_items: List[
@@ -391,13 +391,16 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:
391391
token=stream.current,
392392
)
393393

394-
name = (
395-
codecs.decode(
396-
stream.current.value.replace("\\/", "/"), "unicode-escape"
394+
if self.env.unicode_escape:
395+
name = (
396+
codecs.decode(
397+
stream.current.value.replace("\\/", "/"), "unicode-escape"
398+
)
399+
.encode("utf-16", "surrogatepass")
400+
.decode("utf-16")
397401
)
398-
.encode("utf-16", "surrogatepass")
399-
.decode("utf-16")
400-
)
402+
else:
403+
name = stream.current.value
401404

402405
list_items.append(
403406
PropertySelector(

tests/test_env.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,16 @@ def test_no_match_compound_path(env: JSONPathEnvironment) -> None:
145145
"""Test that we get `None` if there are no matches in a compound path."""
146146
match = env.match("$.other | $.foo", {"some": 1, "thing": 2})
147147
assert match is None
148+
149+
150+
def test_no_unicode_escape() -> None:
151+
"""Test that we can disable decoding of UTF-16 escape sequences."""
152+
document = {"𝄞": "A"}
153+
selector = '$["\\uD834\\uDD1E"]'
154+
155+
env = JSONPathEnvironment(unicode_escape=True)
156+
assert env.findall(selector, document) == ["A"]
157+
158+
env = JSONPathEnvironment(unicode_escape=False)
159+
assert env.findall(selector, document) == []
160+
assert env.findall(selector, {"\\uD834\\uDD1E": "B"}) == ["B"]

0 commit comments

Comments
 (0)