Skip to content

Commit 8bc5dba

Browse files
authored
Count JSON tokens (#1296)
1 parent c65b70b commit 8bc5dba

File tree

7 files changed

+332
-9
lines changed

7 files changed

+332
-9
lines changed

release-notes/VERSION-2.x

+3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ a pure JSON library.
3535
#1277: Add back Java 22 optimisation in FastDoubleParser
3636
#1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
3737
(contributed by @zhangOranges)
38+
#1310: Add new `StreamReadConstraints` (`maxTokenCount`) to limit maximum number
39+
of Tokens allowed per document
40+
(implemented by @pjfanning)
3841

3942
2.17.2 (not yet released)
4043

src/main/java/com/fasterxml/jackson/core/JsonParser.java

+12
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,18 @@ public JsonLocation currentTokenLocation() {
781781
return getTokenLocation();
782782
}
783783

784+
/**
785+
* Get an approximate count of the number of tokens that have been read.
786+
* This count is likely to be only updated if {@link StreamReadConstraints.Builder#maxTokenCount(long)}
787+
* has been used to set a limit on the number of tokens that can be read.
788+
*
789+
* @return the number of tokens that have been read (-1 if the count is not available)
790+
* @since 2.18
791+
*/
792+
public long currentTokenCount() {
793+
return -1L;
794+
}
795+
784796
/**
785797
* Deprecated alias for {@link #currentLocation()} (removed from Jackson 3.0).
786798
*

src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java

+107-5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ public class StreamReadConstraints
4343
*/
4444
public static final long DEFAULT_MAX_DOC_LEN = -1L;
4545

46+
/**
47+
* Default setting for maximum token count:
48+
* see {@link Builder#maxTokenCount} for details.
49+
*/
50+
public static final long DEFAULT_MAX_TOKEN_COUNT = -1L;
51+
4652
/**
4753
* @since 2.16
4854
*/
@@ -74,6 +80,7 @@ public class StreamReadConstraints
7480

7581
protected final int _maxNestingDepth;
7682
protected final long _maxDocLen;
83+
protected final long _maxTokenCount;
7784

7885
protected final int _maxNumLen;
7986
protected final int _maxStringLen;
@@ -112,6 +119,7 @@ public static void overrideDefaultStreamReadConstraints(final StreamReadConstrai
112119

113120
public static final class Builder {
114121
private long maxDocLen;
122+
private long maxTokenCount;
115123
private int maxNestingDepth;
116124
private int maxNumLen;
117125
private int maxStringLen;
@@ -156,6 +164,31 @@ public Builder maxDocumentLength(long maxDocLen) {
156164
return this;
157165
}
158166

167+
/**
168+
* Sets the maximum allowed token count (for positive values over 0) or
169+
* indicate that any count is acceptable ({@code 0} or negative number).
170+
*
171+
* <p>
172+
* A token is a single unit of input, such as a number, a string, an object
173+
* start or end, or an array start or end.
174+
* </p>
175+
*
176+
* @param maxTokenCount the maximum allowed token count if positive number above 0; otherwise
177+
* ({@code 0} or negative number) means "unlimited".
178+
*
179+
* @return this builder
180+
*
181+
* @since 2.18
182+
*/
183+
public Builder maxTokenCount(long maxTokenCount) {
184+
// Negative values and 0 mean "unlimited", mark with -1L
185+
if (maxTokenCount <= 0L) {
186+
maxTokenCount = -1L;
187+
}
188+
this.maxTokenCount = maxTokenCount;
189+
return this;
190+
}
191+
159192
/**
160193
* Sets the maximum number length (in chars or bytes, depending on input context).
161194
* The default is 1000.
@@ -220,14 +253,15 @@ public Builder maxNameLength(final int maxNameLen) {
220253
}
221254

222255
Builder() {
223-
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN,
256+
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, DEFAULT_MAX_TOKEN_COUNT,
224257
DEFAULT_MAX_NUM_LEN, DEFAULT_MAX_STRING_LEN, DEFAULT_MAX_NAME_LEN);
225258
}
226259

227-
Builder(final int maxNestingDepth, final long maxDocLen,
260+
Builder(final int maxNestingDepth, final long maxDocLen, final long maxTokenCount,
228261
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
229262
this.maxNestingDepth = maxNestingDepth;
230263
this.maxDocLen = maxDocLen;
264+
this.maxTokenCount = maxTokenCount;
231265
this.maxNumLen = maxNumLen;
232266
this.maxStringLen = maxStringLen;
233267
this.maxNameLen = maxNameLen;
@@ -236,14 +270,15 @@ public Builder maxNameLength(final int maxNameLen) {
236270
Builder(StreamReadConstraints src) {
237271
maxNestingDepth = src._maxNestingDepth;
238272
maxDocLen = src._maxDocLen;
273+
maxTokenCount = src._maxTokenCount;
239274
maxNumLen = src._maxNumLen;
240275
maxStringLen = src._maxStringLen;
241276
maxNameLen = src._maxNameLen;
242277
}
243278

244279
public StreamReadConstraints build() {
245280
return new StreamReadConstraints(maxNestingDepth, maxDocLen,
246-
maxNumLen, maxStringLen, maxNameLen);
281+
maxNumLen, maxStringLen, maxNameLen, maxTokenCount);
247282
}
248283
}
249284

@@ -257,7 +292,7 @@ public StreamReadConstraints build() {
257292
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
258293
final int maxNumLen, final int maxStringLen) {
259294
this(maxNestingDepth, maxDocLen,
260-
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN);
295+
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN, DEFAULT_MAX_TOKEN_COUNT);
261296
}
262297

263298
/**
@@ -269,13 +304,30 @@ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
269304
*
270305
* @since 2.16
271306
*/
307+
@Deprecated // since 2.18
308+
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
309+
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
310+
this(maxNestingDepth, maxDocLen, maxNumLen, maxStringLen, maxNameLen, DEFAULT_MAX_TOKEN_COUNT);
311+
}
312+
313+
/**
314+
* @param maxNestingDepth Maximum input document nesting to allow
315+
* @param maxDocLen Maximum input document length to allow
316+
* @param maxNumLen Maximum number representation length to allow
317+
* @param maxStringLen Maximum String value length to allow
318+
* @param maxNameLen Maximum Object property name length to allow
319+
* @param maxTokenCount Maximum number of tokens to allow
320+
*
321+
* @since 2.18
322+
*/
272323
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
273-
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
324+
final int maxNumLen, final int maxStringLen, final int maxNameLen, final long maxTokenCount) {
274325
_maxNestingDepth = maxNestingDepth;
275326
_maxDocLen = maxDocLen;
276327
_maxNumLen = maxNumLen;
277328
_maxStringLen = maxStringLen;
278329
_maxNameLen = maxNameLen;
330+
_maxTokenCount = maxTokenCount;
279331
}
280332

281333
public static Builder builder() {
@@ -337,6 +389,31 @@ public boolean hasMaxDocumentLength() {
337389
return _maxDocLen > 0L;
338390
}
339391

392+
/**
393+
* Accessor for maximum token count.
394+
* see {@link Builder#maxTokenCount(long)} for details.
395+
*
396+
* @return Maximum allowed token count
397+
* @since 2.18
398+
*/
399+
public long getMaxTokenCount() {
400+
return _maxTokenCount;
401+
}
402+
403+
/**
404+
* Convenience method, basically same as:
405+
*<pre>
406+
* getMaxTokenCount() &gt; 0L
407+
*</pre>
408+
*
409+
* @return {@code True} if this constraints instance has a limit for maximum
410+
* token count to enforce; {@code false} otherwise.
411+
* @since 2.18
412+
*/
413+
public boolean hasMaxTokenCount() {
414+
return _maxTokenCount > 0L;
415+
}
416+
340417
/**
341418
* Accessor for maximum length of numbers to decode.
342419
* see {@link Builder#maxNumberLength(int)} for details.
@@ -419,6 +496,31 @@ public void validateDocumentLength(long len) throws StreamConstraintsException
419496
}
420497
}
421498

499+
/**
500+
* Convenience method that can be used to verify that the
501+
* token count does not exceed the maximum specified by this
502+
* constraints object (if any): if it does, a
503+
* {@link StreamConstraintsException}
504+
* is thrown.
505+
*
506+
* @param count Current token count for processed document content
507+
*
508+
* @throws StreamConstraintsException If length exceeds maximum
509+
*
510+
* @since 2.18
511+
*/
512+
public void validateTokenCount(long count) throws StreamConstraintsException
513+
{
514+
// for performance reasons, it is assumed that users check hasMaxTokenCount()
515+
// before calling this method - this method will not work properly if hasMaxTokenCount() is false
516+
if (count > _maxTokenCount) {
517+
throw _constructException(
518+
"Token count (%d) exceeds the maximum allowed (%d, from %s)",
519+
count, _maxTokenCount,
520+
_constrainRef("getMaxTokenCount"));
521+
}
522+
}
523+
422524
/*
423525
/**********************************************************************
424526
/* Convenience methods for validation, token lengths

src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java

+25-4
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,20 @@ public abstract class ParserMinimalBase extends JsonParser
159159
*/
160160
protected JsonToken _currToken;
161161

162+
/**
163+
* Current count of tokens, if tracked (see {@link #_trackMaxTokenCount})
164+
*
165+
* @since 2.18
166+
*/
167+
protected long _tokenCount;
168+
169+
/**
170+
* Whether or not to track the token count due a {@link StreamReadConstraints} maxTokenCount > 0.
171+
*
172+
* @since 2.18
173+
*/
174+
protected final boolean _trackMaxTokenCount;
175+
162176
/**
163177
* Last cleared token, if any: that is, value that was in
164178
* effect when {@link #clearCurrentToken} was called.
@@ -175,6 +189,7 @@ public abstract class ParserMinimalBase extends JsonParser
175189
protected ParserMinimalBase() {
176190
super();
177191
_streamReadConstraints = StreamReadConstraints.defaults();
192+
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
178193
}
179194

180195
@Deprecated // since 2.18
@@ -186,12 +201,14 @@ protected ParserMinimalBase(int features) {
186201
protected ParserMinimalBase(StreamReadConstraints src) {
187202
super();
188203
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
204+
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
189205
}
190206

191207
// @since 2.18
192208
protected ParserMinimalBase(int features, StreamReadConstraints src) {
193209
super(features);
194210
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
211+
_trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
195212
}
196213

197214
// NOTE: had base impl in 2.3 and before; but shouldn't
@@ -311,9 +328,6 @@ public JsonParser skipChildren() throws IOException
311328
*/
312329
protected abstract void _handleEOF() throws JsonParseException;
313330

314-
//public JsonToken getCurrentToken()
315-
//public boolean hasCurrentToken()
316-
317331
@Deprecated // since 2.17 -- still need to implement
318332
@Override
319333
public abstract String getCurrentName() throws IOException;
@@ -327,6 +341,11 @@ public JsonParser skipChildren() throws IOException
327341

328342
// public abstract JsonLocation getCurrentLocation();
329343

344+
@Override // since 2.18
345+
public long currentTokenCount() {
346+
return _tokenCount;
347+
}
348+
330349
/*
331350
/**********************************************************
332351
/* Public API, token state overrides
@@ -827,9 +846,11 @@ protected final void _wrapError(String msg, Throwable t) throws JsonParseExcepti
827846

828847
protected final JsonToken _updateToken(final JsonToken token) throws StreamConstraintsException {
829848
_currToken = token;
849+
if (_trackMaxTokenCount) {
850+
_streamReadConstraints.validateTokenCount(++_tokenCount);
851+
}
830852
return token;
831853
}
832-
833854
protected final JsonToken _updateTokenToNull() {
834855
return (_currToken = null);
835856
}

src/main/java/com/fasterxml/jackson/core/util/JsonParserDelegate.java

+3
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ public boolean requiresCustomCodec() {
155155
@Override public JsonLocation currentLocation() { return delegate.currentLocation(); }
156156
@Override public JsonLocation currentTokenLocation() { return delegate.currentTokenLocation(); }
157157

158+
@Override // since 2.18
159+
public long currentTokenCount() { return delegate.currentTokenCount(); }
160+
158161
@Override
159162
@Deprecated
160163
public JsonToken getCurrentToken() { return delegate.getCurrentToken(); }

src/test/java/com/fasterxml/jackson/core/constraints/LargeDocReadTest.java

+17
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
1010
import com.fasterxml.jackson.core.testsupport.AsyncReaderWrapper;
1111

12+
import static org.junit.jupiter.api.Assertions.assertEquals;
1213
import static org.junit.jupiter.api.Assertions.fail;
1314

1415
// [core#1047]: Add max-name-length constraints
@@ -20,6 +21,10 @@ class LargeDocReadTest extends AsyncTestBase
2021
.streamReadConstraints(StreamReadConstraints.builder().maxDocumentLength(10_000L).build())
2122
.build();
2223

24+
private final JsonFactory JSON_F_MAX_TOKENS_1K = JsonFactory.builder()
25+
.streamReadConstraints(StreamReadConstraints.builder().maxTokenCount(1_000L).build())
26+
.build();
27+
2328
// Test name that is below default max name
2429
@Test
2530
void largeNameBytes() throws Exception {
@@ -83,6 +88,18 @@ void largeNameWithSmallLimitAsync() throws Exception
8388
}
8489
}
8590

91+
@Test
92+
void tokenLimitBytes() throws Exception {
93+
final String doc = generateJSON(StreamReadConstraints.defaults().getMaxNameLength() - 100);
94+
try (JsonParser p = createParserUsingStream(JSON_F_MAX_TOKENS_1K, doc, "UTF-8")) {
95+
consumeTokens(p);
96+
fail("expected StreamConstraintsException");
97+
} catch (StreamConstraintsException e) {
98+
assertEquals("Token count (1001) exceeds the maximum allowed (1000, from `StreamReadConstraints.getMaxTokenCount()`)",
99+
e.getMessage());
100+
}
101+
}
102+
86103
private void consumeTokens(JsonParser p) throws IOException {
87104
while (p.nextToken() != null) {
88105
;

0 commit comments

Comments
 (0)