Skip to content

Commit 5f1fa6e

Browse files
committed
count tokens
more changes don't increment token count for NOT_AVAILABLE handle case when token is set to null use _updateTokenToNull null check not needed now add test Update TokenCountTest.java Update StreamReadConstraints.java add validation try to reduce overhead for when maxTokenCount is not needed Update StreamReadConstraints.java Update TokenCountTest.java
1 parent 21c7a03 commit 5f1fa6e

File tree

9 files changed

+375
-141
lines changed

9 files changed

+375
-141
lines changed

src/main/java/com/fasterxml/jackson/core/JsonParser.java

+12
Original file line numberDiff line numberDiff line change
@@ -2546,6 +2546,18 @@ public <T extends TreeNode> T readValueAsTree() throws IOException {
25462546
return (T) _codec().readTree(this);
25472547
}
25482548

2549+
/**
2550+
* Get an approximate count of the number of tokens that have been read.
2551+
* This count is likely to be only updated if {@link StreamReadConstraints.Builder.maxTokenCount(long)}
2552+
* has been used to set a limit on the number of tokens that can be read.
2553+
*
2554+
* @return the number of tokens that have been read (-1 if the count is not available)
2555+
* @since 2.18
2556+
*/
2557+
public long getTokenCount() {
2558+
return -1L;
2559+
}
2560+
25492561
protected ObjectCodec _codec() {
25502562
ObjectCodec c = getCodec();
25512563
if (c == null) {

src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java

+107-5
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ public class StreamReadConstraints
4343
*/
4444
public static final long DEFAULT_MAX_DOC_LEN = -1L;
4545

46+
/**
47+
* Default setting for maximum token count:
48+
* see {@link Builder#maxTokenCount} for details.
49+
*/
50+
public static final long DEFAULT_MAX_TOKEN_COUNT = -1L;
51+
4652
/**
4753
* @since 2.16
4854
*/
@@ -74,6 +80,7 @@ public class StreamReadConstraints
7480

7581
protected final int _maxNestingDepth;
7682
protected final long _maxDocLen;
83+
protected final long _maxTokenCount;
7784

7885
protected final int _maxNumLen;
7986
protected final int _maxStringLen;
@@ -112,6 +119,7 @@ public static void overrideDefaultStreamReadConstraints(final StreamReadConstrai
112119

113120
public static final class Builder {
114121
private long maxDocLen;
122+
private long maxTokenCount;
115123
private int maxNestingDepth;
116124
private int maxNumLen;
117125
private int maxStringLen;
@@ -156,6 +164,31 @@ public Builder maxDocumentLength(long maxDocLen) {
156164
return this;
157165
}
158166

167+
/**
168+
* Sets the maximum allowed token count (for positive values over 0) or
169+
* indicate that any count is acceptable ({@code 0} or negative number).
170+
*
171+
* <p>
172+
* A token is a single unit of input, such as a number, a string, an object
173+
* start or end, or an array start or end.
174+
* </p>
175+
*
176+
* @param maxTokenCount the maximum allowed token count if positive number above 0; otherwise
177+
* ({@code 0} or negative number) means "unlimited".
178+
*
179+
* @return this builder
180+
*
181+
* @since 2.18
182+
*/
183+
public Builder maxTokenCount(long maxTokenCount) {
184+
// Negative values and 0 mean "unlimited", mark with -1L
185+
if (maxTokenCount <= 0L) {
186+
maxTokenCount = -1L;
187+
}
188+
this.maxTokenCount = maxTokenCount;
189+
return this;
190+
}
191+
159192
/**
160193
* Sets the maximum number length (in chars or bytes, depending on input context).
161194
* The default is 1000.
@@ -220,14 +253,15 @@ public Builder maxNameLength(final int maxNameLen) {
220253
}
221254

222255
Builder() {
223-
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN,
256+
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, DEFAULT_MAX_TOKEN_COUNT,
224257
DEFAULT_MAX_NUM_LEN, DEFAULT_MAX_STRING_LEN, DEFAULT_MAX_NAME_LEN);
225258
}
226259

227-
Builder(final int maxNestingDepth, final long maxDocLen,
260+
Builder(final int maxNestingDepth, final long maxDocLen, final long maxTokenCount,
228261
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
229262
this.maxNestingDepth = maxNestingDepth;
230263
this.maxDocLen = maxDocLen;
264+
this.maxTokenCount = maxTokenCount;
231265
this.maxNumLen = maxNumLen;
232266
this.maxStringLen = maxStringLen;
233267
this.maxNameLen = maxNameLen;
@@ -236,14 +270,15 @@ public Builder maxNameLength(final int maxNameLen) {
236270
Builder(StreamReadConstraints src) {
237271
maxNestingDepth = src._maxNestingDepth;
238272
maxDocLen = src._maxDocLen;
273+
maxTokenCount = src._maxTokenCount;
239274
maxNumLen = src._maxNumLen;
240275
maxStringLen = src._maxStringLen;
241276
maxNameLen = src._maxNameLen;
242277
}
243278

244279
public StreamReadConstraints build() {
245280
return new StreamReadConstraints(maxNestingDepth, maxDocLen,
246-
maxNumLen, maxStringLen, maxNameLen);
281+
maxNumLen, maxStringLen, maxNameLen, maxTokenCount);
247282
}
248283
}
249284

@@ -257,7 +292,7 @@ public StreamReadConstraints build() {
257292
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
258293
final int maxNumLen, final int maxStringLen) {
259294
this(maxNestingDepth, maxDocLen,
260-
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN);
295+
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN, DEFAULT_MAX_TOKEN_COUNT);
261296
}
262297

263298
/**
@@ -269,13 +304,30 @@ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
269304
*
270305
* @since 2.16
271306
*/
307+
@Deprecated // since 2.18
308+
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
309+
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
310+
this(maxNestingDepth, maxDocLen, maxNumLen, maxStringLen, maxNameLen, DEFAULT_MAX_TOKEN_COUNT);
311+
}
312+
313+
/**
314+
* @param maxNestingDepth Maximum input document nesting to allow
315+
* @param maxDocLen Maximum input document length to allow
316+
* @param maxNumLen Maximum number representation length to allow
317+
* @param maxStringLen Maximum String value length to allow
318+
* @param maxNameLen Maximum Object property name length to allow
319+
* @param maxTokenCount Maximum number of tokens to allow
320+
*
321+
* @since 2.18
322+
*/
272323
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
273-
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
324+
final int maxNumLen, final int maxStringLen, final int maxNameLen, final long maxTokenCount) {
274325
_maxNestingDepth = maxNestingDepth;
275326
_maxDocLen = maxDocLen;
276327
_maxNumLen = maxNumLen;
277328
_maxStringLen = maxStringLen;
278329
_maxNameLen = maxNameLen;
330+
_maxTokenCount = maxTokenCount;
279331
}
280332

281333
public static Builder builder() {
@@ -337,6 +389,31 @@ public boolean hasMaxDocumentLength() {
337389
return _maxDocLen > 0L;
338390
}
339391

392+
/**
393+
* Accessor for maximum token count.
394+
* see {@link Builder#maxTokenCount(long)} for details.
395+
*
396+
* @return Maximum allowed token count
397+
* @since 2.18
398+
*/
399+
public long getMaxTokenCount() {
400+
return _maxTokenCount;
401+
}
402+
403+
/**
404+
* Convenience method, basically same as:
405+
*<pre>
406+
* getMaxTokenCount() &gt; 0L
407+
*</pre>
408+
*
409+
* @return {@code True} if this constraints instance has a limit for maximum
410+
* token count to enforce; {@code false} otherwise.
411+
* @since 2.18
412+
*/
413+
public boolean hasMaxTokenCount() {
414+
return _maxTokenCount > 0L;
415+
}
416+
340417
/**
341418
* Accessor for maximum length of numbers to decode.
342419
* see {@link Builder#maxNumberLength(int)} for details.
@@ -419,6 +496,31 @@ public void validateDocumentLength(long len) throws StreamConstraintsException
419496
}
420497
}
421498

499+
/**
500+
* Convenience method that can be used to verify that the
501+
* token count does not exceed the maximum specified by this
502+
* constraints object (if any): if it does, a
503+
* {@link StreamConstraintsException}
504+
* is thrown.
505+
*
506+
* @param count Current token count for processed document content
507+
*
508+
* @throws StreamConstraintsException If length exceeds maximum
509+
*
510+
* @since 2.18
511+
*/
512+
public void validateTokenCount(long count) throws StreamConstraintsException
513+
{
514+
// for performance reasons, it is assumed that users check hasMaxTokenCount()
515+
// before calling this method - this method will not work properly if hasMaxTokenCount() is false
516+
if (count > _maxTokenCount) {
517+
throw _constructException(
518+
"Token count (%d) exceeds the maximum allowed (%d, from %s)",
519+
count, _maxTokenCount,
520+
_constrainRef("getMaxTokenCount"));
521+
}
522+
}
523+
422524
/*
423525
/**********************************************************************
424526
/* Convenience methods for validation, token lengths

src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java

+29
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import com.fasterxml.jackson.core.*;
99
import com.fasterxml.jackson.core.exc.InputCoercionException;
10+
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
1011
import com.fasterxml.jackson.core.io.JsonEOFException;
1112
import com.fasterxml.jackson.core.io.NumberInput;
1213
import com.fasterxml.jackson.core.util.ByteArrayBuilder;
@@ -147,6 +148,13 @@ public abstract class ParserMinimalBase extends JsonParser
147148
*/
148149
protected JsonToken _currToken;
149150

151+
/**
152+
* Current count of tokens
153+
*
154+
* @since 2.18
155+
*/
156+
protected long _tokenCount;
157+
150158
/**
151159
* Last cleared token, if any: that is, value that was in
152160
* effect when {@link #clearCurrentToken} was called.
@@ -502,6 +510,11 @@ public String getValueAsString(String defaultValue) throws IOException {
502510
return getText();
503511
}
504512

513+
@Override
514+
public long getTokenCount() {
515+
return _tokenCount;
516+
}
517+
505518
/*
506519
/**********************************************************
507520
/* Base64 decoding
@@ -782,6 +795,22 @@ protected final void _wrapError(String msg, Throwable t) throws JsonParseExcepti
782795
throw _constructReadException(msg, t);
783796
}
784797

798+
protected final JsonToken _updateToken(final JsonToken token) throws StreamConstraintsException {
799+
_currToken = token;
800+
if (streamReadConstraints().hasMaxTokenCount() && token != JsonToken.NOT_AVAILABLE) {
801+
streamReadConstraints().validateTokenCount(++_tokenCount);
802+
}
803+
return token;
804+
}
805+
806+
protected final JsonToken _updateTokenToNull() {
807+
return (_currToken = null);
808+
}
809+
810+
protected final JsonToken _updateTokenToNA() {
811+
return (_currToken = JsonToken.NOT_AVAILABLE);
812+
}
813+
785814
@Deprecated // since 2.11
786815
protected static byte[] _asciiBytes(String str) {
787816
byte[] b = new byte[str.length()];

0 commit comments

Comments
 (0)