@@ -2350,18 +2350,14 @@ protected void _finishToken() throws JacksonException
23502350 }
23512351 return ;
23522352 }
2353- // 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2354- // the longest individual unit is 4 bytes (surrogate pair) so we
2355- // actually need len+3 bytes to avoid bounds checks
23562353 // 18-Jan-2024, tatu: For malicious input / Fuzzers, need to worry about overflow
23572354 // like Integer.MAX_VALUE
2358- final int needed = Math .max (len , len + 3 );
23592355 final int available = _inputEnd - _inputPtr ;
23602356
2361- if ((available >= needed )
2357+ if ((available >= len )
23622358 // if not, could we read? NOTE: we do not require it, just attempt to read
2363- || ((_inputBuffer .length >= needed )
2364- && _tryToLoadToHaveAtLeast (needed ))) {
2359+ || ((_inputBuffer .length >= len )
2360+ && _tryToLoadToHaveAtLeast (len ))) {
23652361 _finishShortText (len );
23662362 return ;
23672363 }
@@ -2392,22 +2388,18 @@ protected String _finishTextToken(int ch) throws JacksonException
23922388 _finishChunkedText ();
23932389 return _textBuffer .contentsAsString ();
23942390 }
2395- // 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2396- // the longest individual unit is 4 bytes (surrogate pair) so we
2397- // actually need len+3 bytes to avoid bounds checks
23982391
23992392 // 19-Mar-2021, tatu: [dataformats-binary#259] shows the case where length
24002393 // we get is Integer.MAX_VALUE, leading to overflow. Could change values
24012394 // to longs but simpler to truncate "needed" (will never pass following test
24022395 // due to inputBuffer never being even close to that big).
24032396
2404- final int needed = Math .max (len + 3 , len );
24052397 final int available = _inputEnd - _inputPtr ;
24062398
2407- if ((available >= needed )
2399+ if ((available >= len )
24082400 // if not, could we read? NOTE: we do not require it, just attempt to read
2409- || ((_inputBuffer .length >= needed )
2410- && _tryToLoadToHaveAtLeast (needed ))) {
2401+ || ((_inputBuffer .length >= len )
2402+ && _tryToLoadToHaveAtLeast (len ))) {
24112403 return _finishShortText (len );
24122404 }
24132405 // If not enough space, need handling similar to chunked
@@ -2435,7 +2427,7 @@ private final String _finishShortText(int len) throws JacksonException
24352427 final byte [] inputBuf = _inputBuffer ;
24362428
24372429 // Let's actually do a tight loop for ASCII first:
2438- final int end = inPtr + len ;
2430+ final int end = _inputPtr ;
24392431
24402432 int i ;
24412433 while ((i = inputBuf [inPtr ]) >= 0 ) {
@@ -2452,44 +2444,50 @@ private final String _finishShortText(int len) throws JacksonException
24522444 final int [] codes = UTF8_UNIT_CODES ;
24532445 do {
24542446 i = inputBuf [inPtr ++] & 0xFF ;
2455- switch (codes [i ]) {
2456- case 0 :
2457- break ;
2458- case 1 :
2459- {
2460- final int c2 = inputBuf [inPtr ++];
2461- if ((c2 & 0xC0 ) != 0x080 ) {
2462- _reportInvalidOther (c2 & 0xFF , inPtr );
2463- }
2464- i = ((i & 0x1F ) << 6 ) | (c2 & 0x3F );
2447+ int code = codes [i ];
2448+ if (code != 0 ) {
2449+ // 05-Jul-2021, tatu: As per [dataformats-binary#289] need to
2450+ // be careful wrt end-of-buffer truncated codepoints
2451+ if ((inPtr + code ) > end ) {
2452+ final int firstCharOffset = len - (end - inPtr ) - 1 ;
2453+ _reportTruncatedUTF8InString (len , firstCharOffset , i , code );
24652454 }
2466- break ;
2467- case 2 :
2468- {
2469- final int c2 = inputBuf [inPtr ++];
2470- if ((c2 & 0xC0 ) != 0x080 ) {
2471- _reportInvalidOther (c2 & 0xFF , inPtr );
2455+
2456+ switch (code ) {
2457+ case 1 : {
2458+ final int c2 = inputBuf [inPtr ++];
2459+ if ((c2 & 0xC0 ) != 0x080 ) {
2460+ _reportInvalidOther (c2 & 0xFF , inPtr );
2461+ }
2462+ i = ((i & 0x1F ) << 6 ) | (c2 & 0x3F );
24722463 }
2473- final int c3 = inputBuf [inPtr ++];
2474- if ((c3 & 0xC0 ) != 0x080 ) {
2475- _reportInvalidOther (c3 & 0xFF , inPtr );
2464+ break ;
2465+ case 2 : {
2466+ final int c2 = inputBuf [inPtr ++];
2467+ if ((c2 & 0xC0 ) != 0x080 ) {
2468+ _reportInvalidOther (c2 & 0xFF , inPtr );
2469+ }
2470+ final int c3 = inputBuf [inPtr ++];
2471+ if ((c3 & 0xC0 ) != 0x080 ) {
2472+ _reportInvalidOther (c3 & 0xFF , inPtr );
2473+ }
2474+ i = ((i & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
24762475 }
2477- i = ((i & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
2476+ break ;
2477+ case 3 :
2478+ // 30-Jan-2021, tatu: TODO - validate these too?
2479+ i = ((i & 0x07 ) << 18 )
2480+ | ((inputBuf [inPtr ++] & 0x3F ) << 12 )
2481+ | ((inputBuf [inPtr ++] & 0x3F ) << 6 )
2482+ | (inputBuf [inPtr ++] & 0x3F );
2483+ // note: this is the codepoint value; need to split, too
2484+ i -= 0x10000 ;
2485+ outBuf [outPtr ++] = (char ) (0xD800 | (i >> 10 ));
2486+ i = 0xDC00 | (i & 0x3FF );
2487+ break ;
2488+ default : // invalid
2489+ _reportInvalidInitial (i );
24782490 }
2479- break ;
2480- case 3 :
2481- // 30-Jan-2021, tatu: TODO - validate these too?
2482- i = ((i & 0x07 ) << 18 )
2483- | ((inputBuf [inPtr ++] & 0x3F ) << 12 )
2484- | ((inputBuf [inPtr ++] & 0x3F ) << 6 )
2485- | (inputBuf [inPtr ++] & 0x3F );
2486- // note: this is the codepoint value; need to split, too
2487- i -= 0x10000 ;
2488- outBuf [outPtr ++] = (char ) (0xD800 | (i >> 10 ));
2489- i = 0xDC00 | (i & 0x3FF );
2490- break ;
2491- default : // invalid
2492- _reportInvalidInitial (i );
24932491 }
24942492 outBuf [outPtr ++] = (char ) i ;
24952493 } while (inPtr < end );
@@ -3919,18 +3917,16 @@ protected void _reportIncompleteBinaryRead(int expLen, int actLen) throws Stream
39193917 expLen , actLen ), _currToken );
39203918 }
39213919
3922- // @since 2.13
3923- /*
3920+ // @since 2.18.1
39243921 private String _reportTruncatedUTF8InString (int strLenBytes , int truncatedCharOffset ,
39253922 int firstUTFByteValue , int bytesExpected )
39263923 throws JacksonException
39273924 {
39283925 throw _constructReadException (String .format (
3929- "Truncated UTF-8 character in Chunked Unicode String value (%d bytes): "
3926+ "Truncated UTF-8 character in Unicode String value (%d bytes): "
39303927+"byte 0x%02X at offset #%d indicated %d more bytes needed" ,
39313928strLenBytes , firstUTFByteValue , truncatedCharOffset , bytesExpected ));
39323929 }
3933- */
39343930
39353931 // @since 2.13
39363932 private String _reportTruncatedUTF8InName (int strLenBytes , int truncatedCharOffset ,
0 commit comments