@@ -89,7 +89,7 @@ private Feature(boolean defaultState) {
89
89
* I/O context for this reader. It handles buffer allocation
90
90
* for the reader.
91
91
*/
92
- final protected IOContext _ioContext ;
92
+ protected final IOContext _ioContext ;
93
93
94
94
/**
95
95
* Flag that indicates whether parser is closed or not. Gets
@@ -2157,17 +2157,21 @@ protected String _finishTextToken(int ch) throws IOException
2157
2157
_finishChunkedText ();
2158
2158
return _textBuffer .contentsAsString ();
2159
2159
}
2160
- if (len > (_inputEnd - _inputPtr )) {
2161
- // or if not, could we read?
2162
- if (len >= _inputBuffer .length ) {
2163
- // If not enough space, need handling similar to chunked
2164
- _finishLongText (len );
2165
- return _textBuffer .contentsAsString ();
2166
- }
2167
- _loadToHaveAtLeast (len );
2160
+ // 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2161
+ // the longest individual unit is 4 bytes (surrogate pair) so we
2162
+ // actually need len+3 bytes to avoid bounds checks
2163
+ final int needed = len + 3 ;
2164
+ final int available = _inputEnd - _inputPtr ;
2165
+
2166
+ if ((available >= needed )
2167
+ // if not, could we read? NOTE: we do not require it, just attempt to read
2168
+ || ((_inputBuffer .length >= needed )
2169
+ && _tryToLoadToHaveAtLeast (needed ))) {
2170
+ return _finishShortText (len );
2168
2171
}
2169
- // offline for better optimization
2170
- return _finishShortText (len );
2172
+ // If not enough space, need handling similar to chunked
2173
+ _finishLongText (len );
2174
+ return _textBuffer .contentsAsString ();
2171
2175
}
2172
2176
2173
2177
private final String _finishShortText (int len ) throws IOException
@@ -2240,7 +2244,7 @@ private final void _finishLongText(int len) throws IOException
2240
2244
continue ;
2241
2245
}
2242
2246
if ((len -= code ) < 0 ) { // may need to improve error here but...
2243
- throw _constructError ("Malformed UTF-8 character at end of long (non-chunked) text segment" );
2247
+ throw _constructError ("Malformed UTF-8 character at the end of a (non-chunked) text segment" );
2244
2248
}
2245
2249
2246
2250
switch (code ) {
@@ -2260,13 +2264,13 @@ private final void _finishLongText(int len) throws IOException
2260
2264
break ;
2261
2265
case 3 : // 4-byte UTF
2262
2266
c = _decodeUTF8_4 (c );
2263
- // Let's add first part right away:
2264
- outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2265
2267
if (outPtr >= outBuf .length ) {
2266
2268
outBuf = _textBuffer .finishCurrentSegment ();
2267
2269
outPtr = 0 ;
2268
2270
outEnd = outBuf .length ;
2269
2271
}
2272
+ // Let's add first part right away:
2273
+ outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2270
2274
c = 0xDC00 | (c & 0x3FF );
2271
2275
// And let the other char output down below
2272
2276
break ;
@@ -3367,6 +3371,37 @@ protected final void _loadToHaveAtLeast(int minAvailable) throws IOException
3367
3371
}
3368
3372
}
3369
3373
3374
+ // @since 2.12.2
3375
+ protected final boolean _tryToLoadToHaveAtLeast (int minAvailable ) throws IOException
3376
+ {
3377
+ // No input stream, no leading (either we are closed, or have non-stream input source)
3378
+ if (_inputStream == null ) {
3379
+ return false ;
3380
+ }
3381
+ // Need to move remaining data in front?
3382
+ int amount = _inputEnd - _inputPtr ;
3383
+ if (amount > 0 && _inputPtr > 0 ) {
3384
+ //_currInputRowStart -= _inputPtr;
3385
+ System .arraycopy (_inputBuffer , _inputPtr , _inputBuffer , 0 , amount );
3386
+ _inputEnd = amount ;
3387
+ } else {
3388
+ _inputEnd = 0 ;
3389
+ }
3390
+ // Needs to be done here, as per [dataformats-binary#178]
3391
+ _currInputProcessed += _inputPtr ;
3392
+ _inputPtr = 0 ;
3393
+ while (_inputEnd < minAvailable ) {
3394
+ int count = _inputStream .read (_inputBuffer , _inputEnd , _inputBuffer .length - _inputEnd );
3395
+ if (count < 1 ) {
3396
+ // End of input; not ideal but we'll accept it here
3397
+ _closeInput ();
3398
+ return false ;
3399
+ }
3400
+ _inputEnd += count ;
3401
+ }
3402
+ return true ;
3403
+ }
3404
+
3370
3405
protected ByteArrayBuilder _getByteArrayBuilder () {
3371
3406
if (_byteArrayBuilder == null ) {
3372
3407
_byteArrayBuilder = new ByteArrayBuilder ();
0 commit comments