Skip to content

Commit ab74693

Browse files
committed
Fix #236
1 parent f1f9343 commit ab74693

File tree

4 files changed

+103
-20
lines changed

4 files changed

+103
-20
lines changed

cbor/src/main/java/com/fasterxml/jackson/dataformat/cbor/CBORParser.java

+49-14
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ private Feature(boolean defaultState) {
8989
* I/O context for this reader. It handles buffer allocation
9090
* for the reader.
9191
*/
92-
final protected IOContext _ioContext;
92+
protected final IOContext _ioContext;
9393

9494
/**
9595
* Flag that indicates whether parser is closed or not. Gets
@@ -2157,17 +2157,21 @@ protected String _finishTextToken(int ch) throws IOException
21572157
_finishChunkedText();
21582158
return _textBuffer.contentsAsString();
21592159
}
2160-
if (len > (_inputEnd - _inputPtr)) {
2161-
// or if not, could we read?
2162-
if (len >= _inputBuffer.length) {
2163-
// If not enough space, need handling similar to chunked
2164-
_finishLongText(len);
2165-
return _textBuffer.contentsAsString();
2166-
}
2167-
_loadToHaveAtLeast(len);
2160+
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2161+
// the longest individual unit is 4 bytes (surrogate pair) so we
2162+
// actually need len+3 bytes to avoid bounds checks
2163+
final int needed = len + 3;
2164+
final int available = _inputEnd - _inputPtr;
2165+
2166+
if ((available >= needed)
2167+
// if not, could we read? NOTE: we do not require it, just attempt to read
2168+
|| ((_inputBuffer.length >= needed)
2169+
&& _tryToLoadToHaveAtLeast(needed))) {
2170+
return _finishShortText(len);
21682171
}
2169-
// offline for better optimization
2170-
return _finishShortText(len);
2172+
// If not enough space, need handling similar to chunked
2173+
_finishLongText(len);
2174+
return _textBuffer.contentsAsString();
21712175
}
21722176

21732177
private final String _finishShortText(int len) throws IOException
@@ -2240,7 +2244,7 @@ private final void _finishLongText(int len) throws IOException
22402244
continue;
22412245
}
22422246
if ((len -= code) < 0) { // may need to improve error here but...
2243-
throw _constructError("Malformed UTF-8 character at end of long (non-chunked) text segment");
2247+
throw _constructError("Malformed UTF-8 character at the end of a (non-chunked) text segment");
22442248
}
22452249

22462250
switch (code) {
@@ -2260,13 +2264,13 @@ private final void _finishLongText(int len) throws IOException
22602264
break;
22612265
case 3: // 4-byte UTF
22622266
c = _decodeUTF8_4(c);
2263-
// Let's add first part right away:
2264-
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
22652267
if (outPtr >= outBuf.length) {
22662268
outBuf = _textBuffer.finishCurrentSegment();
22672269
outPtr = 0;
22682270
outEnd = outBuf.length;
22692271
}
2272+
// Let's add first part right away:
2273+
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
22702274
c = 0xDC00 | (c & 0x3FF);
22712275
// And let the other char output down below
22722276
break;
@@ -3367,6 +3371,37 @@ protected final void _loadToHaveAtLeast(int minAvailable) throws IOException
33673371
}
33683372
}
33693373

3374+
// @since 2.12.2
3375+
protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOException
3376+
{
3377+
// No input stream, no leading (either we are closed, or have non-stream input source)
3378+
if (_inputStream == null) {
3379+
return false;
3380+
}
3381+
// Need to move remaining data in front?
3382+
int amount = _inputEnd - _inputPtr;
3383+
if (amount > 0 && _inputPtr > 0) {
3384+
//_currInputRowStart -= _inputPtr;
3385+
System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, amount);
3386+
_inputEnd = amount;
3387+
} else {
3388+
_inputEnd = 0;
3389+
}
3390+
// Needs to be done here, as per [dataformats-binary#178]
3391+
_currInputProcessed += _inputPtr;
3392+
_inputPtr = 0;
3393+
while (_inputEnd < minAvailable) {
3394+
int count = _inputStream.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd);
3395+
if (count < 1) {
3396+
// End of input; not ideal but we'll accept it here
3397+
_closeInput();
3398+
return false;
3399+
}
3400+
_inputEnd += count;
3401+
}
3402+
return true;
3403+
}
3404+
33703405
protected ByteArrayBuilder _getByteArrayBuilder() {
33713406
if (_byteArrayBuilder == null) {
33723407
_byteArrayBuilder = new ByteArrayBuilder();
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,57 @@
11
package com.fasterxml.jackson.dataformat.cbor.failing;
22

3-
import com.fasterxml.jackson.core.JsonToken;
3+
import com.fasterxml.jackson.core.*;
4+
import com.fasterxml.jackson.core.exc.StreamReadException;
45
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
56
import com.fasterxml.jackson.dataformat.cbor.CBORTestBase;
67

78
public class ParseInvalidUTF8String236Test extends CBORTestBase
89
{
9-
// [dataformats-binary#236]: Ends with the first byte of alleged 2-byte
10-
// UTF-8 character; parser trying to access second byte beyond end.
11-
public void testArrayIssue236() throws Exception
10+
// [dataformats-binary#236]: Original version; broken UTF-8 all around.
11+
// but gets hit by end-of-input only (since content not validated)
12+
public void testShortString236Original() throws Exception
1213
{
1314
final byte[] input = {0x66, (byte) 0xef, 0x7d, 0x7d, 0xa, 0x2d, (byte) 0xda};
1415
try (CBORParser p = cborParser(input)) {
1516
assertToken(JsonToken.VALUE_STRING, p.nextToken());
16-
assertEquals("foobar", p.getText());
17-
assertNull(p.nextToken());
17+
try {
18+
String str = p.getText();
19+
fail("Should have failed, did not, String = '"+str+"'");
20+
} catch (StreamReadException e) {
21+
verifyException(e, "Invalid UTF-8 middle byte 0x7d");
22+
}
23+
}
24+
}
25+
26+
// Variant where the length would be valid, but the last byte is partial UTF-8
27+
// code point
28+
public void testShortString236EndsWithPartialUTF8() throws Exception
29+
{
30+
final byte[] input = {0x63, 0x41, 0x2d, (byte) 0xda};
31+
try (CBORParser p = cborParser(input)) {
32+
assertToken(JsonToken.VALUE_STRING, p.nextToken());
33+
try {
34+
String str = p.getText();
35+
fail("Should have failed, did not, String = '"+str+"'");
36+
} catch (StreamReadException e) {
37+
verifyException(e, "Malformed UTF-8 character at the end of");
38+
}
39+
}
40+
}
41+
42+
// Variant where the length itself exceeds buffer
43+
public void testShortString236TruncatedString() throws Exception
44+
{
45+
// String with length of 6 bytes claimed; only 5 provided
46+
final byte[] input = {0x63, 0x41, 0x2d };
47+
try (CBORParser p = cborParser(input)) {
48+
assertToken(JsonToken.VALUE_STRING, p.nextToken());
49+
try {
50+
String str = p.getText();
51+
fail("Should have failed, did not, String = '"+str+"'");
52+
} catch (StreamReadException e) {
53+
verifyException(e, "Unexpected end-of-input in VALUE_STRING");
54+
}
1855
}
1956
}
2057
}

release-notes/CREDITS-2.x

+6
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,9 @@ Josh Barr (jobarr-amzn@github)
157157

158158
* Contributed #232: (ion) Allow disabling native type ids in IonMapper
159159
(2.12.1)
160+
161+
Fabian Meumertzheim (fmeum@github)
162+
163+
* Reported #236: `ArrayIndexOutOfBoundsException` in `CBORParser` for invalid UTF-8 String
164+
(2.12.2)
165+

release-notes/VERSION-2.x

+5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ Project: jackson-datatypes-binaryModules:
99
=== Releases ===
1010
------------------------------------------------------------------------
1111

12+
2.12.2 (not yet released)
13+
14+
#236: `ArrayIndexOutOfBoundsException` in `CBORParser` for invalid UTF-8 String
15+
(reported by Fabian M)
16+
1217
2.12.1 (08-Jan-2021)
1318

1419
#232: (ion) Allow disabling native type ids in IonMapper

0 commit comments

Comments
 (0)