Skip to content

Commit cb33f0f

Browse files
committed
Bacport #533 fix in 2.10 branch
1 parent a1ab6e3 commit cb33f0f

File tree

6 files changed

+145
-14
lines changed

6 files changed

+145
-14
lines changed

release-notes/CREDITS-2.x

+5
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,8 @@ David Nault (dnault@github)
172172
* Reported #531: Non-blocking parser reports incorrect locations when fed with
173173
non-zero offset
174174
(2.10.0)
175+
176+
Fabien Renaud (fabienrenaud@github)
177+
* Reported, contributed fix fir #533: UTF-8 BOM not accounted for in
178+
`JsonLocation.getByteOffset()`
179+
(2.10.0)

release-notes/VERSION-2.x

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ JSON library.
3131
#508: Add new exception type `InputCoercionException` to be used for failed coercions
3232
like overflow for `int`
3333
#527: Add simple module-info for JDK9+, using Moditect
34+
#533: UTF-8 BOM not accounted for in JsonLocation.getByteOffset()
35+
(contributed by Fabien R)
3436

3537
2.9.9 (16-May-2019)
3638

src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,9 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec,
243243
ByteQuadsCanonicalizer rootByteSymbols, CharsToNameCanonicalizer rootCharSymbols,
244244
int factoryFeatures) throws IOException
245245
{
246+
int prevInputPtr = _inputPtr;
246247
JsonEncoding enc = detectEncoding();
248+
int bytesProcessed = _inputPtr - prevInputPtr;
247249

248250
if (enc == JsonEncoding.UTF8) {
249251
/* and without canonicalization, byte-based approach is not performant; just use std UTF-8 reader
@@ -252,7 +254,7 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec,
252254
if (JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(factoryFeatures)) {
253255
ByteQuadsCanonicalizer can = rootByteSymbols.makeChild(factoryFeatures);
254256
return new UTF8StreamJsonParser(_context, parserFeatures, _in, codec, can,
255-
_inputBuffer, _inputPtr, _inputEnd, _bufferRecyclable);
257+
_inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable);
256258
}
257259
}
258260
return new ReaderBasedJsonParser(_context, parserFeatures, constructReader(), codec,

src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java

+15-2
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,23 @@ public class UTF8StreamJsonParser
137137
/**********************************************************
138138
*/
139139

140+
/**
141+
* @deprecated Since 2.10
142+
*/
143+
@Deprecated
140144
public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
141145
ObjectCodec codec, ByteQuadsCanonicalizer sym,
142146
byte[] inputBuffer, int start, int end,
143147
boolean bufferRecyclable)
148+
{
149+
this(ctxt, features, in, codec, sym,
150+
inputBuffer, start, end, 0, bufferRecyclable);
151+
}
152+
153+
public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
154+
ObjectCodec codec, ByteQuadsCanonicalizer sym,
155+
byte[] inputBuffer, int start, int end, int bytesPreProcessed,
156+
boolean bufferRecyclable)
144157
{
145158
super(ctxt, features);
146159
_inputStream = in;
@@ -149,9 +162,9 @@ public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
149162
_inputBuffer = inputBuffer;
150163
_inputPtr = start;
151164
_inputEnd = end;
152-
_currInputRowStart = start;
165+
_currInputRowStart = start - bytesPreProcessed;
153166
// If we have offset, need to omit that from byte offset, so:
154-
_currInputProcessed = -start;
167+
_currInputProcessed = -start + bytesPreProcessed;
155168
_bufferRecyclable = bufferRecyclable;
156169
}
157170

src/test/java/com/fasterxml/jackson/core/json/LocationOffsetsTest.java

+118-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public void testSimpleInitialOffsets() throws Exception
2323
assertEquals(0L, loc.getCharOffset());
2424
assertEquals(1, loc.getLineNr());
2525
assertEquals(1, loc.getColumnNr());
26-
26+
2727
loc = p.getCurrentLocation();
2828
assertEquals(-1L, loc.getByteOffset());
2929
assertEquals(1L, loc.getCharOffset());
@@ -42,7 +42,7 @@ public void testSimpleInitialOffsets() throws Exception
4242
assertEquals(-1L, loc.getCharOffset());
4343
assertEquals(1, loc.getLineNr());
4444
assertEquals(1, loc.getColumnNr());
45-
45+
4646
loc = p.getCurrentLocation();
4747
assertEquals(1L, loc.getByteOffset());
4848
assertEquals(-1L, loc.getCharOffset());
@@ -69,7 +69,7 @@ public void testOffsetWithInputOffset() throws Exception
6969
assertEquals(-1L, loc.getCharOffset());
7070
assertEquals(1, loc.getLineNr());
7171
assertEquals(1, loc.getColumnNr());
72-
72+
7373
loc = p.getCurrentLocation();
7474
assertEquals(1L, loc.getByteOffset());
7575
assertEquals(-1L, loc.getCharOffset());
@@ -78,4 +78,119 @@ public void testOffsetWithInputOffset() throws Exception
7878

7979
p.close();
8080
}
81+
82+
public void testOffsetWithoutInputOffset() throws Exception
83+
{
84+
JsonLocation loc;
85+
JsonParser p;
86+
// 3 spaces before, 2 after, just for padding
87+
byte[] b = " { } ".getBytes("UTF-8");
88+
89+
// and then peel them off
90+
p = JSON_F.createParser(b);
91+
assertToken(JsonToken.START_OBJECT, p.nextToken());
92+
93+
loc = p.getTokenLocation();
94+
assertEquals(3L, loc.getByteOffset());
95+
assertEquals(-1L, loc.getCharOffset());
96+
assertEquals(1, loc.getLineNr());
97+
assertEquals(4, loc.getColumnNr());
98+
99+
loc = p.getCurrentLocation();
100+
assertEquals(4L, loc.getByteOffset());
101+
assertEquals(-1L, loc.getCharOffset());
102+
assertEquals(1, loc.getLineNr());
103+
assertEquals(5, loc.getColumnNr());
104+
105+
p.close();
106+
}
107+
108+
// for [core#533]
109+
public void testUtf8Bom() throws Exception
110+
{
111+
JsonLocation loc;
112+
JsonParser p;
113+
114+
byte[] b = withUtf8Bom("{ }".getBytes());
115+
116+
// and then peel them off
117+
p = JSON_F.createParser(b);
118+
assertToken(JsonToken.START_OBJECT, p.nextToken());
119+
120+
loc = p.getTokenLocation();
121+
assertEquals(3L, loc.getByteOffset());
122+
assertEquals(-1L, loc.getCharOffset());
123+
assertEquals(1, loc.getLineNr());
124+
assertEquals(4, loc.getColumnNr());
125+
126+
loc = p.getCurrentLocation();
127+
assertEquals(4L, loc.getByteOffset());
128+
assertEquals(-1L, loc.getCharOffset());
129+
assertEquals(1, loc.getLineNr());
130+
assertEquals(5, loc.getColumnNr());
131+
132+
p.close();
133+
}
134+
135+
public void testUtf8BomWithPadding() throws Exception
136+
{
137+
JsonLocation loc;
138+
JsonParser p;
139+
140+
byte[] b = withUtf8Bom(" { }".getBytes());
141+
142+
// and then peel them off
143+
p = JSON_F.createParser(b);
144+
assertToken(JsonToken.START_OBJECT, p.nextToken());
145+
146+
loc = p.getTokenLocation();
147+
assertEquals(6L, loc.getByteOffset());
148+
assertEquals(-1L, loc.getCharOffset());
149+
assertEquals(1, loc.getLineNr());
150+
assertEquals(7, loc.getColumnNr());
151+
152+
loc = p.getCurrentLocation();
153+
assertEquals(7L, loc.getByteOffset());
154+
assertEquals(-1L, loc.getCharOffset());
155+
assertEquals(1, loc.getLineNr());
156+
assertEquals(8, loc.getColumnNr());
157+
158+
p.close();
159+
}
160+
161+
public void testUtf8BomWithInputOffset() throws Exception
162+
{
163+
JsonLocation loc;
164+
JsonParser p;
165+
166+
byte[] b = withUtf8Bom(" { }".getBytes());
167+
168+
// and then peel them off
169+
p = JSON_F.createParser(b);
170+
assertToken(JsonToken.START_OBJECT, p.nextToken());
171+
172+
loc = p.getTokenLocation();
173+
assertEquals(6L, loc.getByteOffset());
174+
assertEquals(-1L, loc.getCharOffset());
175+
assertEquals(1, loc.getLineNr());
176+
assertEquals(7, loc.getColumnNr());
177+
178+
loc = p.getCurrentLocation();
179+
assertEquals(7L, loc.getByteOffset());
180+
assertEquals(-1L, loc.getCharOffset());
181+
assertEquals(1, loc.getLineNr());
182+
assertEquals(8, loc.getColumnNr());
183+
184+
p.close();
185+
}
186+
187+
private byte[] withUtf8Bom(byte[] bytes) {
188+
byte[] arr = new byte[bytes.length + 3];
189+
// write UTF-8 BOM
190+
arr[0] = (byte) 0xEF;
191+
arr[1] = (byte) 0xBB;
192+
arr[2] = (byte) 0xBF;
193+
System.arraycopy(bytes, 0, arr, 3, bytes.length);
194+
return arr;
195+
}
81196
}

src/test/java/com/fasterxml/jackson/core/read/JsonParserTest.java

+2-8
Original file line numberDiff line numberDiff line change
@@ -446,15 +446,9 @@ public void testUtf8BOMHandling() throws Exception
446446

447447
JsonParser p = JSON_FACTORY.createParser(input);
448448
assertEquals(JsonToken.START_ARRAY, p.nextToken());
449-
// should also have skipped first 3 bytes of BOM; but do we have offset available?
450-
/* 08-Oct-2013, tatu: Alas, due to [core#111], we have to omit BOM in calculations
451-
* as we do not know what the offset is due to -- may need to revisit, if this
452-
* discrepancy becomes an issue. For now it just means that BOM is considered
453-
* "out of stream" (not part of input).
454-
*/
449+
455450
JsonLocation loc = p.getTokenLocation();
456-
// so if BOM was consider in-stream (part of input), this should expect 3:
457-
assertEquals(0, loc.getByteOffset());
451+
assertEquals(3, loc.getByteOffset());
458452
assertEquals(-1, loc.getCharOffset());
459453
assertEquals(JsonToken.VALUE_NUMBER_INT, p.nextToken());
460454
assertEquals(JsonToken.END_ARRAY, p.nextToken());

0 commit comments

Comments
 (0)