Skip to content

Add failing tests for #1173: off-by-one discrepancy in the location of reported parser errors #1175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 23, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
package com.fasterxml.jackson.failing.read;

import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.async.ByteArrayFeeder;

/**
* Tests that the {@link JsonLocation} attached to a thrown {@link JsonProcessingException} due to invalid json points
* to the correct character.
*/
public class LocationOfError1173Test extends BaseTest
{
private static final JsonFactory JSON_F = new JsonFactory();

/** Represents the different parser backends */
public enum ParserVariant
{
BYTE_ARRAY(
(String input) -> JSON_F.createParser(input.getBytes(StandardCharsets.UTF_8)),
true, // supports byte offsets in reported location
false, // supports character offsets in reported location
true // supports column numbers in reported location
),
CHAR_ARRAY(
(String input) -> JSON_F.createParser(input.toCharArray()),
false,
true,
true
),
DATA_INPUT(
(String input) -> JSON_F.createParser((DataInput) new DataInputStream(new ByteArrayInputStream(
input.getBytes(StandardCharsets.UTF_8)
))),
false,
false,
false
),
ASYNC(
(String input) -> {
JsonParser parser = JSON_F.createNonBlockingByteArrayParser();
ByteArrayFeeder feeder = (ByteArrayFeeder) parser.getNonBlockingInputFeeder();
assertTrue(feeder.needMoreInput());

byte[] inputBytes = input.getBytes(StandardCharsets.UTF_8);
feeder.feedInput(inputBytes, 0, inputBytes.length);
feeder.endOfInput();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this works the way you'd expect... I think endOfInput() should not be called until all input is consumed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe I misremember this part, as test appears to work fine :)


return parser;
},
true,
false,
true
);

ParserVariant(
ParserGenerator parserGenerator,
boolean supportsByteOffset,
boolean supportsCharOffset,
boolean supportsColumnNr
)
{
_parserGenerator = parserGenerator;

this.supportsByteOffset = supportsByteOffset;
this.supportsCharOffset = supportsCharOffset;
this.supportsColumnNr = supportsColumnNr;
}

public JsonParser createParser(String input) throws IOException
{
return _parserGenerator.createParser(input);
}

private final ParserGenerator _parserGenerator;
public final boolean supportsByteOffset;
public final boolean supportsCharOffset;
public final boolean supportsColumnNr;
}

/** Collection of differing invalid JSON input cases to test */
private static final List<InvalidJson> INVALID_JSON_CASES = Arrays.asList(
new InvalidJson(
"Object property missing colon",
"{\"invalid\" \"json\"}",
11, // byte offset
11, // char offset
1, // line number
12 // column number
),
new InvalidJson(
"Comma after key in object property",
"{\"invalid\", \"json\"}",
10,
10,
1,
11
),
new InvalidJson(
"Missing comma between object properties",
"{\"key1\":\"value1\" \"key2\":\"value2\"}",
17,
17,
1,
18
),
new InvalidJson(
"Number as a property key",
"{1234: 5678}",
1,
1,
1,
2
),
new InvalidJson(
"false literal as property key",
"{false: true}",
1,
1,
1,
2
),
new InvalidJson(
"true literal as property key",
"{true: false}",
1,
1,
1,
2
),
new InvalidJson(
"null literal as property key",
"{null: \"content\"}",
1,
1,
1,
2
),
new InvalidJson(
"Missing comma between list elements",
"[\"no\" \"commas\"]",
6,
6,
1,
7
),
new InvalidJson(
"Property key/value delimiter in list",
"[\"still\":\"invalid\"]",
8,
8,
1,
9
),
new InvalidJson(
"Unexpected EOF",
"{",
1,
1,
1,
2
),
new InvalidJson(
"Close marker without matching open marker",
"}",
0,
0,
1,
1
),
new InvalidJson(
"Mismatched open/close tokens",
"{\"open\":\"close\"]",
15,
15,
1,
16
),
new InvalidJson(
"Bare strings in JSON",
"{missing: quotes}",
1,
1,
1,
2
),
new InvalidJson(
"Incorrect case for false literal",
"{\"isThisValidJson\": FALSE}",
24,
24,
1,
25
),
new InvalidJson(
"Incorrect case for true literal",
"{\"shouldYouAvoidWritingJsonLikeThis\": TRUE}",
41,
41,
1,
42
),
new InvalidJson(
"Incorrect case for null literal",
"{\"licensePlate\": NULL}",
20,
20,
1,
21
),
new InvalidJson(
"Invalid JSON with raw unicode character",
// javac will parse the unicode control sequence, it will be passed to the parser as a raw unicode character
"{\"validJson\":\"\u274c\",\"right\", \"here\"}",
26,
24,
1,
25
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parser backends aren't consistent about the column in this case, because the exact definition of "column" seems to depend on whether the parser is operating on a byte array or a character array. I'm open to suggestions about what to do with this case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Column should be exactly same for byte/char -backed input, for ASCII characters (for multi-byte characters there is indeed difference). But I think few tests use characters outside ASCII range..

),
new InvalidJson(
"Error in middle of line for multiline input",
// missing comma delimiter between properties two and three
"{\n \"one\": 1,\n \"two\": 2\n \"three\": 3\n}",
27,
27,
4,
3
),
new InvalidJson(
"Error at end of line for multiline input",
// double commas between keys
"{\n\"key1\":\"value1\",,\n\"key2\":\"value2\"\n}",
18,
18,
2,
17
)
);

@ParameterizedTest
@MethodSource("_generateTestData")
public void testParserBackendWithInvalidJson(ParserVariant variant, InvalidJson invalidJson) throws IOException
{
try (JsonParser parser = variant.createParser(invalidJson.input))
{
JsonProcessingException jpe = Assertions.assertThrows(
JsonProcessingException.class,
() -> {
// Blindly advance the parser through the end of input
while (parser.nextToken() != null) {}
}
);

JsonLocation location = jpe.getLocation();
assertEquals(invalidJson.lineNr, location.getLineNr());

if (variant.supportsColumnNr)
{
assertEquals(invalidJson.columnNr, location.getColumnNr());
}

if (variant.supportsByteOffset)
{
assertEquals(invalidJson.byteOffset, location.getByteOffset());
}

if (variant.supportsCharOffset)
{
assertEquals(invalidJson.charOffset, location.getCharOffset());
}
}
}

private static Stream<Arguments> _generateTestData()
{
return Arrays.stream(ParserVariant.values())
.flatMap(parserVariant -> INVALID_JSON_CASES.stream().map(
invalidJson -> Arguments.of(parserVariant, invalidJson)
));
}

@FunctionalInterface
public interface ParserGenerator
{
JsonParser createParser(String input) throws IOException;
}

public static class InvalidJson
{
InvalidJson(String name, String input, int byteOffset, int charOffset, int lineNr, int columnNr)
{
_name = name;

this.input = input;
this.byteOffset = byteOffset;
this.charOffset = charOffset;
this.lineNr = lineNr;
this.columnNr = columnNr;
}

@Override
public String toString()
{
return _name;
}

private final String _name;
public final String input;
public final int byteOffset;
public final int charOffset;
public final int lineNr;
public final int columnNr;
}
}