Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up syntax error reporting #3278

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@

package org.opensearch.sql.common.antlr;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.misc.IntervalSet;

/**
* Syntax analysis error listener that handles any syntax error by throwing exception with useful
* information.
*/
public class SyntaxAnalysisErrorListener extends BaseErrorListener {
// Show up to this many characters before the offending token in the query.
private static final int CONTEXT_TRUNCATION_THRESHOLD = 20;
// Avoid presenting too many alternatives when many are available.
private static final int SUGGESTION_TRUNCATION_THRESHOLD = 5;

@Override
public void syntaxError(
Expand All @@ -35,8 +42,7 @@ public void syntaxError(
throw new SyntaxCheckException(
String.format(
Locale.ROOT,
"Failed to parse query due to offending symbol [%s] "
+ "at: '%s' <--- HERE... More details: %s",
"[%s] is not a valid term at this part of the query: '%s' <-- HERE. %s",
getOffendingText(offendingToken),
truncateQueryAtOffendingToken(query, offendingToken),
getDetails(recognizer, msg, e)));
Expand All @@ -47,21 +53,47 @@ private String getOffendingText(Token offendingToken) {
}

private String truncateQueryAtOffendingToken(String query, Token offendingToken) {
return query.substring(0, offendingToken.getStopIndex() + 1);
int contextStartIndex = offendingToken.getStartIndex() - CONTEXT_TRUNCATION_THRESHOLD;
if (contextStartIndex < 3) { // The ellipses won't save us anything below the first 4 characters
return query.substring(0, offendingToken.getStopIndex() + 1);
}
return "..." + query.substring(contextStartIndex, offendingToken.getStopIndex() + 1);
Swiddis marked this conversation as resolved.
Show resolved Hide resolved
}

private List<String> topSuggestions(Recognizer<?, ?> recognizer, IntervalSet continuations) {
Vocabulary vocab = recognizer.getVocabulary();
List<String> tokenNames = new ArrayList<>(SUGGESTION_TRUNCATION_THRESHOLD);
for (int tokenType :
continuations
.toList()
.subList(0, Math.min(continuations.size(), SUGGESTION_TRUNCATION_THRESHOLD))) {
tokenNames.add(vocab.getDisplayName(tokenType));
}
return tokenNames;
}

/**
* As official JavaDoc says, e=null means parser was able to recover from the error. In other
* words, "msg" argument includes the information we want.
*/
private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException e) {
String details;
if (e == null) {
details = msg;
private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException ex) {
if (ex == null) {
// According to the ANTLR docs, ex == null means the parser was able to recover from the
// error.
// In such cases, `msg` includes the raw error information we care about.
return msg;
}

IntervalSet possibleContinuations = ex.getExpectedTokens();
List<String> suggestions = topSuggestions(recognizer, possibleContinuations);

StringBuilder details = new StringBuilder("Expecting ");
if (possibleContinuations.size() > SUGGESTION_TRUNCATION_THRESHOLD) {
details
.append("one of ")
.append(suggestions.size())
.append(" possible tokens. Some examples: ")
.append(String.join(", ", suggestions))
.append(", ...");
} else {
IntervalSet followSet = e.getExpectedTokens();
details = "Expecting tokens in " + followSet.toString(recognizer.getVocabulary());
details.append("tokens: ").append(String.join(", ", suggestions));
}
return details;
return details.toString();
}
}
16 changes: 8 additions & 8 deletions docs/user/dql/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@ Query:

.. code-block:: JSON

POST /_plugins/_sql
POST /_plugins/_ppl
{
"query" : "SELECT * FROM sample:data"
"query" : "SOURCE = test_index | where a > 0)"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This SQL error no longer outputs the same error message (new parsing engine?). I couldn't hit the ANTLR exception with a new SQL query, so I updated it to a PPL one.

}

Result:

.. code-block:: JSON

{
"reason": "Invalid SQL query",
"details": "Failed to parse query due to offending symbol [:] at: 'SELECT * FROM xxx WHERE xxx:' <--- HERE...
More details: Expecting tokens in {<EOF>, 'AND', 'BETWEEN', 'GROUP', 'HAVING', 'IN', 'IS', 'LIKE', 'LIMIT',
'NOT', 'OR', 'ORDER', 'REGEXP', '*', '/', '%', '+', '-', 'DIV', 'MOD', '=', '>', '<', '!',
'|', '&', '^', '.', DOT_ID}",
"type": "SyntaxAnalysisException"
"error": {
"reason": "Invalid Query",
"details": "[)] is not a valid term at this part of the query: '..._index | where a > 0)' <-- HERE. extraneous input ')' expecting <EOF>",
"type": "SyntaxCheckException"
},
"status": 400
}

**Workaround**
Expand Down
Loading
Loading