opensearch-project · Swiddis · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
@@ -5,19 +5,26 @@
 
 package org.opensearch.sql.common.antlr;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 import org.antlr.v4.runtime.BaseErrorListener;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.RecognitionException;
 import org.antlr.v4.runtime.Recognizer;
 import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.Vocabulary;
 import org.antlr.v4.runtime.misc.IntervalSet;
 
 /**
  * Syntax analysis error listener that handles any syntax error by throwing exception with useful
  * information.
  */
 public class SyntaxAnalysisErrorListener extends BaseErrorListener {
+  // Show up to this many characters before the offending token in the query.
+  private static final int CONTEXT_TRUNCATION_THRESHOLD = 20;
+  // Avoid presenting too many alternatives when many are available.
+  private static final int SUGGESTION_TRUNCATION_THRESHOLD = 5;
 
   @Override
   public void syntaxError(
@@ -35,8 +42,7 @@ public void syntaxError(
     throw new SyntaxCheckException(
         String.format(
             Locale.ROOT,
-            "Failed to parse query due to offending symbol [%s] "
-                + "at: '%s' <--- HERE... More details: %s",
+            "[%s] is not a valid term at this part of the query: '%s' <-- HERE. %s",
             getOffendingText(offendingToken),
             truncateQueryAtOffendingToken(query, offendingToken),
             getDetails(recognizer, msg, e)));
@@ -47,21 +53,47 @@ private String getOffendingText(Token offendingToken) {
   }
 
   private String truncateQueryAtOffendingToken(String query, Token offendingToken) {
-    return query.substring(0, offendingToken.getStopIndex() + 1);
+    int contextStartIndex = offendingToken.getStartIndex() - CONTEXT_TRUNCATION_THRESHOLD;
+    if (contextStartIndex < 3) { // The ellipses won't save us anything below the first 4 characters
+      return query.substring(0, offendingToken.getStopIndex() + 1);
+    }
+    return "..." + query.substring(contextStartIndex, offendingToken.getStopIndex() + 1);
+  }
+
+  private List<String> topSuggestions(Recognizer<?, ?> recognizer, IntervalSet continuations) {
+    Vocabulary vocab = recognizer.getVocabulary();
+    List<String> tokenNames = new ArrayList<>(SUGGESTION_TRUNCATION_THRESHOLD);
+    for (int tokenType :
+        continuations
+            .toList()
+            .subList(0, Math.min(continuations.size(), SUGGESTION_TRUNCATION_THRESHOLD))) {
+      tokenNames.add(vocab.getDisplayName(tokenType));
+    }
+    return tokenNames;
   }
 
-  /**
-   * As official JavaDoc says, e=null means parser was able to recover from the error. In other
-   * words, "msg" argument includes the information we want.
-   */
-  private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException e) {
-    String details;
-    if (e == null) {
-      details = msg;
+  private String getDetails(Recognizer<?, ?> recognizer, String msg, RecognitionException ex) {
+    if (ex == null) {
+      // According to the ANTLR docs, ex == null means the parser was able to recover from the
+      // error.
+      // In such cases, `msg` includes the raw error information we care about.
+      return msg;
+    }
+
+    IntervalSet possibleContinuations = ex.getExpectedTokens();
+    List<String> suggestions = topSuggestions(recognizer, possibleContinuations);
+
+    StringBuilder details = new StringBuilder("Expecting ");
+    if (possibleContinuations.size() > SUGGESTION_TRUNCATION_THRESHOLD) {
+      details
+          .append("one of ")
+          .append(suggestions.size())
+          .append(" possible tokens. Some examples: ")
+          .append(String.join(", ", suggestions))
+          .append(", ...");
     } else {
-      IntervalSet followSet = e.getExpectedTokens();
-      details = "Expecting tokens in " + followSet.toString(recognizer.getVocabulary());
+      details.append("tokens: ").append(String.join(", ", suggestions));
     }
-    return details;
+    return details.toString();
   }
 }
@@ -25,22 +25,22 @@ Query:
 
 .. code-block:: JSON
 
-	POST /_plugins/_sql
+	POST /_plugins/_ppl
 	{
-	  "query" : "SELECT * FROM sample:data"
+	  "query" : "SOURCE = test_index | where a > 0)"
 	}
 
 Result:
 
 .. code-block:: JSON
 
     {
-      "reason": "Invalid SQL query",
-      "details": "Failed to parse query due to offending symbol [:] at: 'SELECT * FROM xxx WHERE xxx:' <--- HERE...
-        More details: Expecting tokens in {<EOF>, 'AND', 'BETWEEN', 'GROUP', 'HAVING', 'IN', 'IS', 'LIKE', 'LIMIT',
-        'NOT', 'OR', 'ORDER', 'REGEXP', '*', '/', '%', '+', '-', 'DIV', 'MOD', '=', '>', '<', '!',
-        '|', '&', '^', '.', DOT_ID}",
-      "type": "SyntaxAnalysisException"
+      "error": {
+        "reason": "Invalid Query",
+        "details": "[)] is not a valid term at this part of the query: '..._index | where a > 0)' <-- HERE. extraneous input ')' expecting <EOF>",
+        "type": "SyntaxCheckException"
+      },
+      "status": 400
     }
 
 **Workaround**