Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,23 @@
/**
* ScalarFunction 'search' - simplified architecture similar to MultiMatch.
* Handles DSL parsing and generates SearchPredicate during translation.
* <p>
* Supports 1-3 parameters:
* - search(dsl_string): Traditional usage
* - search(dsl_string, default_field): Simplified syntax with default field
* - search(dsl_string, default_field, default_operator): Full control over expansion
*/
public class Search extends ScalarFunction
implements ExplicitlyCastableSignature, AlwaysNotNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BooleanType.INSTANCE).varArgs(StringType.INSTANCE)
// Original signature: search(dsl_string)
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE),
// With default field: search(dsl_string, default_field)
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE),
// With default field and operator: search(dsl_string, default_field, default_operator)
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE,
StringType.INSTANCE)
);

public Search(Expression... varArgs) {
Expand All @@ -51,7 +62,8 @@ private Search(ScalarFunctionParams functionParams) {

@Override
public Search withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() >= 1);
Preconditions.checkArgument(children.size() >= 1 && children.size() <= 3,
"search() requires 1-3 arguments");
return new Search(getFunctionParams(children));
}

Expand All @@ -76,13 +88,41 @@ public String getDslString() {
return dslArg.toString();
}

/**
* Get default field from second argument (optional)
*/
public String getDefaultField() {
if (children().size() < 2) {
return null;
}
Expression fieldArg = child(1);
if (fieldArg instanceof StringLikeLiteral) {
return ((StringLikeLiteral) fieldArg).getStringValue();
}
return fieldArg.toString();
}

/**
* Get default operator from third argument (optional)
*/
public String getDefaultOperator() {
if (children().size() < 3) {
return null;
}
Expression operatorArg = child(2);
if (operatorArg instanceof StringLikeLiteral) {
return ((StringLikeLiteral) operatorArg).getStringValue();
}
return operatorArg.toString();
}

/**
* Get parsed DSL plan - deferred to translation phase
* This will be handled by SearchPredicate during ExpressionTranslator.visitSearch()
*/
public SearchDslParser.QsPlan getQsPlan() {
// Lazy evaluation will be handled in SearchPredicate
return SearchDslParser.parseDsl(getDslString());
return SearchDslParser.parseDsl(getDslString(), getDefaultField(), getDefaultOperator());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,32 @@ public class SearchDslParser {
* Parse DSL string and return intermediate representation
*/
public static QsPlan parseDsl(String dsl) {
return parseDsl(dsl, null, null);
}

/**
* Parse DSL string with default field and operator support
*
* @param dsl DSL query string
* @param defaultField Default field name when DSL doesn't specify field (optional)
* @param defaultOperator Default operator ("and" or "or") for multi-term queries (optional, defaults to "or")
* @return Parsed QsPlan
*/
public static QsPlan parseDsl(String dsl, String defaultField, String defaultOperator) {
if (dsl == null || dsl.trim().isEmpty()) {
return new QsPlan(new QsNode(QsClauseType.TERM, "error", "empty_dsl"), new ArrayList<>());
}

// Expand simplified DSL if default field is provided
String expandedDsl = dsl;
if (defaultField != null && !defaultField.trim().isEmpty()) {
expandedDsl = expandSimplifiedDsl(dsl.trim(), defaultField.trim(),
normalizeDefaultOperator(defaultOperator));
}

try {
// Create ANTLR lexer and parser
SearchLexer lexer = new SearchLexer(new ANTLRInputStream(dsl));
SearchLexer lexer = new SearchLexer(new ANTLRInputStream(expandedDsl));
CommonTokenStream tokens = new CommonTokenStream(lexer);
SearchParser parser = new SearchParser(tokens);

Expand Down Expand Up @@ -107,11 +126,271 @@ public void syntaxError(org.antlr.v4.runtime.Recognizer<?, ?> recognizer,
return new QsPlan(root, bindings);

} catch (Exception e) {
LOG.error("Failed to parse search DSL: '{}'", dsl, e);
LOG.error("Failed to parse search DSL: '{}' (expanded: '{}')", dsl, expandedDsl, e);
throw new RuntimeException("Invalid search DSL syntax: " + dsl + ". Error: " + e.getMessage(), e);
}
}

/**
* Normalize default operator to lowercase "and" or "or"
*/
private static String normalizeDefaultOperator(String operator) {
if (operator == null || operator.trim().isEmpty()) {
return "or"; // Default to OR
}
String normalized = operator.trim().toLowerCase();
if ("and".equals(normalized) || "or".equals(normalized)) {
return normalized;
}
throw new IllegalArgumentException("Invalid default operator: " + operator
+ ". Must be 'and' or 'or'");
}
Comment on lines +137 to +147
Copy link

Copilot AI Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The null/empty check and trimming logic is duplicated. Consider extracting the trimming and null handling: String trimmed = (operator == null) ? \"\" : operator.trim(); if (trimmed.isEmpty()) return \"or\"; This reduces redundancy and makes the intent clearer.

Copilot uses AI. Check for mistakes.

/**
* Expand simplified DSL to full DSL format
* <p>
* Examples:
* - "foo bar" + field="tags" + operator="and" → "tags:ALL(foo bar)"
* - "foo* bar*" + field="tags" + operator="and" → "tags:foo* AND tags:bar*"
* - "foo OR bar" + field="tags" → "tags:foo OR tags:bar"
* - "EXACT(foo bar)" + field="tags" → "tags:EXACT(foo bar)"
*
* @param dsl Simple DSL string
* @param defaultField Default field name
* @param defaultOperator "and" or "or"
* @return Expanded full DSL
*/
private static String expandSimplifiedDsl(String dsl, String defaultField, String defaultOperator) {
// 1. If DSL already contains field names (colon), return as-is
if (containsFieldReference(dsl)) {
return dsl;
}

// 2. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN)
if (startsWithFunction(dsl)) {
return defaultField + ":" + dsl;
}

// 3. Check for explicit boolean operators in DSL
if (containsExplicitOperators(dsl)) {
return addFieldPrefixToOperatorExpression(dsl, defaultField);
}

// 4. Tokenize and analyze terms
List<String> terms = tokenizeDsl(dsl);
if (terms.isEmpty()) {
return defaultField + ":" + dsl;
}

// 5. Single term - simple case
if (terms.size() == 1) {
return defaultField + ":" + terms.get(0);
}

// 6. Multiple terms - check for wildcards
boolean hasWildcard = terms.stream().anyMatch(SearchDslParser::containsWildcard);

if (hasWildcard) {
// Wildcards cannot be tokenized - must create separate field queries
String operator = "and".equals(defaultOperator) ? " AND " : " OR ";
return terms.stream()
.map(term -> defaultField + ":" + term)
.collect(java.util.stream.Collectors.joining(operator));
} else {
// Regular multi-term query - use ANY/ALL
String clauseType = "and".equals(defaultOperator) ? "ALL" : "ANY";
return defaultField + ":" + clauseType + "(" + dsl + ")";
}
}

/**
* Check if DSL contains field references (has colon not in quoted strings)
*/
private static boolean containsFieldReference(String dsl) {
boolean inQuotes = false;
boolean inRegex = false;
for (int i = 0; i < dsl.length(); i++) {
char c = dsl.charAt(i);
if (c == '"' && (i == 0 || dsl.charAt(i - 1) != '\\')) {
inQuotes = !inQuotes;
} else if (c == '/' && !inQuotes) {
inRegex = !inRegex;
} else if (c == ':' && !inQuotes && !inRegex) {
return true;
}
}
return false;
}

/**
* Check if DSL starts with function keywords
*/
private static boolean startsWithFunction(String dsl) {
String upper = dsl.toUpperCase();
return upper.startsWith("EXACT(")
|| upper.startsWith("ANY(")
|| upper.startsWith("ALL(")
|| upper.startsWith("IN(");
}

/**
* Check if DSL contains explicit boolean operators (AND/OR/NOT)
*/
private static boolean containsExplicitOperators(String dsl) {
// Look for standalone AND/OR/NOT keywords (not part of field names)
String upper = dsl.toUpperCase();
return upper.matches(".*\\s+(AND|OR)\\s+.*")
|| upper.matches("^NOT\\s+.*")
|| upper.matches(".*\\s+NOT\\s+.*");
Comment on lines +242 to +244
Copy link

Copilot AI Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple regex matches on the same string can be inefficient. Consider using a single compiled Pattern or combining these patterns into one regex: Pattern.matches(\"^NOT\\s+.*|.*\\s+(AND|OR|NOT)\\s+.*\", upper) to improve performance for frequently called code.

Copilot uses AI. Check for mistakes.
}

/**
* Add field prefix to expressions with explicit operators
* Example: "foo AND bar" → "field:foo AND field:bar"
*/
private static String addFieldPrefixToOperatorExpression(String dsl, String defaultField) {
StringBuilder result = new StringBuilder();
StringBuilder currentTerm = new StringBuilder();
int i = 0;

while (i < dsl.length()) {
// Skip whitespace
while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) {
i++;
}
if (i >= dsl.length()) {
break;
}

// Try to match operators
String remaining = dsl.substring(i);
String upperRemaining = remaining.toUpperCase();

if (upperRemaining.startsWith("AND ") || upperRemaining.startsWith("AND\t")
|| (upperRemaining.equals("AND") && i + 3 >= dsl.length())) {
Comment on lines +269 to +270
Copy link

Copilot AI Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The operator matching logic for AND, OR, and NOT (lines 269-317) contains duplicated patterns. Consider extracting a helper method matchOperator(String remaining, String operator, int length) to reduce code duplication and improve maintainability.

Copilot uses AI. Check for mistakes.
// Found AND operator
if (currentTerm.length() > 0) {
if (result.length() > 0) {
result.append(" ");
}
result.append(defaultField).append(":").append(currentTerm.toString().trim());
currentTerm.setLength(0);
}
if (result.length() > 0) {
result.append(" ");
}
result.append(dsl.substring(i, i + 3)); // Preserve original case
i += 3;
continue;
} else if (upperRemaining.startsWith("OR ") || upperRemaining.startsWith("OR\t")
|| (upperRemaining.equals("OR") && i + 2 >= dsl.length())) {
// Found OR operator
if (currentTerm.length() > 0) {
if (result.length() > 0) {
result.append(" ");
}
result.append(defaultField).append(":").append(currentTerm.toString().trim());
currentTerm.setLength(0);
}
if (result.length() > 0) {
result.append(" ");
}
result.append(dsl.substring(i, i + 2)); // Preserve original case
i += 2;
continue;
} else if (upperRemaining.startsWith("NOT ") || upperRemaining.startsWith("NOT\t")
|| (upperRemaining.equals("NOT") && i + 3 >= dsl.length())) {
// Found NOT operator
if (currentTerm.length() > 0) {
if (result.length() > 0) {
result.append(" ");
}
result.append(defaultField).append(":").append(currentTerm.toString().trim());
currentTerm.setLength(0);
}
if (result.length() > 0) {
result.append(" ");
}
result.append(dsl.substring(i, i + 3)); // Preserve original case
i += 3;
continue;
}

// Not an operator, accumulate term
currentTerm.append(dsl.charAt(i));
i++;
}

// Add last term
if (currentTerm.length() > 0) {
if (result.length() > 0) {
result.append(" ");
}
result.append(defaultField).append(":").append(currentTerm.toString().trim());
}

return result.toString().trim();
}

/**
* Tokenize DSL into terms (split by whitespace, respecting quotes and functions)
*/
private static List<String> tokenizeDsl(String dsl) {
List<String> terms = new ArrayList<>();
StringBuilder currentTerm = new StringBuilder();
boolean inQuotes = false;
boolean inParens = false;
int parenDepth = 0;

for (int i = 0; i < dsl.length(); i++) {
char c = dsl.charAt(i);

if (c == '"' && (i == 0 || dsl.charAt(i - 1) != '\\')) {
inQuotes = !inQuotes;
currentTerm.append(c);
} else if (c == '(' && !inQuotes) {
parenDepth++;
inParens = true;
currentTerm.append(c);
} else if (c == ')' && !inQuotes) {
parenDepth--;
if (parenDepth == 0) {
inParens = false;
}
currentTerm.append(c);
} else if (Character.isWhitespace(c) && !inQuotes && !inParens) {
// End of term
if (currentTerm.length() > 0) {
terms.add(currentTerm.toString());
currentTerm = new StringBuilder();
}
} else {
currentTerm.append(c);
}
}

// Add last term
if (currentTerm.length() > 0) {
terms.add(currentTerm.toString());
}

return terms;
}

/**
* Check if a term contains wildcard characters (* or ?)
*/
private static boolean containsWildcard(String term) {
// Ignore wildcards in quoted strings or regex
if (term.startsWith("\"") && term.endsWith("\"")) {
return false;
}
if (term.startsWith("/") && term.endsWith("/")) {
return false;
}
return term.contains("*") || term.contains("?");
}

/**
* Clause types supported
*/
Expand Down
Loading
Loading