Skip to content

Commit c64c86a

Browse files
committed
refactor the implementation
Signed-off-by: Kai Huang <[email protected]>
1 parent 83ff62f commit c64c86a

File tree

8 files changed

+160
-649
lines changed

8 files changed

+160
-649
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 142 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@
142142
import org.opensearch.sql.calcite.utils.JoinAndLookupUtils;
143143
import org.opensearch.sql.calcite.utils.PlanUtils;
144144
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
145-
import org.opensearch.sql.calcite.utils.WildcardReplaceUtils;
146145
import org.opensearch.sql.calcite.utils.WildcardUtils;
147146
import org.opensearch.sql.common.patterns.PatternUtils;
148147
import org.opensearch.sql.common.utils.StringUtils;
@@ -2504,11 +2503,35 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) {
25042503
String replacementStr = pair.getReplacement().getValue().toString();
25052504

25062505
if (patternStr.contains("*")) {
2507-
WildcardReplaceUtils.validateWildcardSymmetry(patternStr, replacementStr);
2506+
// Wildcard pattern: convert to regex at planning time
2507+
validateWildcardSymmetry(patternStr, replacementStr);
25082508

2509+
// Convert wildcard pattern to regex pattern (e.g., "*ada" → "^(.*?)ada$")
2510+
String regexPattern = convertWildcardPatternToRegex(patternStr);
2511+
// Convert wildcard replacement to regex replacement (e.g., "*_*" → "$1_$2")
2512+
String regexReplacement = convertWildcardReplacementToRegex(replacementStr);
2513+
2514+
// Create regex pattern and replacement literals
2515+
RexNode regexPatternNode =
2516+
context.rexBuilder.makeLiteral(
2517+
regexPattern,
2518+
context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
2519+
true);
2520+
RexNode regexReplacementNode =
2521+
context.rexBuilder.makeLiteral(
2522+
regexReplacement,
2523+
context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
2524+
true);
2525+
2526+
// Use Calcite's REGEXP_REPLACE operator
25092527
fieldRef =
2510-
buildWildcardReplaceExpression(fieldRef, patternNode, replacementNode, context);
2528+
context.rexBuilder.makeCall(
2529+
org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3,
2530+
fieldRef,
2531+
regexPatternNode,
2532+
regexReplacementNode);
25112533
} else {
2534+
// Literal pattern: use standard REPLACE
25122535
fieldRef =
25132536
context.relBuilder.call(
25142537
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
@@ -2526,16 +2549,124 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) {
25262549
return context.relBuilder.peek();
25272550
}
25282551

2529-
/** Build a RexNode for wildcard-based replacement. */
2530-
private RexNode buildWildcardReplaceExpression(
2531-
RexNode fieldRex, RexNode patternNode, RexNode replacementNode, CalcitePlanContext context) {
2532-
return context.rexBuilder.makeCall(
2533-
org.opensearch.sql.expression.function.PPLBuiltinOperators.WILDCARD_REPLACE,
2534-
fieldRex,
2535-
patternNode,
2536-
replacementNode);
2552+
// ============ Wildcard-to-Regex Conversion Utilities ============
2553+
2554+
/**
2555+
* Convert a wildcard pattern to a regex pattern string. Example: "*ada" → "^(.*?)ada$" Example:
2556+
* "* - *" → "^(.*?) - (.*?)$" Handles escape sequences: \* (literal asterisk), \\ (literal
2557+
* backslash)
2558+
*/
2559+
private static String convertWildcardPatternToRegex(String wildcardPattern) {
2560+
String[] parts = splitWildcards(wildcardPattern);
2561+
StringBuilder regexBuilder = new StringBuilder("^");
2562+
2563+
for (int i = 0; i < parts.length; i++) {
2564+
regexBuilder.append(java.util.regex.Pattern.quote(parts[i]));
2565+
if (i < parts.length - 1) {
2566+
regexBuilder.append("(.*?)"); // Non-greedy capture group for wildcard
2567+
}
2568+
}
2569+
regexBuilder.append("$");
2570+
2571+
return regexBuilder.toString();
2572+
}
2573+
2574+
/**
2575+
* Convert a wildcard replacement to a regex replacement string. Example: "*_*" → "$1_$2" Example:
2576+
* "SELLER" → "SELLER" (no wildcards) Handles escape sequences: \* (literal asterisk), \\ (literal
2577+
* backslash)
2578+
*/
2579+
private static String convertWildcardReplacementToRegex(String wildcardReplacement) {
2580+
if (!wildcardReplacement.contains("*")) {
2581+
return wildcardReplacement; // No wildcards = literal replacement
2582+
}
2583+
2584+
StringBuilder result = new StringBuilder();
2585+
int captureIndex = 1; // Regex capture groups start at $1
2586+
boolean escaped = false;
2587+
2588+
for (char c : wildcardReplacement.toCharArray()) {
2589+
if (escaped) {
2590+
// Handle escape sequences: \* or \\
2591+
result.append(c);
2592+
escaped = false;
2593+
} else if (c == '\\') {
2594+
escaped = true;
2595+
} else if (c == '*') {
2596+
// Replace wildcard with $1, $2, etc.
2597+
result.append('$').append(captureIndex++);
2598+
} else {
2599+
result.append(c);
2600+
}
2601+
}
2602+
2603+
return result.toString();
2604+
}
2605+
2606+
/**
2607+
* Split pattern on unescaped wildcards, handling escape sequences. Supports: \* (literal
2608+
* asterisk), \\ (literal backslash)
2609+
*/
2610+
private static String[] splitWildcards(String pattern) {
2611+
List<String> parts = new ArrayList<>();
2612+
StringBuilder current = new StringBuilder();
2613+
boolean escaped = false;
2614+
2615+
for (char c : pattern.toCharArray()) {
2616+
if (escaped) {
2617+
current.append(c);
2618+
escaped = false;
2619+
} else if (c == '\\') {
2620+
escaped = true;
2621+
} else if (c == '*') {
2622+
parts.add(current.toString());
2623+
current = new StringBuilder();
2624+
} else {
2625+
current.append(c);
2626+
}
2627+
}
2628+
2629+
if (escaped) {
2630+
throw new IllegalArgumentException(
2631+
"Invalid escape sequence: pattern ends with unescaped backslash");
2632+
}
2633+
2634+
parts.add(current.toString());
2635+
return parts.toArray(new String[0]);
25372636
}
25382637

2638+
/** Count the number of unescaped wildcards in a string. */
2639+
private static int countWildcards(String str) {
2640+
int count = 0;
2641+
boolean escaped = false;
2642+
for (char c : str.toCharArray()) {
2643+
if (escaped) {
2644+
escaped = false;
2645+
} else if (c == '\\') {
2646+
escaped = true;
2647+
} else if (c == '*') {
2648+
count++;
2649+
}
2650+
}
2651+
return count;
2652+
}
2653+
2654+
/** Validate wildcard symmetry between pattern and replacement. */
2655+
private static void validateWildcardSymmetry(String pattern, String replacement) {
2656+
int patternWildcards = countWildcards(pattern);
2657+
int replacementWildcards = countWildcards(replacement);
2658+
2659+
if (replacementWildcards != 0 && replacementWildcards != patternWildcards) {
2660+
throw new IllegalArgumentException(
2661+
String.format(
2662+
"Error in 'replace' command: Wildcard count mismatch - pattern has %d wildcard(s), "
2663+
+ "replacement has %d. Replacement must have same number of wildcards or none.",
2664+
patternWildcards, replacementWildcards));
2665+
}
2666+
}
2667+
2668+
// ============ End Wildcard Utilities ============
2669+
25392670
private void buildParseRelNode(Parse node, CalcitePlanContext context) {
25402671
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
25412672
ParseMethod parseMethod = node.getParseMethod();

core/src/main/java/org/opensearch/sql/calcite/utils/WildcardReplaceUtils.java

Lines changed: 0 additions & 204 deletions
This file was deleted.

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,7 @@ public enum BuiltinFunctionName {
333333
INTERNAL_REGEXP_REPLACE_3(FunctionName.of("regexp_replace_3"), true),
334334
INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true),
335335
INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true),
336-
INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true),
337-
INTERNAL_WILDCARD_REPLACE(FunctionName.of("wildcard_replace"), true);
336+
INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true);
338337

339338
private final FunctionName name;
340339
private boolean isInternal;

0 commit comments

Comments
 (0)