Skip to content

Commit 8aa0ffc

Browse files
authored
Support wildcard for replace command (#4698) (#4741)
(cherry picked from commit f97adb0) Signed-off-by: Kai Huang <[email protected]>
1 parent 40c5944 commit 8aa0ffc

File tree

9 files changed

+733
-25
lines changed

9 files changed

+733
-25
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2872,9 +2872,39 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) {
28722872
for (ReplacePair pair : node.getReplacePairs()) {
28732873
RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context);
28742874
RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context);
2875-
fieldRef =
2876-
context.relBuilder.call(
2877-
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2875+
2876+
String patternStr = pair.getPattern().getValue().toString();
2877+
String replacementStr = pair.getReplacement().getValue().toString();
2878+
2879+
if (patternStr.contains("*")) {
2880+
WildcardUtils.validateWildcardSymmetry(patternStr, replacementStr);
2881+
2882+
String regexPattern = WildcardUtils.convertWildcardPatternToRegex(patternStr);
2883+
String regexReplacement =
2884+
WildcardUtils.convertWildcardReplacementToRegex(replacementStr);
2885+
2886+
RexNode regexPatternNode =
2887+
context.rexBuilder.makeLiteral(
2888+
regexPattern,
2889+
context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
2890+
true);
2891+
RexNode regexReplacementNode =
2892+
context.rexBuilder.makeLiteral(
2893+
regexReplacement,
2894+
context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
2895+
true);
2896+
2897+
fieldRef =
2898+
context.rexBuilder.makeCall(
2899+
org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3,
2900+
fieldRef,
2901+
regexPatternNode,
2902+
regexReplacementNode);
2903+
} else {
2904+
fieldRef =
2905+
context.relBuilder.call(
2906+
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2907+
}
28782908
}
28792909

28802910
projectList.add(fieldRef);

core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.sql.calcite.utils;
77

8+
import java.util.ArrayList;
89
import java.util.List;
910
import java.util.stream.Collectors;
1011

@@ -92,4 +93,141 @@ private static boolean matchesCompiledPattern(String[] parts, String fieldName)
9293
public static boolean containsWildcard(String str) {
9394
return str != null && str.contains(WILDCARD);
9495
}
96+
97+
/**
98+
* Converts a wildcard pattern to a regex pattern.
99+
*
100+
* <p>Example: "*ada" → "^(.*?)ada$"
101+
*
102+
* @param wildcardPattern wildcard pattern with '*' and escape sequences (\*, \\)
103+
* @return regex pattern with capture groups
104+
*/
105+
public static String convertWildcardPatternToRegex(String wildcardPattern) {
106+
String[] parts = splitWildcards(wildcardPattern);
107+
StringBuilder regexBuilder = new StringBuilder("^");
108+
109+
for (int i = 0; i < parts.length; i++) {
110+
regexBuilder.append(java.util.regex.Pattern.quote(parts[i]));
111+
if (i < parts.length - 1) {
112+
regexBuilder.append("(.*?)"); // Non-greedy capture group for wildcard
113+
}
114+
}
115+
regexBuilder.append("$");
116+
117+
return regexBuilder.toString();
118+
}
119+
120+
/**
121+
* Converts a wildcard replacement string to a regex replacement string.
122+
*
123+
* <p>Example: "*_*" → "$1_$2"
124+
*
125+
* @param wildcardReplacement replacement string with '*' and escape sequences (\*, \\)
126+
* @return regex replacement string with capture group references
127+
*/
128+
public static String convertWildcardReplacementToRegex(String wildcardReplacement) {
129+
if (!wildcardReplacement.contains("*")) {
130+
return wildcardReplacement; // No wildcards = literal replacement
131+
}
132+
133+
StringBuilder result = new StringBuilder();
134+
int captureIndex = 1; // Regex capture groups start at $1
135+
boolean escaped = false;
136+
137+
for (char c : wildcardReplacement.toCharArray()) {
138+
if (escaped) {
139+
// Handle escape sequences: \* or \\
140+
result.append(c);
141+
escaped = false;
142+
} else if (c == '\\') {
143+
escaped = true;
144+
} else if (c == '*') {
145+
// Replace wildcard with $1, $2, etc.
146+
result.append('$').append(captureIndex++);
147+
} else {
148+
result.append(c);
149+
}
150+
}
151+
152+
return result.toString();
153+
}
154+
155+
/**
156+
* Splits a wildcard pattern into parts separated by unescaped wildcards.
157+
*
158+
* <p>Example: "a*b*c" → ["a", "b", "c"]
159+
*
160+
* @param pattern wildcard pattern with escape sequences
161+
* @return array of pattern parts
162+
*/
163+
private static String[] splitWildcards(String pattern) {
164+
List<String> parts = new ArrayList<>();
165+
StringBuilder current = new StringBuilder();
166+
boolean escaped = false;
167+
168+
for (char c : pattern.toCharArray()) {
169+
if (escaped) {
170+
current.append(c);
171+
escaped = false;
172+
} else if (c == '\\') {
173+
escaped = true;
174+
} else if (c == '*') {
175+
parts.add(current.toString());
176+
current = new StringBuilder();
177+
} else {
178+
current.append(c);
179+
}
180+
}
181+
182+
if (escaped) {
183+
throw new IllegalArgumentException(
184+
"Invalid escape sequence: pattern ends with unescaped backslash");
185+
}
186+
187+
parts.add(current.toString());
188+
return parts.toArray(new String[0]);
189+
}
190+
191+
/**
192+
* Counts the number of unescaped wildcards in a string.
193+
*
194+
* @param str string to count wildcards in
195+
* @return number of unescaped wildcards
196+
*/
197+
private static int countWildcards(String str) {
198+
int count = 0;
199+
boolean escaped = false;
200+
for (char c : str.toCharArray()) {
201+
if (escaped) {
202+
escaped = false;
203+
} else if (c == '\\') {
204+
escaped = true;
205+
} else if (c == '*') {
206+
count++;
207+
}
208+
}
209+
return count;
210+
}
211+
212+
/**
213+
* Validates that wildcard count is symmetric between pattern and replacement.
214+
*
215+
* <p>Replacement must have either the same number of wildcards as the pattern, or zero wildcards.
216+
*
217+
* @param pattern wildcard pattern
218+
* @param replacement wildcard replacement
219+
* @throws IllegalArgumentException if wildcard counts are mismatched
220+
*/
221+
public static void validateWildcardSymmetry(String pattern, String replacement) {
222+
int patternWildcards = countWildcards(pattern);
223+
int replacementWildcards = countWildcards(replacement);
224+
225+
if (replacementWildcards != 0 && replacementWildcards != patternWildcards) {
226+
throw new IllegalArgumentException(
227+
String.format(
228+
"Error in 'replace' command: Wildcard count mismatch - pattern has %d wildcard(s), "
229+
+ "replacement has %d. Replacement must have same number of wildcards or none.",
230+
patternWildcards, replacementWildcards));
231+
}
232+
}
95233
}

0 commit comments

Comments
 (0)