diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
index 573a51de2a7..09ad5d4009a 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
@@ -2854,9 +2854,39 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) {
for (ReplacePair pair : node.getReplacePairs()) {
RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context);
RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context);
- fieldRef =
- context.relBuilder.call(
- SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
+
+ String patternStr = pair.getPattern().getValue().toString();
+ String replacementStr = pair.getReplacement().getValue().toString();
+
+ if (patternStr.contains("*")) {
+ WildcardUtils.validateWildcardSymmetry(patternStr, replacementStr);
+
+ String regexPattern = WildcardUtils.convertWildcardPatternToRegex(patternStr);
+ String regexReplacement =
+ WildcardUtils.convertWildcardReplacementToRegex(replacementStr);
+
+ RexNode regexPatternNode =
+ context.rexBuilder.makeLiteral(
+ regexPattern,
+ context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+ true);
+ RexNode regexReplacementNode =
+ context.rexBuilder.makeLiteral(
+ regexReplacement,
+ context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR),
+ true);
+
+ fieldRef =
+ context.rexBuilder.makeCall(
+ org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3,
+ fieldRef,
+ regexPatternNode,
+ regexReplacementNode);
+ } else {
+ fieldRef =
+ context.relBuilder.call(
+ SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
+ }
}
projectList.add(fieldRef);
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java
index 09552e97109..8558a5292b7 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/WildcardUtils.java
@@ -5,6 +5,7 @@
package org.opensearch.sql.calcite.utils;
+import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
@@ -92,4 +93,141 @@ private static boolean matchesCompiledPattern(String[] parts, String fieldName)
public static boolean containsWildcard(String str) {
return str != null && str.contains(WILDCARD);
}
+
+ /**
+ * Converts a wildcard pattern to a regex pattern.
+ *
+ *
Example: "*ada" → "^(.*?)ada$"
+ *
+ * @param wildcardPattern wildcard pattern with '*' and escape sequences (\*, \\)
+ * @return regex pattern with capture groups
+ */
+ public static String convertWildcardPatternToRegex(String wildcardPattern) {
+ String[] parts = splitWildcards(wildcardPattern);
+ StringBuilder regexBuilder = new StringBuilder("^");
+
+ for (int i = 0; i < parts.length; i++) {
+ regexBuilder.append(java.util.regex.Pattern.quote(parts[i]));
+ if (i < parts.length - 1) {
+ regexBuilder.append("(.*?)"); // Non-greedy capture group for wildcard
+ }
+ }
+ regexBuilder.append("$");
+
+ return regexBuilder.toString();
+ }
+
+ /**
+ * Converts a wildcard replacement string to a regex replacement string.
+ *
+ *
Example: "*_*" → "$1_$2"
+ *
+ * @param wildcardReplacement replacement string with '*' and escape sequences (\*, \\)
+ * @return regex replacement string with capture group references
+ */
+ public static String convertWildcardReplacementToRegex(String wildcardReplacement) {
+ if (!wildcardReplacement.contains("*")) {
+ return wildcardReplacement; // No wildcards = literal replacement
+ }
+
+ StringBuilder result = new StringBuilder();
+ int captureIndex = 1; // Regex capture groups start at $1
+ boolean escaped = false;
+
+ for (char c : wildcardReplacement.toCharArray()) {
+ if (escaped) {
+ // Handle escape sequences: \* or \\
+ result.append(c);
+ escaped = false;
+ } else if (c == '\\') {
+ escaped = true;
+ } else if (c == '*') {
+ // Replace wildcard with $1, $2, etc.
+ result.append('$').append(captureIndex++);
+ } else {
+ result.append(c);
+ }
+ }
+
+ return result.toString();
+ }
+
+ /**
+ * Splits a wildcard pattern into parts separated by unescaped wildcards.
+ *
+ *
Example: "a*b*c" → ["a", "b", "c"]
+ *
+ * @param pattern wildcard pattern with escape sequences
+ * @return array of pattern parts
+ */
+ private static String[] splitWildcards(String pattern) {
+ List parts = new ArrayList<>();
+ StringBuilder current = new StringBuilder();
+ boolean escaped = false;
+
+ for (char c : pattern.toCharArray()) {
+ if (escaped) {
+ current.append(c);
+ escaped = false;
+ } else if (c == '\\') {
+ escaped = true;
+ } else if (c == '*') {
+ parts.add(current.toString());
+ current = new StringBuilder();
+ } else {
+ current.append(c);
+ }
+ }
+
+ if (escaped) {
+ throw new IllegalArgumentException(
+ "Invalid escape sequence: pattern ends with unescaped backslash");
+ }
+
+ parts.add(current.toString());
+ return parts.toArray(new String[0]);
+ }
+
+ /**
+ * Counts the number of unescaped wildcards in a string.
+ *
+ * @param str string to count wildcards in
+ * @return number of unescaped wildcards
+ */
+ private static int countWildcards(String str) {
+ int count = 0;
+ boolean escaped = false;
+ for (char c : str.toCharArray()) {
+ if (escaped) {
+ escaped = false;
+ } else if (c == '\\') {
+ escaped = true;
+ } else if (c == '*') {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ /**
+ * Validates that wildcard count is symmetric between pattern and replacement.
+ *
+ * Replacement must have either the same number of wildcards as the pattern, or zero wildcards.
+ *
+ * @param pattern wildcard pattern
+ * @param replacement wildcard replacement
+ * @throws IllegalArgumentException if wildcard counts are mismatched
+ */
+ public static void validateWildcardSymmetry(String pattern, String replacement) {
+ int patternWildcards = countWildcards(pattern);
+ int replacementWildcards = countWildcards(replacement);
+
+ if (replacementWildcards != 0 && replacementWildcards != patternWildcards) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Error in 'replace' command: Wildcard count mismatch - pattern has %d wildcard(s), "
+ + "replacement has %d. Replacement must have same number of wildcards or none.",
+ patternWildcards, replacementWildcards));
+ }
+ }
}
diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/WildcardUtilsTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/WildcardUtilsTest.java
index 53cc1d5163c..2e41de018a5 100644
--- a/core/src/test/java/org/opensearch/sql/calcite/utils/WildcardUtilsTest.java
+++ b/core/src/test/java/org/opensearch/sql/calcite/utils/WildcardUtilsTest.java
@@ -5,6 +5,11 @@
package org.opensearch.sql.calcite.utils;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
import com.google.common.collect.ImmutableList;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
@@ -74,6 +79,32 @@ void testMatchesWildcardPattern() {
testPattern("*a*e", "city", false);
}
+ @Test
+ void testMatchesWildcardPatternEdgeCases() {
+ // Test null handling
+ assertFalse(WildcardUtils.matchesWildcardPattern(null, "field"));
+ assertFalse(WildcardUtils.matchesWildcardPattern("pattern", null));
+ assertFalse(WildcardUtils.matchesWildcardPattern(null, null));
+
+ // Test empty strings
+ assertTrue(WildcardUtils.matchesWildcardPattern("", ""));
+ assertFalse(WildcardUtils.matchesWildcardPattern("", "field"));
+ assertFalse(WildcardUtils.matchesWildcardPattern("field", ""));
+
+ // Test single wildcard
+ assertTrue(WildcardUtils.matchesWildcardPattern("*", "anything"));
+ assertTrue(WildcardUtils.matchesWildcardPattern("*", ""));
+
+ // Test multiple consecutive wildcards
+ assertTrue(WildcardUtils.matchesWildcardPattern("**", "field"));
+ assertTrue(WildcardUtils.matchesWildcardPattern("a**b", "ab"));
+ assertTrue(WildcardUtils.matchesWildcardPattern("a**b", "axxxb"));
+
+ // Test wildcards at start and end
+ assertTrue(WildcardUtils.matchesWildcardPattern("*field*", "myfield123"));
+ assertTrue(WildcardUtils.matchesWildcardPattern("*field*", "field"));
+ }
+
@Test
void testExpandWildcardPattern() {
// Test exact match
@@ -97,6 +128,20 @@ void testExpandWildcardPattern() {
testExpansion("XYZ*", ImmutableList.of());
}
+ @Test
+ void testExpandWildcardPatternEdgeCases() {
+ // Test null handling
+ assertEquals(List.of(), WildcardUtils.expandWildcardPattern(null, availableFields));
+ assertEquals(List.of(), WildcardUtils.expandWildcardPattern("pattern", null));
+ assertEquals(List.of(), WildcardUtils.expandWildcardPattern(null, null));
+
+ // Test empty list
+ assertEquals(List.of(), WildcardUtils.expandWildcardPattern("*", List.of()));
+
+ // Test single wildcard matches all
+ assertEquals(availableFields, WildcardUtils.expandWildcardPattern("*", availableFields));
+ }
+
@Test
void testContainsWildcard() {
// Test with wildcard
@@ -108,4 +153,142 @@ void testContainsWildcard() {
testContainsWildcard("field", false);
testContainsWildcard("", false);
}
+
+ @Test
+ void testContainsWildcardEdgeCases() {
+ // Test null
+ assertFalse(WildcardUtils.containsWildcard(null));
+
+ // Test multiple wildcards
+ assertTrue(WildcardUtils.containsWildcard("**"));
+ assertTrue(WildcardUtils.containsWildcard("a*b*c"));
+ }
+
+ @Test
+ void testConvertWildcardPatternToRegex() {
+ // Basic patterns
+ assertEquals("^\\Qada\\E$", WildcardUtils.convertWildcardPatternToRegex("ada"));
+ assertEquals("^\\Q\\E(.*?)\\Qada\\E$", WildcardUtils.convertWildcardPatternToRegex("*ada"));
+ assertEquals("^\\Qada\\E(.*?)\\Q\\E$", WildcardUtils.convertWildcardPatternToRegex("ada*"));
+ assertEquals(
+ "^\\Q\\E(.*?)\\Qada\\E(.*?)\\Q\\E$", WildcardUtils.convertWildcardPatternToRegex("*ada*"));
+
+ // Multiple wildcards
+ assertEquals(
+ "^\\Qa\\E(.*?)\\Qb\\E(.*?)\\Qc\\E$", WildcardUtils.convertWildcardPatternToRegex("a*b*c"));
+
+ // Pattern with special regex characters
+ assertEquals(
+ "^\\Qa.b\\E(.*?)\\Qc+d\\E$", WildcardUtils.convertWildcardPatternToRegex("a.b*c+d"));
+
+ // Single wildcard
+ assertEquals("^\\Q\\E(.*?)\\Q\\E$", WildcardUtils.convertWildcardPatternToRegex("*"));
+
+ // Empty pattern
+ assertEquals("^\\Q\\E$", WildcardUtils.convertWildcardPatternToRegex(""));
+
+ // Invalid pattern with trailing backslash should throw
+ IllegalArgumentException ex =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.convertWildcardPatternToRegex("pattern\\"));
+ assertTrue(ex.getMessage().contains("Invalid escape sequence"));
+ }
+
+ @Test
+ void testConvertWildcardReplacementToRegex() {
+ // No wildcards - literal replacement
+ assertEquals("ada", WildcardUtils.convertWildcardReplacementToRegex("ada"));
+ assertEquals("test_value", WildcardUtils.convertWildcardReplacementToRegex("test_value"));
+
+ // Single wildcard
+ assertEquals("$1", WildcardUtils.convertWildcardReplacementToRegex("*"));
+
+ // Wildcards with text
+ assertEquals("$1_$2", WildcardUtils.convertWildcardReplacementToRegex("*_*"));
+ assertEquals("prefix_$1", WildcardUtils.convertWildcardReplacementToRegex("prefix_*"));
+ assertEquals("$1_suffix", WildcardUtils.convertWildcardReplacementToRegex("*_suffix"));
+
+ // Multiple wildcards
+ assertEquals("$1_$2_$3", WildcardUtils.convertWildcardReplacementToRegex("*_*_*"));
+
+ // Empty string
+ assertEquals("", WildcardUtils.convertWildcardReplacementToRegex(""));
+ }
+
+ @Test
+ void testConvertWildcardReplacementToRegexWithEscapes() {
+ // Escaped wildcard should be treated as literal
+ assertEquals("*", WildcardUtils.convertWildcardReplacementToRegex("\\*")); // \* -> *
+ assertEquals("$1_*", WildcardUtils.convertWildcardReplacementToRegex("*_\\*"));
+ assertEquals("*_$1", WildcardUtils.convertWildcardReplacementToRegex("\\*_*"));
+
+ // Escaped backslash when there's no wildcard - returned unchanged
+ assertEquals("\\\\", WildcardUtils.convertWildcardReplacementToRegex("\\\\"));
+
+ // Mixed escaped and unescaped wildcards
+ assertEquals("$1_*_$2", WildcardUtils.convertWildcardReplacementToRegex("*_\\*_*"));
+ assertEquals("$1\\$2", WildcardUtils.convertWildcardReplacementToRegex("*\\\\*")); // \\ -> \
+ }
+
+ @Test
+ void testValidateWildcardSymmetry() {
+ // Valid: same number of wildcards
+ WildcardUtils.validateWildcardSymmetry("*", "*");
+ WildcardUtils.validateWildcardSymmetry("*ada*", "*_*");
+ WildcardUtils.validateWildcardSymmetry("a*b*c", "x*y*z");
+
+ // Valid: replacement has no wildcards (literal replacement)
+ WildcardUtils.validateWildcardSymmetry("*", "literal");
+ WildcardUtils.validateWildcardSymmetry("*ada*", "replacement");
+ WildcardUtils.validateWildcardSymmetry("a*b*c", "xyz");
+
+ // Valid: pattern has no wildcards
+ WildcardUtils.validateWildcardSymmetry("ada", "replacement");
+ }
+
+ @Test
+ void testValidateWildcardSymmetryFailure() {
+ // Invalid: mismatched wildcard counts
+ IllegalArgumentException ex1 =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.validateWildcardSymmetry("*", "**"));
+ assertTrue(ex1.getMessage().contains("Wildcard count mismatch"));
+ assertTrue(ex1.getMessage().contains("pattern has 1 wildcard(s)"));
+ assertTrue(ex1.getMessage().contains("replacement has 2"));
+
+ IllegalArgumentException ex2 =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.validateWildcardSymmetry("*a*b*", "*_*"));
+ assertTrue(ex2.getMessage().contains("pattern has 3 wildcard(s)"));
+ assertTrue(ex2.getMessage().contains("replacement has 2"));
+
+ IllegalArgumentException ex3 =
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.validateWildcardSymmetry("ada", "*"));
+ assertTrue(ex3.getMessage().contains("pattern has 0 wildcard(s)"));
+ assertTrue(ex3.getMessage().contains("replacement has 1"));
+ }
+
+ @Test
+ void testValidateWildcardSymmetryWithEscapes() {
+ // Escaped wildcards should not count
+ WildcardUtils.validateWildcardSymmetry("\\*", "literal"); // 0 wildcards in pattern
+ WildcardUtils.validateWildcardSymmetry("*\\*", "*"); // 1 wildcard in both
+
+ // Pattern with 2 wildcards, replacement with 1 wildcard (middle one in \\**\\*)
+ WildcardUtils.validateWildcardSymmetry("*", "\\**\\*"); // 1 wildcard in both
+
+ // Should fail when unescaped counts don't match
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.validateWildcardSymmetry("*a*", "*\\*")); // 2 vs 1
+
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> WildcardUtils.validateWildcardSymmetry("*a*", "\\**\\*")); // 2 vs 1
+ }
}
diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst
index bcb0d57e677..0098124344d 100644
--- a/docs/user/ppl/cmd/replace.rst
+++ b/docs/user/ppl/cmd/replace.rst
@@ -11,7 +11,7 @@ replace
Description
============
-Using ``replace`` command to replace text in one or more fields in the search result.
+Using ``replace`` command to replace text in one or more fields. Supports literal string replacement and wildcard patterns using ``*``.
Note: This command is only available when Calcite engine is enabled.
@@ -21,13 +21,6 @@ Syntax
replace '' WITH '' [, '' WITH '']... IN [, ]...
-Parameters
-==========
-* **pattern**: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
-* **replacement**: mandatory. The text you want to replace with.
-* **field-name**: mandatory. One or more field names where the replacement should occur.
-
-
Examples
========
@@ -120,8 +113,158 @@ PPL query::
+-----------------+-------+--------+-----+--------+
+Example 6: Wildcard suffix match
+---------------------------------
+
+Replace values that end with a specific pattern. The wildcard ``*`` matches any prefix.
+
+PPL query::
+
+ os> source=accounts | replace "*IL" WITH "Illinois" IN state | fields state;
+ fetched rows / total rows = 4/4
+ +----------+
+ | state |
+ |----------|
+ | Illinois |
+ | TN |
+ | VA |
+ | MD |
+ +----------+
+
+
+Example 7: Wildcard prefix match
+---------------------------------
+
+Replace values that start with a specific pattern. The wildcard ``*`` matches any suffix.
+
+PPL query::
+
+ os> source=accounts | replace "IL*" WITH "Illinois" IN state | fields state;
+ fetched rows / total rows = 4/4
+ +----------+
+ | state |
+ |----------|
+ | Illinois |
+ | TN |
+ | VA |
+ | MD |
+ +----------+
+
+
+Example 8: Wildcard capture and substitution
+---------------------------------------------
+
+Use wildcards in both pattern and replacement to capture and reuse matched portions. The number of wildcards must match in pattern and replacement.
+
+PPL query::
+
+ os> source=accounts | replace "* Lane" WITH "Lane *" IN address | fields address;
+ fetched rows / total rows = 4/4
+ +----------------------+
+ | address |
+ |----------------------|
+ | Lane 880 Holmes |
+ | 671 Bristol Street |
+ | 789 Madison Street |
+ | 467 Hutchinson Court |
+ +----------------------+
+
+
+Example 9: Multiple wildcards for pattern transformation
+---------------------------------------------------------
+
+Use multiple wildcards to transform patterns. Each wildcard in the replacement substitutes the corresponding captured value.
+
+PPL query::
+
+ os> source=accounts | replace "* *" WITH "*_*" IN address | fields address;
+ fetched rows / total rows = 4/4
+ +----------------------+
+ | address |
+ |----------------------|
+ | 880_Holmes Lane |
+ | 671_Bristol Street |
+ | 789_Madison Street |
+ | 467_Hutchinson Court |
+ +----------------------+
+
+
+Example 10: Wildcard with zero wildcards in replacement
+--------------------------------------------------------
+
+When replacement has zero wildcards, all matching values are replaced with the literal replacement string.
+
+PPL query::
+
+ os> source=accounts | replace "*IL*" WITH "Illinois" IN state | fields state;
+ fetched rows / total rows = 4/4
+ +----------+
+ | state |
+ |----------|
+ | Illinois |
+ | TN |
+ | VA |
+ | MD |
+ +----------+
+
+
+Example 11: Matching literal asterisks
+---------------------------------------
+
+Use ``\*`` to match literal asterisk characters (``\*`` = literal asterisk, ``\\`` = literal backslash).
+
+PPL query::
+
+ os> source=accounts | eval note = 'price: *sale*' | replace 'price: \*sale\*' WITH 'DISCOUNTED' IN note | fields note;
+ fetched rows / total rows = 4/4
+ +------------+
+ | note |
+ |------------|
+ | DISCOUNTED |
+ | DISCOUNTED |
+ | DISCOUNTED |
+ | DISCOUNTED |
+ +------------+
+
+Example 12: Wildcard with no replacement wildcards
+----------------------------------------------------
+
+Use wildcards in pattern but none in replacement to create a fixed output.
+
+PPL query::
+
+ os> source=accounts | eval test = 'prefix-value-suffix' | replace 'prefix-*-suffix' WITH 'MATCHED' IN test | fields test;
+ fetched rows / total rows = 4/4
+ +---------+
+ | test |
+ |---------|
+ | MATCHED |
+ | MATCHED |
+ | MATCHED |
+ | MATCHED |
+ +---------+
+
+Example 13: Escaped asterisks with wildcards
+---------------------------------------------
+
+Combine escaped asterisks (literal) with wildcards for complex patterns.
+
+PPL query::
+
+ os> source=accounts | eval label = 'file123.txt' | replace 'file*.*' WITH '\**.*' IN label | fields label;
+ fetched rows / total rows = 4/4
+ +----------+
+ | label |
+ |----------|
+ | *123.txt |
+ | *123.txt |
+ | *123.txt |
+ | *123.txt |
+ +----------+
+
+
Limitations
===========
-* Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported.
-* Pattern and replacement values must be string literals.
-* The replace command modifies the specified fields in-place.
\ No newline at end of file
+* Wildcards: ``*`` matches zero or more characters (case-sensitive)
+* Replacement wildcards must match pattern wildcard count, or be zero
+* Escape sequences: ``\*`` (literal asterisk), ``\\`` (literal backslash)
\ No newline at end of file
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
index 77f3a45cc07..6da047e0c20 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
@@ -1335,6 +1335,17 @@ public void testReplaceCommandExplain() throws IOException {
TEST_INDEX_ACCOUNT)));
}
+ @Test
+ public void testReplaceCommandWildcardExplain() throws IOException {
+ String expected = loadExpectedPlan("explain_replace_wildcard.yaml");
+ assertYamlEqualsIgnoreId(
+ expected,
+ explainQueryYaml(
+ String.format(
+ "source=%s | replace '*L' WITH 'STATE_IL' IN state | fields state",
+ TEST_INDEX_ACCOUNT)));
+ }
+
@Test
public void testExplainRareCommandUseNull() throws IOException {
String expected = loadExpectedPlan("explain_rare_usenull_false.yaml");
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java
index 9d6304c363b..44cc4a3aaf0 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java
@@ -288,4 +288,118 @@ public void testMultiplePairsSequentialApplication() throws IOException {
rows("John", "Ontario Province"),
rows("Jane", "Quebec"));
}
+
+ @Test
+ public void testWildcardReplace_suffixMatch() throws IOException {
+ // Pattern "*ada" should match "Canada" and replace with "CA"
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | replace '*ada' WITH 'CA' IN country | fields name, country",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("name", "string"), schema("country", "string"));
+
+ verifyDataRows(
+ result, rows("Jake", "USA"), rows("Hello", "USA"), rows("John", "CA"), rows("Jane", "CA"));
+ }
+
+ @Test
+ public void testWildcardReplace_prefixMatch() throws IOException {
+ // Pattern "US*" should match "USA" and replace with "United States"
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | replace 'US*' WITH 'United States' IN country | fields name,"
+ + " country",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("name", "string"), schema("country", "string"));
+
+ verifyDataRows(
+ result,
+ rows("Jake", "United States"),
+ rows("Hello", "United States"),
+ rows("John", "Canada"),
+ rows("Jane", "Canada"));
+ }
+
+ @Test
+ public void testWildcardReplace_multipleWildcards() throws IOException {
+ // Pattern "* *" with replacement "*_*" should replace spaces with underscores
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | replace '* *' WITH '*_*' IN state | fields name, state",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("name", "string"), schema("state", "string"));
+
+ verifyDataRows(
+ result,
+ rows("Jake", "California"),
+ rows("Hello", "New_York"),
+ rows("John", "Ontario"),
+ rows("Jane", "Quebec"));
+ }
+
+ @Test
+ public void testWildcardReplace_symmetryMismatch_shouldFail() {
+ // Pattern has 2 wildcards, replacement has 1 - should fail
+ Throwable e =
+ assertThrowsWithReplace(
+ IllegalArgumentException.class,
+ () ->
+ executeQuery(
+ String.format(
+ "source = %s | replace '* *' WITH '*' IN state",
+ TEST_INDEX_STATE_COUNTRY)));
+ verifyErrorMessageContains(e, "Wildcard count mismatch");
+ }
+
+ @Test
+ public void testEscapeSequence_literalAsterisk() throws IOException {
+ // Test matching literal asterisks in data using \* escape sequence
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | eval note = 'price: *sale*' | replace 'price: \\\\*sale\\\\*' WITH"
+ + " 'DISCOUNTED' IN note | fields note | head 1",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("note", "string"));
+ // Pattern "price: \*sale\*" matches literal asterisks, result should be "DISCOUNTED"
+ verifyDataRows(result, rows("DISCOUNTED"));
+ }
+
+ @Test
+ public void testEscapeSequence_mixedEscapeAndWildcard() throws IOException {
+ // Test combining escaped asterisks (literal) with wildcards (pattern matching)
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | eval label = 'file123.txt' | replace 'file*.*' WITH"
+ + " '\\\\**.*' IN label | fields label | head 1",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("label", "string"));
+ // Pattern "file*.*" captures "123" and "txt"
+ // Replacement "\**.*" has escaped * (literal), then 2 wildcards, producing "*123.txt"
+ verifyDataRows(result, rows("*123.txt"));
+ }
+
+ @Test
+ public void testEscapeSequence_noMatchLiteral() throws IOException {
+ // Test that escaped asterisk doesn't match as wildcard
+ JSONObject result =
+ executeQuery(
+ String.format(
+ "source = %s | eval test = 'fooXbar' | replace 'foo\\\\*bar' WITH 'matched' IN test"
+ + " | fields test | head 1",
+ TEST_INDEX_STATE_COUNTRY));
+
+ verifySchema(result, schema("test", "string"));
+ // Pattern "foo\*bar" matches literal "foo*bar", not "fooXbar", so original value returned
+ verifyDataRows(result, rows("fooXbar"));
+ }
}
diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml
new file mode 100644
index 00000000000..0407849a472
--- /dev/null
+++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml
@@ -0,0 +1,8 @@
+calcite:
+ logical: |
+ LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
+ LogicalProject(state=[REGEXP_REPLACE($7, '^\Q\E(.*?)\QL\E$':VARCHAR, 'STATE_IL':VARCHAR)])
+ CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
+ physical: |
+ EnumerableCalc(expr#0=[{inputs}], expr#1=['^\Q\E(.*?)\QL\E$':VARCHAR], expr#2=['STATE_IL':VARCHAR], expr#3=[REGEXP_REPLACE($t0, $t1, $t2)], $f0=[$t3])
+ CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
\ No newline at end of file
diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_wildcard.yaml
new file mode 100644
index 00000000000..194f680adf2
--- /dev/null
+++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_wildcard.yaml
@@ -0,0 +1,9 @@
+calcite:
+ logical: |
+ LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
+ LogicalProject(state=[REGEXP_REPLACE($7, '^\Q\E(.*?)\QL\E$':VARCHAR, 'STATE_IL':VARCHAR)])
+ CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
+ physical: |
+ EnumerableLimit(fetch=[10000])
+ EnumerableCalc(expr#0..16=[{inputs}], expr#17=['^\Q\E(.*?)\QL\E$':VARCHAR], expr#18=['STATE_IL':VARCHAR], expr#19=[REGEXP_REPLACE($t7, $t17, $t18)], state=[$t19])
+ CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
\ No newline at end of file
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java
index abde8b3a5bb..5f6f2beb76d 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java
@@ -325,4 +325,76 @@ public void testReplaceWithMultiplePairsTrailingCommaShouldFail() {
String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", IN JOB";
getRelNode(ppl);
}
+
+ @Test
+ public void testWildcardReplace_prefixWildcard() {
+ // Replace suffix wildcard - e.g., "*MAN" matches "SALESMAN" → "SELLER"
+ // Wildcard pattern is converted to regex at planning time
+ String ppl = "source=EMP | replace \"*MAN\" WITH \"SELLER\" IN JOB";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REGEXP_REPLACE($2,"
+ + " '^\\Q\\E(.*?)\\QMAN\\E$':VARCHAR, 'SELLER':VARCHAR)], MGR=[$3], HIREDATE=[$4],"
+ + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+
+ verifyLogical(root, expectedLogical);
+ }
+
+ @Test
+ public void testWildcardReplace_multipleWildcards() {
+ // Replace with multiple wildcards for capture and substitution
+ // Wildcard pattern "*_*" is converted to regex replacement "$1_$2"
+ String ppl = "source=EMP | replace \"* - *\" WITH \"*_*\" IN JOB";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REGEXP_REPLACE($2, '^\\Q\\E(.*?)\\Q -"
+ + " \\E(.*?)\\Q\\E$':VARCHAR, '$1_$2':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
+ + " COMM=[$6], DEPTNO=[$7])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+
+ verifyLogical(root, expectedLogical);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testWildcardReplace_symmetryMismatch_shouldFail() {
+ // Pattern has 2 wildcards, replacement has 1 - should throw error
+ String ppl = "source=EMP | replace \"* - *\" WITH \"*\" IN JOB";
+ getRelNode(ppl);
+ }
+
+ @Test
+ public void testWildcardReplace_symmetryValid_zeroInReplacement() {
+ // Pattern has 2 wildcards, replacement has 0 - should work
+ // Literal replacement "FIXED" has no wildcards, which is valid
+ String ppl = "source=EMP | replace \"* - *\" WITH \"FIXED\" IN JOB";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REGEXP_REPLACE($2, '^\\Q\\E(.*?)\\Q -"
+ + " \\E(.*?)\\Q\\E$':VARCHAR, 'FIXED':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
+ + " COMM=[$6], DEPTNO=[$7])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+
+ verifyLogical(root, expectedLogical);
+ }
+
+ @Test
+ public void testWildcardAndLiteralReplace_mixedPairs() {
+ // Multiple pairs: one with wildcard (converted to REGEXP_REPLACE), one literal (REPLACE)
+ String ppl =
+ "source=EMP | replace \"*CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB";
+ RelNode root = getRelNode(ppl);
+
+ String expectedLogical =
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE(REGEXP_REPLACE($2,"
+ + " '^\\Q\\E(.*?)\\QCLERK\\E$':VARCHAR, 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR,"
+ + " 'SUPERVISOR':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6],"
+ + " DEPTNO=[$7])\n"
+ + " LogicalTableScan(table=[[scott, EMP]])\n";
+
+ verifyLogical(root, expectedLogical);
+ }
}