Skip to content

Add support for RLIKE (LIST) with pushdown #129929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/129929.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129929
summary: Add support for RLIKE (LIST) with pushdown
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ ROW message = "foo ( bar"
```


```esql
ROW message = "foobar"
| WHERE message RLIKE ("foo.*", "bar.")
```


To reduce the overhead of escaping, we suggest using triple quotes strings `"""`

```esql
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,30 @@
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;
import java.util.Objects;

public class RLikePattern extends AbstractStringPattern {
public class RLikePattern extends AbstractStringPattern implements Writeable {

private final String regexpPattern;

public RLikePattern(String regexpPattern) {
this.regexpPattern = regexpPattern;
}

public RLikePattern(StreamInput in) throws IOException {
this(in.readString());
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(regexpPattern);
}

@Override
public Automaton createAutomaton(boolean ignoreCase) {
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;

import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

public class RLikePatternList extends AbstractStringPattern implements Writeable {

private final List<RLikePattern> patternList;

public RLikePatternList(List<RLikePattern> patternList) {
this.patternList = patternList;
}

public RLikePatternList(StreamInput in) throws IOException {
this(in.readCollectionAsList(RLikePattern::new));
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
}

public List<RLikePattern> patternList() {
return patternList;
}

/**
* Creates an automaton that matches any of the patterns in the list.
* We create a single automaton that is the union of all individual automatons to improve performance
*/
@Override
public Automaton createAutomaton(boolean ignoreCase) {
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
Automaton result = Operations.union(automatonList);
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
}

/**
* Returns a Java regex that matches any of the patterns in the list.
* The patterns are joined with the '|' operator to create a single regex.
*/
@Override
public String asJavaRegex() {
return patternList.stream().map(RLikePattern::asJavaRegex).collect(Collectors.joining("|"));
}

@Override
public int hashCode() {
return Objects.hash(patternList);
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
RLikePatternList other = (RLikePatternList) obj;
return patternList.equals(other.patternList);
}

/**
* Returns a string that matches any of the patterns in the list.
* The patterns are joined with the '|' operator to create a single regex string.
*/
@Override
public String pattern() {
if (patternList.isEmpty()) {
return "";
}
if (patternList.size() == 1) {
return patternList.get(0).pattern();
}
return "(\"" + patternList.stream().map(RLikePattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
/**
* Query that matches documents based on a Lucene Automaton.
*/
public class AutomatonQuery extends Query {
public class EsqlAutomatonQuery extends Query {

private final String field;
private final Automaton automaton;
private final String automatonDescription;

public AutomatonQuery(Source source, String field, Automaton automaton, String automatonDescription) {
public EsqlAutomatonQuery(Source source, String field, Automaton automaton, String automatonDescription) {
super(source);
this.field = field;
this.automaton = automaton;
Expand Down Expand Up @@ -53,7 +53,7 @@ public boolean equals(Object obj) {
return false;
}

AutomatonQuery other = (AutomatonQuery) obj;
EsqlAutomatonQuery other = (EsqlAutomatonQuery) obj;
return Objects.equals(field, other.field)
&& Objects.equals(automaton, other.automaton)
&& Objects.equals(automatonDescription, other.automatonDescription);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,42 @@ public void testLikeList() throws IOException {
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

public void testRLike() throws IOException {
String value = "v".repeat(between(1, 256));
String esqlQuery = """
FROM test
| WHERE test rlike "%value.*"
""";
String luceneQuery = switch (type) {
case KEYWORD -> "test:/%value.*/";
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
};
ComputeSignature dataNodeSignature = switch (type) {
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
};
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

public void testRLikeList() throws IOException {
String value = "v".repeat(between(1, 256));
String esqlQuery = """
FROM test
| WHERE test rlike ("%value.*", "abc.*")
""";
String luceneQuery = switch (type) {
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
case KEYWORD -> "test:RLIKE(\"%value.*\", \"abc.*\"), caseInsensitive=false";
};
ComputeSignature dataNodeSignature = switch (type) {
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
};
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

enum ComputeSignature {
FILTER_IN_COMPUTE(
matchesList().item("LuceneSourceOperator")
Expand Down
Loading
Loading