Skip to content

Commit e5ebdc9

Browse files
committed
Merge remote-tracking branch 'origin/main' into issues/399
Signed-off-by: Yuanchun Shen <[email protected]>
2 parents dabb710 + 0c1ec27 commit e5ebdc9

File tree

274 files changed

+6479
-1317
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

274 files changed

+6479
-1317
lines changed

core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,14 @@ private Timechart timechart(UnresolvedExpression newAggregateFunction) {
123123
return this.toBuilder().aggregateFunction(newAggregateFunction).build();
124124
}
125125

126-
/** TODO: extend to support additional per_* functions */
127126
@RequiredArgsConstructor
128127
static class PerFunction {
129-
private static final Map<String, Integer> UNIT_SECONDS = Map.of("per_second", 1);
128+
private static final Map<String, Integer> UNIT_SECONDS =
129+
Map.of(
130+
"per_second", 1,
131+
"per_minute", 60,
132+
"per_hour", 3600,
133+
"per_day", 86400);
130134
private final String aggName;
131135
private final UnresolvedExpression aggArg;
132136
private final int seconds;

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,13 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
284284
"Rex pattern must contain at least one named capture group");
285285
}
286286

287+
// TODO: Once JDK 20+ is supported, consider using Pattern.namedGroups() API for more efficient
288+
// named group handling instead of manual parsing in RegexCommonUtils
289+
287290
List<RexNode> newFields = new ArrayList<>();
288291
List<String> newFieldNames = new ArrayList<>();
289292

290-
for (int i = 0; i < namedGroups.size(); i++) {
293+
for (String groupName : namedGroups) {
291294
RexNode extractCall;
292295
if (node.getMaxMatch().isPresent() && node.getMaxMatch().get() > 1) {
293296
extractCall =
@@ -296,7 +299,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
296299
BuiltinFunctionName.REX_EXTRACT_MULTI,
297300
fieldRex,
298301
context.rexBuilder.makeLiteral(patternStr),
299-
context.relBuilder.literal(i + 1),
302+
context.rexBuilder.makeLiteral(groupName),
300303
context.relBuilder.literal(node.getMaxMatch().get()));
301304
} else {
302305
extractCall =
@@ -305,10 +308,10 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
305308
BuiltinFunctionName.REX_EXTRACT,
306309
fieldRex,
307310
context.rexBuilder.makeLiteral(patternStr),
308-
context.relBuilder.literal(i + 1));
311+
context.rexBuilder.makeLiteral(groupName));
309312
}
310313
newFields.add(extractCall);
311-
newFieldNames.add(namedGroups.get(i));
314+
newFieldNames.add(groupName);
312315
}
313316

314317
if (node.getOffsetField().isPresent()) {
@@ -852,7 +855,7 @@ private void projectPlusOverriding(
852855
List<String> originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames();
853856
List<RexNode> toOverrideList =
854857
originalFieldNames.stream()
855-
.filter(newNames::contains)
858+
.filter(originalName -> shouldOverrideField(originalName, newNames))
856859
.map(a -> (RexNode) context.relBuilder.field(a))
857860
.toList();
858861
// 1. add the new fields, For example "age0, country0"
@@ -872,6 +875,17 @@ private void projectPlusOverriding(
872875
context.relBuilder.rename(expectedRenameFields);
873876
}
874877

878+
private boolean shouldOverrideField(String originalName, List<String> newNames) {
879+
return newNames.stream()
880+
.anyMatch(
881+
newName ->
882+
// Match exact field names (e.g., "age" == "age") for flat fields
883+
newName.equals(originalName)
884+
// OR match nested paths (e.g., "resource.attributes..." starts with
885+
// "resource.")
886+
|| newName.startsWith(originalName + "."));
887+
}
888+
875889
private List<List<RexInputRef>> extractInputRefList(List<RelBuilder.AggCall> aggCalls) {
876890
return aggCalls.stream()
877891
.map(RelBuilder.AggCall::over)
@@ -1803,18 +1817,16 @@ public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) {
18031817
}
18041818

18051819
/**
1806-
* Finds the timestamp field for multisearch ordering.
1820+
* Finds the @timestamp field for multisearch ordering. Only @timestamp field is used for
1821+
* timestamp interleaving. Other timestamp-like fields are ignored.
18071822
*
1808-
* @param rowType The row type to search for timestamp fields
1809-
* @return The name of the timestamp field, or null if not found
1823+
* @param rowType The row type to search for @timestamp field
1824+
* @return "@timestamp" if the field exists, or null if not found
18101825
*/
18111826
private String findTimestampField(RelDataType rowType) {
1812-
String[] candidates = {"@timestamp", "_time", "timestamp", "time"};
1813-
for (String fieldName : candidates) {
1814-
RelDataTypeField field = rowType.getField(fieldName, false, false);
1815-
if (field != null) {
1816-
return fieldName;
1817-
}
1827+
RelDataTypeField field = rowType.getField("@timestamp", false, false);
1828+
if (field != null) {
1829+
return "@timestamp";
18181830
}
18191831
return null;
18201832
}

core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,27 @@
77

88
import java.util.ArrayList;
99
import java.util.HashMap;
10-
import java.util.HashSet;
1110
import java.util.List;
1211
import java.util.Map;
13-
import java.util.Set;
1412
import java.util.stream.Collectors;
1513
import org.apache.calcite.rel.RelNode;
1614
import org.apache.calcite.rel.type.RelDataType;
1715
import org.apache.calcite.rel.type.RelDataTypeField;
1816
import org.apache.calcite.rex.RexNode;
19-
import org.apache.calcite.sql.validate.SqlValidatorUtil;
2017

2118
/**
22-
* Utility class for unifying schemas across multiple RelNodes with type conflict resolution. Uses
23-
* the same strategy as append command - renames conflicting fields to avoid type conflicts.
19+
* Utility class for unifying schemas across multiple RelNodes. Throws an exception when type
20+
* conflicts are detected.
2421
*/
2522
public class SchemaUnifier {
2623

2724
/**
28-
* Builds a unified schema for multiple nodes with type conflict resolution.
25+
* Builds a unified schema for multiple nodes. Throws an exception if type conflicts are detected.
2926
*
3027
* @param nodes List of RelNodes to unify schemas for
3128
* @param context Calcite plan context
3229
* @return List of projected RelNodes with unified schema
30+
* @throws IllegalArgumentException if type conflicts are detected
3331
*/
3432
public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
3533
List<RelNode> nodes, CalcitePlanContext context) {
@@ -41,7 +39,7 @@ public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
4139
return nodes;
4240
}
4341

44-
// Step 1: Build the unified schema by processing all nodes
42+
// Step 1: Build the unified schema by processing all nodes (throws on conflict)
4543
List<SchemaField> unifiedSchema = buildUnifiedSchema(nodes);
4644

4745
// Step 2: Create projections for each node to align with unified schema
@@ -55,47 +53,37 @@ public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
5553
projectedNodes.add(projectedNode);
5654
}
5755

58-
// Step 3: Unify names to handle type conflicts (this creates age0, age1, etc.)
59-
List<String> uniqueNames =
60-
SqlValidatorUtil.uniquify(fieldNames, SqlValidatorUtil.EXPR_SUGGESTER, true);
61-
62-
// Step 4: Re-project with unique names if needed
63-
if (!uniqueNames.equals(fieldNames)) {
64-
List<RelNode> renamedNodes = new ArrayList<>();
65-
for (RelNode node : projectedNodes) {
66-
RelNode renamedNode =
67-
context.relBuilder.push(node).project(context.relBuilder.fields(), uniqueNames).build();
68-
renamedNodes.add(renamedNode);
69-
}
70-
return renamedNodes;
71-
}
72-
7356
return projectedNodes;
7457
}
7558

7659
/**
77-
* Builds a unified schema by merging fields from all nodes. Fields with the same name but
78-
* different types are added as separate entries (which will be renamed during uniquification).
60+
* Builds a unified schema by merging fields from all nodes. Throws an exception if fields with
61+
* the same name have different types.
7962
*
8063
* @param nodes List of RelNodes to merge schemas from
81-
* @return List of SchemaField representing the unified schema (may contain duplicate names)
64+
* @return List of SchemaField representing the unified schema
65+
* @throws IllegalArgumentException if type conflicts are detected
8266
*/
8367
private static List<SchemaField> buildUnifiedSchema(List<RelNode> nodes) {
8468
List<SchemaField> schema = new ArrayList<>();
85-
Map<String, Set<RelDataType>> seenFields = new HashMap<>();
69+
Map<String, RelDataType> seenFields = new HashMap<>();
8670

8771
for (RelNode node : nodes) {
8872
for (RelDataTypeField field : node.getRowType().getFieldList()) {
8973
String fieldName = field.getName();
9074
RelDataType fieldType = field.getType();
9175

92-
// Track which (name, type) combinations we've seen
93-
Set<RelDataType> typesForName = seenFields.computeIfAbsent(fieldName, k -> new HashSet<>());
94-
95-
if (!typesForName.contains(fieldType)) {
96-
// New field or same name with different type - add to schema
76+
RelDataType existingType = seenFields.get(fieldName);
77+
if (existingType == null) {
78+
// New field - add to schema
9779
schema.add(new SchemaField(fieldName, fieldType));
98-
typesForName.add(fieldType);
80+
seenFields.put(fieldName, fieldType);
81+
} else if (!areTypesCompatible(existingType, fieldType)) {
82+
// Same field name but different type - throw exception
83+
throw new IllegalArgumentException(
84+
String.format(
85+
"Unable to process column '%s' due to incompatible types: '%s' and '%s'",
86+
fieldName, existingType.getSqlTypeName(), fieldType.getSqlTypeName()));
9987
}
10088
// If we've seen this exact (name, type) combination, skip it
10189
}
@@ -104,6 +92,10 @@ private static List<SchemaField> buildUnifiedSchema(List<RelNode> nodes) {
10492
return schema;
10593
}
10694

95+
private static boolean areTypesCompatible(RelDataType type1, RelDataType type2) {
96+
return type1.getSqlTypeName() != null && type1.getSqlTypeName().equals(type2.getSqlTypeName());
97+
}
98+
10799
/**
108100
* Builds a projection for a node to align with the unified schema. For each field in the unified
109101
* schema: - If the node has a matching field with the same type, use it - Otherwise, project NULL
@@ -125,8 +117,8 @@ private static List<RexNode> buildProjectionForNode(
125117
RelDataType expectedType = schemaField.getType();
126118
RelDataTypeField nodeField = nodeFieldMap.get(fieldName);
127119

128-
if (nodeField != null && nodeField.getType().equals(expectedType)) {
129-
// Field exists with matching type - use it
120+
if (nodeField != null && areTypesCompatible(nodeField.getType(), expectedType)) {
121+
// Field exists with compatible type - use it
130122
projection.add(context.rexBuilder.makeInputRef(node, nodeField.getIndex()));
131123
} else {
132124
// Field missing or type mismatch - project NULL

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,22 @@
1414
import com.google.common.collect.ImmutableList;
1515
import java.lang.reflect.Method;
1616
import java.util.ArrayList;
17+
import java.util.HashSet;
1718
import java.util.List;
1819
import java.util.Objects;
20+
import java.util.Set;
1921
import java.util.function.Predicate;
2022
import java.util.stream.Collectors;
2123
import javax.annotation.Nullable;
2224
import org.apache.calcite.plan.RelOptTable;
2325
import org.apache.calcite.rel.RelHomogeneousShuttle;
2426
import org.apache.calcite.rel.RelNode;
2527
import org.apache.calcite.rel.RelShuttle;
28+
import org.apache.calcite.rel.core.Project;
29+
import org.apache.calcite.rel.core.Sort;
2630
import org.apache.calcite.rel.core.TableScan;
2731
import org.apache.calcite.rel.logical.LogicalProject;
32+
import org.apache.calcite.rel.logical.LogicalSort;
2833
import org.apache.calcite.rel.type.RelDataType;
2934
import org.apache.calcite.rex.RexCall;
3035
import org.apache.calcite.rex.RexCorrelVariable;
@@ -38,6 +43,7 @@
3843
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
3944
import org.apache.calcite.sql.type.SqlTypeName;
4045
import org.apache.calcite.tools.RelBuilder;
46+
import org.apache.calcite.util.Pair;
4147
import org.apache.calcite.util.Util;
4248
import org.opensearch.sql.ast.AbstractNodeVisitor;
4349
import org.opensearch.sql.ast.Node;
@@ -474,13 +480,51 @@ public Void visitInputRef(RexInputRef inputRef) {
474480
return selectedColumns;
475481
}
476482

483+
// `RelDecorrelator` may generate a Project with duplicated fields, e.g. Project($0,$0).
484+
// There will be problem if pushing down the pattern like `Aggregate(AGG($0),{1})-Project($0,$0)`,
485+
// as it will lead to field-name conflict.
486+
// We should wait and rely on `AggregateProjectMergeRule` to mitigate it by having this constraint
487+
// Nevertheless, that rule cannot handle all cases if there is RexCall in the Project,
488+
// e.g. Project($0, $0, +($0,1)). We cannot push down the Aggregate for this corner case.
489+
// TODO: Simplify the Project where there is RexCall by adding a new rule.
490+
static boolean distinctProjectList(LogicalProject project) {
491+
// Change to Set<Pair<RexNode, String>> to resolve
492+
// https://github.com/opensearch-project/sql/issues/4347
493+
Set<Pair<RexNode, String>> rexSet = new HashSet<>();
494+
return project.getNamedProjects().stream().allMatch(rexSet::add);
495+
}
496+
497+
static boolean containsRexOver(LogicalProject project) {
498+
return project.getProjects().stream().anyMatch(RexOver::containsOver);
499+
}
500+
501+
/**
502+
* The LogicalSort is a LIMIT that should be pushed down when its fetch field is not null and its
503+
* collation is empty. For example: <code>sort name | head 5</code> should not be pushed down
504+
* because it has a field collation.
505+
*
506+
* @param sort The LogicalSort to check.
507+
* @return True if the LogicalSort is a LIMIT, false otherwise.
508+
*/
509+
static boolean isLogicalSortLimit(LogicalSort sort) {
510+
return sort.fetch != null;
511+
}
512+
513+
static boolean projectContainsExpr(Project project) {
514+
return project.getProjects().stream().anyMatch(p -> p instanceof RexCall);
515+
}
516+
517+
static boolean sortByFieldsOnly(Sort sort) {
518+
return !sort.getCollation().getFieldCollations().isEmpty() && sort.fetch == null;
519+
}
520+
477521
/**
478522
* Get a string representation of the argument types expressed in ExprType for error messages.
479523
*
480524
* @param argTypes the list of argument types as {@link RelDataType}
481525
* @return a string in the format [type1,type2,...] representing the argument types
482526
*/
483-
public static String getActualSignature(List<RelDataType> argTypes) {
527+
static String getActualSignature(List<RelDataType> argTypes) {
484528
return "["
485529
+ argTypes.stream()
486530
.map(OpenSearchTypeFactory::convertRelDataTypeToExprType)

core/src/main/java/org/opensearch/sql/data/utils/ComparableLinkedHashMap.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,29 @@ private int compareRecursive(
4242
if (!thisHasNext) return -1;
4343
if (!otherHasNext) return 1;
4444

45-
V thisValue = thisIterator.next().getValue();
46-
V otherValue = otherIterator.next().getValue();
47-
int comparison = compareValues(thisValue, otherValue);
45+
Map.Entry<K, V> thisEntry = thisIterator.next();
46+
Map.Entry<K, V> otherEntry = otherIterator.next();
47+
K thisKey = thisEntry.getKey();
48+
K otherKey = otherEntry.getKey();
49+
V thisValue = thisEntry.getValue();
50+
V otherValue = otherEntry.getValue();
51+
int comparison = compareKV(thisKey, otherKey, thisValue, otherValue);
4852
if (comparison != 0) return comparison;
4953
return compareRecursive(thisIterator, otherIterator);
5054
}
5155

5256
@SuppressWarnings("unchecked")
53-
private int compareValues(V value1, V value2) {
57+
private int compareKV(K key1, K key2, V value1, V value2) {
58+
int keyCompare;
59+
if (key1 instanceof Comparable) {
60+
keyCompare = ((Comparable<K>) key1).compareTo(key2);
61+
} else {
62+
keyCompare = key1.toString().compareTo(key2.toString());
63+
}
64+
if (keyCompare != 0) {
65+
return keyCompare;
66+
}
67+
5468
if (value1 == null && value2 == null) return 0;
5569
if (value1 == null) return -1;
5670
if (value2 == null) return 1;

0 commit comments

Comments
 (0)