Skip to content

Commit 434df46

Browse files
committed
feat: substrait builder extra apifeat: substrait builder extra api
Signed-off-by: MBWhite <whitemat@uk.ibm.com>
1 parent af52a69 commit 434df46

File tree

4 files changed

+474
-13
lines changed

4 files changed

+474
-13
lines changed

.bob/notes/pending-notes.txt

Whitespace-only changes.

build-logic/.kotlin/sessions/kotlin-compiler-17666991750707048222.salive

Whitespace-only changes.

core/src/main/java/io/substrait/dsl/SubstraitBuilder.java

Lines changed: 209 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import io.substrait.expression.Expression.SwitchClause;
1313
import io.substrait.expression.FieldReference;
1414
import io.substrait.expression.FunctionArg;
15+
import io.substrait.expression.FunctionOption;
1516
import io.substrait.expression.WindowBound;
1617
import io.substrait.extension.DefaultExtensionCatalog;
1718
import io.substrait.extension.SimpleExtension;
@@ -39,6 +40,7 @@
3940
import io.substrait.type.NamedStruct;
4041
import io.substrait.type.Type;
4142
import io.substrait.type.TypeCreator;
43+
import java.util.ArrayList;
4244
import java.util.Arrays;
4345
import java.util.Collections;
4446
import java.util.LinkedList;
@@ -73,6 +75,17 @@ public class SubstraitBuilder {
7375

7476
private final SimpleExtension.ExtensionCollection extensions;
7577

78+
/**
79+
* Constructs a new SubstraitBuilder with the default extension collection.
80+
*
81+
* <p>The builder is initialized with {@link DefaultExtensionCatalog#DEFAULT_COLLECTION}, which
82+
* includes standard Substrait functions for strings, arithmetic, comparison, datetime, and other
83+
* operations.
84+
*/
85+
public SubstraitBuilder() {
86+
this(DefaultExtensionCatalog.DEFAULT_COLLECTION);
87+
}
88+
7689
/**
7790
* Constructs a new SubstraitBuilder with the specified extension collection.
7891
*
@@ -83,6 +96,34 @@ public SubstraitBuilder(SimpleExtension.ExtensionCollection extensions) {
8396
this.extensions = extensions;
8497
}
8598

99+
/**
100+
* Gets the default extension collection used by this builder.
101+
*
102+
* <p>This collection includes standard Substrait functions for strings, arithmetic, comparison,
103+
* datetime, and other operations from {@link DefaultExtensionCatalog#DEFAULT_COLLECTION}.
104+
*
105+
* @return the ExtensionCollection containing standard Substrait functions
106+
*/
107+
public SimpleExtension.ExtensionCollection getExtensions() {
108+
return extensions;
109+
}
110+
111+
/**
112+
* Creates a Plan.Root, which is the top-level container for a Substrait query plan.
113+
*
114+
* <p>The Plan.Root wraps a relational expression tree and associates output column names with the
115+
* plan. This is the final step in building a complete Substrait plan that can be serialized and
116+
* executed by a Substrait consumer.
117+
*
118+
* @param input the root relational expression of the query plan
119+
* @param names the ordered list of output column names corresponding to the input relation's
120+
* output schema
121+
* @return a Plan.Root containing the query plan and output column names
122+
*/
123+
public Plan.Root root(final Rel input, final List<String> names) {
124+
return Plan.Root.builder().input(input).names(names).build();
125+
}
126+
86127
// Relations
87128

88129
/**
@@ -142,13 +183,32 @@ public Aggregate aggregate(
142183
return aggregate(groupingsFn, measuresFn, Optional.of(remap), input);
143184
}
144185

145-
private Aggregate aggregate(
146-
Function<Rel, List<Aggregate.Grouping>> groupingsFn,
147-
Function<Rel, List<Aggregate.Measure>> measuresFn,
148-
Optional<Rel.Remap> remap,
149-
Rel input) {
150-
List<Aggregate.Grouping> groupings = groupingsFn.apply(input);
151-
List<Aggregate.Measure> measures = measuresFn.apply(input);
186+
/**
187+
* Creates an aggregate relation that groups and aggregates data from an input relation.
188+
*
189+
* <p>This method constructs a Substrait aggregate operation by applying grouping and measure
190+
* functions to the input relation. The grouping function defines how rows are grouped together,
191+
* while the measure function defines the aggregate computations (e.g., SUM, COUNT, AVG) to
192+
* perform on each group.
193+
*
194+
* <p>The optional remap parameter allows reordering or filtering of output columns, which is
195+
* useful for controlling the final schema of the aggregate result.
196+
*
197+
* @param groupingsFn a function that takes the input relation and returns a list of grouping
198+
* expressions defining how to partition the data
199+
* @param measuresFn a function that takes the input relation and returns a list of aggregate
200+
* measures to compute for each group
201+
* @param remap an optional remapping specification to reorder or filter output columns
202+
* @param input the input relation to aggregate
203+
* @return an Aggregate relation representing the grouping and aggregation operation
204+
*/
205+
public Aggregate aggregate(
206+
final Function<Rel, List<Aggregate.Grouping>> groupingsFn,
207+
final Function<Rel, List<Aggregate.Measure>> measuresFn,
208+
final Optional<Rel.Remap> remap,
209+
final Rel input) {
210+
final List<Aggregate.Grouping> groupings = groupingsFn.apply(input);
211+
final List<Aggregate.Measure> measures = measuresFn.apply(input);
152212
return Aggregate.builder()
153213
.groupings(groupings)
154214
.measures(measures)
@@ -853,24 +913,64 @@ public Expression.BoolLiteral bool(boolean v) {
853913
return Expression.BoolLiteral.builder().value(v).build();
854914
}
855915

916+
/**
917+
* Create i16 literal.
918+
*
919+
* @param value value to create
920+
* @return i16 instance
921+
*/
922+
public Expression.I8Literal i8(final int value) {
923+
return Expression.I8Literal.builder().value(value).build();
924+
}
925+
926+
/**
927+
* Create i16 literal.
928+
*
929+
* @param value value to create
930+
* @return i16 instance
931+
*/
932+
public Expression.I16Literal i16(final int value) {
933+
return Expression.I16Literal.builder().value(value).build();
934+
}
935+
856936
/**
857937
* Creates a 32-bit integer literal expression.
858938
*
859-
* @param v the integer value
939+
* @param value the integer value
860940
* @return a new {@link Expression.I32Literal}
861941
*/
862-
public Expression.I32Literal i32(int v) {
863-
return Expression.I32Literal.builder().value(v).build();
942+
public Expression.I32Literal i32(final int value) {
943+
return Expression.I32Literal.builder().value(value).build();
944+
}
945+
946+
/**
947+
* Createi64 literal.
948+
*
949+
* @param value value to create
950+
* @return i64 instance
951+
*/
952+
public Expression.I64Literal i64(final long value) {
953+
return Expression.I64Literal.builder().value(value).build();
954+
}
955+
956+
/**
957+
* Creates a 32-bit floating point literal expression.
958+
*
959+
* @param value the float value
960+
* @return a new {@link Expression.FP32Literal}
961+
*/
962+
public Expression.FP32Literal fp32(final float value) {
963+
return Expression.FP32Literal.builder().value(value).build();
864964
}
865965

866966
/**
867967
* Creates a 64-bit floating point literal expression.
868968
*
869-
* @param v the double value
969+
* @param value the double value
870970
* @return a new {@link Expression.FP64Literal}
871971
*/
872-
public Expression.FP64Literal fp64(double v) {
873-
return Expression.FP64Literal.builder().value(v).build();
972+
public Expression.FP64Literal fp64(final double value) {
973+
return Expression.FP64Literal.builder().value(value).build();
874974
}
875975

876976
/**
@@ -1439,6 +1539,79 @@ public Expression.ScalarFunctionInvocation or(Expression... args) {
14391539
return scalarFn(DefaultExtensionCatalog.FUNCTIONS_BOOLEAN, "or:bool", outputType, args);
14401540
}
14411541

1542+
/**
1543+
* Creates a logical NOT expression that negates a boolean expression.
1544+
*
1545+
* <p>This is a convenience method that wraps the boolean NOT function from the Substrait standard
1546+
* library. The result is nullable to handle NULL input values according to three-valued logic.
1547+
*
1548+
* @param expression the boolean expression to negate
1549+
* @return a scalar function invocation representing the logical NOT of the input expression
1550+
*/
1551+
public Expression not(final Expression expression) {
1552+
return this.scalarFn(
1553+
DefaultExtensionCatalog.FUNCTIONS_BOOLEAN,
1554+
"not:bool",
1555+
TypeCreator.NULLABLE.BOOLEAN,
1556+
expression);
1557+
}
1558+
1559+
/**
1560+
* Creates a null-check expression that tests whether an expression is null.
1561+
*
1562+
* <p>This is a convenience method that wraps the is_null function from the Substrait comparison
1563+
* function library. The function evaluates the input expression and returns true if it is null,
1564+
* false otherwise. This is commonly used in conditional logic and filtering operations.
1565+
*
1566+
* <p>The return type is always a required (non-nullable) boolean, as the null check itself always
1567+
* produces a definite true/false result.
1568+
*
1569+
* @param expression the expression to test for null
1570+
* @return a scalar function invocation that returns true if the expression is null, false
1571+
* otherwise
1572+
*/
1573+
public Expression isNull(final Expression expression) {
1574+
1575+
final List<Expression> args = new ArrayList<>();
1576+
args.add(expression);
1577+
1578+
return this.scalarFn(
1579+
DefaultExtensionCatalog.FUNCTIONS_COMPARISON,
1580+
"is_null:any",
1581+
TypeCreator.REQUIRED.BOOLEAN,
1582+
args,
1583+
new ArrayList<FunctionOption>());
1584+
}
1585+
1586+
/**
1587+
* Creates a scalar function invocation with function options.
1588+
*
1589+
* <p>This method extends the base builder's functionality by supporting function options, which
1590+
* control function behavior (e.g., rounding modes, overflow handling).
1591+
*
1592+
* @param urn the extension URI (e.g., {@link DefaultExtensionCatalog#FUNCTIONS_STRING})
1593+
* @param key the function signature (e.g., "substring:str_i32_i32")
1594+
* @param returnType the return type of the function
1595+
* @param args the function arguments
1596+
* @param optionsList the function options controlling behavior
1597+
* @return a scalar function invocation expression
1598+
*/
1599+
public Expression scalarFn(
1600+
final String urn,
1601+
final String key,
1602+
final Type returnType,
1603+
final List<? extends FunctionArg> args,
1604+
final List<FunctionOption> optionsList) {
1605+
final SimpleExtension.ScalarFunctionVariant declaration =
1606+
extensions.getScalarFunction(SimpleExtension.FunctionAnchor.of(urn, key));
1607+
return Expression.ScalarFunctionInvocation.builder()
1608+
.declaration(declaration)
1609+
.options(optionsList)
1610+
.outputType(returnType)
1611+
.arguments(args)
1612+
.build();
1613+
}
1614+
14421615
/**
14431616
* Creates a scalar function invocation with specified arguments.
14441617
*
@@ -1459,6 +1632,29 @@ public Expression.ScalarFunctionInvocation scalarFn(
14591632
.build();
14601633
}
14611634

1635+
/**
1636+
* Creates a scalar function invocation with function options.
1637+
*
1638+
* @param urn the extension URI (e.g., {@link DefaultExtensionCatalog#FUNCTIONS_STRING})
1639+
* @param key the function signature (e.g., "substring:str_i32_i32")
1640+
* @param returnType the return type of the function
1641+
* @param args the function arguments
1642+
* @return a scalar function invocation expression
1643+
*/
1644+
public Expression scalarFn(
1645+
final String urn,
1646+
final String key,
1647+
final Type returnType,
1648+
final List<? extends FunctionArg> args) {
1649+
final SimpleExtension.ScalarFunctionVariant declaration =
1650+
extensions.getScalarFunction(SimpleExtension.FunctionAnchor.of(urn, key));
1651+
return Expression.ScalarFunctionInvocation.builder()
1652+
.declaration(declaration)
1653+
.outputType(returnType)
1654+
.arguments(args)
1655+
.build();
1656+
}
1657+
14621658
/**
14631659
* Creates a window function invocation with specified arguments and window bounds.
14641660
*

0 commit comments

Comments
 (0)