1212import io .substrait .expression .Expression .SwitchClause ;
1313import io .substrait .expression .FieldReference ;
1414import io .substrait .expression .FunctionArg ;
15+ import io .substrait .expression .FunctionOption ;
1516import io .substrait .expression .WindowBound ;
1617import io .substrait .extension .DefaultExtensionCatalog ;
1718import io .substrait .extension .SimpleExtension ;
3940import io .substrait .type .NamedStruct ;
4041import io .substrait .type .Type ;
4142import io .substrait .type .TypeCreator ;
43+ import java .util .ArrayList ;
4244import java .util .Arrays ;
4345import java .util .Collections ;
4446import java .util .LinkedList ;
@@ -73,6 +75,17 @@ public class SubstraitBuilder {
7375
7476 private final SimpleExtension .ExtensionCollection extensions ;
7577
78+ /**
79+ * Constructs a new SubstraitBuilder with the default extension collection.
80+ *
81+ * <p>The builder is initialized with {@link DefaultExtensionCatalog#DEFAULT_COLLECTION}, which
82+ * includes standard Substrait functions for strings, arithmetic, comparison, datetime, and other
83+ * operations.
84+ */
85+ public SubstraitBuilder () {
86+ this (DefaultExtensionCatalog .DEFAULT_COLLECTION );
87+ }
88+
7689 /**
7790 * Constructs a new SubstraitBuilder with the specified extension collection.
7891 *
@@ -83,6 +96,34 @@ public SubstraitBuilder(SimpleExtension.ExtensionCollection extensions) {
8396 this .extensions = extensions ;
8497 }
8598
99+ /**
100+ * Gets the default extension collection used by this builder.
101+ *
102+ * <p>This collection includes standard Substrait functions for strings, arithmetic, comparison,
103+ * datetime, and other operations from {@link DefaultExtensionCatalog#DEFAULT_COLLECTION}.
104+ *
105+ * @return the ExtensionCollection containing standard Substrait functions
106+ */
107+ public SimpleExtension .ExtensionCollection getExtensions () {
108+ return extensions ;
109+ }
110+
111+ /**
112+ * Creates a Plan.Root, which is the top-level container for a Substrait query plan.
113+ *
114+ * <p>The Plan.Root wraps a relational expression tree and associates output column names with the
115+ * plan. This is the final step in building a complete Substrait plan that can be serialized and
116+ * executed by a Substrait consumer.
117+ *
118+ * @param input the root relational expression of the query plan
119+ * @param names the ordered list of output column names corresponding to the input relation's
120+ * output schema
121+ * @return a Plan.Root containing the query plan and output column names
122+ */
123+ public Plan .Root root (final Rel input , final List <String > names ) {
124+ return Plan .Root .builder ().input (input ).names (names ).build ();
125+ }
126+
86127 // Relations
87128
88129 /**
@@ -142,13 +183,32 @@ public Aggregate aggregate(
142183 return aggregate (groupingsFn , measuresFn , Optional .of (remap ), input );
143184 }
144185
145- private Aggregate aggregate (
146- Function <Rel , List <Aggregate .Grouping >> groupingsFn ,
147- Function <Rel , List <Aggregate .Measure >> measuresFn ,
148- Optional <Rel .Remap > remap ,
149- Rel input ) {
150- List <Aggregate .Grouping > groupings = groupingsFn .apply (input );
151- List <Aggregate .Measure > measures = measuresFn .apply (input );
186+ /**
187+ * Creates an aggregate relation that groups and aggregates data from an input relation.
188+ *
189+ * <p>This method constructs a Substrait aggregate operation by applying grouping and measure
190+ * functions to the input relation. The grouping function defines how rows are grouped together,
191+ * while the measure function defines the aggregate computations (e.g., SUM, COUNT, AVG) to
192+ * perform on each group.
193+ *
194+ * <p>The optional remap parameter allows reordering or filtering of output columns, which is
195+ * useful for controlling the final schema of the aggregate result.
196+ *
197+ * @param groupingsFn a function that takes the input relation and returns a list of grouping
198+ * expressions defining how to partition the data
199+ * @param measuresFn a function that takes the input relation and returns a list of aggregate
200+ * measures to compute for each group
201+ * @param remap an optional remapping specification to reorder or filter output columns
202+ * @param input the input relation to aggregate
203+ * @return an Aggregate relation representing the grouping and aggregation operation
204+ */
205+ public Aggregate aggregate (
206+ final Function <Rel , List <Aggregate .Grouping >> groupingsFn ,
207+ final Function <Rel , List <Aggregate .Measure >> measuresFn ,
208+ final Optional <Rel .Remap > remap ,
209+ final Rel input ) {
210+ final List <Aggregate .Grouping > groupings = groupingsFn .apply (input );
211+ final List <Aggregate .Measure > measures = measuresFn .apply (input );
152212 return Aggregate .builder ()
153213 .groupings (groupings )
154214 .measures (measures )
@@ -853,24 +913,64 @@ public Expression.BoolLiteral bool(boolean v) {
853913 return Expression .BoolLiteral .builder ().value (v ).build ();
854914 }
855915
916+ /**
917+ * Create i16 literal.
918+ *
919+ * @param value value to create
920+ * @return i16 instance
921+ */
922+ public Expression .I8Literal i8 (final int value ) {
923+ return Expression .I8Literal .builder ().value (value ).build ();
924+ }
925+
926+ /**
927+ * Create i16 literal.
928+ *
929+ * @param value value to create
930+ * @return i16 instance
931+ */
932+ public Expression .I16Literal i16 (final int value ) {
933+ return Expression .I16Literal .builder ().value (value ).build ();
934+ }
935+
856936 /**
857937 * Creates a 32-bit integer literal expression.
858938 *
859- * @param v the integer value
939+ * @param value the integer value
860940 * @return a new {@link Expression.I32Literal}
861941 */
862- public Expression .I32Literal i32 (int v ) {
863- return Expression .I32Literal .builder ().value (v ).build ();
942+ public Expression .I32Literal i32 (final int value ) {
943+ return Expression .I32Literal .builder ().value (value ).build ();
944+ }
945+
946+ /**
947+ * Createi64 literal.
948+ *
949+ * @param value value to create
950+ * @return i64 instance
951+ */
952+ public Expression .I64Literal i64 (final long value ) {
953+ return Expression .I64Literal .builder ().value (value ).build ();
954+ }
955+
956+ /**
957+ * Creates a 32-bit floating point literal expression.
958+ *
959+ * @param value the float value
960+ * @return a new {@link Expression.FP32Literal}
961+ */
962+ public Expression .FP32Literal fp32 (final float value ) {
963+ return Expression .FP32Literal .builder ().value (value ).build ();
864964 }
865965
866966 /**
867967 * Creates a 64-bit floating point literal expression.
868968 *
869- * @param v the double value
969+ * @param value the double value
870970 * @return a new {@link Expression.FP64Literal}
871971 */
872- public Expression .FP64Literal fp64 (double v ) {
873- return Expression .FP64Literal .builder ().value (v ).build ();
972+ public Expression .FP64Literal fp64 (final double value ) {
973+ return Expression .FP64Literal .builder ().value (value ).build ();
874974 }
875975
876976 /**
@@ -1439,6 +1539,79 @@ public Expression.ScalarFunctionInvocation or(Expression... args) {
14391539 return scalarFn (DefaultExtensionCatalog .FUNCTIONS_BOOLEAN , "or:bool" , outputType , args );
14401540 }
14411541
1542+ /**
1543+ * Creates a logical NOT expression that negates a boolean expression.
1544+ *
1545+ * <p>This is a convenience method that wraps the boolean NOT function from the Substrait standard
1546+ * library. The result is nullable to handle NULL input values according to three-valued logic.
1547+ *
1548+ * @param expression the boolean expression to negate
1549+ * @return a scalar function invocation representing the logical NOT of the input expression
1550+ */
1551+ public Expression not (final Expression expression ) {
1552+ return this .scalarFn (
1553+ DefaultExtensionCatalog .FUNCTIONS_BOOLEAN ,
1554+ "not:bool" ,
1555+ TypeCreator .NULLABLE .BOOLEAN ,
1556+ expression );
1557+ }
1558+
1559+ /**
1560+ * Creates a null-check expression that tests whether an expression is null.
1561+ *
1562+ * <p>This is a convenience method that wraps the is_null function from the Substrait comparison
1563+ * function library. The function evaluates the input expression and returns true if it is null,
1564+ * false otherwise. This is commonly used in conditional logic and filtering operations.
1565+ *
1566+ * <p>The return type is always a required (non-nullable) boolean, as the null check itself always
1567+ * produces a definite true/false result.
1568+ *
1569+ * @param expression the expression to test for null
1570+ * @return a scalar function invocation that returns true if the expression is null, false
1571+ * otherwise
1572+ */
1573+ public Expression isNull (final Expression expression ) {
1574+
1575+ final List <Expression > args = new ArrayList <>();
1576+ args .add (expression );
1577+
1578+ return this .scalarFn (
1579+ DefaultExtensionCatalog .FUNCTIONS_COMPARISON ,
1580+ "is_null:any" ,
1581+ TypeCreator .REQUIRED .BOOLEAN ,
1582+ args ,
1583+ new ArrayList <FunctionOption >());
1584+ }
1585+
1586+ /**
1587+ * Creates a scalar function invocation with function options.
1588+ *
1589+ * <p>This method extends the base builder's functionality by supporting function options, which
1590+ * control function behavior (e.g., rounding modes, overflow handling).
1591+ *
1592+ * @param urn the extension URI (e.g., {@link DefaultExtensionCatalog#FUNCTIONS_STRING})
1593+ * @param key the function signature (e.g., "substring:str_i32_i32")
1594+ * @param returnType the return type of the function
1595+ * @param args the function arguments
1596+ * @param optionsList the function options controlling behavior
1597+ * @return a scalar function invocation expression
1598+ */
1599+ public Expression scalarFn (
1600+ final String urn ,
1601+ final String key ,
1602+ final Type returnType ,
1603+ final List <? extends FunctionArg > args ,
1604+ final List <FunctionOption > optionsList ) {
1605+ final SimpleExtension .ScalarFunctionVariant declaration =
1606+ extensions .getScalarFunction (SimpleExtension .FunctionAnchor .of (urn , key ));
1607+ return Expression .ScalarFunctionInvocation .builder ()
1608+ .declaration (declaration )
1609+ .options (optionsList )
1610+ .outputType (returnType )
1611+ .arguments (args )
1612+ .build ();
1613+ }
1614+
14421615 /**
14431616 * Creates a scalar function invocation with specified arguments.
14441617 *
@@ -1459,6 +1632,29 @@ public Expression.ScalarFunctionInvocation scalarFn(
14591632 .build ();
14601633 }
14611634
1635+ /**
1636+ * Creates a scalar function invocation with function options.
1637+ *
1638+ * @param urn the extension URI (e.g., {@link DefaultExtensionCatalog#FUNCTIONS_STRING})
1639+ * @param key the function signature (e.g., "substring:str_i32_i32")
1640+ * @param returnType the return type of the function
1641+ * @param args the function arguments
1642+ * @return a scalar function invocation expression
1643+ */
1644+ public Expression scalarFn (
1645+ final String urn ,
1646+ final String key ,
1647+ final Type returnType ,
1648+ final List <? extends FunctionArg > args ) {
1649+ final SimpleExtension .ScalarFunctionVariant declaration =
1650+ extensions .getScalarFunction (SimpleExtension .FunctionAnchor .of (urn , key ));
1651+ return Expression .ScalarFunctionInvocation .builder ()
1652+ .declaration (declaration )
1653+ .outputType (returnType )
1654+ .arguments (args )
1655+ .build ();
1656+ }
1657+
14621658 /**
14631659 * Creates a window function invocation with specified arguments and window bounds.
14641660 *
0 commit comments