22
33import com .google .common .collect .ArrayListMultimap ;
44import com .google .common .collect .ImmutableList ;
5- import com .google .common .collect .ImmutableMap ;
5+ import com .google .common .collect .ImmutableListMultimap ;
6+ import com .google .common .collect .ListMultimap ;
67import com .google .common .collect .Multimap ;
78import com .google .common .collect .Multimaps ;
89import com .google .common .collect .Streams ;
4546import org .slf4j .Logger ;
4647import org .slf4j .LoggerFactory ;
4748
49+ /**
50+ * Abstract base class for converting between Calcite {@link SqlOperator}s and Substrait function
51+ * invocations.
52+ *
53+ * <p>This class handles bidirectional conversion:
54+ *
55+ * <ul>
56+ * <li><b>Calcite → Substrait:</b> Subclasses implement {@code convert()} methods to convert
57+ * Calcite calls to Substrait function invocations
58+ * <li><b>Substrait → Calcite:</b> {@link #getSqlOperatorFromSubstraitFunc} converts Substrait
59+ * function keys to Calcite {@link SqlOperator}s
60+ * </ul>
61+ *
62+ * <p>When multiple functions with the same name and signature are passed into the constructor, a
63+ * <b>last-wins precedence strategy</b> is used for resolution. The last function in the input list
64+ * takes precedence during Calcite to Substrait conversion.
65+ *
66+ * @param <F> the function type (ScalarFunctionVariant, AggregateFunctionVariant, etc.)
67+ * @param <T> the return type for Calcite→Substrait conversion
68+ * @param <C> the call type being converted
69+ */
4870public abstract class FunctionConverter <
4971 F extends SimpleExtension .Function , T , C extends FunctionConverter .GenericCall > {
5072
@@ -57,10 +79,32 @@ public abstract class FunctionConverter<
5779
5880 protected final Multimap <String , SqlOperator > substraitFuncKeyToSqlOperatorMap ;
5981
82+ /**
83+ * Creates a FunctionConverter with the given functions.
84+ *
85+ * <p>If there are multiple functions provided with the same name and signature (e.g., from
86+ * different extension URNs), the last one in the list will be given precedence during Calcite to
87+ * Substrait conversion.
88+ *
89+ * @param functions the list of function variants to register
90+ * @param typeFactory the Calcite type factory
91+ */
6092 public FunctionConverter (List <F > functions , RelDataTypeFactory typeFactory ) {
6193 this (functions , Collections .EMPTY_LIST , typeFactory , TypeConverter .DEFAULT );
6294 }
6395
96+ /**
97+ * Creates a FunctionConverter with the given functions and additional signatures.
98+ *
99+ * <p>If there are multiple functions provided with the same name and signature (e.g., from
100+ * different extension URNs), the last one in the list will be given precedence during Calcite to
101+ * Substrait conversion.
102+ *
103+ * @param functions the list of function variants to register
104+ * @param additionalSignatures additional Calcite operator signatures to map
105+ * @param typeFactory the Calcite type factory
106+ * @param typeConverter the type converter to use
107+ */
64108 public FunctionConverter (
65109 List <F > functions ,
66110 List <FunctionMappings .Sig > additionalSignatures ,
@@ -75,9 +119,9 @@ public FunctionConverter(
75119 this .typeFactory = typeFactory ;
76120 this .substraitFuncKeyToSqlOperatorMap = ArrayListMultimap .create ();
77121
78- ArrayListMultimap <String , F > alm = ArrayListMultimap .<String , F >create ();
122+ ArrayListMultimap <String , F > nameToFn = ArrayListMultimap .<String , F >create ();
79123 for (F f : functions ) {
80- alm .put (f .name ().toLowerCase (Locale .ROOT ), f );
124+ nameToFn .put (f .name ().toLowerCase (Locale .ROOT ), f );
81125 }
82126
83127 Multimap <String , FunctionMappings .Sig > calciteOperators =
@@ -87,21 +131,21 @@ public FunctionConverter(
87131 FunctionMappings .Sig ::name , Function .identity (), ArrayListMultimap ::create ));
88132 IdentityHashMap <SqlOperator , FunctionFinder > matcherMap =
89133 new IdentityHashMap <SqlOperator , FunctionFinder >();
90- for (String key : alm .keySet ()) {
134+ for (String key : nameToFn .keySet ()) {
91135 Collection <Sig > sigs = calciteOperators .get (key );
92136 if (sigs .isEmpty ()) {
93137 LOGGER .atDebug ().log ("No binding for function: {}" , key );
94138 }
95139
96140 for (Sig sig : sigs ) {
97- List <F > implList = alm .get (key );
141+ List <F > implList = nameToFn .get (key );
98142 if (!implList .isEmpty ()) {
99143 matcherMap .put (sig .operator (), new FunctionFinder (key , sig .operator (), implList ));
100144 }
101145 }
102146 }
103147
104- for (Entry <String , F > entry : alm .entries ()) {
148+ for (Entry <String , F > entry : nameToFn .entries ()) {
105149 String key = entry .getKey ();
106150 F func = entry .getValue ();
107151 for (FunctionMappings .Sig sig : calciteOperators .get (key )) {
@@ -112,6 +156,17 @@ public FunctionConverter(
112156 this .signatures = matcherMap ;
113157 }
114158
159+ /**
160+ * Converts a Substrait function to a Calcite {@link SqlOperator} (Substrait → Calcite direction).
161+ *
162+ * <p>Given a Substrait function key (e.g., "concat:str_str") and output type, this method finds
163+ * the corresponding Calcite {@link SqlOperator}. When multiple operators match, the output type
164+ * is used to disambiguate.
165+ *
166+ * @param key the Substrait function key (function name with type signature)
167+ * @param outputType the expected output type
168+ * @return the matching {@link SqlOperator}, or empty if no match found
169+ */
115170 public Optional <SqlOperator > getSqlOperatorFromSubstraitFunc (String key , Type outputType ) {
116171 Map <SqlOperator , TypeBasedResolver > resolver = getTypeBasedResolver ();
117172 Collection <SqlOperator > operators = substraitFuncKeyToSqlOperatorMap .get (key );
@@ -155,7 +210,7 @@ protected class FunctionFinder {
155210 private final String substraitName ;
156211 private final SqlOperator operator ;
157212 private final List <F > functions ;
158- private final Map <String , F > directMap ;
213+ private final ListMultimap <String , F > directMap ;
159214 private final Optional <SingularArgumentMatcher <F >> singularInputType ;
160215 private final Util .IntRange argRange ;
161216
@@ -168,7 +223,7 @@ public FunctionFinder(String substraitName, SqlOperator operator, List<F> functi
168223 functions .stream ().mapToInt (t -> t .getRange ().getStartInclusive ()).min ().getAsInt (),
169224 functions .stream ().mapToInt (t -> t .getRange ().getEndExclusive ()).max ().getAsInt ());
170225 this .singularInputType = getSingularInputType (functions );
171- ImmutableMap .Builder <String , F > directMap = ImmutableMap .builder ();
226+ ImmutableListMultimap .Builder <String , F > directMap = ImmutableListMultimap .builder ();
172227 for (F func : functions ) {
173228 String key = func .key ();
174229 directMap .put (key , func );
@@ -342,13 +397,29 @@ private Stream<String> matchKeys(List<RexNode> rexOperands, List<String> opTypes
342397 }
343398 }
344399
400+ /**
401+ * Converts a Calcite call to a Substrait function invocation (Calcite → Substrait direction).
402+ *
403+ * <p>This method tries to find a matching Substrait function for the given Calcite call using
404+ * direct signature matching, type coercion, and least-restrictive type resolution.
405+ *
406+ * <p>If multiple registered function extensions have the same name and signature, the last one
407+ * in the list passed into the constructor will be matched.
408+ *
409+ * @param call the Calcite call to match
410+ * @param topLevelConverter function to convert RexNode operands to Substrait Expressions
411+ * @return the matched Substrait function binding, or empty if no match found
412+ */
345413 public Optional <T > attemptMatch (C call , Function <RexNode , Expression > topLevelConverter ) {
346414
347415 /*
348416 * Here the RexLiteral with an Enum value is mapped to String Literal.
349417 * Not enough context here to construct a substrait EnumArg.
350418 * Once a FunctionVariant is resolved we can map the String Literal
351419 * to a EnumArg.
420+ *
421+ * Note that if there are multiple registered function extensions which can match a particular Call,
422+ * the last one added to the extension collection will be matched.
352423 */
353424 List <RexNode > operandsList = call .getOperands ().collect (Collectors .toList ());
354425 List <Expression > operands =
@@ -369,7 +440,13 @@ public Optional<T> attemptMatch(C call, Function<RexNode, Expression> topLevelCo
369440 .findFirst ();
370441
371442 if (directMatchKey .isPresent ()) {
372- F variant = directMap .get (directMatchKey .get ());
443+ List <F > variants = directMap .get (directMatchKey .get ());
444+ if (variants .isEmpty ()) {
445+
446+ return Optional .empty ();
447+ }
448+
449+ F variant = variants .get (variants .size () - 1 );
373450 variant .validateOutputType (operands , outputType );
374451 List <FunctionArg > funcArgs =
375452 IntStream .range (0 , operandsList .size ())
0 commit comments