Skip to content

Commit 6dcb66c

Browse files
ulysses-youIvanK-db
authored andcommitted
[SPARK-49200][SQL] Fix null type non-codegen ordering exception
### What changes were proposed in this pull request? Spark mark `NullType` as orderable, and we return 0 when gen comparing code for `NullType`. ``` object OrderUtils { def isOrderable(dataType: DataType): Boolean = dataType match { case NullType => true ``` This pr makes `NullType` ordering work for non-codegen path to avoid exception. ### Why are the changes needed? Fix exception: ```sql set spark.sql.codegen.factoryMode=NO_CODEGEN; set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.EliminateSorts; select * from range(10) order by array(null); ``` ``` org.apache.spark.SparkIllegalArgumentException: Type PhysicalNullType does not support ordered operations. at org.apache.spark.sql.errors.QueryExecutionErrors$.orderedOperationUnsupportedByDataTypeError(QueryExecutionErrors.scala:352) at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:246) at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:243) at org.apache.spark.sql.catalyst.types.PhysicalArrayType$$anon$1.<init>(PhysicalDataType.scala:283) at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering$lzycompute(PhysicalDataType.scala:281) at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering(PhysicalDataType.scala:281) at org.apache.spark.sql.catalyst.types.PhysicalArrayType.ordering(PhysicalDataType.scala:277) at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:67) at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:40) at org.apache.spark.sql.execution.UnsafeExternalRowSorter$RowComparator.compare(UnsafeExternalRowSorter.java:254) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:70) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:44) ``` ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test ### Was this patch authored or co-authored using generative AI tooling? no Closes apache#47707 from ulysses-you/null-ordering. Authored-by: ulysses-you <[email protected]> Signed-off-by: youxiduo <[email protected]>
1 parent 5dfb1cd commit 6dcb66c

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,7 @@ case class PhysicalMapType(keyType: DataType, valueType: DataType, valueContains
242242

243243
class PhysicalNullType() extends PhysicalDataType with PhysicalPrimitiveType {
244244
override private[sql] def ordering =
245-
throw QueryExecutionErrors.orderedOperationUnsupportedByDataTypeError(
246-
"PhysicalNullType")
245+
implicitly[Ordering[Unit]].asInstanceOf[Ordering[Any]]
247246
override private[sql] type InternalType = Any
248247
@transient private[sql] lazy val tag = typeTag[InternalType]
249248
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import org.apache.commons.io.FileUtils
3131
import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkArithmeticException, SparkDateTimeException, SparkException, SparkNumberFormatException, SparkRuntimeException}
3232
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
3333
import org.apache.spark.sql.catalyst.ExtendedAnalysisException
34-
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Hex}
34+
import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRow, Hex}
3535
import org.apache.spark.sql.catalyst.expressions.Cast._
3636
import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
3737
import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
@@ -1430,6 +1430,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
14301430
}
14311431
}
14321432

1433+
test("SPARK-49200: Fix null type non-codegen ordering exception") {
1434+
withSQLConf(
1435+
SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString,
1436+
SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
1437+
"org.apache.spark.sql.catalyst.optimizer.EliminateSorts") {
1438+
checkAnswer(
1439+
sql("SELECT * FROM range(3) ORDER BY array(null)"),
1440+
Seq(Row(0), Row(1), Row(2)))
1441+
}
1442+
}
1443+
14331444
test("SPARK-8837: use keyword in column name") {
14341445
withTempView("t") {
14351446
val df = Seq(1 -> "a").toDF("count", "sort")

0 commit comments

Comments
 (0)