@@ -486,15 +486,19 @@ fn check_alignment(
486
486
#[ cfg( test) ]
487
487
mod tests {
488
488
use super :: * ;
489
+ use crate :: datasource:: listing:: PartitionedFile ;
490
+ use crate :: datasource:: object_store:: ObjectStoreUrl ;
489
491
use crate :: physical_plan:: displayable;
492
+ use crate :: physical_plan:: file_format:: { FileScanConfig , ParquetExec } ;
490
493
use crate :: physical_plan:: filter:: FilterExec ;
491
494
use crate :: physical_plan:: memory:: MemoryExec ;
492
495
use crate :: physical_plan:: sorts:: sort_preserving_merge:: SortPreservingMergeExec ;
496
+ use crate :: physical_plan:: union:: UnionExec ;
493
497
use crate :: physical_plan:: windows:: create_window_expr;
494
498
use crate :: prelude:: SessionContext ;
495
499
use arrow:: compute:: SortOptions ;
496
500
use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
497
- use datafusion_common:: Result ;
501
+ use datafusion_common:: { Result , Statistics } ;
498
502
use datafusion_expr:: { AggregateFunction , WindowFrame , WindowFunction } ;
499
503
use datafusion_physical_expr:: expressions:: { col, NotExpr } ;
500
504
use datafusion_physical_expr:: PhysicalSortExpr ;
@@ -813,6 +817,33 @@ mod tests {
813
817
Ok ( ( ) )
814
818
}
815
819
820
+ #[ tokio:: test]
821
+ async fn test_union_inputs_sorted ( ) -> Result < ( ) > {
822
+ let schema = create_test_schema ( ) ?;
823
+
824
+ let source1 = parquet_exec ( & schema) ;
825
+ let sort_exprs = vec ! [ sort_expr( "nullable_col" , & schema) ] ;
826
+ let sort = sort_exec ( sort_exprs. clone ( ) , source1) ;
827
+
828
+ let source2 = parquet_exec_sorted ( & schema, sort_exprs. clone ( ) ) ;
829
+
830
+ let union = union_exec ( vec ! [ source2, sort] ) ;
831
+ let physical_plan = sort_preserving_merge_exec ( sort_exprs, union) ;
832
+
833
+ // one input to the union is already sorted, one is not.
834
+ let expected_input = vec ! [
835
+ "SortPreservingMergeExec: [nullable_col@0 ASC]" ,
836
+ " UnionExec" ,
837
+ " ParquetExec: limit=None, partitions={1 group: [[x]]}, output_ordering=[nullable_col@0 ASC], projection=[nullable_col, non_nullable_col]" ,
838
+ " SortExec: [nullable_col@0 ASC]" ,
839
+ " ParquetExec: limit=None, partitions={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]" ,
840
+ ] ;
841
+ // should not add a sort at the output of the union, input plan should not be changed
842
+ let expected_optimized = expected_input. clone ( ) ;
843
+ assert_optimized ! ( expected_input, expected_optimized, physical_plan) ;
844
+ Ok ( ( ) )
845
+ }
846
+
816
847
/// make PhysicalSortExpr with default options
817
848
fn sort_expr ( name : & str , schema : & Schema ) -> PhysicalSortExpr {
818
849
sort_expr_options ( name, schema, SortOptions :: default ( ) )
@@ -856,4 +887,51 @@ mod tests {
856
887
) -> Arc < dyn ExecutionPlan > {
857
888
Arc :: new ( FilterExec :: try_new ( predicate, input) . unwrap ( ) )
858
889
}
890
+
891
+ /// Create a non sorted parquet exec
892
+ fn parquet_exec ( schema : & SchemaRef ) -> Arc < ParquetExec > {
893
+ Arc :: new ( ParquetExec :: new (
894
+ FileScanConfig {
895
+ object_store_url : ObjectStoreUrl :: parse ( "test:///" ) . unwrap ( ) ,
896
+ file_schema : schema. clone ( ) ,
897
+ file_groups : vec ! [ vec![ PartitionedFile :: new( "x" . to_string( ) , 100 ) ] ] ,
898
+ statistics : Statistics :: default ( ) ,
899
+ projection : None ,
900
+ limit : None ,
901
+ table_partition_cols : vec ! [ ] ,
902
+ output_ordering : None ,
903
+ infinite_source : false ,
904
+ } ,
905
+ None ,
906
+ None ,
907
+ ) )
908
+ }
909
+
910
+ // Created a sorted parquet exec
911
+ fn parquet_exec_sorted (
912
+ schema : & SchemaRef ,
913
+ sort_exprs : impl IntoIterator < Item = PhysicalSortExpr > ,
914
+ ) -> Arc < ParquetExec > {
915
+ let sort_exprs = sort_exprs. into_iter ( ) . collect ( ) ;
916
+
917
+ Arc :: new ( ParquetExec :: new (
918
+ FileScanConfig {
919
+ object_store_url : ObjectStoreUrl :: parse ( "test:///" ) . unwrap ( ) ,
920
+ file_schema : schema. clone ( ) ,
921
+ file_groups : vec ! [ vec![ PartitionedFile :: new( "x" . to_string( ) , 100 ) ] ] ,
922
+ statistics : Statistics :: default ( ) ,
923
+ projection : None ,
924
+ limit : None ,
925
+ table_partition_cols : vec ! [ ] ,
926
+ output_ordering : Some ( sort_exprs) ,
927
+ infinite_source : false ,
928
+ } ,
929
+ None ,
930
+ None ,
931
+ ) )
932
+ }
933
+
934
+ fn union_exec ( input : Vec < Arc < dyn ExecutionPlan > > ) -> Arc < dyn ExecutionPlan > {
935
+ Arc :: new ( UnionExec :: new ( input) )
936
+ }
859
937
}
0 commit comments