apache · goldmedal · Mar 25, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -700,6 +700,10 @@ config_namespace! {
         /// HashJoin can work more efficiently than SortMergeJoin but consumes more memory
         pub prefer_hash_join: bool, default = true
 
+        /// When set to true, the physical plan optimizer will prefer HashSelectionBitmapPartitioning for HashAggregate
+        /// over HashPartitioning. HashSelectionBitmapPartitioning can work without data copying.
+        pub prefer_hash_selection_bitmap_partitioning_agg: bool, default = false
+
         /// The maximum estimated size in bytes for one input side of a HashJoin
         /// will be collected into a single partition
         pub hash_join_single_partition_threshold: usize, default = 1024 * 1024

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -82,7 +82,7 @@ use datafusion_expr::{
 };
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::{Column, Literal};
-use datafusion_physical_expr::LexOrdering;
+use datafusion_physical_expr::{HashPartitionMode, LexOrdering};
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
@@ -741,8 +741,17 @@ impl DefaultPhysicalPlanner {
                 let updated_aggregates = initial_aggr.aggr_expr().to_vec();
 
                 let next_partition_mode = if can_repartition {
+                    let mode = if session_state
+                        .config_options()
+                        .optimizer
+                        .prefer_hash_selection_bitmap_partitioning_agg
+                    {
+                        HashPartitionMode::SelectionBitmap
+                    } else {
+                        HashPartitionMode::HashPartitioned
+                    };
                     // construct a second aggregation with 'AggregateMode::FinalPartitioned'
-                    AggregateMode::FinalPartitioned
+                    AggregateMode::FinalPartitioned(mode)
                 } else {
                     // construct a second aggregation, keeping the final column name equal to the
                     // first aggregation and the expressions corresponding to the respective aggregate
@@ -804,7 +813,15 @@ impl DefaultPhysicalPlanner {
                                 )
                             })
                             .collect::<Result<Vec<_>>>()?;
-                        Partitioning::Hash(runtime_expr, *n)
+                        if session_state
+                            .config_options()
+                            .optimizer
+                            .prefer_hash_selection_bitmap_partitioning_agg
+                        {
+                            Partitioning::HashSelectionBitmap(runtime_expr, *n)
+                        } else {
+                            Partitioning::Hash(runtime_expr, *n)
+                        }
                     }
                     LogicalPartitioning::DistributeBy(_) => {
                         return not_impl_err!(

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
@@ -2514,7 +2514,7 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
-        @r###"
+        @r"
     +---------------+------------------------------------------------------------------------------------------------------------+
     | plan_type     | plan                                                                                                       |
     +---------------+------------------------------------------------------------------------------------------------------------+
@@ -2527,37 +2527,37 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
     |               |   SortPreservingMergeExec: [count(Int64(1))@2 ASC NULLS LAST]                                              |
     |               |     SortExec: expr=[count(Int64(1))@2 ASC NULLS LAST], preserve_partitioning=[true]                        |
     |               |       ProjectionExec: expr=[b@0 as b, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] |
-    |               |         AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(Int64(1))]                       |
+    |               |         AggregateExec: mode=FinalPartitioned(HashPartitioned), gby=[b@0 as b], aggr=[count(Int64(1))]      |
     |               |           CoalesceBatchesExec: target_batch_size=8192                                                      |
     |               |             RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4                               |
     |               |               RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                         |
     |               |                 AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))]                        |
     |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                        |
     |               |                                                                                                            |
     +---------------+------------------------------------------------------------------------------------------------------------+
-    "###
+    "
     );
 
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
-        @r###"
-    +---------------+--------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                           |
-    +---------------+--------------------------------------------------------------------------------+
-    | logical_plan  | Sort: count(*) ASC NULLS LAST                                                  |
-    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]            |
-    |               |     TableScan: t1 projection=[b]                                               |
-    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                           |
-    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true]     |
-    |               |     AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)]      |
-    |               |       CoalesceBatchesExec: target_batch_size=8192                              |
-    |               |         RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4       |
-    |               |           RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
-    |               |             AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]       |
-    |               |               DataSourceExec: partitions=1, partition_sizes=[1]                |
-    |               |                                                                                |
-    +---------------+--------------------------------------------------------------------------------+
-    "###
+        @r"
+    +---------------+--------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                       |
+    +---------------+--------------------------------------------------------------------------------------------+
+    | logical_plan  | Sort: count(*) ASC NULLS LAST                                                              |
+    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]                        |
+    |               |     TableScan: t1 projection=[b]                                                           |
+    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                                       |
+    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true]                 |
+    |               |     AggregateExec: mode=FinalPartitioned(HashPartitioned), gby=[b@0 as b], aggr=[count(*)] |
+    |               |       CoalesceBatchesExec: target_batch_size=8192                                          |
+    |               |         RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4                   |
+    |               |           RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1             |
+    |               |             AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]                   |
+    |               |               DataSourceExec: partitions=1, partition_sizes=[1]                            |
+    |               |                                                                                            |
+    +---------------+--------------------------------------------------------------------------------------------+
+    "
     );
     Ok(())
 }
@@ -2870,7 +2870,7 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     |               |       HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
     |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
     |               |         ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                             |
-    |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                    |
+    |               |           AggregateExec: mode=FinalPartitioned(HashPartitioned), gby=[a@0 as a], aggr=[count(Int64(1))]                   |
     |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
     |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
     |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |
@@ -2927,7 +2927,7 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     |               |       HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
     |               |         DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
     |               |         ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                    |
-    |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                           |
+    |               |           AggregateExec: mode=FinalPartitioned(HashPartitioned), gby=[a@0 as a], aggr=[count(*)]                          |
     |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
     |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
     |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |