apache · aditanase · May 14, 2025 · alamb · Aug 10, 2025 · aditanase
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
@@ -978,7 +978,11 @@ impl OptimizerRule for PushDownFilter {
                 let group_expr_columns = agg
                     .group_expr
                     .iter()
-                    .map(|e| Ok(Column::from_qualified_name(e.schema_name().to_string())))
+                    .map(|e| {
+                        Ok(Column::from_qualified_name_ignore_case(
+                            e.schema_name().to_string(),
+                        ))
+                    })
                     .collect::<Result<HashSet<_>>>()?;
 
                 let predicates = split_conjunction_owned(filter.predicate);
@@ -4160,4 +4164,55 @@ mod tests {
         "
         )
     }
+
+    /// Create a test table scan with uppercase column names for case sensitivity testing
+    fn test_table_scan_with_uppercase_columns() -> Result<LogicalPlan> {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::UInt32, false),
+            Field::new("A", DataType::UInt32, false),
+            Field::new("B", DataType::UInt32, false),
+            Field::new("C", DataType::UInt32, false),
+        ]);
+        table_scan(Some("test"), &schema, None)?.build()
+    }
+
+    #[test]
+    fn filter_agg_case_insensitive() -> Result<()> {
+        let table_scan = test_table_scan_with_uppercase_columns()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(
+                vec![col(r#""A""#)],
+                vec![sum(col(r#""B""#)).alias("total_salary")],
+            )?
+            .filter(col(r#""A""#).gt(lit(10i64)))?
+            .build()?;
+
+        assert_optimized_plan_equal!(
+            plan,
+            @r"
+        Aggregate: groupBy=[[test.A]], aggr=[[sum(test.B) AS total_salary]]
+          TableScan: test, full_filters=[test.A > Int64(10)]
+        "
+        )
+    }
+
+    #[test]
+    fn filter_agg_mix_case_insensitive() -> Result<()> {
+        let table_scan = test_table_scan_with_uppercase_columns()?;
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(
+                vec![col("a")],
+                vec![sum(col(r#""B""#)).alias("total_salary")],
+            )?
+            .filter(col("a").gt(lit(10i64)))?
+            .build()?;
+
+        assert_optimized_plan_equal!(
+            plan,
+            @r"
+        Aggregate: groupBy=[[test.a]], aggr=[[sum(test.B) AS total_salary]]
+          TableScan: test, full_filters=[test.a > Int64(10)]
+        "
+        )
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -288,3 +288,49 @@ physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/
 
 statement ok
 drop table t;
+
+statement ok
+create table test_uppercase_cols (a int, "A" int, "B" int, "C" int);
+
+# test push down through aggregate for uppercase column name
+query TT
+explain
+select "A", total_salary
+from (
+    select "A", sum("B") as total_salary from test_uppercase_cols group by "A"
+)
+where "A" > 10;
+----
+physical_plan
+01)ProjectionExec: expr=[A@0 as A, sum(test_uppercase_cols.B)@1 as total_salary]
+02)--AggregateExec: mode=FinalPartitioned, gby=[A@0 as A], aggr=[sum(test_uppercase_cols.B)]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------RepartitionExec: partitioning=Hash([A@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[A@0 as A], aggr=[sum(test_uppercase_cols.B)]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------FilterExec: A@0 > 10
+09)----------------DataSourceExec: partitions=1, partition_sizes=[0]
+
+# test push down through aggregate for mix of lowercase and uppercase column names
+query TT
+explain
+select a, total_salary
+from (
+    select a, sum("B") as total_salary from test_uppercase_cols group by a
+)
+where a > 10;
+----
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, sum(test_uppercase_cols.B)@1 as total_salary]
+02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[sum(test_uppercase_cols.B)]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[sum(test_uppercase_cols.B)]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------FilterExec: a@0 > 10
+09)----------------DataSourceExec: partitions=1, partition_sizes=[0]
+
+statement ok
+drop table test_uppercase_cols;