Skip to content

Commit 0d9f845

Browse files
authored
fix: Limits are not applied correctly (#14418)
* fix: Limits are not applied correctly * Add easy fix * Add fix * Add slt testing * Address comments
1 parent ea788c7 commit 0d9f845

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

datafusion/physical-optimizer/src/limit_pushdown.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,15 @@ pub fn pushdown_limit_helper(
146146
global_state.skip = skip;
147147
global_state.fetch = fetch;
148148

149+
if limit_exec.input().as_any().is::<CoalescePartitionsExec>() {
150+
// If the child is a `CoalescePartitionsExec`, we should not remove the limit
151+
// the push_down through the `CoalescePartitionsExec` to each partition will not guarantee the limit.
152+
// TODO: we may have a better solution if we can support with_fetch for limit inside CoalescePartitionsExec.
153+
// Follow-up issue: https://github.com/apache/datafusion/issues/14446
154+
global_state.satisfied = true;
155+
return Ok((Transformed::no(pushdown_plan), global_state));
156+
}
157+
149158
// Now the global state has the most recent information, we can remove
150159
// the `LimitExec` plan. We will decide later if we should add it again
151160
// or not.

datafusion/sqllogictest/test_files/limit.slt

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,3 +772,107 @@ physical_plan
772772

773773
statement ok
774774
drop table testSubQueryLimit;
775+
776+
777+
# Test push down limit with more than one partition
778+
statement ok
779+
set datafusion.explain.logical_plan_only = false;
780+
781+
# Set up 3 partitions
782+
statement ok
783+
set datafusion.execution.target_partitions = 3;
784+
785+
# automatically partition all files over 1 byte
786+
statement ok
787+
set datafusion.optimizer.repartition_file_min_size = 1;
788+
789+
# Create a table as a data source
790+
statement ok
791+
CREATE TABLE src_table (
792+
part_key INT,
793+
value INT
794+
) AS VALUES(1, 0), (1, 1), (1, 100), (2, 0), (2, 2), (2, 2), (2, 100), (3, 4), (3, 5), (3, 6);
795+
796+
797+
# Setup 3 files, i.e., as many as there are partitions:
798+
799+
# File 1:
800+
query I
801+
COPY (SELECT * FROM src_table where part_key = 1)
802+
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet'
803+
STORED AS PARQUET;
804+
----
805+
3
806+
807+
# File 2:
808+
query I
809+
COPY (SELECT * FROM src_table where part_key = 2)
810+
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet'
811+
STORED AS PARQUET;
812+
----
813+
4
814+
815+
# File 3:
816+
query I
817+
COPY (SELECT * FROM src_table where part_key = 3)
818+
TO 'test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet'
819+
STORED AS PARQUET;
820+
----
821+
3
822+
823+
statement ok
824+
CREATE EXTERNAL TABLE test_limit_with_partitions
825+
(
826+
part_key INT,
827+
value INT
828+
)
829+
STORED AS PARQUET
830+
LOCATION 'test_files/scratch/parquet/test_limit_with_partitions/';
831+
832+
query TT
833+
explain
834+
with selection as (
835+
select *
836+
from test_limit_with_partitions
837+
limit 1
838+
)
839+
select 1 as foo
840+
from selection
841+
order by part_key
842+
limit 1000;
843+
----
844+
logical_plan
845+
01)Projection: foo
846+
02)--Sort: selection.part_key ASC NULLS LAST, fetch=1000
847+
03)----Projection: Int64(1) AS foo, selection.part_key
848+
04)------SubqueryAlias: selection
849+
05)--------Limit: skip=0, fetch=1
850+
06)----------TableScan: test_limit_with_partitions projection=[part_key], fetch=1
851+
physical_plan
852+
01)ProjectionExec: expr=[foo@0 as foo]
853+
02)--SortExec: TopK(fetch=1000), expr=[part_key@1 ASC NULLS LAST], preserve_partitioning=[false]
854+
03)----ProjectionExec: expr=[1 as foo, part_key@0 as part_key]
855+
04)------GlobalLimitExec: skip=0, fetch=1
856+
05)--------CoalescePartitionsExec
857+
06)----------ParquetExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-0.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-1.parquet:0..794], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/test_limit_with_partitions/part-2.parquet:0..794]]}, projection=[part_key], limit=1
858+
859+
query I
860+
with selection as (
861+
select *
862+
from test_limit_with_partitions
863+
limit 1
864+
)
865+
select 1 as foo
866+
from selection
867+
order by part_key
868+
limit 1000;
869+
----
870+
1
871+
872+
# Tear down test_filter_with_limit table:
873+
statement ok
874+
DROP TABLE test_limit_with_partitions;
875+
876+
# Tear down src_table table:
877+
statement ok
878+
DROP TABLE src_table;

0 commit comments

Comments
 (0)