Skip to content

Commit b11ca8b

Browse files
committed
rebase all execution and preceding recursive cte work
add config flag for recursive ctes update docs from script update slt test for doc change restore testing pin add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan fix sql integration test wip: add sql test for logical plan wip: format test assertion wip: remove uncessary with qualifier method some docs more docs Add comments to `RecursiveQuery` Update datfusion-cli Cargo.lock Fix clippy better errors and comments add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan fix sql integration test wip: add sql test for logical plan wip: format test assertion wip: remove uncessary with qualifier method some docs more docs impl execution support add sql -> logical plan support * impl cte as work table * move SharedState to continuance * impl WorkTableState wip: readying pr to implement only logical plan partway through porting over isidentical's work Continuing implementation with fixes and improvements Lint fixes ensure that repartitions are not added immediately after RecursiveExec in the physical-plan add trivial sqllogictest more recursive tests remove test that asserts recursive cte should fail additional cte test wip: remove tokio from physical plan dev deps format cargo tomls fix issue where CTE could not be referenced more than 1 time wip: fixes after rebase but tpcds_physical_q54 keeps overflowing its stack Impl NamedRelation as CteWorkTable * impl cte as work table * move SharedState to continuance * impl WorkTableState * upd * assign work table state * upd * upd fix min repro but still broken on larger test case set config in sql logic tests clean up cte slt tests fixes fix option add group by test case and more test case files wip add window function recursive cte example simplify stream impl for recrusive query stream add explain to trivial test case move setting of recursive ctes to slt file and add test to ensure multiple record batches are produced each iteration remove tokio dep and remove mut lint, comments and remove tokio stream update submodule pin to feat branch that contains csvs update submodule pin to feat branch that contains csvs
1 parent ae0f401 commit b11ca8b

File tree

9 files changed

+1173
-22
lines changed

9 files changed

+1173
-22
lines changed

datafusion/core/src/datasource/cte_worktable.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,14 @@ use std::sync::Arc;
2222

2323
use arrow::datatypes::SchemaRef;
2424
use async_trait::async_trait;
25-
use datafusion_common::not_impl_err;
25+
use datafusion_physical_plan::work_table::WorkTableExec;
2626

2727
use crate::{
2828
error::Result,
2929
logical_expr::{Expr, LogicalPlan, TableProviderFilterPushDown},
3030
physical_plan::ExecutionPlan,
3131
};
3232

33-
use datafusion_common::DataFusionError;
34-
3533
use crate::datasource::{TableProvider, TableType};
3634
use crate::execution::context::SessionState;
3735

@@ -84,7 +82,11 @@ impl TableProvider for CteWorkTable {
8482
_filters: &[Expr],
8583
_limit: Option<usize>,
8684
) -> Result<Arc<dyn ExecutionPlan>> {
87-
not_impl_err!("scan not implemented for CteWorkTable yet")
85+
// TODO: pushdown filters and limits
86+
Ok(Arc::new(WorkTableExec::new(
87+
self.name.clone(),
88+
self.table_schema.clone(),
89+
)))
8890
}
8991

9092
fn supports_filter_pushdown(

datafusion/core/src/physical_optimizer/projection_pushdown.rs

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,14 @@ fn try_swapping_with_csv(
159159
projection: &ProjectionExec,
160160
csv: &CsvExec,
161161
) -> Option<Arc<dyn ExecutionPlan>> {
162+
// info!("csv exec: {}", csv);
162163
// If there is any non-column or alias-carrier expression, Projection should not be removed.
163164
// This process can be moved into CsvExec, but it would be an overlap of their responsibility.
164165
all_alias_free_columns(projection.expr()).then(|| {
165166
let mut file_scan = csv.base_config().clone();
166167
let new_projections =
167168
new_projections_for_columns(projection, &file_scan.projection);
168-
file_scan.projection = Some(new_projections);
169+
file_scan.projection = new_projections;
169170

170171
Arc::new(CsvExec::new(
171172
file_scan,
@@ -194,7 +195,7 @@ fn try_swapping_with_memory(
194195
MemoryExec::try_new(
195196
memory.partitions(),
196197
memory.original_schema(),
197-
Some(new_projections),
198+
new_projections,
198199
)
199200
.map(|e| Arc::new(e) as _)
200201
})
@@ -238,7 +239,7 @@ fn try_swapping_with_streaming_table(
238239
StreamingTableExec::try_new(
239240
streaming_table.partition_schema().clone(),
240241
streaming_table.partitions().clone(),
241-
Some(&new_projections),
242+
new_projections.as_ref(),
242243
lex_orderings,
243244
streaming_table.is_infinite(),
244245
)
@@ -834,16 +835,22 @@ fn all_alias_free_columns(exprs: &[(Arc<dyn PhysicalExpr>, String)]) -> bool {
834835
fn new_projections_for_columns(
835836
projection: &ProjectionExec,
836837
source: &Option<Vec<usize>>,
837-
) -> Vec<usize> {
838-
projection
839-
.expr()
840-
.iter()
841-
.filter_map(|(expr, _)| {
842-
expr.as_any()
843-
.downcast_ref::<Column>()
844-
.and_then(|expr| source.as_ref().map(|proj| proj[expr.index()]))
845-
})
846-
.collect()
838+
) -> Option<Vec<usize>> {
839+
if source.is_none() {
840+
return None;
841+
}
842+
843+
Some(
844+
projection
845+
.expr()
846+
.iter()
847+
.filter_map(|(expr, _)| {
848+
expr.as_any()
849+
.downcast_ref::<Column>()
850+
.and_then(|expr| source.as_ref().map(|proj| proj[expr.index()]))
851+
})
852+
.collect(),
853+
)
847854
}
848855

849856
/// The function operates in two modes:

datafusion/core/src/physical_planner.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use crate::physical_plan::joins::{
5858
use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
5959
use crate::physical_plan::memory::MemoryExec;
6060
use crate::physical_plan::projection::ProjectionExec;
61+
use crate::physical_plan::recursive_query::RecursiveQueryExec;
6162
use crate::physical_plan::repartition::RepartitionExec;
6263
use crate::physical_plan::sorts::sort::SortExec;
6364
use crate::physical_plan::union::UnionExec;
@@ -896,7 +897,7 @@ impl DefaultPhysicalPlanner {
896897
let filter = FilterExec::try_new(runtime_expr, physical_input)?;
897898
Ok(Arc::new(filter.with_default_selectivity(selectivity)?))
898899
}
899-
LogicalPlan::Union(Union { inputs, .. }) => {
900+
LogicalPlan::Union(Union { inputs, schema: _ }) => {
900901
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state).await?;
901902

902903
Ok(Arc::new(UnionExec::new(physical_plans)))
@@ -1290,8 +1291,10 @@ impl DefaultPhysicalPlanner {
12901291
Ok(plan)
12911292
}
12921293
}
1293-
LogicalPlan::RecursiveQuery(RecursiveQuery { name: _, static_term: _, recursive_term: _, is_distinct: _,.. }) => {
1294-
not_impl_err!("Physical counterpart of RecursiveQuery is not implemented yet")
1294+
LogicalPlan::RecursiveQuery(RecursiveQuery { name, static_term, recursive_term, is_distinct,.. }) => {
1295+
let static_term = self.create_initial_plan(static_term, session_state).await?;
1296+
let recursive_term = self.create_initial_plan(recursive_term, session_state).await?;
1297+
Ok(Arc::new(RecursiveQueryExec::try_new(name.clone(), static_term, recursive_term, *is_distinct)?))
12951298
}
12961299
};
12971300
exec_plan

datafusion/physical-plan/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ pub mod metrics;
6161
mod ordering;
6262
pub mod placeholder_row;
6363
pub mod projection;
64+
pub mod recursive_query;
6465
pub mod repartition;
6566
pub mod sorts;
6667
pub mod stream;
@@ -71,6 +72,7 @@ pub mod union;
7172
pub mod unnest;
7273
pub mod values;
7374
pub mod windows;
75+
pub mod work_table;
7476

7577
pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
7678
pub use crate::metrics::Metric;

0 commit comments

Comments
 (0)