Skip to content

Fix Correlated Subquery With Depth Larger Than One #16060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion benchmarks/src/cancellation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use datafusion::execution::TaskContext;
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::prelude::*;
use datafusion::sql::planner::PlannerContext;
use datafusion_common::instant::Instant;
use futures::TryStreamExt;
use object_store::ObjectStore;
Expand Down Expand Up @@ -185,7 +186,10 @@ async fn datafusion(store: Arc<dyn ObjectStore>) -> Result<()> {
.await?;

println!("Creating logical plan...");
let logical_plan = ctx.state().create_logical_plan(query).await?;
let logical_plan = ctx
.state()
.create_logical_plan(query, &mut PlannerContext::new())
.await?;

println!("Creating physical plan...");
let physical_plan = Arc::new(CoalescePartitionsExec::new(
Expand Down
20 changes: 16 additions & 4 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use crate::{
object_storage::get_object_store,
print_options::{MaxRows, PrintOptions},
};
use datafusion::sql::planner::PlannerContext;
use futures::StreamExt;
use std::collections::HashMap;
use std::fs::File;
Expand Down Expand Up @@ -231,7 +232,8 @@ pub(super) async fn exec_and_print(
let adjusted =
AdjustedPrintOptions::new(print_options.clone()).with_statement(&statement);

let plan = create_plan(ctx, statement).await?;
let mut planner_context = PlannerContext::new();
let plan = create_plan(ctx, statement, &mut planner_context).await?;
let adjusted = adjusted.with_plan(&plan);

let df = ctx.execute_logical_plan(plan).await?;
Expand Down Expand Up @@ -348,8 +350,12 @@ fn config_file_type_from_str(ext: &str) -> Option<ConfigFileType> {
async fn create_plan(
ctx: &dyn CliSessionContext,
statement: Statement,
planner_context: &mut PlannerContext,
) -> Result<LogicalPlan, DataFusionError> {
let mut plan = ctx.session_state().statement_to_plan(statement).await?;
let mut plan = ctx
.session_state()
.statement_to_plan(statement, planner_context)
.await?;

// Note that cmd is a mutable reference so that create_external_table function can remove all
// datafusion-cli specific options before passing through to datafusion. Otherwise, datafusion
Expand Down Expand Up @@ -453,7 +459,10 @@ mod tests {

async fn create_external_table_test(location: &str, sql: &str) -> Result<()> {
let ctx = SessionContext::new();
let plan = ctx.state().create_logical_plan(sql).await?;
let plan = ctx
.state()
.create_logical_plan(sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
let format = config_file_type_from_str(&cmd.file_type);
Expand All @@ -479,7 +488,10 @@ mod tests {
let ctx = SessionContext::new();
// AWS CONFIG register.

let plan = ctx.state().create_logical_plan(sql).await?;
let plan = ctx
.state()
.create_logical_plan(sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Copy(cmd) = &plan {
let format = config_file_type_from_str(&cmd.file_type.get_ext());
Expand Down
25 changes: 20 additions & 5 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,10 @@ mod tests {
);

let ctx = SessionContext::new();
let mut plan = ctx.state().create_logical_plan(&sql).await?;
let mut plan = ctx
.state()
.create_logical_plan(&sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
Expand Down Expand Up @@ -538,7 +541,10 @@ mod tests {
);

let ctx = SessionContext::new();
let mut plan = ctx.state().create_logical_plan(&sql).await?;
let mut plan = ctx
.state()
.create_logical_plan(&sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
Expand All @@ -564,7 +570,10 @@ mod tests {
) LOCATION '{location}'"
);

let mut plan = ctx.state().create_logical_plan(&sql).await?;
let mut plan = ctx
.state()
.create_logical_plan(&sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
Expand Down Expand Up @@ -592,7 +601,10 @@ mod tests {
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'");

let ctx = SessionContext::new();
let mut plan = ctx.state().create_logical_plan(&sql).await?;
let mut plan = ctx
.state()
.create_logical_plan(&sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
Expand Down Expand Up @@ -629,7 +641,10 @@ mod tests {
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_path' '{service_account_path}', 'gcp.service_account_key' '{service_account_key}', 'gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");

let ctx = SessionContext::new();
let mut plan = ctx.state().create_logical_plan(&sql).await?;
let mut plan = ctx
.state()
.create_logical_plan(&sql, &mut PlannerContext::new())
.await?;

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
Expand Down
6 changes: 5 additions & 1 deletion datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ use datafusion_session::SessionStore;

use async_trait::async_trait;
use chrono::{DateTime, Utc};
use datafusion_sql::planner::PlannerContext;
use object_store::ObjectStore;
use parking_lot::RwLock;
use url::Url;
Expand Down Expand Up @@ -620,7 +621,10 @@ impl SessionContext {
sql: &str,
options: SQLOptions,
) -> Result<DataFrame> {
let plan = self.state().create_logical_plan(sql).await?;
let plan = self
.state()
.create_logical_plan(sql, &mut PlannerContext::new())
.await?;
options.verify_plan(&plan)?;

self.execute_logical_plan(plan).await
Expand Down
6 changes: 4 additions & 2 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ impl SessionState {
pub async fn statement_to_plan(
&self,
statement: Statement,
planner_context: &mut PlannerContext,
) -> datafusion_common::Result<LogicalPlan> {
let references = self.resolve_table_references(&statement)?;

Expand All @@ -482,7 +483,7 @@ impl SessionState {
}

let query = SqlToRel::new_with_options(&provider, self.get_parser_options());
query.statement_to_plan(statement)
query.statement_to_plan(statement, planner_context)
}

fn get_parser_options(&self) -> ParserOptions {
Expand Down Expand Up @@ -514,10 +515,11 @@ impl SessionState {
pub async fn create_logical_plan(
&self,
sql: &str,
planner_context: &mut PlannerContext,
) -> datafusion_common::Result<LogicalPlan> {
let dialect = self.config.options().sql_parser.dialect.as_str();
let statement = self.sql_to_statement(sql, dialect)?;
let plan = self.statement_to_plan(statement).await?;
let plan = self.statement_to_plan(statement, planner_context).await?;
Ok(plan)
}

Expand Down
10 changes: 8 additions & 2 deletions datafusion/core/tests/sql/sql_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ async fn empty_statement_returns_error() {
let state = ctx.state();

// Give it an empty string which contains no statements
let plan_res = state.create_logical_plan("").await;
let plan_res = state
.create_logical_plan("", &mut PlannerContext::new())
.await;
assert_eq!(
plan_res.unwrap_err().strip_backtrace(),
"Error during planning: No SQL statements were provided in the query string"
Expand All @@ -180,6 +182,7 @@ async fn multiple_statements_returns_error() {
let plan_res = state
.create_logical_plan(
"INSERT INTO test (x) VALUES (1); INSERT INTO test (x) VALUES (2)",
&mut PlannerContext::new(),
)
.await;
assert_eq!(
Expand All @@ -199,7 +202,10 @@ async fn ddl_can_not_be_planned_by_session_state() {

// can not create a logical plan for catalog DDL
let sql = "DROP TABLE test";
let plan = state.create_logical_plan(sql).await.unwrap();
let plan = state
.create_logical_plan(sql, &mut PlannerContext::new())
.await
.unwrap();
let physical_plan = state.create_physical_plan(&plan).await;
assert_eq!(
physical_plan.unwrap_err().strip_backtrace(),
Expand Down
5 changes: 4 additions & 1 deletion datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,10 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
LOCATION '../core/tests/data/window_2.csv'
OPTIONS ('format.has_header' 'true')";

let plan = ctx.state().create_logical_plan(query).await?;
let plan = ctx
.state()
.create_logical_plan(query, &mut PlannerContext::new())
.await?;

let bytes = logical_plan_to_bytes(&plan)?;
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
Expand Down
77 changes: 39 additions & 38 deletions datafusion/sql/src/expr/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,48 +165,49 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
not_impl_err!("compound identifier: {ids:?}")
} else {
// Check the outer_query_schema and try to find a match
if let Some(outer) = planner_context.outer_query_schema() {
let search_result = search_dfschema(&ids, outer);
match search_result {
// Found matching field with spare identifier(s) for nested field(s) in structure
Some((field, qualifier, nested_names))
if !nested_names.is_empty() =>
{
// TODO: remove when can support nested identifiers for OuterReferenceColumn
not_impl_err!(
"Nested identifiers are not yet supported for OuterReferenceColumn {}",
Column::from((qualifier, field)).quoted_flat_name()
)
// TODO: Put the depth somewhere to record it like (OuterReferenceColumn)
for (_depth, schema) in
planner_context.iter_outer_query_schemas_rev()
{
if let Some(outer) = schema {
let search_result = search_dfschema(&ids, outer);
match search_result {
// Found matching field with spare identifier(s) for nested field(s) in structure
Some((field, qualifier, nested_names))
if !nested_names.is_empty() =>
{
// TODO: remove when can support nested identifiers for OuterReferenceColumn
return not_impl_err!("Nested identifiers are not yet supported for OuterReferenceColumn {}", Column::from((qualifier, field)).quoted_flat_name());
}
// Found matching field with no spare identifier(s)
Some((field, qualifier, _nested_names)) => {
// Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
return Ok(Expr::OuterReferenceColumn(
field.data_type().clone(),
Column::from((qualifier, field)),
));
}
// Found no matching field, will return a default
None => continue,
}
// Found matching field with no spare identifier(s)
Some((field, qualifier, _nested_names)) => {
// Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column
Ok(Expr::OuterReferenceColumn(
field.data_type().clone(),
Column::from((qualifier, field)),
))
}
// Found no matching field, will return a default
None => {
let s = &ids[0..ids.len()];
// safe unwrap as s can never be empty or exceed the bounds
let (relation, column_name) =
form_identifier(s).unwrap();
Ok(Expr::Column(Column::new(relation, column_name)))
}
}
} else {
let s = &ids[0..ids.len()];
// Safe unwrap as s can never be empty or exceed the bounds
let (relation, column_name) = form_identifier(s).unwrap();
let mut column = Column::new(relation, column_name);
if self.options.collect_spans {
if let Some(span) = ids_span {
column.spans_mut().add_span(span);
} else {
// Only depth=0 outer_query_schema can reach here.
let s = &ids[0..ids.len()];
// Safe unwrap as s can never be empty or exceed the bounds
let (relation, column_name) = form_identifier(s).unwrap();
let mut column = Column::new(relation, column_name);
if self.options.collect_spans {
if let Some(span) = ids_span {
column.spans_mut().add_span(span);
}
}
return Ok(Expr::Column(column));
}
Ok(Expr::Column(column))
}
let s = &ids[0..ids.len()];
// safe unwrap as s can never be empty or exceed the bounds
let (relation, column_name) = form_identifier(s).unwrap();
return Ok(Expr::Column(Column::new(relation, column_name)));
}
}
}
Expand Down
26 changes: 17 additions & 9 deletions datafusion/sql/src/expr/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
input_schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
let old_outer_query_schema =
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
// TODO
planner_context.push_outer_query_schema(Some(input_schema.clone().into()));
planner_context.increase_depth();

let sub_plan = self.query_to_plan(subquery, planner_context)?;
let outer_ref_columns = sub_plan.all_out_ref_exprs();
planner_context.set_outer_query_schema(old_outer_query_schema);

planner_context.decrease_depth();

Ok(Expr::Exists(Exists {
subquery: Subquery {
subquery: Arc::new(sub_plan),
Expand All @@ -54,8 +58,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
input_schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
let old_outer_query_schema =
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
// TODO
planner_context.push_outer_query_schema(Some(input_schema.clone().into()));
planner_context.increase_depth();

let mut spans = Spans::new();
if let SetExpr::Select(select) = subquery.body.as_ref() {
Expand All @@ -70,7 +75,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {

let sub_plan = self.query_to_plan(subquery, planner_context)?;
let outer_ref_columns = sub_plan.all_out_ref_exprs();
planner_context.set_outer_query_schema(old_outer_query_schema);

self.validate_single_column(
&sub_plan,
Expand All @@ -81,6 +85,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {

let expr_obj = self.sql_to_expr(expr, input_schema, planner_context)?;

planner_context.decrease_depth();

Ok(Expr::InSubquery(InSubquery::new(
Box::new(expr_obj),
Subquery {
Expand All @@ -98,8 +104,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
input_schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
let old_outer_query_schema =
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
planner_context.push_outer_query_schema(Some(input_schema.clone().into()));
planner_context.increase_depth();
let mut spans = Spans::new();
if let SetExpr::Select(select) = subquery.body.as_ref() {
for item in &select.projection {
Expand All @@ -112,7 +118,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}
let sub_plan = self.query_to_plan(subquery, planner_context)?;
let outer_ref_columns = sub_plan.all_out_ref_exprs();
planner_context.set_outer_query_schema(old_outer_query_schema);
dbg!(&outer_ref_columns);

self.validate_single_column(
&sub_plan,
Expand All @@ -121,6 +127,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
"Select only one column in the subquery",
)?;

planner_context.decrease_depth();

Ok(Expr::ScalarSubquery(Subquery {
subquery: Arc::new(sub_plan),
outer_ref_columns,
Expand Down
Loading
Loading