-
Notifications
You must be signed in to change notification settings - Fork 1.7k
feat: ClassicJoin
for PWMJ
#17482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jonathanc-n
wants to merge
30
commits into
apache:main
Choose a base branch
from
jonathanc-n:classic-join-+-physical-planner
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+3,000
−109
Open
feat: ClassicJoin
for PWMJ
#17482
Changes from all commits
Commits
Show all changes
30 commits
Select commit
Hold shift + click to select a range
d87d24d
POC: `ClassicJoin` for PWMJ
jonathanc-n eb80117
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n f343f71
fmt
jonathanc-n 248ae49
clippy + fix test
jonathanc-n 1020e65
fix tests
jonathanc-n 29c0ff0
fmt
jonathanc-n 59486ab
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n cb94a20
clean up slt tests
jonathanc-n 13db5b5
Merge branch 'classic-join-+-physical-planner' of https://github.com/…
jonathanc-n 2675ad8
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n 770e1a8
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n 0039a2f
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n 18ee4cb
fixes
jonathanc-n e3d8606
remove swap
jonathanc-n 9b74c9d
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n 0834b98
change varialbe names
jonathanc-n 1202962
add flag
jonathanc-n f18c1b7
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n fa18c41
Merge branch 'classic-join-+-physical-planner' of https://github.com/…
jonathanc-n 502c088
remove duplicate function
jonathanc-n 55a4a1d
changes
jonathanc-n c5e59ed
Merge branch 'main' into classic-join-+-physical-planner
jonathanc-n 10526fe
update configs
jonathanc-n e9593c8
Merge branch 'classic-join-+-physical-planner' of https://github.com/…
jonathanc-n 14d945c
Update datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
jonathanc-n 2bf7940
fix proposed changes
jonathanc-n 2473f27
Merge branch 'classic-join-+-physical-planner' of https://github.com/…
jonathanc-n 499ccd3
fix
jonathanc-n c981947
fix null handling
jonathanc-n 2946585
fmt
jonathanc-n File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,10 +77,11 @@ use datafusion_expr::expr::{ | |
}; | ||
use datafusion_expr::expr_rewriter::unnormalize_cols; | ||
use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; | ||
use datafusion_expr::utils::split_conjunction; | ||
use datafusion_expr::{ | ||
Analyze, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension, FetchType, | ||
Filter, JoinType, RecursiveQuery, SkipType, StringifiedPlan, WindowFrame, | ||
WindowFrameBound, WriteOp, | ||
Analyze, BinaryExpr, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension, | ||
FetchType, Filter, JoinType, Operator, RecursiveQuery, SkipType, StringifiedPlan, | ||
WindowFrame, WindowFrameBound, WriteOp, | ||
}; | ||
use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; | ||
use datafusion_physical_expr::expressions::Literal; | ||
|
@@ -90,6 +91,7 @@ use datafusion_physical_expr::{ | |
use datafusion_physical_optimizer::PhysicalOptimizerRule; | ||
use datafusion_physical_plan::empty::EmptyExec; | ||
use datafusion_physical_plan::execution_plan::InvariantLevel; | ||
use datafusion_physical_plan::joins::PiecewiseMergeJoinExec; | ||
use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; | ||
use datafusion_physical_plan::recursive_query::RecursiveQueryExec; | ||
use datafusion_physical_plan::unnest::ListUnnest; | ||
|
@@ -1131,8 +1133,42 @@ impl DefaultPhysicalPlanner { | |
}) | ||
.collect::<Result<join_utils::JoinOn>>()?; | ||
|
||
// TODO: `num_range_filters` can be used later on for ASOF joins (`num_range_filters > 1`) | ||
let mut num_range_filters = 0; | ||
let mut range_filters: Vec<Expr> = Vec::new(); | ||
let mut total_filters = 0; | ||
|
||
let join_filter = match filter { | ||
Some(expr) => { | ||
let split_expr = split_conjunction(expr); | ||
for expr in split_expr.iter() { | ||
match *expr { | ||
Expr::BinaryExpr(BinaryExpr { | ||
left: _, | ||
right: _, | ||
op, | ||
}) => { | ||
if matches!( | ||
op, | ||
Operator::Lt | ||
| Operator::LtEq | ||
| Operator::Gt | ||
| Operator::GtEq | ||
) { | ||
range_filters.push((**expr).clone()); | ||
num_range_filters += 1; | ||
} | ||
total_filters += 1; | ||
} | ||
// TODO: Want to deal with `Expr::Between` for IEJoins, it counts as two range predicates | ||
// which is why it is not dealt with in PWMJ | ||
// Expr::Between(_) => {}, | ||
_ => { | ||
total_filters += 1; | ||
} | ||
} | ||
} | ||
|
||
// Extract columns from filter expression and saved in a HashSet | ||
let cols = expr.column_refs(); | ||
|
||
|
@@ -1188,6 +1224,7 @@ impl DefaultPhysicalPlanner { | |
)?; | ||
let filter_schema = | ||
Schema::new_with_metadata(filter_fields, metadata); | ||
|
||
let filter_expr = create_physical_expr( | ||
expr, | ||
&filter_df_schema, | ||
|
@@ -1210,10 +1247,103 @@ impl DefaultPhysicalPlanner { | |
let prefer_hash_join = | ||
session_state.config_options().optimizer.prefer_hash_join; | ||
|
||
// TODO: Allow PWMJ to deal with residual equijoin conditions | ||
let join: Arc<dyn ExecutionPlan> = if join_on.is_empty() { | ||
if join_filter.is_none() && matches!(join_type, JoinType::Inner) { | ||
// cross join if there is no join conditions and no join filter set | ||
Arc::new(CrossJoinExec::new(physical_left, physical_right)) | ||
} else if num_range_filters == 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would like to refactor this in another pull request, just a refactor but it should be quite simple to do. Just wanted to get this version in first. |
||
&& total_filters == 1 | ||
&& !matches!( | ||
join_type, | ||
JoinType::LeftSemi | ||
| JoinType::RightSemi | ||
| JoinType::LeftAnti | ||
| JoinType::RightAnti | ||
| JoinType::LeftMark | ||
| JoinType::RightMark | ||
) | ||
&& session_state | ||
.config_options() | ||
.optimizer | ||
.enable_piecewise_merge_join | ||
{ | ||
let Expr::BinaryExpr(be) = &range_filters[0] else { | ||
return plan_err!( | ||
"Unsupported expression for PWMJ: Expected `Expr::BinaryExpr`" | ||
); | ||
}; | ||
|
||
let mut op = be.op; | ||
if !matches!( | ||
op, | ||
Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq | ||
) { | ||
return plan_err!( | ||
"Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=", | ||
op | ||
); | ||
} | ||
|
||
fn reverse_ineq(op: Operator) -> Operator { | ||
match op { | ||
Operator::Lt => Operator::Gt, | ||
Operator::LtEq => Operator::GtEq, | ||
Operator::Gt => Operator::Lt, | ||
Operator::GtEq => Operator::LtEq, | ||
_ => op, | ||
} | ||
} | ||
|
||
let side_of = |e: &Expr| -> Result<&'static str> { | ||
let cols = e.column_refs(); | ||
let in_left = cols | ||
.iter() | ||
.all(|c| left_df_schema.index_of_column(c).is_ok()); | ||
let in_right = cols | ||
.iter() | ||
.all(|c| right_df_schema.index_of_column(c).is_ok()); | ||
match (in_left, in_right) { | ||
(true, false) => Ok("left"), | ||
(false, true) => Ok("right"), | ||
_ => unreachable!(), | ||
} | ||
}; | ||
|
||
let mut lhs_logical = &be.left; | ||
let mut rhs_logical = &be.right; | ||
|
||
let left_side = side_of(lhs_logical)?; | ||
let right_side = side_of(rhs_logical)?; | ||
if left_side == "right" && right_side == "left" { | ||
std::mem::swap(&mut lhs_logical, &mut rhs_logical); | ||
op = reverse_ineq(op); | ||
} else if !(left_side == "left" && right_side == "right") { | ||
return plan_err!( | ||
"Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=", | ||
op | ||
); | ||
} | ||
|
||
let on_left = create_physical_expr( | ||
lhs_logical, | ||
left_df_schema, | ||
session_state.execution_props(), | ||
)?; | ||
let on_right = create_physical_expr( | ||
rhs_logical, | ||
right_df_schema, | ||
session_state.execution_props(), | ||
)?; | ||
|
||
Arc::new(PiecewiseMergeJoinExec::try_new( | ||
physical_left, | ||
physical_right, | ||
(on_left, on_right), | ||
op, | ||
*join_type, | ||
session_state.config().target_partitions(), | ||
)?) | ||
} else { | ||
// there is no equal join condition, use the nested loop join | ||
Arc::new(NestedLoopJoinExec::try_new( | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.