15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- //! Eliminate common sub-expression.
18
+ //! [`CommonSubexprEliminate`] to avoid redundant computation of common sub-expressions
19
19
20
20
use std:: collections:: hash_map:: Entry ;
21
21
use std:: collections:: { BTreeSet , HashMap } ;
@@ -126,7 +126,7 @@ type Identifier = String;
126
126
/// same value
127
127
///
128
128
/// Currently only common sub-expressions within a single `LogicalPlan` are
129
- /// be eliminated.
129
+ /// eliminated.
130
130
///
131
131
/// # Example
132
132
///
@@ -148,6 +148,12 @@ type Identifier = String;
148
148
pub struct CommonSubexprEliminate { }
149
149
150
150
impl CommonSubexprEliminate {
151
+ /// Rewrites `exprs_list` with common sub-expressions replaced with a new
152
+ /// column.
153
+ ///
154
+ /// `affected_id` is updated with any sub expressions that were replaced.
155
+ ///
156
+ /// Returns the rewritten expressions
151
157
fn rewrite_exprs_list (
152
158
& self ,
153
159
exprs_list : & [ & [ Expr ] ] ,
@@ -166,6 +172,14 @@ impl CommonSubexprEliminate {
166
172
. collect :: < Result < Vec < _ > > > ( )
167
173
}
168
174
175
+ /// Rewrites the expression in `exprs_list` with common sub-expressions
176
+ /// replaced with a new colum and adds a ProjectionExec on top of `input`
177
+ /// which computes any replaced common sub-expressions.
178
+ ///
179
+ /// Returns a tuple of:
180
+ /// 1. The rewritten expressions
181
+ /// 2. A `LogicalPlan::Projection` with input of `input` that computes any
182
+ /// common sub-expressions that were used
169
183
fn rewrite_expr (
170
184
& self ,
171
185
exprs_list : & [ & [ Expr ] ] ,
@@ -458,7 +472,16 @@ fn pop_expr(new_expr: &mut Vec<Vec<Expr>>) -> Result<Vec<Expr>> {
458
472
. ok_or_else ( || DataFusionError :: Internal ( "Failed to pop expression" . to_string ( ) ) )
459
473
}
460
474
461
- /// Build the "intermediate" projection plan that evaluates the extracted common expressions.
475
+ /// Build the "intermediate" projection plan that evaluates the extracted common
476
+ /// expressions.
477
+ ///
478
+ /// # Arguments
479
+ /// input: the input plan
480
+ ///
481
+ /// affected_id: which common subexpressions were used (and thus are added to
482
+ /// intermediate projection)
483
+ ///
484
+ /// expr_set: the set of common subexpressions
462
485
fn build_common_expr_project_plan (
463
486
input : LogicalPlan ,
464
487
affected_id : BTreeSet < Identifier > ,
@@ -493,10 +516,11 @@ fn build_common_expr_project_plan(
493
516
) ?) )
494
517
}
495
518
496
- /// Build the projection plan to eliminate unexpected columns produced by
519
+ /// Build the projection plan to eliminate unnecessary columns produced by
497
520
/// the "intermediate" projection plan built in [build_common_expr_project_plan].
498
521
///
499
- /// This is for those plans who don't keep its own output schema like `Filter` or `Sort`.
522
+ /// This is required to keep the schema the same for plans that pass the input
523
+ /// on to the output, such as `Filter` or `Sort`.
500
524
fn build_recover_project_plan (
501
525
schema : & DFSchema ,
502
526
input : LogicalPlan ,
@@ -570,7 +594,7 @@ impl ExprMask {
570
594
}
571
595
}
572
596
573
- /// Go through an expression tree and generate identifier .
597
+ /// Go through an expression tree and generate identifiers for each subexpression .
574
598
///
575
599
/// An identifier contains information of the expression itself and its sub-expression.
576
600
/// This visitor implementation use a stack `visit_stack` to track traversal, which
@@ -679,9 +703,10 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
679
703
}
680
704
}
681
705
682
- /// Rewrite expression by replacing detected common sub-expression with
683
- /// the corresponding temporary column name. That column contains the
684
- /// evaluate result of replaced expression.
706
+ /// Rewrite expression by common sub-expression with a corresponding temporary
707
+ /// column name that will compute the subexpression.
708
+ ///
709
+ /// `affected_id` is updated with any sub expressions that were replaced
685
710
struct CommonSubexprRewriter < ' a > {
686
711
expr_set : & ' a ExprSet ,
687
712
/// Which identifier is replaced.
@@ -726,6 +751,8 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
726
751
}
727
752
}
728
753
754
+ /// Replace common sub-expression in `expr` with the corresponding temporary
755
+ /// column name, updating `affected_id` with any replaced expressions
729
756
fn replace_common_expr (
730
757
expr : Expr ,
731
758
expr_set : & ExprSet ,
0 commit comments