Skip to content

Commit da555fb

Browse files
authored
Minor: Add documentation to CommonSubexprEliminate (#9959)
1 parent a29f2be commit da555fb

File tree

1 file changed

+36
-9
lines changed

1 file changed

+36
-9
lines changed

datafusion/optimizer/src/common_subexpr_eliminate.rs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Eliminate common sub-expression.
18+
//! [`CommonSubexprEliminate`] to avoid redundant computation of common sub-expressions
1919
2020
use std::collections::hash_map::Entry;
2121
use std::collections::{BTreeSet, HashMap};
@@ -126,7 +126,7 @@ type Identifier = String;
126126
/// same value
127127
///
128128
/// Currently only common sub-expressions within a single `LogicalPlan` are
129-
/// be eliminated.
129+
/// eliminated.
130130
///
131131
/// # Example
132132
///
@@ -148,6 +148,12 @@ type Identifier = String;
148148
pub struct CommonSubexprEliminate {}
149149

150150
impl CommonSubexprEliminate {
151+
/// Rewrites `exprs_list` with common sub-expressions replaced with a new
152+
/// column.
153+
///
154+
/// `affected_id` is updated with any sub expressions that were replaced.
155+
///
156+
/// Returns the rewritten expressions
151157
fn rewrite_exprs_list(
152158
&self,
153159
exprs_list: &[&[Expr]],
@@ -166,6 +172,14 @@ impl CommonSubexprEliminate {
166172
.collect::<Result<Vec<_>>>()
167173
}
168174

175+
/// Rewrites the expression in `exprs_list` with common sub-expressions
176+
/// replaced with a new colum and adds a ProjectionExec on top of `input`
177+
/// which computes any replaced common sub-expressions.
178+
///
179+
/// Returns a tuple of:
180+
/// 1. The rewritten expressions
181+
/// 2. A `LogicalPlan::Projection` with input of `input` that computes any
182+
/// common sub-expressions that were used
169183
fn rewrite_expr(
170184
&self,
171185
exprs_list: &[&[Expr]],
@@ -458,7 +472,16 @@ fn pop_expr(new_expr: &mut Vec<Vec<Expr>>) -> Result<Vec<Expr>> {
458472
.ok_or_else(|| DataFusionError::Internal("Failed to pop expression".to_string()))
459473
}
460474

461-
/// Build the "intermediate" projection plan that evaluates the extracted common expressions.
475+
/// Build the "intermediate" projection plan that evaluates the extracted common
476+
/// expressions.
477+
///
478+
/// # Arguments
479+
/// input: the input plan
480+
///
481+
/// affected_id: which common subexpressions were used (and thus are added to
482+
/// intermediate projection)
483+
///
484+
/// expr_set: the set of common subexpressions
462485
fn build_common_expr_project_plan(
463486
input: LogicalPlan,
464487
affected_id: BTreeSet<Identifier>,
@@ -493,10 +516,11 @@ fn build_common_expr_project_plan(
493516
)?))
494517
}
495518

496-
/// Build the projection plan to eliminate unexpected columns produced by
519+
/// Build the projection plan to eliminate unnecessary columns produced by
497520
/// the "intermediate" projection plan built in [build_common_expr_project_plan].
498521
///
499-
/// This is for those plans who don't keep its own output schema like `Filter` or `Sort`.
522+
/// This is required to keep the schema the same for plans that pass the input
523+
/// on to the output, such as `Filter` or `Sort`.
500524
fn build_recover_project_plan(
501525
schema: &DFSchema,
502526
input: LogicalPlan,
@@ -570,7 +594,7 @@ impl ExprMask {
570594
}
571595
}
572596

573-
/// Go through an expression tree and generate identifier.
597+
/// Go through an expression tree and generate identifiers for each subexpression.
574598
///
575599
/// An identifier contains information of the expression itself and its sub-expression.
576600
/// This visitor implementation use a stack `visit_stack` to track traversal, which
@@ -679,9 +703,10 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
679703
}
680704
}
681705

682-
/// Rewrite expression by replacing detected common sub-expression with
683-
/// the corresponding temporary column name. That column contains the
684-
/// evaluate result of replaced expression.
706+
/// Rewrite expression by common sub-expression with a corresponding temporary
707+
/// column name that will compute the subexpression.
708+
///
709+
/// `affected_id` is updated with any sub expressions that were replaced
685710
struct CommonSubexprRewriter<'a> {
686711
expr_set: &'a ExprSet,
687712
/// Which identifier is replaced.
@@ -726,6 +751,8 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
726751
}
727752
}
728753

754+
/// Replace common sub-expression in `expr` with the corresponding temporary
755+
/// column name, updating `affected_id` with any replaced expressions
729756
fn replace_common_expr(
730757
expr: Expr,
731758
expr_set: &ExprSet,

0 commit comments

Comments
 (0)