Skip to content

Commit 6504d2a

Browse files
authored
enable explain for ballista (#2163)
* explain * fmt
1 parent 0da1f37 commit 6504d2a

File tree

9 files changed

+225
-58
lines changed

9 files changed

+225
-58
lines changed

ballista/rust/core/proto/ballista.proto

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ message AnalyzeNode {
183183
bool verbose = 2;
184184
}
185185

186-
message ExplainNode{
186+
message ExplainNode {
187187
LogicalPlanNode input = 1;
188188
bool verbose = 2;
189189
}
@@ -269,6 +269,7 @@ message PhysicalPlanNode {
269269
AvroScanExecNode avro_scan = 20;
270270
PhysicalExtensionNode extension = 21;
271271
UnionExecNode union = 22;
272+
ExplainExecNode explain = 23;
272273
}
273274
}
274275

@@ -461,6 +462,12 @@ message UnionExecNode {
461462
repeated PhysicalPlanNode inputs = 1;
462463
}
463464

465+
message ExplainExecNode {
466+
datafusion.Schema schema = 1;
467+
repeated datafusion.StringifiedPlan stringified_plans = 2;
468+
bool verbose = 3;
469+
}
470+
464471
message CrossJoinExecNode {
465472
PhysicalPlanNode left = 1;
466473
PhysicalPlanNode right = 2;

ballista/rust/core/proto/datafusion.proto

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,3 +515,27 @@ message ArrowType{
515515
// }
516516
//}
517517
message EmptyMessage{}
518+
519+
message OptimizedLogicalPlanType {
520+
string optimizer_name = 1;
521+
}
522+
523+
message OptimizedPhysicalPlanType {
524+
string optimizer_name = 1;
525+
}
526+
527+
message PlanType {
528+
oneof plan_type_enum {
529+
EmptyMessage InitialLogicalPlan = 1;
530+
OptimizedLogicalPlanType OptimizedLogicalPlan = 2;
531+
EmptyMessage FinalLogicalPlan = 3;
532+
EmptyMessage InitialPhysicalPlan = 4;
533+
OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5;
534+
EmptyMessage FinalPhysicalPlan = 6;
535+
}
536+
}
537+
538+
message StringifiedPlan {
539+
PlanType plan_type = 1;
540+
string plan = 2;
541+
}

ballista/rust/core/src/serde/physical_plan/mod.rs

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::error::BallistaError;
19-
use crate::execution_plans::{
20-
ShuffleReaderExec, ShuffleWriterExec, UnresolvedShuffleExec,
21-
};
22-
use crate::serde::physical_plan::from_proto::{
23-
parse_physical_expr, parse_protobuf_hash_partitioning,
24-
};
25-
use crate::serde::protobuf::physical_expr_node::ExprType;
26-
use crate::serde::protobuf::physical_plan_node::PhysicalPlanType;
27-
use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
18+
use std::convert::TryInto;
19+
use std::sync::Arc;
20+
21+
use prost::bytes::BufMut;
22+
use prost::Message;
2823

29-
use crate::serde::protobuf::{PhysicalExtensionNode, PhysicalPlanNode};
30-
use crate::serde::scheduler::PartitionLocation;
31-
use crate::serde::{
32-
byte_to_string, proto_error, protobuf, str_to_byte, AsExecutionPlan,
33-
PhysicalExtensionCodec,
34-
};
35-
use crate::{convert_required, into_physical_plan, into_required};
3624
use datafusion::arrow::compute::SortOptions;
3725
use datafusion::arrow::datatypes::SchemaRef;
3826
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
@@ -45,6 +33,7 @@ use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
4533
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
4634
use datafusion::physical_plan::cross_join::CrossJoinExec;
4735
use datafusion::physical_plan::empty::EmptyExec;
36+
use datafusion::physical_plan::explain::ExplainExec;
4837
use datafusion::physical_plan::expressions::{Column, PhysicalSortExpr};
4938
use datafusion::physical_plan::file_format::{
5039
AvroExec, CsvExec, FileScanConfig, ParquetExec,
@@ -62,10 +51,24 @@ use datafusion::physical_plan::{
6251
AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, WindowExpr,
6352
};
6453
use datafusion_proto::from_proto::parse_expr;
65-
use prost::bytes::BufMut;
66-
use prost::Message;
67-
use std::convert::TryInto;
68-
use std::sync::Arc;
54+
55+
use crate::error::BallistaError;
56+
use crate::execution_plans::{
57+
ShuffleReaderExec, ShuffleWriterExec, UnresolvedShuffleExec,
58+
};
59+
use crate::serde::physical_plan::from_proto::{
60+
parse_physical_expr, parse_protobuf_hash_partitioning,
61+
};
62+
use crate::serde::protobuf::physical_expr_node::ExprType;
63+
use crate::serde::protobuf::physical_plan_node::PhysicalPlanType;
64+
use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
65+
use crate::serde::protobuf::{PhysicalExtensionNode, PhysicalPlanNode};
66+
use crate::serde::scheduler::PartitionLocation;
67+
use crate::serde::{
68+
byte_to_string, proto_error, protobuf, str_to_byte, AsExecutionPlan,
69+
PhysicalExtensionCodec,
70+
};
71+
use crate::{convert_required, into_physical_plan, into_required};
6972

7073
pub mod from_proto;
7174
pub mod to_proto;
@@ -103,6 +106,15 @@ impl AsExecutionPlan for PhysicalPlanNode {
103106
))
104107
})?;
105108
match plan {
109+
PhysicalPlanType::Explain(explain) => Ok(Arc::new(ExplainExec::new(
110+
Arc::new(explain.schema.as_ref().unwrap().try_into()?),
111+
explain
112+
.stringified_plans
113+
.iter()
114+
.map(|plan| plan.into())
115+
.collect(),
116+
explain.verbose,
117+
))),
106118
PhysicalPlanType::Projection(projection) => {
107119
let input: Arc<dyn ExecutionPlan> = into_physical_plan!(
108120
projection.input,
@@ -587,7 +599,21 @@ impl AsExecutionPlan for PhysicalPlanNode {
587599
let plan_clone = plan.clone();
588600
let plan = plan.as_any();
589601

590-
if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
602+
if let Some(exec) = plan.downcast_ref::<ExplainExec>() {
603+
Ok(protobuf::PhysicalPlanNode {
604+
physical_plan_type: Some(PhysicalPlanType::Explain(
605+
protobuf::ExplainExecNode {
606+
schema: Some(exec.schema().as_ref().into()),
607+
stringified_plans: exec
608+
.stringified_plans()
609+
.iter()
610+
.map(|plan| plan.into())
611+
.collect(),
612+
verbose: exec.verbose(),
613+
},
614+
)),
615+
})
616+
} else if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
591617
let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
592618
exec.input().to_owned(),
593619
extension_codec,
@@ -1038,7 +1064,6 @@ mod roundtrip_tests {
10381064
use std::ops::Deref;
10391065
use std::sync::Arc;
10401066

1041-
use crate::serde::{AsExecutionPlan, BallistaCodec};
10421067
use datafusion::arrow::array::ArrayRef;
10431068
use datafusion::execution::context::ExecutionProps;
10441069
use datafusion::logical_plan::create_udf;
@@ -1071,10 +1096,12 @@ mod roundtrip_tests {
10711096
scalar::ScalarValue,
10721097
};
10731098

1074-
use super::super::super::error::Result;
1075-
use super::super::protobuf;
10761099
use crate::execution_plans::ShuffleWriterExec;
10771100
use crate::serde::protobuf::{LogicalPlanNode, PhysicalPlanNode};
1101+
use crate::serde::{AsExecutionPlan, BallistaCodec};
1102+
1103+
use super::super::super::error::Result;
1104+
use super::super::protobuf;
10781105

10791106
fn roundtrip_test(exec_plan: Arc<dyn ExecutionPlan>) -> Result<()> {
10801107
let ctx = SessionContext::new();

datafusion/core/src/optimizer/limit_push_down.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ impl LimitPushDown {
3939
}
4040

4141
fn limit_push_down(
42-
optimizer: &LimitPushDown,
42+
_optimizer: &LimitPushDown,
4343
upper_limit: Option<usize>,
4444
plan: &LogicalPlan,
45-
execution_props: &ExecutionProps,
45+
_execution_props: &ExecutionProps,
4646
) -> Result<LogicalPlan> {
4747
match (plan, upper_limit) {
4848
(LogicalPlan::Limit(Limit { n, input }), upper_limit) => {
@@ -51,10 +51,10 @@ fn limit_push_down(
5151
n: smallest,
5252
// push down limit to plan (minimum of upper limit and current limit)
5353
input: Arc::new(limit_push_down(
54-
optimizer,
54+
_optimizer,
5555
Some(smallest),
5656
input.as_ref(),
57-
execution_props,
57+
_execution_props,
5858
)?),
5959
}))
6060
}
@@ -91,10 +91,10 @@ fn limit_push_down(
9191
Ok(LogicalPlan::Projection(Projection {
9292
expr: expr.clone(),
9393
input: Arc::new(limit_push_down(
94-
optimizer,
94+
_optimizer,
9595
upper_limit,
9696
input.as_ref(),
97-
execution_props,
97+
_execution_props,
9898
)?),
9999
schema: schema.clone(),
100100
alias: alias.clone(),
@@ -115,10 +115,10 @@ fn limit_push_down(
115115
Ok(LogicalPlan::Limit(Limit {
116116
n: upper_limit,
117117
input: Arc::new(limit_push_down(
118-
optimizer,
118+
_optimizer,
119119
Some(upper_limit),
120120
x,
121-
execution_props,
121+
_execution_props,
122122
)?),
123123
}))
124124
})
@@ -138,7 +138,7 @@ fn limit_push_down(
138138
let inputs = plan.inputs();
139139
let new_inputs = inputs
140140
.iter()
141-
.map(|plan| limit_push_down(optimizer, None, plan, execution_props))
141+
.map(|plan| limit_push_down(_optimizer, None, plan, _execution_props))
142142
.collect::<Result<Vec<_>>>()?;
143143

144144
utils::from_plan(plan, &expr, &new_inputs)

datafusion/core/src/optimizer/projection_push_down.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ fn get_projected_schema(
126126

127127
/// Recursively transverses the logical plan removing expressions and that are not needed.
128128
fn optimize_plan(
129-
optimizer: &ProjectionPushDown,
129+
_optimizer: &ProjectionPushDown,
130130
plan: &LogicalPlan,
131131
required_columns: &HashSet<Column>, // set of columns required up to this step
132132
has_projection: bool,
133-
execution_props: &ExecutionProps,
133+
_execution_props: &ExecutionProps,
134134
) -> Result<LogicalPlan> {
135135
let mut new_required_columns = required_columns.clone();
136136
match plan {
@@ -165,11 +165,11 @@ fn optimize_plan(
165165
})?;
166166

167167
let new_input = optimize_plan(
168-
optimizer,
168+
_optimizer,
169169
input,
170170
&new_required_columns,
171171
true,
172-
execution_props,
172+
_execution_props,
173173
)?;
174174

175175
let new_required_columns_optimized = new_input
@@ -211,19 +211,19 @@ fn optimize_plan(
211211
}
212212

213213
let optimized_left = Arc::new(optimize_plan(
214-
optimizer,
214+
_optimizer,
215215
left,
216216
&new_required_columns,
217217
true,
218-
execution_props,
218+
_execution_props,
219219
)?);
220220

221221
let optimized_right = Arc::new(optimize_plan(
222-
optimizer,
222+
_optimizer,
223223
right,
224224
&new_required_columns,
225225
true,
226-
execution_props,
226+
_execution_props,
227227
)?);
228228

229229
let schema = build_join_schema(
@@ -272,11 +272,11 @@ fn optimize_plan(
272272
)?;
273273

274274
LogicalPlanBuilder::from(optimize_plan(
275-
optimizer,
275+
_optimizer,
276276
input,
277277
&new_required_columns,
278278
true,
279-
execution_props,
279+
_execution_props,
280280
)?)
281281
.window(new_window_expr)?
282282
.build()
@@ -324,11 +324,11 @@ fn optimize_plan(
324324
group_expr: group_expr.clone(),
325325
aggr_expr: new_aggr_expr,
326326
input: Arc::new(optimize_plan(
327-
optimizer,
327+
_optimizer,
328328
input,
329329
&new_required_columns,
330330
true,
331-
execution_props,
331+
_execution_props,
332332
)?),
333333
schema: DFSchemaRef::new(new_schema),
334334
}))
@@ -373,11 +373,11 @@ fn optimize_plan(
373373

374374
Ok(LogicalPlan::Analyze(Analyze {
375375
input: Arc::new(optimize_plan(
376-
optimizer,
376+
_optimizer,
377377
&a.input,
378378
&required_columns,
379379
false,
380-
execution_props,
380+
_execution_props,
381381
)?),
382382
verbose: a.verbose,
383383
schema: a.schema.clone(),
@@ -409,11 +409,11 @@ fn optimize_plan(
409409
new_required_columns.insert(f.qualified_column());
410410
});
411411
optimize_plan(
412-
optimizer,
412+
_optimizer,
413413
input_plan,
414414
&new_required_columns,
415415
has_projection,
416-
execution_props,
416+
_execution_props,
417417
)
418418
})
419419
.collect::<Result<Vec<_>>>()?;
@@ -457,11 +457,11 @@ fn optimize_plan(
457457
.iter()
458458
.map(|input_plan| {
459459
optimize_plan(
460-
optimizer,
460+
_optimizer,
461461
input_plan,
462462
&new_required_columns,
463463
has_projection,
464-
execution_props,
464+
_execution_props,
465465
)
466466
})
467467
.collect::<Result<Vec<_>>>()?;

datafusion/core/src/physical_plan/explain.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ impl ExplainExec {
6666
pub fn stringified_plans(&self) -> &[StringifiedPlan] {
6767
&self.stringified_plans
6868
}
69+
70+
/// access to verbose
71+
pub fn verbose(&self) -> bool {
72+
self.verbose
73+
}
6974
}
7075

7176
#[async_trait]

0 commit comments

Comments
 (0)