-
Notifications
You must be signed in to change notification settings - Fork 1.5k
feat: ORDER BY ALL #15772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: ORDER BY ALL #15772
Changes from 1 commit
96638f8
91fc9f6
a7262a3
b2d48a2
e0f3189
86526fd
a65a9a7
e9a5038
6fc1122
a9f758a
02cd396
3f9839d
cd1bcaf
a328983
590be4d
646dc8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; | |
use datafusion_common::{ | ||
not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, Result, | ||
}; | ||
use datafusion_expr::expr::Sort; | ||
use datafusion_expr::expr::{OrderByExprs, Sort}; | ||
use datafusion_expr::{Expr, SortExpr}; | ||
use sqlparser::ast::{ | ||
Expr as SQLExpr, OrderByExpr, OrderByOptions, Value, ValueWithSpan, | ||
|
@@ -41,16 +41,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> { | |
/// If false, interpret numeric literals as constant values. | ||
pub(crate) fn order_by_to_sort_expr( | ||
&self, | ||
exprs: Vec<OrderByExpr>, | ||
order_by_exprs: OrderByExprs, | ||
input_schema: &DFSchema, | ||
planner_context: &mut PlannerContext, | ||
literal_to_column: bool, | ||
additional_schema: Option<&DFSchema>, | ||
) -> Result<Vec<SortExpr>> { | ||
if exprs.is_empty() { | ||
return Ok(vec![]); | ||
} | ||
|
||
let mut combined_schema; | ||
let order_by_schema = match additional_schema { | ||
Some(schema) => { | ||
|
@@ -61,56 +57,78 @@ impl<S: ContextProvider> SqlToRel<'_, S> { | |
None => input_schema, | ||
}; | ||
|
||
let mut expr_vec = vec![]; | ||
for e in exprs { | ||
let OrderByExpr { | ||
expr, | ||
options: OrderByOptions { asc, nulls_first }, | ||
with_fill, | ||
} = e; | ||
if order_by_exprs.is_empty() { | ||
return Ok(vec![]); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why put the early return here? IMO, the earlier we do the check, the more useless computation we can avoid. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. I moved it by accident. |
||
|
||
if let Some(with_fill) = with_fill { | ||
return not_impl_err!("ORDER BY WITH FILL is not supported: {with_fill}"); | ||
} | ||
let mut sort_expr_vec = vec![]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I usually incline to give it a pre-allocation |
||
|
||
let expr = match expr { | ||
SQLExpr::Value(ValueWithSpan { | ||
value: Value::Number(v, _), | ||
span: _, | ||
}) if literal_to_column => { | ||
let field_index = v | ||
.parse::<usize>() | ||
.map_err(|err| plan_datafusion_err!("{}", err))?; | ||
let make_sort_expr = | ||
|expr: Expr, asc: Option<bool>, nulls_first: Option<bool>| { | ||
let asc = asc.unwrap_or(true); | ||
// When asc is true, by default nulls last to be consistent with postgres | ||
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html | ||
let nulls_first = nulls_first.unwrap_or(!asc); | ||
Sort::new(expr, asc, nulls_first) | ||
}; | ||
|
||
if field_index == 0 { | ||
return plan_err!( | ||
"Order by index starts at 1 for column indexes" | ||
); | ||
} else if input_schema.fields().len() < field_index { | ||
return plan_err!( | ||
"Order by column out of bounds, specified: {}, max: {}", | ||
field_index, | ||
input_schema.fields().len() | ||
match order_by_exprs { | ||
OrderByExprs::OrderByExprVec(expressions) => { | ||
for e in expressions { | ||
let OrderByExpr { | ||
expr, | ||
options: OrderByOptions { asc, nulls_first }, | ||
with_fill, | ||
} = e; | ||
|
||
if let Some(with_fill) = with_fill { | ||
return not_impl_err!( | ||
"ORDER BY WITH FILL is not supported: {with_fill}" | ||
); | ||
} | ||
|
||
Expr::Column(Column::from( | ||
input_schema.qualified_field(field_index - 1), | ||
)) | ||
let expr = match expr { | ||
SQLExpr::Value(ValueWithSpan { | ||
value: Value::Number(v, _), | ||
span: _, | ||
}) if literal_to_column => { | ||
let field_index = v | ||
.parse::<usize>() | ||
.map_err(|err| plan_datafusion_err!("{}", err))?; | ||
|
||
if field_index == 0 { | ||
return plan_err!( | ||
"Order by index starts at 1 for column indexes" | ||
); | ||
} else if input_schema.fields().len() < field_index { | ||
return plan_err!( | ||
"Order by column out of bounds, specified: {}, max: {}", | ||
field_index, | ||
input_schema.fields().len() | ||
); | ||
} | ||
|
||
Expr::Column(Column::from( | ||
input_schema.qualified_field(field_index - 1), | ||
)) | ||
} | ||
e => self.sql_expr_to_logical_expr( | ||
e, | ||
order_by_schema, | ||
planner_context, | ||
)?, | ||
}; | ||
sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first)); | ||
} | ||
e => { | ||
self.sql_expr_to_logical_expr(e, order_by_schema, planner_context)? | ||
} | ||
OrderByExprs::All { exprs, options } => { | ||
let OrderByOptions { asc, nulls_first } = options; | ||
for expr in exprs { | ||
sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first)); | ||
} | ||
}; | ||
let asc = asc.unwrap_or(true); | ||
expr_vec.push(Sort::new( | ||
expr, | ||
asc, | ||
// When asc is true, by default nulls last to be consistent with postgres | ||
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html | ||
nulls_first.unwrap_or(!asc), | ||
)) | ||
} | ||
Ok(expr_vec) | ||
} | ||
}; | ||
|
||
Ok(sort_expr_vec) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,14 +21,14 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; | |
|
||
use crate::stack::StackGuard; | ||
use datafusion_common::{not_impl_err, Constraints, DFSchema, Result}; | ||
use datafusion_expr::expr::Sort; | ||
use datafusion_expr::select_expr::SelectExpr; | ||
use datafusion_expr::expr::{OrderByExprs, Sort}; | ||
|
||
use datafusion_expr::{ | ||
CreateMemoryTable, DdlStatement, Distinct, LogicalPlan, LogicalPlanBuilder, | ||
CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder, | ||
}; | ||
use sqlparser::ast::{ | ||
Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByExpr, OrderByKind, Query, | ||
SelectInto, SetExpr, | ||
Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByKind, Query, SelectInto, | ||
SetExpr, | ||
}; | ||
|
||
impl<S: ContextProvider> SqlToRel<'_, S> { | ||
|
@@ -151,24 +151,46 @@ impl<S: ContextProvider> SqlToRel<'_, S> { | |
} | ||
|
||
/// Returns the order by expressions from the query. | ||
fn to_order_by_exprs(order_by: Option<OrderBy>) -> Result<Vec<OrderByExpr>> { | ||
fn to_order_by_exprs(order_by: Option<OrderBy>) -> Result<OrderByExprs> { | ||
to_order_by_exprs_with_select(order_by, None) | ||
} | ||
|
||
/// Returns the order by expressions from the query with the select expressions. | ||
pub(crate) fn to_order_by_exprs_with_select( | ||
order_by: Option<OrderBy>, | ||
_select_exprs: Option<&Vec<SelectExpr>>, // TODO: ORDER BY ALL | ||
) -> Result<Vec<OrderByExpr>> { | ||
select_exprs: Option<&Vec<Expr>>, | ||
) -> Result<OrderByExprs> { | ||
let Some(OrderBy { kind, interpolate }) = order_by else { | ||
// If no order by, return an empty array. | ||
return Ok(vec![]); | ||
return Ok(OrderByExprs::OrderByExprVec(vec![])); | ||
}; | ||
if let Some(_interpolate) = interpolate { | ||
return not_impl_err!("ORDER BY INTERPOLATE is not supported"); | ||
} | ||
match kind { | ||
OrderByKind::All(_) => not_impl_err!("ORDER BY ALL is not supported"), | ||
OrderByKind::Expressions(order_by_exprs) => Ok(order_by_exprs), | ||
OrderByKind::All(order_by_options) => { | ||
let Some(exprs) = select_exprs else { | ||
return Ok(OrderByExprs::All { | ||
exprs: vec![], | ||
options: order_by_options, | ||
}); | ||
}; | ||
|
||
let order_by_epxrs = exprs | ||
.iter() | ||
.filter_map(|select_expr| match select_expr { | ||
Expr::Column(_) => Some(select_expr.clone()), | ||
_ => None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you @PokIsemaine, the PR seems good, but I have one question: Are we going to exclude select expressions which are not column? Is this the same behavior of duckDB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, it is directly filtered out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the user gives something like |
||
}) | ||
.collect::<Vec<_>>(); | ||
|
||
Ok(OrderByExprs::All { | ||
exprs: order_by_epxrs, | ||
options: order_by_options, | ||
}) | ||
} | ||
OrderByKind::Expressions(order_by_exprs) => { | ||
Ok(OrderByExprs::OrderByExprVec(order_by_exprs)) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if we need the Enum.
Maybe it's enough to wrap
Vec<OrderByExpr>
as a struct and add an extra flag to indicate theALL
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This requires converting a
datafusion_expr::Expr
to aSQLExpr
, but I'm not sure if there's a good way to do that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of this new enum, can we utilize https://github.com/apache/datafusion-sqlparser-rs/blob/3ec80e187d163c4f90c5bfc7c04ef71a2705a631/src/ast/query.rs#L2222 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this enum seems to only be used in the sql planner, so I don't think it is needed in
datafusion-expr
-- maybe we can just move this into the datafusion-sql crate and make itpub(crate)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I’ll try to move it this weekend