Skip to content

feat: ORDER BY ALL #15772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use datafusion_common::{
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use sqlparser::ast::{
display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
NullTreatment, RenameSelectItem, ReplaceSelectElement,
NullTreatment, OrderByExpr, OrderByOptions, RenameSelectItem, ReplaceSelectElement,
};

/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
Expand Down Expand Up @@ -701,6 +701,24 @@ impl TryCast {
}
}

/// OrderBy Expressions
pub enum OrderByExprs {
OrderByExprVec(Vec<OrderByExpr>),
All {
exprs: Vec<Expr>,
options: OrderByOptions,
},
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if we need the Enum.

Maybe it's enough to wrap Vec<OrderByExpr> as a struct and add an extra flag to indicate the ALL

pub struct OrderByExprs {
    exprs: Vec<OrderByExpr>,
    all: bool
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires converting a datafusion_expr::Expr to a SQLExpr, but I'm not sure if there's a good way to do that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this enum seems to only be used in the sql planner, so I don't think it is needed in datafusion-expr -- maybe we can just move this into the datafusion-sql crate and make it pub(crate)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I’ll try to move it this weekend


impl OrderByExprs {
pub fn is_empty(&self) -> bool {
match self {
OrderByExprs::OrderByExprVec(exprs) => exprs.is_empty(),
OrderByExprs::All { exprs, .. } => exprs.is_empty(),
}
}
}

/// SORT expression
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
pub struct Sort {
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sql/src/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use datafusion_common::{
internal_datafusion_err, internal_err, not_impl_err, plan_datafusion_err, plan_err,
DFSchema, Dependency, Diagnostic, Result, Span,
};
use datafusion_expr::expr::{ScalarFunction, Unnest, WildcardOptions};
use datafusion_expr::expr::{OrderByExprs, ScalarFunction, Unnest, WildcardOptions};
use datafusion_expr::planner::{PlannerResult, RawAggregateExpr, RawWindowExpr};
use datafusion_expr::{
expr, Expr, ExprFunctionExt, ExprSchemable, WindowFrame, WindowFunctionDefinition,
Expand Down Expand Up @@ -276,7 +276,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context))
.collect::<Result<Vec<_>>>()?;
let mut order_by = self.order_by_to_sort_expr(
window.order_by,
OrderByExprs::OrderByExprVec(window.order_by),
schema,
planner_context,
// Numeric literals in window function ORDER BY are treated as constants
Expand Down Expand Up @@ -357,7 +357,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
// User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
let order_by = self.order_by_to_sort_expr(
order_by,
OrderByExprs::OrderByExprVec(order_by),
schema,
planner_context,
true,
Expand Down
116 changes: 67 additions & 49 deletions datafusion/sql/src/expr/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use datafusion_common::{
not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, Result,
};
use datafusion_expr::expr::Sort;
use datafusion_expr::expr::{OrderByExprs, Sort};
use datafusion_expr::{Expr, SortExpr};
use sqlparser::ast::{
Expr as SQLExpr, OrderByExpr, OrderByOptions, Value, ValueWithSpan,
Expand All @@ -41,16 +41,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
/// If false, interpret numeric literals as constant values.
pub(crate) fn order_by_to_sort_expr(
&self,
exprs: Vec<OrderByExpr>,
order_by_exprs: OrderByExprs,
input_schema: &DFSchema,
planner_context: &mut PlannerContext,
literal_to_column: bool,
additional_schema: Option<&DFSchema>,
) -> Result<Vec<SortExpr>> {
if exprs.is_empty() {
return Ok(vec![]);
}

let mut combined_schema;
let order_by_schema = match additional_schema {
Some(schema) => {
Expand All @@ -61,56 +57,78 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
None => input_schema,
};

let mut expr_vec = vec![];
for e in exprs {
let OrderByExpr {
expr,
options: OrderByOptions { asc, nulls_first },
with_fill,
} = e;
if order_by_exprs.is_empty() {
return Ok(vec![]);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why put the early return here? IMO, the earlier we do the check, the more useless computation we can avoid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I moved it by accident.


if let Some(with_fill) = with_fill {
return not_impl_err!("ORDER BY WITH FILL is not supported: {with_fill}");
}
let mut sort_expr_vec = vec![];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually incline to give it a pre-allocation


let expr = match expr {
SQLExpr::Value(ValueWithSpan {
value: Value::Number(v, _),
span: _,
}) if literal_to_column => {
let field_index = v
.parse::<usize>()
.map_err(|err| plan_datafusion_err!("{}", err))?;
let make_sort_expr =
|expr: Expr, asc: Option<bool>, nulls_first: Option<bool>| {
let asc = asc.unwrap_or(true);
// When asc is true, by default nulls last to be consistent with postgres
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
let nulls_first = nulls_first.unwrap_or(!asc);
Sort::new(expr, asc, nulls_first)
};

if field_index == 0 {
return plan_err!(
"Order by index starts at 1 for column indexes"
);
} else if input_schema.fields().len() < field_index {
return plan_err!(
"Order by column out of bounds, specified: {}, max: {}",
field_index,
input_schema.fields().len()
match order_by_exprs {
OrderByExprs::OrderByExprVec(expressions) => {
for e in expressions {
let OrderByExpr {
expr,
options: OrderByOptions { asc, nulls_first },
with_fill,
} = e;

if let Some(with_fill) = with_fill {
return not_impl_err!(
"ORDER BY WITH FILL is not supported: {with_fill}"
);
}

Expr::Column(Column::from(
input_schema.qualified_field(field_index - 1),
))
let expr = match expr {
SQLExpr::Value(ValueWithSpan {
value: Value::Number(v, _),
span: _,
}) if literal_to_column => {
let field_index = v
.parse::<usize>()
.map_err(|err| plan_datafusion_err!("{}", err))?;

if field_index == 0 {
return plan_err!(
"Order by index starts at 1 for column indexes"
);
} else if input_schema.fields().len() < field_index {
return plan_err!(
"Order by column out of bounds, specified: {}, max: {}",
field_index,
input_schema.fields().len()
);
}

Expr::Column(Column::from(
input_schema.qualified_field(field_index - 1),
))
}
e => self.sql_expr_to_logical_expr(
e,
order_by_schema,
planner_context,
)?,
};
sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first));
}
e => {
self.sql_expr_to_logical_expr(e, order_by_schema, planner_context)?
}
OrderByExprs::All { exprs, options } => {
let OrderByOptions { asc, nulls_first } = options;
for expr in exprs {
sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first));
}
};
let asc = asc.unwrap_or(true);
expr_vec.push(Sort::new(
expr,
asc,
// When asc is true, by default nulls last to be consistent with postgres
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
nulls_first.unwrap_or(!asc),
))
}
Ok(expr_vec)
}
};

Ok(sort_expr_vec)
}
}
44 changes: 33 additions & 11 deletions datafusion/sql/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};

use crate::stack::StackGuard;
use datafusion_common::{not_impl_err, Constraints, DFSchema, Result};
use datafusion_expr::expr::Sort;
use datafusion_expr::select_expr::SelectExpr;
use datafusion_expr::expr::{OrderByExprs, Sort};

use datafusion_expr::{
CreateMemoryTable, DdlStatement, Distinct, LogicalPlan, LogicalPlanBuilder,
CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
};
use sqlparser::ast::{
Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByExpr, OrderByKind, Query,
SelectInto, SetExpr,
Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByKind, Query, SelectInto,
SetExpr,
};

impl<S: ContextProvider> SqlToRel<'_, S> {
Expand Down Expand Up @@ -151,24 +151,46 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}

/// Returns the order by expressions from the query.
fn to_order_by_exprs(order_by: Option<OrderBy>) -> Result<Vec<OrderByExpr>> {
fn to_order_by_exprs(order_by: Option<OrderBy>) -> Result<OrderByExprs> {
to_order_by_exprs_with_select(order_by, None)
}

/// Returns the order by expressions from the query with the select expressions.
pub(crate) fn to_order_by_exprs_with_select(
order_by: Option<OrderBy>,
_select_exprs: Option<&Vec<SelectExpr>>, // TODO: ORDER BY ALL
) -> Result<Vec<OrderByExpr>> {
select_exprs: Option<&Vec<Expr>>,
) -> Result<OrderByExprs> {
let Some(OrderBy { kind, interpolate }) = order_by else {
// If no order by, return an empty array.
return Ok(vec![]);
return Ok(OrderByExprs::OrderByExprVec(vec![]));
};
if let Some(_interpolate) = interpolate {
return not_impl_err!("ORDER BY INTERPOLATE is not supported");
}
match kind {
OrderByKind::All(_) => not_impl_err!("ORDER BY ALL is not supported"),
OrderByKind::Expressions(order_by_exprs) => Ok(order_by_exprs),
OrderByKind::All(order_by_options) => {
let Some(exprs) = select_exprs else {
return Ok(OrderByExprs::All {
exprs: vec![],
options: order_by_options,
});
};

let order_by_epxrs = exprs
.iter()
.filter_map(|select_expr| match select_expr {
Expr::Column(_) => Some(select_expr.clone()),
_ => None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @PokIsemaine, the PR seems good, but I have one question: Are we going to exclude select expressions which are not column? Is this the same behavior of duckDB?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, it is directly filtered out. DuckDB is capable of handling other types of expressions. Perhaps I can add a TODO comment and use not_impl_err! to notify the user, then gradually add support for other types of SELECT expressions in future updates.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the user gives something like select foo as bar from ... order by all, it doesn't order anything, am I wrong? if so, we should return a not_impl_err!

})
.collect::<Vec<_>>();

Ok(OrderByExprs::All {
exprs: order_by_epxrs,
options: order_by_options,
})
}
OrderByKind::Expressions(order_by_exprs) => {
Ok(OrderByExprs::OrderByExprVec(order_by_exprs))
}
}
}
6 changes: 3 additions & 3 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context,
)?;

let order_by =
to_order_by_exprs_with_select(query_order_by, Some(&select_exprs))?;

// Having and group by clause may reference aliases defined in select projection
let projected_plan = self.project(base_plan.clone(), select_exprs)?;
let select_exprs = projected_plan.expressions();

let order_by =
to_order_by_exprs_with_select(query_order_by, Some(&select_exprs))?;

// Place the fields of the base plan at the front so that when there are references
// with the same name, the fields of the base plan will be searched first.
// See https://github.com/apache/datafusion/issues/9162
Expand Down
12 changes: 9 additions & 3 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use datafusion_common::{
ToDFSchema,
};
use datafusion_expr::dml::{CopyTo, InsertOp};
use datafusion_expr::expr::OrderByExprs;
use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check;
use datafusion_expr::logical_plan::builder::project;
use datafusion_expr::logical_plan::DdlStatement;
Expand Down Expand Up @@ -1240,7 +1241,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.to_dfschema_ref()?;
let using: Option<String> = using.as_ref().map(ident_to_string);
let columns = self.order_by_to_sort_expr(
columns,
OrderByExprs::OrderByExprVec(columns),
&table_schema,
planner_context,
false,
Expand Down Expand Up @@ -1423,8 +1424,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
let mut all_results = vec![];
for expr in order_exprs {
// Convert each OrderByExpr to a SortExpr:
let expr_vec =
self.order_by_to_sort_expr(expr, schema, planner_context, true, None)?;
let expr_vec = self.order_by_to_sort_expr(
OrderByExprs::OrderByExprVec(expr),
schema,
planner_context,
true,
None,
)?;
// Verify that columns of all SortExprs exist in the schema:
for sort in expr_vec.iter() {
for column in sort.expr.column_refs().iter() {
Expand Down
39 changes: 39 additions & 0 deletions datafusion/sqllogictest/test_files/order.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1380,3 +1380,42 @@ physical_plan

statement ok
drop table table_with_ordered_not_null;

# ORDER BY ALL
statement ok
set datafusion.sql_parser.dialect = 'DuckDB';

statement ok
CREATE OR REPLACE TABLE addresses AS
SELECT '123 Quack Blvd' AS address, 'DuckTown' AS city, '11111' AS zip
UNION ALL
SELECT '111 Duck Duck Goose Ln', 'DuckTown', '11111'
UNION ALL
SELECT '111 Duck Duck Goose Ln', 'Duck Town', '11111'
UNION ALL
SELECT '111 Duck Duck Goose Ln', 'Duck Town', '11111-0001';


query TTT
SELECT * FROM addresses ORDER BY ALL;
----
111 Duck Duck Goose Ln Duck Town 11111
111 Duck Duck Goose Ln Duck Town 11111-0001
111 Duck Duck Goose Ln DuckTown 11111
123 Quack Blvd DuckTown 11111

query TTT
SELECT * FROM addresses ORDER BY ALL DESC;
----
123 Quack Blvd DuckTown 11111
111 Duck Duck Goose Ln DuckTown 11111
111 Duck Duck Goose Ln Duck Town 11111-0001
111 Duck Duck Goose Ln Duck Town 11111

query TT
SELECT address, zip FROM addresses ORDER BY ALL;
----
111 Duck Duck Goose Ln 11111
111 Duck Duck Goose Ln 11111
111 Duck Duck Goose Ln 11111-0001
123 Quack Blvd 11111