Skip to content

Commit 474fb34

Browse files
committed
[task #8987]add_to_date_function
Signed-off-by: tangruilin <[email protected]>
1 parent d6ab343 commit 474fb34

File tree

11 files changed

+134
-2
lines changed

11 files changed

+134
-2
lines changed

datafusion/common/src/scalar.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use arrow::{
5252
},
5353
};
5454
use arrow_array::cast::as_list_array;
55+
use arrow_array::types::Date32Type;
5556

5657
/// A dynamically typed, nullable single value, (the single-valued counter-part
5758
/// to arrow's [`Array`])
@@ -3229,6 +3230,12 @@ impl ScalarType<i64> for TimestampNanosecondType {
32293230
}
32303231
}
32313232

3233+
impl ScalarType<i32> for Date32Type {
3234+
fn scalar(r: Option<i32>) -> ScalarValue {
3235+
ScalarValue::Date32(r)
3236+
}
3237+
}
3238+
32323239
#[cfg(test)]
32333240
mod tests {
32343241
use std::cmp::Ordering;

datafusion/expr/src/built_in_function.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,8 @@ pub enum BuiltinScalarFunction {
289289
ToTimestampSeconds,
290290
/// from_unixtime
291291
FromUnixtime,
292+
/// to_date
293+
ToDate,
292294
///now
293295
Now,
294296
///current_date
@@ -490,6 +492,7 @@ impl BuiltinScalarFunction {
490492
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
491493
BuiltinScalarFunction::Struct => Volatility::Immutable,
492494
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
495+
BuiltinScalarFunction::ToDate => Volatility::Immutable,
493496
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
494497
BuiltinScalarFunction::OverLay => Volatility::Immutable,
495498
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
@@ -829,6 +832,7 @@ impl BuiltinScalarFunction {
829832
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
830833
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
831834
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
835+
BuiltinScalarFunction::ToDate => Ok(Date32),
832836
BuiltinScalarFunction::Now => {
833837
Ok(Timestamp(Nanosecond, Some("+00:00".into())))
834838
}
@@ -1076,6 +1080,7 @@ impl BuiltinScalarFunction {
10761080
BuiltinScalarFunction::FromUnixtime => {
10771081
Signature::uniform(1, vec![Int64], self.volatility())
10781082
}
1083+
BuiltinScalarFunction::ToDate => Signature::variadic_any(self.volatility()),
10791084
BuiltinScalarFunction::Digest => Signature::one_of(
10801085
vec![
10811086
Exact(vec![Utf8, Utf8]),
@@ -1532,6 +1537,7 @@ impl BuiltinScalarFunction {
15321537
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
15331538
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
15341539
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
1540+
BuiltinScalarFunction::ToDate => &["to_date"],
15351541

15361542
// hashing functions
15371543
BuiltinScalarFunction::Digest => &["digest"],

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,11 @@ nary_scalar_expr!(
887887
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
888888
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
889889
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
890+
nary_scalar_expr!(
891+
ToDate,
892+
to_date,
893+
"converts a string and optional formats to a `Date32`"
894+
);
890895
nary_scalar_expr!(
891896
ToTimestamp,
892897
to_timestamp,

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use arrow::{
3535
};
3636
use arrow_array::temporal_conversions::NANOSECONDS;
3737
use arrow_array::timezone::Tz;
38-
use arrow_array::types::ArrowTimestampType;
38+
use arrow_array::types::{ArrowTimestampType, Date32Type};
3939
use arrow_array::GenericStringArray;
4040
use chrono::prelude::*;
4141
use chrono::LocalResult::Single;
@@ -391,6 +391,42 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
391391
string_to_timestamp_nanos(s).map_err(|e| e.into())
392392
}
393393

394+
fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
395+
match args.len() {
396+
1 => handle::<Date32Type, _, Date32Type>(
397+
args,
398+
|s| {
399+
string_to_timestamp_nanos_shim(s)
400+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
401+
.and_then(|v| {
402+
v.try_into().map_err(|_| {
403+
DataFusionError::NotImplemented("()".to_string())
404+
})
405+
})
406+
},
407+
name,
408+
),
409+
n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
410+
args,
411+
|s, format| {
412+
string_to_timestamp_nanos_formatted(s, format)
413+
.map(|n| {
414+
println!("{n}");
415+
n / (1_000_000 * 24 * 60 * 60 * 1_000)
416+
})
417+
.and_then(|v| {
418+
v.try_into().map_err(|_| {
419+
DataFusionError::NotImplemented("()".to_string())
420+
})
421+
})
422+
},
423+
|n| n,
424+
name,
425+
),
426+
_ => internal_err!("Unsupported 0 argument count for function {name}"),
427+
}
428+
}
429+
394430
fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
395431
args: &[ColumnarValue],
396432
name: &str,
@@ -418,6 +454,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
418454
}
419455
}
420456

457+
/// to_date SQL function
458+
pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
459+
to_date_impl(args, "to_date")
460+
}
461+
421462
/// to_timestamp SQL function
422463
///
423464
/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
@@ -1210,6 +1251,40 @@ fn validate_to_timestamp_data_types(
12101251
None
12111252
}
12121253

1254+
// TODO: 实现这个函数
1255+
/// to_date SQL function implementation
1256+
pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
1257+
if args.is_empty() {
1258+
return internal_err!(
1259+
"to_date function requires 1 or more arguments, got {}",
1260+
args.len()
1261+
);
1262+
}
1263+
1264+
// validate that any args after the first one are Utf8
1265+
if args.len() > 1 {
1266+
if let Some(value) = validate_to_timestamp_data_types(args, "to_date") {
1267+
return value;
1268+
}
1269+
}
1270+
1271+
match args[0].data_type() {
1272+
DataType::Int32 | DataType::Int64 => {
1273+
cast_column(&args[0], &DataType::Date32, None)
1274+
}
1275+
DataType::Null | DataType::Float64 => {
1276+
cast_column(&args[0], &DataType::Date32, None)
1277+
}
1278+
DataType::Date32 | DataType::Date64 => {
1279+
cast_column(&args[0], &DataType::Date32, None)
1280+
}
1281+
DataType::Utf8 => to_date(args),
1282+
other => {
1283+
internal_err!("Unsupported data type {:?} for function to_date", other)
1284+
}
1285+
}
1286+
}
1287+
12131288
/// to_timestamp() SQL function implementation
12141289
pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
12151290
if args.is_empty() {

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,7 @@ pub fn create_physical_fun(
592592
BuiltinScalarFunction::FromUnixtime => {
593593
Arc::new(datetime_expressions::from_unixtime_invoke)
594594
}
595+
BuiltinScalarFunction::ToDate => Arc::new(datetime_expressions::to_date_invoke),
595596
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
596597
DataType::Utf8 => {
597598
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ enum ScalarFunction {
671671
ArrayResize = 130;
672672
EndsWith = 131;
673673
InStr = 132;
674+
ToDate = 133;
674675
}
675676

676677
message ScalarFunctionNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ use datafusion_common::{
4444
Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
4545
Result, ScalarValue,
4646
};
47-
use datafusion_expr::expr::{Alias, Placeholder};
4847
use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
4948
use datafusion_expr::{
5049
abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
@@ -72,6 +71,10 @@ use datafusion_expr::{
7271
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
7372
WindowFrameUnits,
7473
};
74+
use datafusion_expr::{
75+
expr::{Alias, Placeholder},
76+
to_date,
77+
};
7578

7679
#[derive(Debug)]
7780
pub enum Error {
@@ -572,6 +575,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
572575
ScalarFunction::Levenshtein => Self::Levenshtein,
573576
ScalarFunction::SubstrIndex => Self::SubstrIndex,
574577
ScalarFunction::FindInSet => Self::FindInSet,
578+
ScalarFunction::ToDate => Self::ToDate,
575579
}
576580
}
577581
}
@@ -1813,6 +1817,16 @@ pub fn parse_expr(
18131817
ScalarFunction::StructFun => {
18141818
Ok(struct_fun(parse_expr(&args[0], registry)?))
18151819
}
1820+
ScalarFunction::ToDate => {
1821+
let args: Vec<_> = args
1822+
.iter()
1823+
.map(|expr| parse_expr(expr, registry))
1824+
.collect::<std::result::Result<_, _>>()?;
1825+
Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
1826+
BuiltinScalarFunction::ToDate,
1827+
args,
1828+
)))
1829+
}
18161830
}
18171831
}
18181832
ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { fun_name, args }) => {

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,6 +1566,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
15661566
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
15671567
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
15681568
BuiltinScalarFunction::FindInSet => Self::FindInSet,
1569+
BuiltinScalarFunction::ToDate => Self::ToDate,
15691570
};
15701571

15711572
Ok(scalar_function)

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,19 @@ query ?
107107
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
108108
----
109109
730 days 0 hours 0 mins 0.000000000 secs
110+
111+
# to_date_test
112+
statement ok
113+
create table to_date_t1(ts bigint) as VALUES
114+
(1235865600000),
115+
(1235865660000),
116+
(1238544000000);
117+
118+
119+
# query_cast_timestamp_millis
120+
query P
121+
SELECT to_date(ts / 1000) FROM to_date_t1 LIMIT 3
122+
----
123+
2009-03-01T00:00:00
124+
2009-03-01T00:01:00
125+
2009-04-01T00:00:00

0 commit comments

Comments
 (0)