Skip to content

Commit 55c50db

Browse files
committed
[task #8987]add_to_date_function
Signed-off-by: tangruilin <[email protected]>
1 parent d6d35f7 commit 55c50db

File tree

11 files changed

+137
-0
lines changed

11 files changed

+137
-0
lines changed

datafusion/common/src/scalar.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use arrow::{
5252
},
5353
};
5454
use arrow_array::cast::as_list_array;
55+
use arrow_array::types::Date32Type;
5556

5657
/// A dynamically typed, nullable single value, (the single-valued counter-part
5758
/// to arrow's [`Array`])
@@ -3239,6 +3240,12 @@ impl ScalarType<i64> for TimestampNanosecondType {
32393240
}
32403241
}
32413242

3243+
impl ScalarType<i32> for Date32Type {
3244+
fn scalar(r: Option<i32>) -> ScalarValue {
3245+
ScalarValue::Date32(r)
3246+
}
3247+
}
3248+
32423249
#[cfg(test)]
32433250
mod tests {
32443251
use std::cmp::Ordering;

datafusion/expr/src/built_in_function.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ pub enum BuiltinScalarFunction {
285285
ToTimestampSeconds,
286286
/// from_unixtime
287287
FromUnixtime,
288+
/// to_date
289+
ToDate,
288290
///now
289291
Now,
290292
///current_date
@@ -487,6 +489,7 @@ impl BuiltinScalarFunction {
487489
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
488490
BuiltinScalarFunction::Struct => Volatility::Immutable,
489491
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
492+
BuiltinScalarFunction::ToDate => Volatility::Immutable,
490493
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
491494
BuiltinScalarFunction::OverLay => Volatility::Immutable,
492495
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
@@ -802,6 +805,7 @@ impl BuiltinScalarFunction {
802805
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
803806
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
804807
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
808+
BuiltinScalarFunction::ToDate => Ok(Date32),
805809
BuiltinScalarFunction::Now => {
806810
Ok(Timestamp(Nanosecond, Some("+00:00".into())))
807811
}
@@ -1050,6 +1054,7 @@ impl BuiltinScalarFunction {
10501054
BuiltinScalarFunction::FromUnixtime => {
10511055
Signature::uniform(1, vec![Int64], self.volatility())
10521056
}
1057+
BuiltinScalarFunction::ToDate => Signature::variadic_any(self.volatility()),
10531058
BuiltinScalarFunction::Digest => Signature::one_of(
10541059
vec![
10551060
Exact(vec![Utf8, Utf8]),
@@ -1494,6 +1499,7 @@ impl BuiltinScalarFunction {
14941499
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
14951500
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
14961501
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
1502+
BuiltinScalarFunction::ToDate => &["to_date"],
14971503

14981504
// hashing functions
14991505
BuiltinScalarFunction::Digest => &["digest"],

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,11 @@ nary_scalar_expr!(
885885
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
886886
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
887887
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
888+
nary_scalar_expr!(
889+
ToDate,
890+
to_date,
891+
"converts a string and optional formats to a `Date32`"
892+
);
888893
nary_scalar_expr!(
889894
ToTimestamp,
890895
to_timestamp,

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,42 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
396396
string_to_timestamp_nanos(s).map_err(|e| e.into())
397397
}
398398

399+
fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
400+
match args.len() {
401+
1 => handle::<Date32Type, _, Date32Type>(
402+
args,
403+
|s| {
404+
string_to_timestamp_nanos_shim(s)
405+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
406+
.and_then(|v| {
407+
v.try_into().map_err(|_| {
408+
DataFusionError::NotImplemented("()".to_string())
409+
})
410+
})
411+
},
412+
name,
413+
),
414+
n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
415+
args,
416+
|s, format| {
417+
string_to_timestamp_nanos_formatted(s, format)
418+
.map(|n| {
419+
println!("{n}");
420+
n / (1_000_000 * 24 * 60 * 60 * 1_000)
421+
})
422+
.and_then(|v| {
423+
v.try_into().map_err(|_| {
424+
DataFusionError::NotImplemented("()".to_string())
425+
})
426+
})
427+
},
428+
|n| n,
429+
name,
430+
),
431+
_ => internal_err!("Unsupported 0 argument count for function {name}"),
432+
}
433+
}
434+
399435
fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
400436
args: &[ColumnarValue],
401437
name: &str,
@@ -423,6 +459,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
423459
}
424460
}
425461

462+
/// to_date SQL function
463+
pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
464+
to_date_impl(args, "to_date")
465+
}
466+
426467
/// to_timestamp SQL function
427468
///
428469
/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
@@ -1308,6 +1349,40 @@ fn validate_to_timestamp_data_types(
13081349
None
13091350
}
13101351

1352+
// TODO: 实现这个函数
1353+
/// to_date SQL function implementation
1354+
pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
1355+
if args.is_empty() {
1356+
return internal_err!(
1357+
"to_date function requires 1 or more arguments, got {}",
1358+
args.len()
1359+
);
1360+
}
1361+
1362+
// validate that any args after the first one are Utf8
1363+
if args.len() > 1 {
1364+
if let Some(value) = validate_to_timestamp_data_types(args, "to_date") {
1365+
return value;
1366+
}
1367+
}
1368+
1369+
match args[0].data_type() {
1370+
DataType::Int32 | DataType::Int64 => {
1371+
cast_column(&args[0], &DataType::Date32, None)
1372+
}
1373+
DataType::Null | DataType::Float64 => {
1374+
cast_column(&args[0], &DataType::Date32, None)
1375+
}
1376+
DataType::Date32 | DataType::Date64 => {
1377+
cast_column(&args[0], &DataType::Date32, None)
1378+
}
1379+
DataType::Utf8 => to_date(args),
1380+
other => {
1381+
internal_err!("Unsupported data type {:?} for function to_date", other)
1382+
}
1383+
}
1384+
}
1385+
13111386
/// to_timestamp() SQL function implementation
13121387
pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
13131388
if args.is_empty() {

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ pub fn create_physical_fun(
574574
BuiltinScalarFunction::FromUnixtime => {
575575
Arc::new(datetime_expressions::from_unixtime_invoke)
576576
}
577+
BuiltinScalarFunction::ToDate => Arc::new(datetime_expressions::to_date_invoke),
577578
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
578579
DataType::Utf8 => {
579580
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ enum ScalarFunction {
670670
EndsWith = 131;
671671
InStr = 132;
672672
MakeDate = 133;
673+
ToDate = 134;
673674
}
674675

675676
message ScalarFunctionNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
571571
ScalarFunction::Levenshtein => Self::Levenshtein,
572572
ScalarFunction::SubstrIndex => Self::SubstrIndex,
573573
ScalarFunction::FindInSet => Self::FindInSet,
574+
ScalarFunction::ToDate => Self::ToDate,
574575
}
575576
}
576577
}
@@ -1819,6 +1820,16 @@ pub fn parse_expr(
18191820
ScalarFunction::StructFun => {
18201821
Ok(struct_fun(parse_expr(&args[0], registry)?))
18211822
}
1823+
ScalarFunction::ToDate => {
1824+
let args: Vec<_> = args
1825+
.iter()
1826+
.map(|expr| parse_expr(expr, registry))
1827+
.collect::<std::result::Result<_, _>>()?;
1828+
Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
1829+
BuiltinScalarFunction::ToDate,
1830+
args,
1831+
)))
1832+
}
18221833
}
18231834
}
18241835
ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { fun_name, args }) => {

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
15681568
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
15691569
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
15701570
BuiltinScalarFunction::FindInSet => Self::FindInSet,
1571+
BuiltinScalarFunction::ToDate => Self::ToDate,
15711572
};
15721573

15731574
Ok(scalar_function)

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,27 @@ query ?
107107
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
108108
----
109109
730 days 0 hours 0 mins 0.000000000 secs
110+
111+
# to_date_test
112+
statement ok
113+
create table to_date_t1(ts bigint) as VALUES
114+
(1235865600000),
115+
(1235865660000),
116+
(1238544000000);
117+
118+
119+
# query_cast_timestamp_millis
120+
query D
121+
SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
122+
----
123+
2003-11-02
124+
2003-11-02
125+
2003-11-29
126+
127+
query D
128+
SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
129+
----
130+
2023-01-13
131+
132+
statement error DataFusion error: Internal error: to_date function unsupported data type at index 1: List
133+
SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'));

0 commit comments

Comments
 (0)