Skip to content

Commit dc8d355

Browse files
committed
[task #8987]add_to_date_function
Signed-off-by: tangruilin <[email protected]>
1 parent d6d35f7 commit dc8d355

File tree

11 files changed

+132
-2
lines changed

11 files changed

+132
-2
lines changed

datafusion/common/src/scalar.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use arrow::{
5252
},
5353
};
5454
use arrow_array::cast::as_list_array;
55+
use arrow_array::types::Date32Type;
5556

5657
/// A dynamically typed, nullable single value, (the single-valued counter-part
5758
/// to arrow's [`Array`])
@@ -3239,6 +3240,12 @@ impl ScalarType<i64> for TimestampNanosecondType {
32393240
}
32403241
}
32413242

3243+
impl ScalarType<i32> for Date32Type {
3244+
fn scalar(r: Option<i32>) -> ScalarValue {
3245+
ScalarValue::Date32(r)
3246+
}
3247+
}
3248+
32423249
#[cfg(test)]
32433250
mod tests {
32443251
use std::cmp::Ordering;

datafusion/expr/src/built_in_function.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ pub enum BuiltinScalarFunction {
285285
ToTimestampSeconds,
286286
/// from_unixtime
287287
FromUnixtime,
288+
/// to_date
289+
ToDate,
288290
///now
289291
Now,
290292
///current_date
@@ -487,6 +489,7 @@ impl BuiltinScalarFunction {
487489
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
488490
BuiltinScalarFunction::Struct => Volatility::Immutable,
489491
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
492+
BuiltinScalarFunction::ToDate => Volatility::Immutable,
490493
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
491494
BuiltinScalarFunction::OverLay => Volatility::Immutable,
492495
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
@@ -802,6 +805,7 @@ impl BuiltinScalarFunction {
802805
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
803806
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
804807
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
808+
BuiltinScalarFunction::ToDate => Ok(Date32),
805809
BuiltinScalarFunction::Now => {
806810
Ok(Timestamp(Nanosecond, Some("+00:00".into())))
807811
}
@@ -1050,6 +1054,7 @@ impl BuiltinScalarFunction {
10501054
BuiltinScalarFunction::FromUnixtime => {
10511055
Signature::uniform(1, vec![Int64], self.volatility())
10521056
}
1057+
BuiltinScalarFunction::ToDate => Signature::variadic_any(self.volatility()),
10531058
BuiltinScalarFunction::Digest => Signature::one_of(
10541059
vec![
10551060
Exact(vec![Utf8, Utf8]),
@@ -1494,6 +1499,7 @@ impl BuiltinScalarFunction {
14941499
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
14951500
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
14961501
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
1502+
BuiltinScalarFunction::ToDate => &["to_date"],
14971503

14981504
// hashing functions
14991505
BuiltinScalarFunction::Digest => &["digest"],

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,11 @@ nary_scalar_expr!(
885885
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
886886
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
887887
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
888+
nary_scalar_expr!(
889+
ToDate,
890+
to_date,
891+
"converts a string and optional formats to a `Date32`"
892+
);
888893
nary_scalar_expr!(
889894
ToTimestamp,
890895
to_timestamp,

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ use datafusion_common::cast::{
5252
as_timestamp_nanosecond_array, as_timestamp_second_array,
5353
};
5454
use datafusion_common::{
55-
exec_err, internal_err, not_impl_err, DataFusionError, Result, ScalarType,
56-
ScalarValue,
55+
exec_err, internal_datafusion_err, internal_err, not_impl_err, DataFusionError,
56+
Result, ScalarType, ScalarValue,
5757
};
5858
use datafusion_expr::ColumnarValue;
5959

@@ -396,6 +396,39 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
396396
string_to_timestamp_nanos(s).map_err(|e| e.into())
397397
}
398398

399+
fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
400+
match args.len() {
401+
1 => handle::<Date32Type, _, Date32Type>(
402+
args,
403+
|s| {
404+
string_to_timestamp_nanos_shim(s)
405+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
406+
.and_then(|v| {
407+
v.try_into().map_err(|_| {
408+
internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
409+
})
410+
})
411+
},
412+
name,
413+
),
414+
n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
415+
args,
416+
|s, format| {
417+
string_to_timestamp_nanos_formatted(s, format)
418+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
419+
.and_then(|v| {
420+
v.try_into().map_err(|_| {
421+
internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
422+
})
423+
})
424+
},
425+
|n| n,
426+
name,
427+
),
428+
_ => internal_err!("Unsupported 0 argument count for function {name}"),
429+
}
430+
}
431+
399432
fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
400433
args: &[ColumnarValue],
401434
name: &str,
@@ -423,6 +456,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
423456
}
424457
}
425458

459+
/// to_date SQL function
460+
pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
461+
to_date_impl(args, "to_date")
462+
}
463+
426464
/// to_timestamp SQL function
427465
///
428466
/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
@@ -1308,6 +1346,36 @@ fn validate_to_timestamp_data_types(
13081346
None
13091347
}
13101348

1349+
/// to_date SQL function implementation
1350+
pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
1351+
if args.is_empty() {
1352+
return exec_err!(
1353+
"to_date function requires 1 or more arguments, got {}",
1354+
args.len()
1355+
);
1356+
}
1357+
1358+
// validate that any args after the first one are Utf8
1359+
if args.len() > 1 {
1360+
if let Some(value) = validate_to_timestamp_data_types(args, "to_date") {
1361+
return value;
1362+
}
1363+
}
1364+
1365+
match args[0].data_type() {
1366+
DataType::Int32
1367+
| DataType::Int64
1368+
| DataType::Null
1369+
| DataType::Float64
1370+
| DataType::Date32
1371+
| DataType::Date64 => cast_column(&args[0], &DataType::Date32, None),
1372+
DataType::Utf8 => to_date(args),
1373+
other => {
1374+
internal_err!("Unsupported data type {:?} for function to_date", other)
1375+
}
1376+
}
1377+
}
1378+
13111379
/// to_timestamp() SQL function implementation
13121380
pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
13131381
if args.is_empty() {

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ pub fn create_physical_fun(
574574
BuiltinScalarFunction::FromUnixtime => {
575575
Arc::new(datetime_expressions::from_unixtime_invoke)
576576
}
577+
BuiltinScalarFunction::ToDate => Arc::new(datetime_expressions::to_date_invoke),
577578
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
578579
DataType::Utf8 => {
579580
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ enum ScalarFunction {
670670
EndsWith = 131;
671671
InStr = 132;
672672
MakeDate = 133;
673+
ToDate = 134;
673674
}
674675

675676
message ScalarFunctionNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
571571
ScalarFunction::Levenshtein => Self::Levenshtein,
572572
ScalarFunction::SubstrIndex => Self::SubstrIndex,
573573
ScalarFunction::FindInSet => Self::FindInSet,
574+
ScalarFunction::ToDate => Self::ToDate,
574575
}
575576
}
576577
}
@@ -1819,6 +1820,16 @@ pub fn parse_expr(
18191820
ScalarFunction::StructFun => {
18201821
Ok(struct_fun(parse_expr(&args[0], registry)?))
18211822
}
1823+
ScalarFunction::ToDate => {
1824+
let args: Vec<_> = args
1825+
.iter()
1826+
.map(|expr| parse_expr(expr, registry))
1827+
.collect::<std::result::Result<_, _>>()?;
1828+
Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
1829+
BuiltinScalarFunction::ToDate,
1830+
args,
1831+
)))
1832+
}
18221833
}
18231834
}
18241835
ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { fun_name, args }) => {

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
15681568
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
15691569
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
15701570
BuiltinScalarFunction::FindInSet => Self::FindInSet,
1571+
BuiltinScalarFunction::ToDate => Self::ToDate,
15711572
};
15721573

15731574
Ok(scalar_function)

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,27 @@ query ?
107107
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
108108
----
109109
730 days 0 hours 0 mins 0.000000000 secs
110+
111+
# to_date_test
112+
statement ok
113+
create table to_date_t1(ts bigint) as VALUES
114+
(1235865600000),
115+
(1235865660000),
116+
(1238544000000);
117+
118+
119+
# query_cast_timestamp_millis
120+
query D
121+
SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
122+
----
123+
2003-11-02
124+
2003-11-02
125+
2003-11-29
126+
127+
query D
128+
SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
129+
----
130+
2023-01-13
131+
132+
statement error DataFusion error: Internal error: to_date function unsupported data type at index 1: List
133+
SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'));

0 commit comments

Comments
 (0)