Skip to content

Commit 0eade48

Browse files
committed
[task #8987]add_to_date_function
Signed-off-by: tangruilin <[email protected]>
1 parent 15f59d9 commit 0eade48

File tree

11 files changed

+132
-2
lines changed

11 files changed

+132
-2
lines changed

datafusion/common/src/scalar.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use arrow::{
5252
},
5353
};
5454
use arrow_array::cast::as_list_array;
55+
use arrow_array::types::Date32Type;
5556

5657
/// A dynamically typed, nullable single value, (the single-valued counter-part
5758
/// to arrow's [`Array`])
@@ -3239,6 +3240,12 @@ impl ScalarType<i64> for TimestampNanosecondType {
32393240
}
32403241
}
32413242

3243+
impl ScalarType<i32> for Date32Type {
3244+
fn scalar(r: Option<i32>) -> ScalarValue {
3245+
ScalarValue::Date32(r)
3246+
}
3247+
}
3248+
32423249
#[cfg(test)]
32433250
mod tests {
32443251
use std::cmp::Ordering;

datafusion/expr/src/built_in_function.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,8 @@ pub enum BuiltinScalarFunction {
287287
ToTimestampSeconds,
288288
/// from_unixtime
289289
FromUnixtime,
290+
/// to_date
291+
ToDate,
290292
///now
291293
Now,
292294
///current_date
@@ -490,6 +492,7 @@ impl BuiltinScalarFunction {
490492
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
491493
BuiltinScalarFunction::Struct => Volatility::Immutable,
492494
BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
495+
BuiltinScalarFunction::ToDate => Volatility::Immutable,
493496
BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
494497
BuiltinScalarFunction::OverLay => Volatility::Immutable,
495498
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
@@ -806,6 +809,7 @@ impl BuiltinScalarFunction {
806809
BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)),
807810
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)),
808811
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
812+
BuiltinScalarFunction::ToDate => Ok(Date32),
809813
BuiltinScalarFunction::Now => {
810814
Ok(Timestamp(Nanosecond, Some("+00:00".into())))
811815
}
@@ -1055,6 +1059,7 @@ impl BuiltinScalarFunction {
10551059
BuiltinScalarFunction::FromUnixtime => {
10561060
Signature::uniform(1, vec![Int64], self.volatility())
10571061
}
1062+
BuiltinScalarFunction::ToDate => Signature::variadic_any(self.volatility()),
10581063
BuiltinScalarFunction::Digest => Signature::one_of(
10591064
vec![
10601065
Exact(vec![Utf8, Utf8]),
@@ -1499,6 +1504,7 @@ impl BuiltinScalarFunction {
14991504
BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"],
15001505
BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"],
15011506
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],
1507+
BuiltinScalarFunction::ToDate => &["to_date"],
15021508

15031509
// hashing functions
15041510
BuiltinScalarFunction::Digest => &["digest"],

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,11 @@ nary_scalar_expr!(
891891
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
892892
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
893893
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
894+
nary_scalar_expr!(
895+
ToDate,
896+
to_date,
897+
"converts string to date according to the given format"
898+
);
894899
nary_scalar_expr!(
895900
ToTimestamp,
896901
to_timestamp,

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ use datafusion_common::cast::{
5353
as_timestamp_nanosecond_array, as_timestamp_second_array,
5454
};
5555
use datafusion_common::{
56-
exec_err, internal_err, not_impl_err, DataFusionError, Result, ScalarType,
57-
ScalarValue,
56+
exec_err, internal_datafusion_err, internal_err, not_impl_err, DataFusionError,
57+
Result, ScalarType, ScalarValue,
5858
};
5959
use datafusion_expr::ColumnarValue;
6060

@@ -397,6 +397,39 @@ fn string_to_timestamp_nanos_shim(s: &str) -> Result<i64> {
397397
string_to_timestamp_nanos(s).map_err(|e| e.into())
398398
}
399399

400+
fn to_date_impl(args: &[ColumnarValue], name: &str) -> Result<ColumnarValue> {
401+
match args.len() {
402+
1 => handle::<Date32Type, _, Date32Type>(
403+
args,
404+
|s| {
405+
string_to_timestamp_nanos_shim(s)
406+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
407+
.and_then(|v| {
408+
v.try_into().map_err(|_| {
409+
internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
410+
})
411+
})
412+
},
413+
name,
414+
),
415+
n if n >= 2 => handle_multiple::<Date32Type, _, Date32Type, _>(
416+
args,
417+
|s, format| {
418+
string_to_timestamp_nanos_formatted(s, format)
419+
.map(|n| n / (1_000_000 * 24 * 60 * 60 * 1_000))
420+
.and_then(|v| {
421+
v.try_into().map_err(|_| {
422+
internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
423+
})
424+
})
425+
},
426+
|n| n,
427+
name,
428+
),
429+
_ => internal_err!("Unsupported 0 argument count for function {name}"),
430+
}
431+
}
432+
400433
fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
401434
args: &[ColumnarValue],
402435
name: &str,
@@ -424,6 +457,11 @@ fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
424457
}
425458
}
426459

460+
/// to_date SQL function
461+
pub fn to_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
462+
to_date_impl(args, "to_date")
463+
}
464+
427465
/// to_timestamp SQL function
428466
///
429467
/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
@@ -1343,6 +1381,36 @@ fn validate_to_timestamp_data_types(
13431381
None
13441382
}
13451383

1384+
/// to_date SQL function implementation
1385+
pub fn to_date_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
1386+
if args.is_empty() {
1387+
return exec_err!(
1388+
"to_date function requires 1 or more arguments, got {}",
1389+
args.len()
1390+
);
1391+
}
1392+
1393+
// validate that any args after the first one are Utf8
1394+
if args.len() > 1 {
1395+
if let Some(value) = validate_to_timestamp_data_types(args, "to_date") {
1396+
return value;
1397+
}
1398+
}
1399+
1400+
match args[0].data_type() {
1401+
DataType::Int32
1402+
| DataType::Int64
1403+
| DataType::Null
1404+
| DataType::Float64
1405+
| DataType::Date32
1406+
| DataType::Date64 => cast_column(&args[0], &DataType::Date32, None),
1407+
DataType::Utf8 => to_date(args),
1408+
other => {
1409+
internal_err!("Unsupported data type {:?} for function to_date", other)
1410+
}
1411+
}
1412+
}
1413+
13461414
/// to_timestamp() SQL function implementation
13471415
pub fn to_timestamp_invoke(args: &[ColumnarValue]) -> Result<ColumnarValue> {
13481416
if args.is_empty() {

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ pub fn create_physical_fun(
577577
BuiltinScalarFunction::FromUnixtime => {
578578
Arc::new(datetime_expressions::from_unixtime_invoke)
579579
}
580+
BuiltinScalarFunction::ToDate => Arc::new(datetime_expressions::to_date_invoke),
580581
BuiltinScalarFunction::InitCap => Arc::new(|args| match args[0].data_type() {
581582
DataType::Utf8 => {
582583
make_scalar_function_inner(string_expressions::initcap::<i32>)(args)

datafusion/proto/proto/datafusion.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ enum ScalarFunction {
671671
InStr = 132;
672672
MakeDate = 133;
673673
ArrayReverse = 134;
674+
ToDate = 135;
674675
}
675676

676677
message ScalarFunctionNode {

datafusion/proto/src/generated/pbjson.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/from_proto.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
575575
ScalarFunction::Levenshtein => Self::Levenshtein,
576576
ScalarFunction::SubstrIndex => Self::SubstrIndex,
577577
ScalarFunction::FindInSet => Self::FindInSet,
578+
ScalarFunction::ToDate => Self::ToDate,
578579
}
579580
}
580581
}
@@ -1826,6 +1827,16 @@ pub fn parse_expr(
18261827
ScalarFunction::StructFun => {
18271828
Ok(struct_fun(parse_expr(&args[0], registry)?))
18281829
}
1830+
ScalarFunction::ToDate => {
1831+
let args: Vec<_> = args
1832+
.iter()
1833+
.map(|expr| parse_expr(expr, registry))
1834+
.collect::<std::result::Result<_, _>>()?;
1835+
Ok(Expr::ScalarFunction(expr::ScalarFunction::new(
1836+
BuiltinScalarFunction::ToDate,
1837+
args,
1838+
)))
1839+
}
18291840
}
18301841
}
18311842
ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { fun_name, args }) => {

datafusion/proto/src/logical_plan/to_proto.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
15691569
BuiltinScalarFunction::Levenshtein => Self::Levenshtein,
15701570
BuiltinScalarFunction::SubstrIndex => Self::SubstrIndex,
15711571
BuiltinScalarFunction::FindInSet => Self::FindInSet,
1572+
BuiltinScalarFunction::ToDate => Self::ToDate,
15721573
};
15731574

15741575
Ok(scalar_function)

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,27 @@ query ?
107107
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
108108
----
109109
730 days 0 hours 0 mins 0.000000000 secs
110+
111+
# to_date_test
112+
statement ok
113+
create table to_date_t1(ts bigint) as VALUES
114+
(1235865600000),
115+
(1235865660000),
116+
(1238544000000);
117+
118+
119+
# query_cast_timestamp_millis
120+
query D
121+
SELECT to_date(ts / 100000000) FROM to_date_t1 LIMIT 3
122+
----
123+
2003-11-02
124+
2003-11-02
125+
2003-11-29
126+
127+
query D
128+
SELECT to_date('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
129+
----
130+
2023-01-13
131+
132+
statement error DataFusion error: Internal error: to_date function unsupported data type at index 1: List
133+
SELECT to_date('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'));

0 commit comments

Comments
 (0)