Skip to content

Commit 3e30f77

Browse files
authored
[datafusion-spark] Add Spark-compatible char expression (#15994)
* Add Spark-compatible char expression * Add slt test
1 parent 64fb51f commit 3e30f77

File tree

3 files changed

+138
-1
lines changed

3 files changed

+138
-1
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::{any::Any, sync::Arc};
19+
20+
use arrow::{
21+
array::{ArrayRef, StringArray},
22+
datatypes::{
23+
DataType,
24+
DataType::{Int64, Utf8},
25+
},
26+
};
27+
28+
use datafusion_common::{cast::as_int64_array, exec_err, Result, ScalarValue};
29+
use datafusion_expr::{
30+
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
31+
};
32+
33+
/// Spark-compatible `char` expression
34+
/// <https://spark.apache.org/docs/latest/api/sql/index.html#char>
35+
#[derive(Debug)]
36+
pub struct SparkChar {
37+
signature: Signature,
38+
}
39+
40+
impl Default for SparkChar {
41+
fn default() -> Self {
42+
Self::new()
43+
}
44+
}
45+
46+
impl SparkChar {
47+
pub fn new() -> Self {
48+
Self {
49+
signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
50+
}
51+
}
52+
}
53+
54+
impl ScalarUDFImpl for SparkChar {
55+
fn as_any(&self) -> &dyn Any {
56+
self
57+
}
58+
59+
fn name(&self) -> &str {
60+
"char"
61+
}
62+
63+
fn signature(&self) -> &Signature {
64+
&self.signature
65+
}
66+
67+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
68+
Ok(Utf8)
69+
}
70+
71+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
72+
spark_chr(&args.args)
73+
}
74+
}
75+
76+
/// Returns the ASCII character having the binary equivalent to the input expression.
77+
/// E.g., chr(65) = 'A'.
78+
/// Compatible with Apache Spark's Chr function
79+
fn spark_chr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
80+
let array = args[0].clone();
81+
match array {
82+
ColumnarValue::Array(array) => {
83+
let array = chr(&[array])?;
84+
Ok(ColumnarValue::Array(array))
85+
}
86+
ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) => {
87+
if value < 0 {
88+
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
89+
"".to_string(),
90+
))))
91+
} else {
92+
match core::char::from_u32((value % 256) as u32) {
93+
Some(ch) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
94+
ch.to_string(),
95+
)))),
96+
None => {
97+
exec_err!("requested character was incompatible for encoding.")
98+
}
99+
}
100+
}
101+
}
102+
_ => exec_err!("The argument must be an Int64 array or scalar."),
103+
}
104+
}
105+
106+
fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
107+
let integer_array = as_int64_array(&args[0])?;
108+
109+
// first map is the iterator, second is for the `Option<_>`
110+
let result = integer_array
111+
.iter()
112+
.map(|integer: Option<i64>| {
113+
integer
114+
.map(|integer| {
115+
if integer < 0 {
116+
return Ok("".to_string()); // Return empty string for negative integers
117+
}
118+
match core::char::from_u32((integer % 256) as u32) {
119+
Some(ch) => Ok(ch.to_string()),
120+
None => {
121+
exec_err!("requested character not compatible for encoding.")
122+
}
123+
}
124+
})
125+
.transpose()
126+
})
127+
.collect::<Result<StringArray>>()?;
128+
129+
Ok(Arc::new(result) as ArrayRef)
130+
}

datafusion/spark/src/function/string/mod.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
// under the License.
1717

1818
pub mod ascii;
19+
pub mod char;
1920

2021
use datafusion_expr::ScalarUDF;
2122
use datafusion_functions::make_udf_function;
2223
use std::sync::Arc;
2324

2425
make_udf_function!(ascii::SparkAscii, ascii);
26+
make_udf_function!(char::SparkChar, char);
2527

2628
pub mod expr_fn {
2729
use datafusion_functions::export_functions;
@@ -31,8 +33,13 @@ pub mod expr_fn {
3133
"Returns the ASCII code point of the first character of string.",
3234
arg1
3335
));
36+
export_functions!((
37+
char,
38+
"Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).",
39+
arg1
40+
));
3441
}
3542

3643
pub fn functions() -> Vec<Arc<ScalarUDF>> {
37-
vec![ascii()]
44+
vec![ascii(), char()]
3845
}
Binary file not shown.

0 commit comments

Comments
 (0)