Skip to content

Commit aa86236

Browse files
committed
fix: encode function should work with strings and binary
closes #14055
1 parent 17446ad commit aa86236

File tree

2 files changed

+39
-8
lines changed

2 files changed

+39
-8
lines changed

datafusion/functions/src/encoding/inner.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,20 @@ impl ScalarUDFImpl for EncodeFunc {
8787
}
8888

8989
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
90-
Ok(arg_types[0].to_owned())
90+
use DataType::*;
91+
92+
Ok(match arg_types[0] {
93+
Utf8 => Utf8,
94+
LargeUtf8 => LargeUtf8,
95+
Binary => Utf8,
96+
LargeBinary => LargeUtf8,
97+
Null => Null,
98+
_ => {
99+
return plan_err!(
100+
"The encode function can only accept Utf8 or Binary or Null."
101+
);
102+
}
103+
})
91104
}
92105

93106
fn invoke_batch(
@@ -112,12 +125,12 @@ impl ScalarUDFImpl for EncodeFunc {
112125
}
113126

114127
match arg_types[0] {
115-
DataType::Utf8 | DataType::Utf8View | DataType::Binary | DataType::Null => {
128+
DataType::Utf8 | DataType::Utf8View | DataType::Null => {
116129
Ok(vec![DataType::Utf8; 2])
117130
}
118-
DataType::LargeUtf8 | DataType::LargeBinary => {
119-
Ok(vec![DataType::LargeUtf8, DataType::Utf8])
120-
}
131+
DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]),
132+
DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
133+
DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]),
121134
_ => plan_err!(
122135
"1st argument should be Utf8 or Binary or Null, got {:?}",
123136
arg_types[0]

datafusion/sqllogictest/test_files/encoding.slt

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ CREATE TABLE test(
2323
hex_field TEXT
2424
) as VALUES
2525
(0, 'abc', encode('abc', 'base64'), encode('abc', 'hex')),
26-
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
27-
(2, NULL, NULL, NULL)
26+
(1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
27+
(2, NULL, NULL, NULL),
28+
(3, X'8f50d3f60eae370ddbf85c86219c55108a350165', encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'hex'))
2829
;
2930

3031
# errors
@@ -43,34 +44,51 @@ select decode(hex_field, 'non_encoding') from test;
4344
query error
4445
select to_hex(hex_field) from test;
4546

47+
query error
48+
select arrow_cast(decode(X'8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), 'Utf8');
49+
4650
# Arrays tests
4751
query T
4852
SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
4953
----
5054
616263
5155
717765717765
5256
NULL
57+
8f50d3f60eae370ddbf85c86219c55108a350165
5358

5459
query T
5560
SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
5661
----
5762
abc
5863
qweqwe
5964
NULL
65+
8f50d3f60eae370ddbf85c86219c55108a350165
6066

6167
query T
6268
SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
6369
----
6470
abc
6571
qweqwe
6672
NULL
73+
8f50d3f60eae370ddbf85c86219c55108a350165
6774

6875
query T
6976
select to_hex(num) from test ORDER BY num;
7077
----
7178
0
7279
1
7380
2
81+
3
82+
83+
query T
84+
select encode(bin_field, 'base64') FROM test WHERE num = 3;
85+
----
86+
j1DT9g6uNw3b+FyGIZxVEIo1AWU
87+
88+
query B
89+
select decode(encode(bin_field, 'base64'), 'base64') = X'8f50d3f60eae370ddbf85c86219c55108a350165' FROM test WHERE num = 3;
90+
----
91+
true
7492

7593
# test for Utf8View support for encode
7694
statement ok
@@ -101,4 +119,4 @@ FROM test_utf8view;
101119
Andrew QW5kcmV3 416e64726577 X WA 58
102120
Xiangpeng WGlhbmdwZW5n 5869616e6770656e67 Xiangpeng WGlhbmdwZW5n 5869616e6770656e67
103121
Raphael UmFwaGFlbA 5261706861656c R Ug 52
104-
NULL NULL NULL R Ug 52
122+
NULL NULL NULL R Ug 52

0 commit comments

Comments
 (0)