Skip to content

Commit d3ef6c8

Browse files
parkma99alamb
andauthored
feat: make_array support empty arguments (#6593)
* feat: make_array support empty arguments * fix fmt error * fix error * array_append support empty array * array_prepend support empty make_array * array_concat support empty make_array * fix clippy * update * fix * rename `array_make` --> `make_array` --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 708044c commit d3ef6c8

File tree

4 files changed

+166
-34
lines changed

4 files changed

+166
-34
lines changed

datafusion/core/tests/sqllogictests/test_files/array.slt

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,49 @@ select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)
4949
----
5050
[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
5151

52-
# array_append scalar function
52+
# array scalar function #6
53+
query ? rowsort
54+
select make_array()
55+
----
56+
[]
57+
58+
# array scalar function #7
59+
query ?? rowsort
60+
select make_array(make_array()), make_array(make_array(make_array()))
61+
----
62+
[[]] [[[]]]
63+
64+
# array_append scalar function #1
65+
query ? rowsort
66+
select array_append(make_array(), 4);
67+
----
68+
[4]
69+
70+
# array_append scalar function #2
71+
query ?? rowsort
72+
select array_append(make_array(), make_array()), array_append(make_array(), make_array(4));
73+
----
74+
[[]] [[4]]
75+
76+
# array_append scalar function #3
5377
query ??? rowsort
5478
select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
5579
----
5680
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
5781

58-
# array_prepend scalar function
82+
# array_prepend scalar function #1
83+
query ? rowsort
84+
select array_prepend(4, make_array());
85+
----
86+
[4]
87+
88+
# array_prepend scalar function #2
89+
query ?? rowsort
90+
select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array());
91+
----
92+
[[]] [[4]]
93+
94+
# array_prepend scalar function #3
5995
query ??? rowsort
6096
select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
6197
----
@@ -73,6 +109,12 @@ select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2,
73109
----
74110
[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]]
75111

112+
# array_fill scalar function #3
113+
query ?
114+
select array_fill(1, make_array())
115+
----
116+
[]
117+
76118
# array_concat scalar function #1
77119
query ?? rowsort
78120
select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4]));
@@ -97,6 +139,18 @@ select array_concat(make_array([[1]]), make_array([[2]]));
97139
----
98140
[[[1]], [[2]]]
99141

142+
# array_concat scalar function #5
143+
query ? rowsort
144+
select array_concat(make_array(2, 3), make_array());
145+
----
146+
[2, 3]
147+
148+
# array_concat scalar function #6
149+
query ? rowsort
150+
select array_concat(make_array(), make_array(2, 3));
151+
----
152+
[2, 3]
153+
100154
# array_position scalar function #1
101155
query III
102156
select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1);
@@ -133,6 +187,12 @@ select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]]
133187
----
134188
11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3
135189

190+
# array_to_string scalar function #3
191+
query ?
192+
select array_to_string(make_array(), ',')
193+
----
194+
(empty)
195+
136196
# cardinality scalar function
137197
query III
138198
select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o'));
@@ -145,7 +205,13 @@ select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3
145205
----
146206
6 18
147207

148-
# trim_array scalar function
208+
# cardinality scalar function #3
209+
query II
210+
select cardinality(make_array()), cardinality(make_array(make_array()))
211+
----
212+
0 0
213+
214+
# trim_array scalar function #1
149215
query ???
150216
select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', 'o'], 3), trim_array([1.0, 2.0, 3.0], 2);
151217
----
@@ -157,6 +223,18 @@ select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4,
157223
----
158224
[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]
159225

226+
# trim_array scalar function #3
227+
query ?
228+
select array_concat(trim_array(make_array(1, 2, 3), 3), make_array(4, 5), make_array());
229+
----
230+
[4, 5]
231+
232+
# trim_array scalar function #4
233+
query ??
234+
select trim_array(make_array(), 0), trim_array(make_array(), 1)
235+
----
236+
[] []
237+
160238
# array_length scalar function
161239
query III rowsort
162240
select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
@@ -181,6 +259,12 @@ select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3,
181259
----
182260
3 2 5 NULL
183261

262+
# array_length scalar function #5
263+
query III rowsort
264+
select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2)
265+
----
266+
0 0 NULL
267+
184268
# array_dims scalar function
185269
query III rowsort
186270
select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]]));
@@ -193,6 +277,12 @@ select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])
193277
----
194278
[1, 2, 3] [2, 5, 4]
195279

280+
# array_dims scalar function #3
281+
query II rowsort
282+
select array_dims(make_array()), array_dims(make_array(make_array()))
283+
----
284+
[0] [1, 0]
285+
196286
# array_ndims scalar function
197287
query III rowsort
198288
select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]]));
@@ -204,3 +294,9 @@ query II rowsort
204294
select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
205295
----
206296
3 21
297+
298+
# array_ndims scalar function #3
299+
query II rowsort
300+
select array_ndims(make_array()), array_ndims(make_array(make_array()))
301+
----
302+
1 2

datafusion/expr/src/built_in_function.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ impl BuiltinScalarFunction {
277277
| BuiltinScalarFunction::CurrentDate
278278
| BuiltinScalarFunction::CurrentTime
279279
| BuiltinScalarFunction::Uuid
280+
| BuiltinScalarFunction::MakeArray
280281
)
281282
}
282283
/// Returns the [Volatility] of the builtin function.
@@ -510,11 +511,14 @@ impl BuiltinScalarFunction {
510511
))),
511512
},
512513
BuiltinScalarFunction::Cardinality => Ok(UInt64),
513-
BuiltinScalarFunction::MakeArray => Ok(List(Arc::new(Field::new(
514-
"item",
515-
input_expr_types[0].clone(),
516-
true,
517-
)))),
514+
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
515+
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
516+
_ => Ok(List(Arc::new(Field::new(
517+
"item",
518+
input_expr_types[0].clone(),
519+
true,
520+
)))),
521+
},
518522
BuiltinScalarFunction::TrimArray => match &input_expr_types[0] {
519523
List(field) => Ok(List(Arc::new(Field::new(
520524
"item",

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,15 @@ pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
150150
Ok(ColumnarValue::Array(array_array(arrays.as_slice())?))
151151
}
152152

153+
/// `make_array` SQL function
154+
pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
155+
match values[0].data_type() {
156+
DataType::Null => Ok(datafusion_expr::ColumnarValue::Scalar(
157+
ScalarValue::new_list(Some(vec![]), DataType::Null),
158+
)),
159+
_ => array(values),
160+
}
161+
}
153162
macro_rules! downcast_arg {
154163
($ARG:expr, $ARRAY_TYPE:ident) => {{
155164
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
@@ -217,6 +226,7 @@ pub fn array_append(args: &[ColumnarValue]) -> Result<ColumnarValue> {
217226
(DataType::UInt16, DataType::UInt16) => append!(arr, element, UInt16Array),
218227
(DataType::UInt32, DataType::UInt32) => append!(arr, element, UInt32Array),
219228
(DataType::UInt64, DataType::UInt64) => append!(arr, element, UInt64Array),
229+
(DataType::Null, _) => return array(&args[1..]),
220230
(array_data_type, element_data_type) => {
221231
return Err(DataFusionError::NotImplemented(format!(
222232
"Array_append is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'."
@@ -290,6 +300,7 @@ pub fn array_prepend(args: &[ColumnarValue]) -> Result<ColumnarValue> {
290300
(DataType::UInt16, DataType::UInt16) => prepend!(arr, element, UInt16Array),
291301
(DataType::UInt32, DataType::UInt32) => prepend!(arr, element, UInt32Array),
292302
(DataType::UInt64, DataType::UInt64) => prepend!(arr, element, UInt64Array),
303+
(DataType::Null, _) => return array(&args[..1]),
293304
(array_data_type, element_data_type) => {
294305
return Err(DataFusionError::NotImplemented(format!(
295306
"Array_prepend is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'."
@@ -318,30 +329,36 @@ pub fn array_concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
318329
.collect();
319330
let data_type = arrays[0].data_type();
320331
match data_type {
321-
DataType::List(..) => {
322-
let list_arrays =
323-
downcast_vec!(arrays, ListArray).collect::<Result<Vec<&ListArray>>>()?;
324-
let len: usize = list_arrays.iter().map(|a| a.values().len()).sum();
325-
let capacity = Capacities::Array(list_arrays.iter().map(|a| a.len()).sum());
326-
let array_data: Vec<_> =
327-
list_arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
328-
let array_data = array_data.iter().collect();
329-
let mut mutable =
330-
MutableArrayData::with_capacities(array_data, false, capacity);
331-
332-
for (i, a) in list_arrays.iter().enumerate() {
333-
mutable.extend(i, 0, a.len())
334-
}
332+
DataType::List(field) => match field.data_type() {
333+
DataType::Null => array_concat(&args[1..]),
334+
_ => {
335+
let list_arrays = downcast_vec!(arrays, ListArray)
336+
.collect::<Result<Vec<&ListArray>>>()?;
337+
let len: usize = list_arrays.iter().map(|a| a.values().len()).sum();
338+
let capacity =
339+
Capacities::Array(list_arrays.iter().map(|a| a.len()).sum());
340+
let array_data: Vec<_> =
341+
list_arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
342+
let array_data = array_data.iter().collect();
343+
let mut mutable =
344+
MutableArrayData::with_capacities(array_data, false, capacity);
345+
346+
for (i, a) in list_arrays.iter().enumerate() {
347+
mutable.extend(i, 0, a.len())
348+
}
335349

336-
let builder = mutable.into_builder();
337-
let list = builder
338-
.len(1)
339-
.buffers(vec![Buffer::from_slice_ref([0, len as i32])])
340-
.build()
341-
.unwrap();
350+
let builder = mutable.into_builder();
351+
let list = builder
352+
.len(1)
353+
.buffers(vec![Buffer::from_slice_ref([0, len as i32])])
354+
.build()
355+
.unwrap();
342356

343-
return Ok(ColumnarValue::Array(Arc::new(make_array(list))));
344-
}
357+
return Ok(ColumnarValue::Array(Arc::new(arrow::array::make_array(
358+
list,
359+
))));
360+
}
361+
},
345362
_ => Err(DataFusionError::NotImplemented(format!(
346363
"Array is not type '{data_type:?}'."
347364
))),
@@ -410,6 +427,11 @@ pub fn array_fill(args: &[ColumnarValue]) -> Result<ColumnarValue> {
410427
DataType::UInt16 => fill!(array_values, element, UInt16Array),
411428
DataType::UInt32 => fill!(array_values, element, UInt32Array),
412429
DataType::UInt64 => fill!(array_values, element, UInt64Array),
430+
DataType::Null => {
431+
return Ok(datafusion_expr::ColumnarValue::Scalar(
432+
ScalarValue::new_list(Some(vec![]), DataType::Null),
433+
))
434+
}
413435
data_type => {
414436
return Err(DataFusionError::Internal(format!(
415437
"Array_fill is not implemented for type '{data_type:?}'."
@@ -823,6 +845,7 @@ pub fn array_to_string(args: &[ColumnarValue]) -> Result<ColumnarValue> {
823845
DataType::UInt16 => to_string!(arg, arr, &delimeter, UInt16Array),
824846
DataType::UInt32 => to_string!(arg, arr, &delimeter, UInt32Array),
825847
DataType::UInt64 => to_string!(arg, arr, &delimeter, UInt64Array),
848+
DataType::Null => Ok(arg),
826849
data_type => Err(DataFusionError::NotImplemented(format!(
827850
"Array is not implemented for type '{data_type:?}'."
828851
))),
@@ -831,8 +854,13 @@ pub fn array_to_string(args: &[ColumnarValue]) -> Result<ColumnarValue> {
831854

832855
let mut arg = String::from("");
833856
let mut res = compute_array_to_string(&mut arg, arr, delimeter.clone())?.clone();
834-
res.truncate(res.len() - delimeter.len());
835-
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res))))
857+
match res.as_str() {
858+
"" => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res)))),
859+
_ => {
860+
res.truncate(res.len() - delimeter.len());
861+
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res))))
862+
}
863+
}
836864
}
837865

838866
/// Trim_array SQL function
@@ -871,8 +899,12 @@ pub fn trim_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
871899

872900
let list_array = downcast_arg!(arr, ListArray);
873901
let values = list_array.value(0);
902+
if values.len() <= n {
903+
return Ok(datafusion_expr::ColumnarValue::Scalar(
904+
ScalarValue::new_list(Some(vec![]), DataType::Null),
905+
));
906+
}
874907
let res = values.slice(0, values.len() - n);
875-
876908
let mut scalars = vec![];
877909
for i in 0..res.len() {
878910
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&res, i)?));

datafusion/physical-expr/src/functions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ pub fn create_physical_fun(
404404
Arc::new(array_expressions::array_to_string)
405405
}
406406
BuiltinScalarFunction::Cardinality => Arc::new(array_expressions::cardinality),
407-
BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::array),
407+
BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::make_array),
408408
BuiltinScalarFunction::TrimArray => Arc::new(array_expressions::trim_array),
409409

410410
// string functions

0 commit comments

Comments
 (0)