Skip to content

Commit b2e8848

Browse files
authored
feat: support largelist in array_to_string (#8729)
* support largelist in array_to_string * reduce code duplication
1 parent 98f02ff commit b2e8848

File tree

2 files changed

+128
-27
lines changed

2 files changed

+128
-27
lines changed

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,8 +2022,21 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
20222022
) -> Result<&mut String> {
20232023
match arr.data_type() {
20242024
DataType::List(..) => {
2025-
let list_array = downcast_arg!(arr, ListArray);
2025+
let list_array = as_list_array(&arr)?;
2026+
for i in 0..list_array.len() {
2027+
compute_array_to_string(
2028+
arg,
2029+
list_array.value(i),
2030+
delimiter.clone(),
2031+
null_string.clone(),
2032+
with_null_string,
2033+
)?;
2034+
}
20262035

2036+
Ok(arg)
2037+
}
2038+
DataType::LargeList(..) => {
2039+
let list_array = as_large_list_array(&arr)?;
20272040
for i in 0..list_array.len() {
20282041
compute_array_to_string(
20292042
arg,
@@ -2055,35 +2068,61 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
20552068
}
20562069
}
20572070

2058-
let mut arg = String::from("");
2059-
let mut res: Vec<Option<String>> = Vec::new();
2060-
2061-
match arr.data_type() {
2062-
DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => {
2063-
let list_array = arr.as_list::<i32>();
2064-
for (arr, &delimiter) in list_array.iter().zip(delimiters.iter()) {
2065-
if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
2066-
arg = String::from("");
2067-
let s = compute_array_to_string(
2068-
&mut arg,
2069-
arr,
2070-
delimiter.to_string(),
2071-
null_string.clone(),
2072-
with_null_string,
2073-
)?
2074-
.clone();
2075-
2076-
if let Some(s) = s.strip_suffix(delimiter) {
2077-
res.push(Some(s.to_string()));
2078-
} else {
2079-
res.push(Some(s));
2080-
}
2071+
fn generate_string_array<O: OffsetSizeTrait>(
2072+
list_arr: &GenericListArray<O>,
2073+
delimiters: Vec<Option<&str>>,
2074+
null_string: String,
2075+
with_null_string: bool,
2076+
) -> Result<StringArray> {
2077+
let mut res: Vec<Option<String>> = Vec::new();
2078+
for (arr, &delimiter) in list_arr.iter().zip(delimiters.iter()) {
2079+
if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
2080+
let mut arg = String::from("");
2081+
let s = compute_array_to_string(
2082+
&mut arg,
2083+
arr,
2084+
delimiter.to_string(),
2085+
null_string.clone(),
2086+
with_null_string,
2087+
)?
2088+
.clone();
2089+
2090+
if let Some(s) = s.strip_suffix(delimiter) {
2091+
res.push(Some(s.to_string()));
20812092
} else {
2082-
res.push(None);
2093+
res.push(Some(s));
20832094
}
2095+
} else {
2096+
res.push(None);
20842097
}
20852098
}
2099+
2100+
Ok(StringArray::from(res))
2101+
}
2102+
2103+
let arr_type = arr.data_type();
2104+
let string_arr = match arr_type {
2105+
DataType::List(_) | DataType::FixedSizeList(_, _) => {
2106+
let list_array = as_list_array(&arr)?;
2107+
generate_string_array::<i32>(
2108+
list_array,
2109+
delimiters,
2110+
null_string,
2111+
with_null_string,
2112+
)?
2113+
}
2114+
DataType::LargeList(_) => {
2115+
let list_array = as_large_list_array(&arr)?;
2116+
generate_string_array::<i64>(
2117+
list_array,
2118+
delimiters,
2119+
null_string,
2120+
with_null_string,
2121+
)?
2122+
}
20862123
_ => {
2124+
let mut arg = String::from("");
2125+
let mut res: Vec<Option<String>> = Vec::new();
20872126
// delimiter length is 1
20882127
assert_eq!(delimiters.len(), 1);
20892128
let delimiter = delimiters[0].unwrap();
@@ -2102,10 +2141,11 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
21022141
} else {
21032142
res.push(Some(s));
21042143
}
2144+
StringArray::from(res)
21052145
}
2106-
}
2146+
};
21072147

2108-
Ok(Arc::new(StringArray::from(res)))
2148+
Ok(Arc::new(string_arr))
21092149
}
21102150

21112151
/// Cardinality SQL function

datafusion/sqllogictest/test_files/array.slt

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,30 +3238,55 @@ select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3,
32383238
----
32393239
h,e,l,l,o 1-2-3-4-5 1|2|3
32403240

3241+
query TTT
3242+
select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
3243+
----
3244+
h,e,l,l,o 1-2-3-4-5 1|2|3
3245+
32413246
# array_join scalar function #5 (function alias `array_to_string`)
32423247
query TTT
32433248
select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|');
32443249
----
32453250
h,e,l,l,o 1-2-3-4-5 1|2|3
32463251

3252+
query TTT
3253+
select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
3254+
----
3255+
h,e,l,l,o 1-2-3-4-5 1|2|3
3256+
32473257
# list_join scalar function #6 (function alias `list_join`)
32483258
query TTT
32493259
select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|');
32503260
----
32513261
h,e,l,l,o 1-2-3-4-5 1|2|3
32523262

3263+
query TTT
3264+
select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
3265+
----
3266+
h,e,l,l,o 1-2-3-4-5 1|2|3
3267+
32533268
# array_to_string scalar function with nulls #1
32543269
query TTT
32553270
select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|');
32563271
----
32573272
h,l,o 1-3-5 2|3
32583273

3274+
query TTT
3275+
select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
3276+
----
3277+
h,e,l,l,o 1-2-3-4-5 1|2|3
3278+
32593279
# array_to_string scalar function with nulls #2
32603280
query TTT
32613281
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
32623282
----
32633283
h,-,-,-,o nil-2-nil-4-5 1|0|3
32643284

3285+
query TTT
3286+
select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), '|', '0');
3287+
----
3288+
h,-,-,-,o nil-2-nil-4-5 1|0|3
3289+
32653290
# array_to_string with columns #1
32663291

32673292
# For reference
@@ -3288,6 +3313,18 @@ NULL
32883313
51^52^54^55^56^57^58^59^60
32893314
NULL
32903315

3316+
query T
3317+
select array_to_string(column1, column4) from large_arrays_values;
3318+
----
3319+
2,3,4,5,6,7,8,9,10
3320+
11.12.13.14.15.16.17.18.20
3321+
21-22-23-25-26-27-28-29-30
3322+
31ok32ok33ok34ok35ok37ok38ok39ok40
3323+
NULL
3324+
41$42$43$44$45$46$47$48$49$50
3325+
51^52^54^55^56^57^58^59^60
3326+
NULL
3327+
32913328
query TT
32923329
select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values;
32933330
----
@@ -3300,6 +3337,18 @@ NULL 1/2/3
33003337
51_52_54_55_56_57_58_59_60 1/2/3
33013338
61_62_63_64_65_66_67_68_69_70 1/2/3
33023339

3340+
query TT
3341+
select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from large_arrays_values;
3342+
----
3343+
2_3_4_5_6_7_8_9_10 1/2/3
3344+
11_12_13_14_15_16_17_18_20 1/2/3
3345+
21_22_23_25_26_27_28_29_30 1/2/3
3346+
31_32_33_34_35_37_38_39_40 1/2/3
3347+
NULL 1/2/3
3348+
41_42_43_44_45_46_47_48_49_50 1/2/3
3349+
51_52_54_55_56_57_58_59_60 1/2/3
3350+
61_62_63_64_65_66_67_68_69_70 1/2/3
3351+
33033352
query TT
33043353
select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values;
33053354
----
@@ -3312,6 +3361,18 @@ NULL 1.2.3
33123361
51_52_*_54_55_56_57_58_59_60 1.2.3
33133362
61_62_63_64_65_66_67_68_69_70 1.2.3
33143363

3364+
query TT
3365+
select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values;
3366+
----
3367+
*_2_3_4_5_6_7_8_9_10 1.2.3
3368+
11_12_13_14_15_16_17_18_*_20 1.2.3
3369+
21_22_23_*_25_26_27_28_29_30 1.2.3
3370+
31_32_33_34_35_*_37_38_39_40 1.2.3
3371+
NULL 1.2.3
3372+
41_42_43_44_45_46_47_48_49_50 1.2.3
3373+
51_52_*_54_55_56_57_58_59_60 1.2.3
3374+
61_62_63_64_65_66_67_68_69_70 1.2.3
3375+
33153376
## cardinality
33163377

33173378
# cardinality scalar function

0 commit comments

Comments
 (0)