Skip to content

Commit f5138fc

Browse files
authored
feat: add to concat different data types error message the data types (#7166)
* feat: add to concat different data types error message the data types * improve test, fix it and made the names to be in the order of appearance * simplify * change error message to only have up to 10 unique data types and also change the data type order to appear in the same order as the arrays for easier debugging * add tests for not printing all the data types
1 parent 46d1612 commit f5138fc

File tree

1 file changed

+110
-6
lines changed

1 file changed

+110
-6
lines changed

arrow-select/src/concat.rs

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use arrow_array::*;
3737
use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
3838
use arrow_data::transform::{Capacities, MutableArrayData};
3939
use arrow_schema::{ArrowError, DataType, FieldRef, SchemaRef};
40-
use std::sync::Arc;
40+
use std::{collections::HashSet, sync::Arc};
4141

4242
fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
4343
let mut item_capacity = 0;
@@ -223,9 +223,35 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
223223

224224
let d = arrays[0].data_type();
225225
if arrays.iter().skip(1).any(|array| array.data_type() != d) {
226-
return Err(ArrowError::InvalidArgumentError(
227-
"It is not possible to concatenate arrays of different data types.".to_string(),
228-
));
226+
// Create error message with up to 10 unique data types in the order they appear
227+
let error_message = {
228+
// 10 max unique data types to print and another 1 to know if there are more
229+
let mut unique_data_types = HashSet::with_capacity(11);
230+
231+
let mut error_message =
232+
format!("It is not possible to concatenate arrays of different data types ({d}");
233+
unique_data_types.insert(d);
234+
235+
for array in arrays {
236+
let is_unique = unique_data_types.insert(array.data_type());
237+
238+
if unique_data_types.len() == 11 {
239+
error_message.push_str(", ...");
240+
break;
241+
}
242+
243+
if is_unique {
244+
error_message.push_str(", ");
245+
error_message.push_str(&array.data_type().to_string());
246+
}
247+
}
248+
249+
error_message.push_str(").");
250+
251+
error_message
252+
};
253+
254+
return Err(ArrowError::InvalidArgumentError(error_message));
229255
}
230256

231257
match d {
@@ -340,9 +366,87 @@ mod tests {
340366
fn test_concat_incompatible_datatypes() {
341367
let re = concat(&[
342368
&PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
369+
// 2 string to make sure we only mention unique types
343370
&StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
371+
&StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
372+
// Another type to make sure we are showing all the incompatible types
373+
&PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
344374
]);
345-
assert!(re.is_err());
375+
376+
assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32).");
377+
}
378+
379+
#[test]
380+
fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
381+
let re = concat(&[
382+
&PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
383+
// 2 string to make sure we only mention unique types
384+
&StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
385+
&StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
386+
// Another type to make sure we are showing all the incompatible types
387+
&PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
388+
&PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
389+
&PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
390+
&PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
391+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
392+
&PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
393+
// Non unique
394+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
395+
&PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
396+
&PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
397+
]);
398+
399+
assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32).");
400+
}
401+
402+
#[test]
403+
fn test_concat_11_incompatible_datatypes_should_only_include_10() {
404+
let re = concat(&[
405+
&PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
406+
// 2 string to make sure we only mention unique types
407+
&StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
408+
&StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
409+
// Another type to make sure we are showing all the incompatible types
410+
&PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
411+
&PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
412+
&PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
413+
&PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
414+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
415+
&PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
416+
// Non unique
417+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
418+
&PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
419+
&PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
420+
&PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
421+
]);
422+
423+
assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
424+
}
425+
426+
#[test]
427+
fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
428+
let re = concat(&[
429+
&PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
430+
// 2 string to make sure we only mention unique types
431+
&StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
432+
&StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
433+
// Another type to make sure we are showing all the incompatible types
434+
&PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
435+
&PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
436+
&PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
437+
&PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
438+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
439+
&PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
440+
// Non unique
441+
&PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
442+
&PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
443+
&PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
444+
&PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
445+
&PrimitiveArray::<Float16Type>::new_null(3),
446+
&BooleanArray::from(vec![Some(true), Some(false), None]),
447+
]);
448+
449+
assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
346450
}
347451

348452
#[test]
@@ -924,7 +1028,7 @@ mod tests {
9241028
.unwrap();
9251029

9261030
let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
927-
assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types.");
1031+
assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8).");
9281032
}
9291033

9301034
#[test]

0 commit comments

Comments
 (0)