diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 379d8fe180fa..40c311bb857c 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -125,7 +125,15 @@ pub struct FixedSizeListArray { } impl FixedSizeListArray { - /// Create a new [`FixedSizeListArray`] with `size` element size, panicking on failure + /// Create a new [`FixedSizeListArray`] with `size` element size, panicking on failure. + /// + /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable + /// `FixedSizeListArray`), this function will set the length of the array to 0. + /// + /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary + /// length, use the [`try_new_with_length()`] constructor. + /// + /// [`try_new_with_length()`]: Self::try_new_with_length /// /// # Panics /// @@ -134,12 +142,20 @@ impl FixedSizeListArray { Self::try_new(field, size, values, nulls).unwrap() } - /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure + /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure. + /// + /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable + /// `FixedSizeListArray`), this function will set the length of the array to 0. + /// + /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary + /// length, use the [`try_new_with_length()`] constructor. + /// + /// [`try_new_with_length()`]: Self::try_new_with_length /// /// # Errors /// /// * `size < 0` - /// * `values.len() / size != nulls.len()` + /// * `values.len() != nulls.len() * size` if `nulls` is `Some` /// * `values.data_type() != field.data_type()` /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())` pub fn try_new( @@ -152,23 +168,88 @@ impl FixedSizeListArray { ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) })?; - let len = match s { - 0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(), - _ => { - let len = values.len() / s.max(1); - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for FixedSizeListArray, expected {} got {}", - len, - n.len(), - ))); - } + let len = if s == 0 { + // Note that for degenerate and non-nullable `FixedSizeList`s, we will set the length to + // 0 (`_or_default`). + nulls.as_ref().map(|x| x.len()).unwrap_or_default() + } else { + if values.len() % s != 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of values buffer for FixedSizeListArray, \ + expected a multiple of {s} got {}", + values.len(), + ))); + } + + let len = values.len() / s; + + // Check that the null buffer length is correct (if it exists). + if let Some(null_buffer) = &nulls { + if s * null_buffer.len() != values.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of values buffer for FixedSizeListArray, expected {} got {}", + s * null_buffer.len(), + values.len(), + ))); } - len } + + len }; + Self::try_new_with_length(field, size, values, nulls, len) + } + + /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure. + /// + /// This method exists to allow the construction of arbitrary length degenerate (`size == 0`) + /// and non-nullable `FixedSizeListArray`s. If you want a nullable `FixedSizeListArray`, then + /// you can use [`try_new()`] instead. + /// + /// [`try_new()`]: Self::try_new + /// + /// # Errors + /// + /// * `size < 0` + /// * `nulls.len() != len` if `nulls` is `Some` + /// * `values.len() != len * size` + /// * `values.data_type() != field.data_type()` + /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())` + pub fn try_new_with_length( + field: FieldRef, + size: i32, + values: ArrayRef, + nulls: Option, + len: usize, + ) -> Result { + let s = size.to_usize().ok_or_else(|| { + ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) + })?; + + if let Some(null_buffer) = &nulls + && null_buffer.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid null buffer for FixedSizeListArray, expected {len} found {}", + null_buffer.len() + ))); + } + + if s == 0 && !values.is_empty() { + return Err(ArrowError::InvalidArgumentError(format!( + "An degenerate FixedSizeListArray should have no underlying values, found {} values", + values.len() + ))); + } + + if values.len() != len * s { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of values buffer for FixedSizeListArray, expected {} got {}", + len * s, + values.len(), + ))); + } + if field.data_type() != values.data_type() { return Err(ArrowError::InvalidArgumentError(format!( "FixedSizeListArray expected data type {} got {} for {:?}", @@ -673,8 +754,23 @@ mod tests { let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), Some(nulls)); assert_eq!(list.len(), 3); - let list = FixedSizeListArray::new(field.clone(), 4, values.clone(), None); - assert_eq!(list.len(), 1); + let list = FixedSizeListArray::new(field.clone(), 3, values.clone(), None); + assert_eq!(list.len(), 2); + + let err = FixedSizeListArray::try_new(field.clone(), 4, values.clone(), None).unwrap_err(); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, \ + expected a multiple of 4 got 6", + ); + + let err = + FixedSizeListArray::try_new_with_length(field.clone(), 4, values.clone(), None, 1) + .unwrap_err(); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6" + ); let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err(); assert_eq!( @@ -682,14 +778,11 @@ mod tests { "Invalid argument error: Size cannot be negative, got -1" ); - let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None); - assert_eq!(list.len(), 0); - let nulls = NullBuffer::new_null(2); let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err(); assert_eq!( err.to_string(), - "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2" + "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6" ); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); @@ -712,11 +805,35 @@ mod tests { } #[test] - fn empty_fixed_size_list() { + fn degenerate_fixed_size_list() { let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let nulls = NullBuffer::new_null(2); let values = new_empty_array(&DataType::Int32); - let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls)); + let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), Some(nulls.clone())); assert_eq!(list.len(), 2); + + // Test invalid null buffer length. + let err = FixedSizeListArray::try_new_with_length( + field.clone(), + 0, + values.clone(), + Some(nulls), + 5, + ) + .unwrap_err(); + assert_eq!( + err.to_string(), + "Invalid argument error: Invalid null buffer for FixedSizeListArray, expected 5 found 2" + ); + + // Test non-empty values for degenerate list. + let non_empty_values = Arc::new(Int32Array::from(vec![1, 2, 3])); + let err = + FixedSizeListArray::try_new_with_length(field.clone(), 0, non_empty_values, None, 3) + .unwrap_err(); + assert_eq!( + err.to_string(), + "Invalid argument error: An degenerate FixedSizeListArray should have no underlying values, found 3 values" + ); } }