Skip to content

Commit 604bc90

Browse files
committed
feat: list view basic construction and validation
1 parent cd33319 commit 604bc90

File tree

2 files changed

+155
-3
lines changed

2 files changed

+155
-3
lines changed

arrow-data/src/data.rs

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,43 @@ impl ArrayData {
929929
Ok(())
930930
}
931931

932+
/// Does a cheap sanity check that the `self.len` values in `buffer` are valid
933+
/// offsets and sizes (of type T) into some other buffer of `values_length` bytes long
934+
fn validate_offsets_and_sizes<T: ArrowNativeType + num::Num + std::fmt::Display>(
935+
&self,
936+
values_length: usize,
937+
) -> Result<(), ArrowError> {
938+
let offsets: &[T] = self.typed_buffer(0, self.len)?;
939+
let sizes: &[T] = self.typed_buffer(1, self.len)?;
940+
for i in 0..values_length {
941+
let size = sizes[i].to_usize().ok_or_else(|| {
942+
ArrowError::InvalidArgumentError(format!(
943+
"Error converting size[{}] ({}) to usize for {}",
944+
i, sizes[i], self.data_type
945+
))
946+
})?;
947+
let offset = offsets[i].to_usize().ok_or_else(|| {
948+
ArrowError::InvalidArgumentError(format!(
949+
"Error converting offset[{}] ({}) to usize for {}",
950+
i, offsets[i], self.data_type
951+
))
952+
})?;
953+
if offset > values_length {
954+
return Err(ArrowError::InvalidArgumentError(format!(
955+
"Size {} at index {} is offset {} is out of bounds for {}",
956+
size, i, offset, self.data_type
957+
)));
958+
}
959+
if size > values_length - offset {
960+
return Err(ArrowError::InvalidArgumentError(format!(
961+
"Size {} at index {} is larger than the remaining values for {}",
962+
size, i, self.data_type
963+
)));
964+
}
965+
}
966+
Ok(())
967+
}
968+
932969
/// Validates the layout of `child_data` ArrayData structures
933970
fn validate_child_data(&self) -> Result<(), ArrowError> {
934971
match &self.data_type {
@@ -942,6 +979,16 @@ impl ArrayData {
942979
self.validate_offsets::<i64>(values_data.len)?;
943980
Ok(())
944981
}
982+
DataType::ListView(field) => {
983+
let values_data = self.get_single_valid_child_data(field.data_type())?;
984+
self.validate_offsets_and_sizes::<i32>(values_data.len)?;
985+
Ok(())
986+
}
987+
DataType::LargeListView(field) => {
988+
let values_data = self.get_single_valid_child_data(field.data_type())?;
989+
self.validate_offsets_and_sizes::<i64>(values_data.len)?;
990+
Ok(())
991+
}
945992
DataType::FixedSizeList(field, list_size) => {
946993
let values_data = self.get_single_valid_child_data(field.data_type())?;
947994

@@ -1546,9 +1593,8 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
15461593
DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
15471594
DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), // all in child data
15481595
DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1549-
DataType::ListView(_) | DataType::LargeListView(_) => {
1550-
unimplemented!("ListView/LargeListView not implemented")
1551-
}
1596+
DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1597+
DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
15521598
DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
15531599
DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
15541600
DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), // all in child data,
@@ -1661,6 +1707,24 @@ impl DataTypeLayout {
16611707
variadic: true,
16621708
}
16631709
}
1710+
1711+
/// Describes a list view type
1712+
pub fn new_list_view<T>() -> Self {
1713+
Self {
1714+
buffers: vec![
1715+
BufferSpec::FixedWidth {
1716+
byte_width: mem::size_of::<T>(),
1717+
alignment: mem::align_of::<T>(),
1718+
},
1719+
BufferSpec::FixedWidth {
1720+
byte_width: mem::size_of::<T>(),
1721+
alignment: mem::align_of::<T>(),
1722+
},
1723+
],
1724+
can_contain_null_mask: true,
1725+
variadic: true,
1726+
}
1727+
}
16641728
}
16651729

16661730
/// Layout specification for a single data type buffer

arrow/tests/array_validation.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,94 @@ fn test_validate_offsets_last_too_large() {
342342
.unwrap();
343343
}
344344

345+
/// Test that the list of type `data_type` generates correct offset and size out of bounds errors
346+
fn check_list_view_offsets_sizes<T: ArrowNativeType>(
347+
data_type: DataType,
348+
offsets: Vec<T>,
349+
sizes: Vec<T>,
350+
) {
351+
let values: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
352+
let offsets_buffer = Buffer::from_slice_ref(offsets);
353+
let sizes_buffer = Buffer::from_slice_ref(sizes);
354+
ArrayData::try_new(
355+
data_type,
356+
4,
357+
None,
358+
0,
359+
vec![offsets_buffer, sizes_buffer],
360+
vec![values.into_data()],
361+
)
362+
.unwrap();
363+
}
364+
365+
#[test]
366+
#[should_panic(expected = "Size 3 at index 3 is larger than the remaining values for ListView")]
367+
fn test_validate_list_view_offsets_sizes() {
368+
let field_type = Field::new("f", DataType::Int32, true);
369+
check_list_view_offsets_sizes::<i32>(
370+
DataType::ListView(Arc::new(field_type)),
371+
vec![0, 1, 1, 2],
372+
vec![1, 1, 1, 3],
373+
);
374+
}
375+
376+
#[test]
377+
#[should_panic(
378+
expected = "Size 3 at index 3 is larger than the remaining values for LargeListView"
379+
)]
380+
fn test_validate_large_list_view_offsets_sizes() {
381+
let field_type = Field::new("f", DataType::Int32, true);
382+
check_list_view_offsets_sizes::<i64>(
383+
DataType::LargeListView(Arc::new(field_type)),
384+
vec![0, 1, 1, 2],
385+
vec![1, 1, 1, 3],
386+
);
387+
}
388+
389+
#[test]
390+
#[should_panic(expected = "Error converting offset[1] (-1) to usize for ListView")]
391+
fn test_validate_list_view_negative_offsets() {
392+
let field_type = Field::new("f", DataType::Int32, true);
393+
check_list_view_offsets_sizes::<i32>(
394+
DataType::ListView(Arc::new(field_type)),
395+
vec![0, -1, 1, 2],
396+
vec![1, 1, 1, 3],
397+
);
398+
}
399+
400+
#[test]
401+
#[should_panic(expected = "Error converting size[2] (-1) to usize for ListView")]
402+
fn test_validate_list_view_negative_sizes() {
403+
let field_type = Field::new("f", DataType::Int32, true);
404+
check_list_view_offsets_sizes::<i32>(
405+
DataType::ListView(Arc::new(field_type)),
406+
vec![0, 1, 1, 2],
407+
vec![1, 1, -1, 3],
408+
);
409+
}
410+
411+
#[test]
412+
#[should_panic(expected = "Error converting offset[1] (-1) to usize for LargeListView")]
413+
fn test_validate_large_list_view_negative_offsets() {
414+
let field_type = Field::new("f", DataType::Int32, true);
415+
check_list_view_offsets_sizes::<i64>(
416+
DataType::LargeListView(Arc::new(field_type)),
417+
vec![0, -1, 1, 2],
418+
vec![1, 1, 1, 3],
419+
);
420+
}
421+
422+
#[test]
423+
#[should_panic(expected = "Error converting size[2] (-1) to usize for LargeListView")]
424+
fn test_validate_large_list_view_negative_sizes() {
425+
let field_type = Field::new("f", DataType::Int32, true);
426+
check_list_view_offsets_sizes::<i64>(
427+
DataType::LargeListView(Arc::new(field_type)),
428+
vec![0, 1, 1, 2],
429+
vec![1, 1, -1, 3],
430+
);
431+
}
432+
345433
#[test]
346434
#[should_panic(
347435
expected = "Values length 4 is less than the length (2) multiplied by the value size (2) for FixedSizeList"

0 commit comments

Comments
 (0)