diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs index 41a75ef3e2c1..e7696711ff56 100644 --- a/arrow-buffer/src/builder/boolean.rs +++ b/arrow-buffer/src/builder/boolean.rs @@ -16,7 +16,7 @@ // under the License. use crate::bit_util::apply_bitwise_binary_op; -use crate::{BooleanBuffer, Buffer, MutableBuffer, bit_util}; +use crate::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util}; use std::ops::Range; /// Builder for [`BooleanBuffer`] @@ -275,6 +275,14 @@ impl From for BooleanBuffer { } } +impl From for NullBuffer { + #[inline] + fn from(builder: BooleanBufferBuilder) -> Self { + let boolean_buffer = BooleanBuffer::from(builder); + NullBuffer::new(boolean_buffer) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs index b4a6a375334f..3c5a5f836bbe 100644 --- a/parquet/src/arrow/array_reader/struct_array.rs +++ b/parquet/src/arrow/array_reader/struct_array.rs @@ -18,8 +18,8 @@ use crate::arrow::array_reader::ArrayReader; use crate::errors::{ParquetError, Result}; use arrow_array::{Array, ArrayRef, StructArray, builder::BooleanBufferBuilder}; -use arrow_data::{ArrayData, ArrayDataBuilder}; -use arrow_schema::DataType as ArrowType; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType as ArrowType, DataType}; use std::any::Any; use std::sync::Arc; @@ -124,16 +124,15 @@ impl ArrayReader for StructArrayReader { return Err(general_err!("Not all children array length are the same!")); } - // Now we can build array data - let mut array_data_builder = ArrayDataBuilder::new(self.data_type.clone()) - .len(children_array_len) - .child_data( - children_array - .into_iter() - .map(|x| x.into_data()) - .collect::>(), - ); + let DataType::Struct(fields) = &self.data_type else { + return Err(general_err!( + "Internal: StructArrayReader must have struct data type, got {:?}", + self.data_type + )); + }; + let fields = fields.clone(); // cloning Fields is cheap (Arc internally) + let mut nulls = None; if self.nullable { // calculate struct def level data @@ -168,12 +167,19 @@ impl ArrayReader for StructArrayReader { if bitmap_builder.len() != children_array_len { return Err(general_err!("Failed to decode level data for struct array")); } - - array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into())); + nulls = Some(NullBuffer::from(bitmap_builder)); } - let array_data = unsafe { array_data_builder.build_unchecked() }; - Ok(Arc::new(StructArray::from(array_data))) + // Safety: checked above that all children array data have same + // length and correct type + unsafe { + Ok(Arc::new(StructArray::new_unchecked_with_length( + fields, + children_array, + nulls, + children_array_len, + ))) + } } fn skip_records(&mut self, num_records: usize) -> Result {