Skip to content

Commit 75ea40b

Browse files
committed
[Parquet] perf: Create StructArrays directly rather than use ArrayData
1 parent 96637fc commit 75ea40b

1 file changed

Lines changed: 21 additions & 15 deletions

File tree

parquet/src/arrow/array_reader/struct_array.rs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
use crate::arrow::array_reader::ArrayReader;
1919
use crate::errors::{ParquetError, Result};
2020
use arrow_array::{Array, ArrayRef, StructArray, builder::BooleanBufferBuilder};
21-
use arrow_data::{ArrayData, ArrayDataBuilder};
22-
use arrow_schema::DataType as ArrowType;
21+
use arrow_buffer::NullBuffer;
22+
use arrow_schema::{DataType as ArrowType, DataType};
2323
use std::any::Any;
2424
use std::sync::Arc;
2525

@@ -124,16 +124,15 @@ impl ArrayReader for StructArrayReader {
124124
return Err(general_err!("Not all children array length are the same!"));
125125
}
126126

127-
// Now we can build array data
128-
let mut array_data_builder = ArrayDataBuilder::new(self.data_type.clone())
129-
.len(children_array_len)
130-
.child_data(
131-
children_array
132-
.into_iter()
133-
.map(|x| x.into_data())
134-
.collect::<Vec<ArrayData>>(),
135-
);
127+
let DataType::Struct(fields) = &self.data_type else {
128+
return Err(general_err!(
129+
"Internal: StructArrayReader must have struct data type, got {:?}",
130+
self.data_type
131+
));
132+
};
133+
let fields = fields.clone(); // cloning Fields is cheap (Arc internally)
136134

135+
let mut nulls = None;
137136
if self.nullable {
138137
// calculate struct def level data
139138

@@ -168,12 +167,19 @@ impl ArrayReader for StructArrayReader {
168167
if bitmap_builder.len() != children_array_len {
169168
return Err(general_err!("Failed to decode level data for struct array"));
170169
}
171-
172-
array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
170+
nulls = Some(NullBuffer::new(bitmap_builder.finish()));
173171
}
174172

175-
let array_data = unsafe { array_data_builder.build_unchecked() };
176-
Ok(Arc::new(StructArray::from(array_data)))
173+
// Safety: checked above that all children array data have same
174+
// length and correct type
175+
unsafe {
176+
Ok(Arc::new(StructArray::new_unchecked_with_length(
177+
fields,
178+
children_array,
179+
nulls,
180+
children_array_len,
181+
)))
182+
}
177183
}
178184

179185
fn skip_records(&mut self, num_records: usize) -> Result<usize> {

0 commit comments

Comments
 (0)