From 4f9fee306e9f0632be6a10328c78d1995248778e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 26 Mar 2025 15:39:25 -0400 Subject: [PATCH] Minor: reserve output space in ByteViewArrayDecoderDictionary --- parquet/src/arrow/array_reader/byte_view_array.rs | 12 +++++++++--- parquet/src/arrow/buffer/view_buffer.rs | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 6d6bbdc7b804..0896e90ee446 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -432,11 +432,13 @@ impl ByteViewArrayDecoderDictionary { } } - /// Reads the next indexes from self.decoder - /// the indexes are assumed to be indexes into `dict` + /// Reads the next `len` indexes from self.decoder + /// + /// The indexes are assumed to be indexes into `dict` /// the output values are written to output /// - /// Assumptions / Optimization + /// # Assumptions / Optimization + /// /// This function checks if dict.buffers() are the last buffers in `output`, and if so /// reuses the dictionary page buffers directly without copying data fn read(&mut self, output: &mut ViewBuffer, dict: &ViewBuffer, len: usize) -> Result { @@ -458,6 +460,10 @@ impl ByteViewArrayDecoderDictionary { } }; + // we are going to append `len` views to the output buffer so reserve + // the space for them to avoid reallocations + output.reserve_views(len); + if need_to_create_new_buffer { for b in dict.buffers.iter() { output.buffers.push(b.clone()); diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs index fd7d6c213f04..1d09adf8be08 100644 --- a/parquet/src/arrow/buffer/view_buffer.rs +++ b/parquet/src/arrow/buffer/view_buffer.rs @@ -37,6 +37,11 @@ impl ViewBuffer { self.views.is_empty() } + /// Reserve capacity for `additional` views + pub fn reserve_views(&mut self, additional: usize) { + self.views.reserve(additional); + } + pub fn append_block(&mut self, block: Buffer) -> u32 { let block_id = self.buffers.len() as u32; self.buffers.push(block);