Skip to content

Commit 5760049

Browse files
authored
Improve performance of interleave_primitive (-15% - 45%) / interleave_bytes (-10-25%) (#7420)
* Improve interleave_primitive * Fmt * Improve interleave_bytes * Improve null handling * Fmt * Just use Vec (no change in perf)
1 parent cee5124 commit 5760049

File tree

1 file changed

+17
-19
lines changed

1 file changed

+17
-19
lines changed

arrow-select/src/interleave.rs

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder
2222
use arrow_array::cast::AsArray;
2323
use arrow_array::types::*;
2424
use arrow_array::*;
25-
use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer, NullBufferBuilder, OffsetBuffer};
25+
use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
2626
use arrow_data::transform::MutableArrayData;
2727
use arrow_data::ByteView;
2828
use arrow_schema::{ArrowError, DataType};
@@ -132,12 +132,11 @@ impl<'a, T: Array + 'static> Interleave<'a, T> {
132132

133133
let nulls = match has_nulls {
134134
true => {
135-
let mut builder = NullBufferBuilder::new(indices.len());
136-
for (a, b) in indices {
137-
let v = arrays[*a].is_valid(*b);
138-
builder.append(v)
139-
}
140-
builder.finish()
135+
let nulls = BooleanBuffer::collect_bool(indices.len(), |i| {
136+
let (a, b) = indices[i];
137+
arrays[a].is_valid(b)
138+
});
139+
Some(nulls.into())
141140
}
142141
false => None,
143142
};
@@ -153,11 +152,10 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
153152
) -> Result<ArrayRef, ArrowError> {
154153
let interleaved = Interleave::<'_, PrimitiveArray<T>>::new(values, indices);
155154

156-
let mut values = Vec::with_capacity(indices.len());
157-
for (a, b) in indices {
158-
let v = interleaved.arrays[*a].value(*b);
159-
values.push(v)
160-
}
155+
let values = indices
156+
.iter()
157+
.map(|(a, b)| interleaved.arrays[*a].value(*b))
158+
.collect::<Vec<_>>();
161159

162160
let array = PrimitiveArray::<T>::new(values.into(), interleaved.nulls);
163161
Ok(Arc::new(array.with_data_type(data_type.clone())))
@@ -170,23 +168,23 @@ fn interleave_bytes<T: ByteArrayType>(
170168
let interleaved = Interleave::<'_, GenericByteArray<T>>::new(values, indices);
171169

172170
let mut capacity = 0;
173-
let mut offsets = BufferBuilder::<T::Offset>::new(indices.len() + 1);
174-
offsets.append(T::Offset::from_usize(0).unwrap());
175-
for (a, b) in indices {
171+
let mut offsets = Vec::with_capacity(indices.len() + 1);
172+
offsets.push(T::Offset::from_usize(0).unwrap());
173+
offsets.extend(indices.iter().map(|(a, b)| {
176174
let o = interleaved.arrays[*a].value_offsets();
177175
let element_len = o[*b + 1].as_usize() - o[*b].as_usize();
178176
capacity += element_len;
179-
offsets.append(T::Offset::from_usize(capacity).expect("overflow"));
180-
}
177+
T::Offset::from_usize(capacity).expect("overflow")
178+
}));
181179

182-
let mut values = MutableBuffer::new(capacity);
180+
let mut values = Vec::with_capacity(capacity);
183181
for (a, b) in indices {
184182
values.extend_from_slice(interleaved.arrays[*a].value(*b).as_ref());
185183
}
186184

187185
// Safety: safe by construction
188186
let array = unsafe {
189-
let offsets = OffsetBuffer::new_unchecked(offsets.finish().into());
187+
let offsets = OffsetBuffer::new_unchecked(offsets.into());
190188
GenericByteArray::<T>::new_unchecked(offsets, values.into(), interleaved.nulls)
191189
};
192190
Ok(Arc::new(array))

0 commit comments

Comments
 (0)