Skip to content

Commit 93e4eb2

Browse files
authored
Faster GenericByteView construction (#6102)
* add benchmark to track performance * fast byte view construction * make doc happy * fix clippy * update comments
1 parent 8aa91e5 commit 93e4eb2

File tree

1 file changed

+41
-15
lines changed

1 file changed

+41
-15
lines changed

arrow-array/src/builder/generic_bytes_view_builder.rs

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -420,23 +420,49 @@ pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;
420420
/// [`GenericByteViewBuilder::append_null`] as normal.
421421
pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
422422

423+
/// Creates a view from a fixed length input (the compiler can generate
424+
/// specialized code for this)
425+
fn make_inlined_view<const LEN: usize>(data: &[u8]) -> u128 {
426+
let mut view_buffer = [0; 16];
427+
view_buffer[0..4].copy_from_slice(&(LEN as u32).to_le_bytes());
428+
view_buffer[4..4 + LEN].copy_from_slice(&data[..LEN]);
429+
u128::from_le_bytes(view_buffer)
430+
}
431+
423432
/// Create a view based on the given data, block id and offset
424-
#[inline(always)]
433+
/// Note that the code below is carefully examined with x86_64 assembly code: <https://godbolt.org/z/685YPsd5G>
434+
/// The goal is to avoid calling into `ptr::copy_non_interleave`, which makes function call (i.e., not inlined),
435+
/// which slows down things.
436+
#[inline(never)]
425437
pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
426-
let len = data.len() as u32;
427-
if len <= 12 {
428-
let mut view_buffer = [0; 16];
429-
view_buffer[0..4].copy_from_slice(&len.to_le_bytes());
430-
view_buffer[4..4 + data.len()].copy_from_slice(data);
431-
u128::from_le_bytes(view_buffer)
432-
} else {
433-
let view = ByteView {
434-
length: len,
435-
prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
436-
buffer_index: block_id,
437-
offset,
438-
};
439-
view.into()
438+
let len = data.len();
439+
440+
// Generate specialized code for each potential small string length
441+
// to improve performance
442+
match len {
443+
0 => make_inlined_view::<0>(data),
444+
1 => make_inlined_view::<1>(data),
445+
2 => make_inlined_view::<2>(data),
446+
3 => make_inlined_view::<3>(data),
447+
4 => make_inlined_view::<4>(data),
448+
5 => make_inlined_view::<5>(data),
449+
6 => make_inlined_view::<6>(data),
450+
7 => make_inlined_view::<7>(data),
451+
8 => make_inlined_view::<8>(data),
452+
9 => make_inlined_view::<9>(data),
453+
10 => make_inlined_view::<10>(data),
454+
11 => make_inlined_view::<11>(data),
455+
12 => make_inlined_view::<12>(data),
456+
// When string is longer than 12 bytes, it can't be inlined, we create a ByteView instead.
457+
_ => {
458+
let view = ByteView {
459+
length: len as u32,
460+
prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
461+
buffer_index: block_id,
462+
offset,
463+
};
464+
view.as_u128()
465+
}
440466
}
441467
}
442468

0 commit comments

Comments
 (0)