-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Speed up binary kernels (30% faster and and or), add BooleanBuffer::from_bitwise_binary_op
#9090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+169
−20
Merged
Changes from 3 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
4f93fae
Add more tests for nullif
alamb 9a8879d
Revert changes to `bitwise_unary_op_helper`, `nullif`
alamb 577dfa8
Speed up binary kernels, add `BooleanBuffer::from_bitwise_binary_op`
alamb 7fe57a7
Merge branch 'main' into alamb/boolean_kernel
alamb a22ad8d
Fix comment
alamb a110e2c
Merge remote-tracking branch 'apache/main' into alamb/boolean_kernel
alamb File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -165,12 +165,14 @@ impl BooleanBuffer { | |
| /// * `op` must only apply bitwise operations | ||
| /// on the relevant bits; the input `u64` may contain irrelevant bits | ||
| /// and may be processed differently on different endian architectures. | ||
| /// * `op` may be called with input bits outside the requested range | ||
| /// * The output always has zero offset | ||
| /// | ||
| /// # See Also | ||
| /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation | ||
| /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations | ||
| /// | ||
| /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input [`Buffer`] | ||
| /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte slice | ||
| /// ``` | ||
| /// # use arrow_buffer::BooleanBuffer; | ||
| /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits | ||
|
|
@@ -220,9 +222,8 @@ impl BooleanBuffer { | |
| result.truncate(chunks.num_bytes()); | ||
| } | ||
|
|
||
| let buffer = Buffer::from(result); | ||
| BooleanBuffer { | ||
| buffer, | ||
| buffer: Buffer::from(result), | ||
| bit_offset: 0, | ||
| bit_len: len_in_bits, | ||
| } | ||
|
|
@@ -253,6 +254,112 @@ impl BooleanBuffer { | |
| Some(BooleanBuffer::new(buffer, 0, len_in_bits)) | ||
| } | ||
|
|
||
| /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to | ||
| /// the relevant bits from two input buffers. | ||
| /// | ||
| /// This function is faster than applying the operation bit by bit as | ||
| /// it processes input buffers in chunks of 64 bits (8 bytes) at a time | ||
| /// | ||
| /// # Notes: | ||
| /// See notes on [Self::from_bitwise_unary_op] | ||
| /// | ||
| /// # See Also | ||
| /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer. | ||
| /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations | ||
| /// | ||
| /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s | ||
| /// ``` | ||
| /// # use arrow_buffer::{Buffer, BooleanBuffer}; | ||
| /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits | ||
| /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits | ||
| /// // AND of the first 12 bits | ||
| /// let result = BooleanBuffer::from_bitwise_binary_op( | ||
| /// &left, 0, &right, 0, 12, |a, b| a & b | ||
| /// ); | ||
| /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]); | ||
| /// ``` | ||
| /// | ||
| /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices | ||
| /// ``` | ||
| /// # use arrow_buffer::BooleanBuffer; | ||
| /// let left = [0b11001100u8, 0b10111010u8]; | ||
| /// let right = [0b10101010u8, 0b11011100u8]; | ||
| /// // OR of bits 4..16 from left and bits 0..12 from right | ||
| /// let result = BooleanBuffer::from_bitwise_binary_op( | ||
| /// &left, 4, &right, 0, 12, |a, b| a | b | ||
| /// ); | ||
| /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]); | ||
| /// ``` | ||
| pub fn from_bitwise_binary_op<F>( | ||
| left: impl AsRef<[u8]>, | ||
| left_offset_in_bits: usize, | ||
| right: impl AsRef<[u8]>, | ||
| right_offset_in_bits: usize, | ||
| len_in_bits: usize, | ||
| mut op: F, | ||
| ) -> Self | ||
| where | ||
| F: FnMut(u64, u64) -> u64, | ||
| { | ||
| let left = left.as_ref(); | ||
| let right = right.as_ref(); | ||
| // try fast path for aligned input | ||
| // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices | ||
| // to improve performance. | ||
| if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 { | ||
| // align to byte boundary | ||
| let left = &left[left_offset_in_bits / 8..]; | ||
| let right = &right[right_offset_in_bits / 8..]; | ||
|
|
||
| unsafe { | ||
| let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>(); | ||
| let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>(); | ||
| // if there is no prefix or suffix, both buffers are aligned and | ||
| // we can do the operation directly on u64s. | ||
| // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88. | ||
| // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361 | ||
| if left_prefix.is_empty() | ||
| && right_prefix.is_empty() | ||
| && left_suffix.is_empty() | ||
| && right_suffix.is_empty() | ||
| { | ||
| let result_u64s = left_u64s | ||
| .iter() | ||
| .zip(right_u64s.iter()) | ||
| .map(|(l, r)| op(*l, *r)) | ||
| .collect::<Vec<u64>>(); | ||
| return BooleanBuffer { | ||
| buffer: Buffer::from(result_u64s), | ||
| bit_offset: 0, | ||
| bit_len: len_in_bits, | ||
| }; | ||
| } | ||
| } | ||
| } | ||
| let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits); | ||
| let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits); | ||
|
|
||
| let chunks = left_chunks | ||
| .iter() | ||
| .zip(right_chunks.iter()) | ||
| .map(|(left, right)| op(left, right)); | ||
| // Soundness: `BitChunks` is a `BitChunks` iterator which | ||
|
||
| // correctly reports its upper bound | ||
| let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) }; | ||
|
|
||
| let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8); | ||
| let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); | ||
| // we are counting its starting from the least significant bit, to to_le_bytes should be correct | ||
| let rem = &rem.to_le_bytes()[0..remainder_bytes]; | ||
| buffer.extend_from_slice(rem); | ||
|
|
||
| BooleanBuffer { | ||
| buffer: Buffer::from(buffer), | ||
| bit_offset: 0, | ||
| bit_len: len_in_bits, | ||
| } | ||
| } | ||
|
|
||
| /// Returns the number of set bits in this buffer | ||
| pub fn count_set_bits(&self) -> usize { | ||
| self.buffer | ||
|
|
@@ -655,4 +762,42 @@ mod tests { | |
| assert_eq!(result, expected); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_from_bitwise_binary_op() { | ||
| // pick random boolean inputs | ||
| let input_bools_left = (0..1024) | ||
| .map(|_| rand::random::<bool>()) | ||
| .collect::<Vec<bool>>(); | ||
| let input_bools_right = (0..1024) | ||
| .map(|_| rand::random::<bool>()) | ||
| .collect::<Vec<bool>>(); | ||
| let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]); | ||
| let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]); | ||
|
|
||
| for left_offset in 0..200 { | ||
| for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] { | ||
| for len_offset in [0, 1, 44, 100, 256, 300, 512] { | ||
| let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds | ||
| // compute with AND | ||
| let result = BooleanBuffer::from_bitwise_binary_op( | ||
| input_buffer_left.values(), | ||
| left_offset, | ||
| input_buffer_right.values(), | ||
| right_offset, | ||
| len, | ||
| |a, b| a & b, | ||
| ); | ||
| // compute directly from bools | ||
| let expected = input_bools_left[left_offset..] | ||
| .iter() | ||
| .zip(&input_bools_right[right_offset..]) | ||
| .take(len) | ||
| .map(|(a, b)| *a & *b) | ||
| .collect::<BooleanBuffer>(); | ||
| assert_eq!(result, expected); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is the new API