apache · alamb · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025 · Dec 20, 2025
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
@@ -23,7 +23,7 @@
 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
 
 use arrow_array::*;
-use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
+use arrow_buffer::buffer::bitwise_quaternary_op_helper;
 use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
 use arrow_schema::ArrowError;
 
@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is false (because null AND false = false).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_null_buffer.buffer(),
                 left_null_buffer.offset(),
                 right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
         }
         (None, Some(right_null_buffer)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_null_buffer.buffer(),
                 right_null_buffer.offset(),
                 left_values.inner(),
@@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & !d)) & (c | (a & !b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_null_buffer.buffer(),
                     left_values.inner(),
@@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values & right_values, nulls))
 }
 
@@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is true (because null OR true = true).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_nulls.buffer(),
                 left_nulls.offset(),
                 right_values.inner(),
@@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
         }
         (None, Some(right_nulls)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_nulls.buffer(),
                 right_nulls.offset(),
                 left_values.inner(),
@@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & d)) & (c | (a & b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_nulls.buffer(),
                     left_values.inner(),
@@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & d)) & (c | (a & b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
 
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values | right_values, nulls))
 }
 

diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
@@ -19,7 +19,7 @@ use crate::bit_chunk_iterator::BitChunks;
 use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
 use crate::{
     BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, buffer_bin_or,
-    buffer_bin_xor, buffer_unary_not,
+    buffer_bin_xor, buffer_unary_not, util,
 };
 
 use std::ops::{BitAnd, BitOr, BitXor, Not};
@@ -127,9 +127,10 @@ impl BooleanBuffer {
     /// * The output always has zero offset
     ///
     /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
     /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
     ///
-    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input [`Buffer`]
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte slice
     /// ```
     /// # use arrow_buffer::BooleanBuffer;
     /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
@@ -179,9 +180,8 @@ impl BooleanBuffer {
             result.truncate(chunks.num_bytes());
         }
 
-        let buffer = Buffer::from(result);
         BooleanBuffer {
-            buffer,
+            buffer: Buffer::from(result),
             offset: 0,
             len: len_in_bits,
         }
@@ -212,6 +212,126 @@ impl BooleanBuffer {
         Some(BooleanBuffer::new(buffer, 0, len_in_bits))
     }
 
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
+    /// the relevant bits from two input buffers.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// See notes on [Self::from_bitwise_unary_op]
+    ///
+    /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
+    /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
+    /// ```
+    /// # use arrow_buffer::{Buffer, BooleanBuffer};
+    /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
+    /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
+    /// // AND of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///   &left, 0, &right, 0, 12, |a, b| a & b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+    /// ```
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let left = [0b11001100u8, 0b10111010u8];
+    /// let right = [0b10101010u8, 0b11011100u8];
+    /// // OR of bits 4..16 from left and bits 0..12 from right
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///  &left, 4, &right, 0, 12, |a, b| a | b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+    /// ```
+    pub fn from_bitwise_binary_op<F>(
+        left: impl AsRef<[u8]>,
+        left_offset_in_bits: usize,
+        right: impl AsRef<[u8]>,
+        right_offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64, u64) -> u64,
+    {
+        // try fast path for aligned input
+        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
+            if let Some(result) = Self::try_from_aligned_bitwise_binary_op(
+                &left.as_ref()[left_offset_in_bits / 8..], // aligned to byte boundary
+                &right.as_ref()[right_offset_in_bits / 8..],
+                len_in_bits,
+                &mut op,
+            ) {
+                return result;
+            }
+        }
+        let left_chunks = BitChunks::new(left.as_ref(), left_offset_in_bits, len_in_bits);
+        let right_chunks = BitChunks::new(right.as_ref(), right_offset_in_bits, len_in_bits);
+
+        let chunks = left_chunks
+            .iter()
+            .zip(right_chunks.iter())
+            .map(|(left, right)| op(left, right));
+        // Soundness: `BitChunks` is a `BitChunks` iterator which
+        // correctly reports its upper bound
+        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
+
+        let remainder_bytes = util::bit_util::ceil(left_chunks.remainder_len(), 8);
+        let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
+        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+        buffer.extend_from_slice(rem);
+
+        BooleanBuffer {
+            buffer: Buffer::from(buffer),
+            offset: 0,
+            len: len_in_bits,
+        }
+    }
+
+    /// Like [`Self::from_bitwise_binary_op`] but optimized for the case where the
+    /// inputs are aligned to byte boundaries
+    ///
+    /// Returns `None` if the inputs are not fully u64 aligned
+    fn try_from_aligned_bitwise_binary_op<F>(
+        left: &[u8],
+        right: &[u8],
+        len_in_bits: usize,
+        op: &mut F,
+    ) -> Option<Self>
+    where
+        F: FnMut(u64, u64) -> u64,
+    {
+        // Safety: all valid bytes are valid u64s
+        let (left_prefix, left_u64s, left_suffix) = unsafe { left.align_to::<u64>() };
+        let (right_prefix, right_u64s, right_suffix) = unsafe { right.align_to::<u64>() };
+        if !(left_prefix.is_empty()
+            && right_prefix.is_empty()
+            && left_suffix.is_empty()
+            && right_suffix.is_empty())
+        {
+            // Couldn't make this case any faster than the default path
+            // would be cool to handle non empty prefixes/suffixes too,
+            return None;
+        }
+        // the buffers are word (64 bit) aligned, so use optimized Vec code.
+        let result_u64s = left_u64s
+            .iter()
+            .zip(right_u64s.iter())
+            .map(|(l, r)| op(*l, *r))
+            .collect::<Vec<u64>>();
+        Some(BooleanBuffer::new(
+            Buffer::from(result_u64s),
+            0,
+            len_in_bits,
+        ))
+    }
+
     /// Returns the number of set bits in this buffer
     pub fn count_set_bits(&self) -> usize {
         self.buffer.count_set_bits_offset(self.offset, self.len)
@@ -591,4 +711,42 @@ mod tests {
             assert_eq!(result, expected);
         }
     }
+
+    #[test]
+    fn test_from_bitwise_binary_op() {
+        // pick random boolean inputs
+        let input_bools_left = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_bools_right = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
+        let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
+
+        for left_offset in 0..200 {
+            for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
+                for len_offset in [0, 1, 44, 100, 256, 300, 512] {
+                    let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
+                    // compute with AND
+                    let result = BooleanBuffer::from_bitwise_binary_op(
+                        input_buffer_left.values(),
+                        left_offset,
+                        input_buffer_right.values(),
+                        right_offset,
+                        len,
+                        |a, b| a & b,
+                    );
+                    // compute directly from bools
+                    let expected = input_bools_left[left_offset..]
+                        .iter()
+                        .zip(&input_bools_right[right_offset..])
+                        .take(len)
+                        .map(|(a, b)| *a & *b)
+                        .collect::<BooleanBuffer>();
+                    assert_eq!(result, expected);
+                }
+            }
+        }
+    }
 }