Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions arrow-arith/src/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -809,15 +809,6 @@ where

/// Returns the minimum value in the array, according to the natural order.
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
///
/// # Example
/// ```rust
/// # use arrow_array::Int32Array;
/// # use arrow_arith::aggregate::min;
/// let array = Int32Array::from(vec![8, 2, 4]);
/// let result = min(&array);
/// assert_eq!(result, Some(2));
/// ```
pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
where
T::Native: PartialOrd,
Expand All @@ -827,15 +818,6 @@ where

/// Returns the maximum value in the array, according to the natural order.
/// For floating point arrays any NaN values are considered to be greater than any other non-null value
///
/// # Example
/// ```rust
/// # use arrow_array::Int32Array;
/// # use arrow_arith::aggregate::max;
/// let array = Int32Array::from(vec![4, 8, 2]);
/// let result = max(&array);
/// assert_eq!(result, Some(8));
/// ```
pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
where
T::Native: PartialOrd,
Expand Down
24 changes: 13 additions & 11 deletions arrow-arith/src/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.

use arrow_array::*;
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
use arrow_buffer::buffer::bitwise_quaternary_op_helper;
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
use arrow_schema::ArrowError;

Expand Down Expand Up @@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is false (because null AND false = false).
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
left_null_buffer.buffer(),
left_null_buffer.offset(),
right_values.inner(),
Expand All @@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
}
(None, Some(right_null_buffer)) => {
// Same as above
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
right_null_buffer.buffer(),
right_null_buffer.offset(),
left_values.inner(),
Expand All @@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
// d is right data bits.
// The final null bits are:
// (a | (c & !d)) & (c | (a & !b))
Some(bitwise_quaternary_op_helper(
let buffer = bitwise_quaternary_op_helper(
[
left_null_buffer.buffer(),
left_values.inner(),
Expand All @@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
],
left.len(),
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
))
);
Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};
let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values & right_values, nulls))
}

Expand Down Expand Up @@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is true (because null OR true = true).
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
left_nulls.buffer(),
left_nulls.offset(),
right_values.inner(),
Expand All @@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
}
(None, Some(right_nulls)) => {
// Same as above
Some(bitwise_bin_op_helper(
Some(BooleanBuffer::from_bitwise_binary_op(
right_nulls.buffer(),
right_nulls.offset(),
left_values.inner(),
Expand All @@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
// d is right data bits.
// The final null bits are:
// (a | (c & d)) & (c | (a & b))
Some(bitwise_quaternary_op_helper(
let buffer = bitwise_quaternary_op_helper(
[
left_nulls.buffer(),
left_values.inner(),
Expand All @@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
],
left.len(),
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
))
);
Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};

let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values | right_values, nulls))
}

Expand Down
145 changes: 142 additions & 3 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,10 @@ impl BooleanBuffer {
/// * The output always has zero offset
///
/// # See Also
/// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
/// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input [`Buffer`]
/// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte slice
/// ```
/// # use arrow_buffer::BooleanBuffer;
/// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
Expand Down Expand Up @@ -220,9 +221,8 @@ impl BooleanBuffer {
result.truncate(chunks.num_bytes());
}

let buffer = Buffer::from(result);
BooleanBuffer {
buffer,
buffer: Buffer::from(result),
bit_offset: 0,
bit_len: len_in_bits,
}
Expand Down Expand Up @@ -253,6 +253,107 @@ impl BooleanBuffer {
Some(BooleanBuffer::new(buffer, 0, len_in_bits))
}

/// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
/// the relevant bits from two input buffers.
///
/// This function is faster than applying the operation bit by bit as
/// it processes input buffers in chunks of 64 bits (8 bytes) at a time
///
/// # Notes:
/// See notes on [Self::from_bitwise_unary_op]
///
/// # See Also
/// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
/// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
/// ```
/// # use arrow_buffer::{Buffer, BooleanBuffer};
/// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
/// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
/// // AND of the first 12 bits
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 0, &right, 0, 12, |a, b| a & b
/// );
/// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
/// ```
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
/// ```
/// # use arrow_buffer::BooleanBuffer;
/// let left = [0b11001100u8, 0b10111010u8];
/// let right = [0b10101010u8, 0b11011100u8];
/// // OR of bits 4..16 from left and bits 0..12 from right
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 4, &right, 0, 12, |a, b| a | b
/// );
/// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
/// ```
pub fn from_bitwise_binary_op<F>(
left: impl AsRef<[u8]>,
left_offset_in_bits: usize,
right: impl AsRef<[u8]>,
right_offset_in_bits: usize,
len_in_bits: usize,
mut op: F,
) -> Self
where
F: FnMut(u64, u64) -> u64,
{
let left = left.as_ref();
let right = right.as_ref();
// try fast path for aligned input
// If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
// to improve performance.
if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
unsafe {
let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>();
let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>();
// if there is no prefix or suffix, both buffers are aligned and we can do the operation directly
// on u64s
// TODO also handle non empty suffixes by processing them separately
if left_prefix.is_empty()
&& right_prefix.is_empty()
&& left_suffix.is_empty()
&& right_suffix.is_empty()
{
let result_u64s = left_u64s
.iter()
.zip(right_u64s.iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<u64>>();
return BooleanBuffer {
buffer: Buffer::from(result_u64s),
bit_offset: 0,
bit_len: len_in_bits,
};
}
}
}
let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);

let chunks = left_chunks
.iter()
.zip(right_chunks.iter())
.map(|(left, right)| op(left, right));
// Soundness: `BitChunks` is a `BitChunks` iterator which
// correctly reports its upper bound
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };

let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
let rem = &rem.to_le_bytes()[0..remainder_bytes];
buffer.extend_from_slice(rem);

BooleanBuffer {
buffer: Buffer::from(buffer),
bit_offset: 0,
bit_len: len_in_bits,
}
}

/// Returns the number of set bits in this buffer
pub fn count_set_bits(&self) -> usize {
self.buffer
Expand Down Expand Up @@ -655,4 +756,42 @@ mod tests {
assert_eq!(result, expected);
}
}

#[test]
fn test_from_bitwise_binary_op() {
// pick random boolean inputs
let input_bools_left = (0..1024)
.map(|_| rand::random::<bool>())
.collect::<Vec<bool>>();
let input_bools_right = (0..1024)
.map(|_| rand::random::<bool>())
.collect::<Vec<bool>>();
let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);

for left_offset in 0..200 {
for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
for len_offset in [0, 1, 44, 100, 256, 300, 512] {
let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
// compute with AND
let result = BooleanBuffer::from_bitwise_binary_op(
input_buffer_left.values(),
left_offset,
input_buffer_right.values(),
right_offset,
len,
|a, b| a & b,
);
// compute directly from bools
let expected = input_bools_left[left_offset..]
.iter()
.zip(&input_bools_right[right_offset..])
.take(len)
.map(|(a, b)| *a & *b)
.collect::<BooleanBuffer>();
assert_eq!(result, expected);
}
}
}
}
}
Loading
Loading