-
Notifications
You must be signed in to change notification settings - Fork 939
Implement specialized min/max for GenericBinaryView
(StringView
and BinaryView
)
#6089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,9 @@ use arrow_buffer::{ArrowNativeType, NullBuffer}; | |
use arrow_data::bit_iterator::try_for_each_valid_idx; | ||
use arrow_schema::*; | ||
use std::borrow::BorrowMut; | ||
use std::cmp::{self, Ordering}; | ||
use std::ops::{BitAnd, BitOr, BitXor}; | ||
use types::ByteViewType; | ||
|
||
/// An accumulator for primitive numeric values. | ||
trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default { | ||
|
@@ -425,14 +427,55 @@ where | |
} | ||
} | ||
|
||
/// Helper to compute min/max of [`GenericByteViewArray<T>`]. | ||
/// The specialized min/max leverages the inlined values to compare the byte views. | ||
/// `swap_cond` is the condition to swap current min/max with the new value. | ||
/// For example, `Ordering::Greater` for max and `Ordering::Less` for min. | ||
fn min_max_view_helper<T: ByteViewType>( | ||
array: &GenericByteViewArray<T>, | ||
swap_cond: cmp::Ordering, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only other thing I can think of doing here is potentially making this a const I actually tried doing so XiangpengHao#2 and it does not seem to make any performance difference |
||
) -> Option<&T::Native> { | ||
let null_count = array.null_count(); | ||
if null_count == array.len() { | ||
None | ||
} else if null_count == 0 { | ||
let target_idx = (0..array.len()).reduce(|acc, item| { | ||
// SAFETY: array's length is correct so item is within bounds | ||
let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) }; | ||
if cmp == swap_cond { | ||
item | ||
} else { | ||
acc | ||
} | ||
}); | ||
// SAFETY: idx came from valid range `0..array.len()` | ||
unsafe { target_idx.map(|idx| array.value_unchecked(idx)) } | ||
} else { | ||
let nulls = array.nulls().unwrap(); | ||
XiangpengHao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| { | ||
let cmp = | ||
unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) }; | ||
if cmp == swap_cond { | ||
idx | ||
} else { | ||
acc_idx | ||
} | ||
}); | ||
|
||
// SAFETY: idx came from valid range `0..array.len()` | ||
unsafe { target_idx.map(|idx| array.value_unchecked(idx)) } | ||
} | ||
} | ||
|
||
/// Returns the maximum value in the binary array, according to the natural order. | ||
pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> { | ||
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) | ||
} | ||
|
||
/// Returns the maximum value in the binary view array, according to the natural order. | ||
pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { | ||
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) | ||
min_max_view_helper(array, Ordering::Greater) | ||
} | ||
|
||
/// Returns the minimum value in the binary array, according to the natural order. | ||
|
@@ -442,7 +485,7 @@ pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<& | |
|
||
/// Returns the minimum value in the binary view array, according to the natural order. | ||
pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { | ||
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b) | ||
min_max_view_helper(array, Ordering::Less) | ||
} | ||
|
||
/// Returns the maximum value in the string array, according to the natural order. | ||
|
@@ -452,7 +495,7 @@ pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<& | |
|
||
/// Returns the maximum value in the string view array, according to the natural order. | ||
pub fn max_string_view(array: &StringViewArray) -> Option<&str> { | ||
min_max_helper::<&str, _, _>(array, |a, b| *a < *b) | ||
min_max_view_helper(array, Ordering::Greater) | ||
} | ||
|
||
/// Returns the minimum value in the string array, according to the natural order. | ||
|
@@ -462,7 +505,7 @@ pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<& | |
|
||
/// Returns the minimum value in the string view array, according to the natural order. | ||
pub fn min_string_view(array: &StringViewArray) -> Option<&str> { | ||
min_max_helper::<&str, _, _>(array, |a, b| *a > *b) | ||
min_max_view_helper(array, Ordering::Less) | ||
} | ||
|
||
/// Returns the sum of values in the array. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -579,13 +579,13 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> { | |
return false; | ||
} | ||
|
||
unsafe { compare_byte_view_unchecked(l.0, l.1, r.0, r.1).is_eq() } | ||
unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_eq() } | ||
} | ||
|
||
fn is_lt(l: Self::Item, r: Self::Item) -> bool { | ||
// # Safety | ||
// The index is within bounds as it is checked in value() | ||
unsafe { compare_byte_view_unchecked(l.0, l.1, r.0, r.1).is_lt() } | ||
unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() } | ||
} | ||
|
||
fn len(&self) -> usize { | ||
|
@@ -626,7 +626,7 @@ pub fn compare_byte_view<T: ByteViewType>( | |
) -> std::cmp::Ordering { | ||
assert!(left_idx < left.len()); | ||
assert!(right_idx < right.len()); | ||
unsafe { compare_byte_view_unchecked(left, left_idx, right, right_idx) } | ||
unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) } | ||
} | ||
|
||
/// Comparing two [`GenericByteViewArray`] at index `left_idx` and `right_idx` | ||
|
@@ -656,6 +656,7 @@ pub fn compare_byte_view<T: ByteViewType>( | |
/// | ||
/// # Safety | ||
/// The left/right_idx must within range of each array | ||
#[deprecated(note = "Use `GenericByteViewArray::compare_unchecked` instead")] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is fine. Thank you for the diligence. |
||
pub unsafe fn compare_byte_view_unchecked<T: ByteViewType>( | ||
left: &GenericByteViewArray<T>, | ||
left_idx: usize, | ||
|
Uh oh!
There was an error while loading. Please reload this page.