Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{get_offsets, print_long_array};
use crate::array::{get_offsets_from_buffer, print_long_array};
use crate::builder::GenericByteBuilder;
use crate::iterator::ArrayIter;
use crate::types::ByteArrayType;
Expand Down Expand Up @@ -542,30 +542,34 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {

impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
fn from(data: ArrayData) -> Self {
let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
assert_eq!(
data.data_type(),
&Self::DATA_TYPE,
data_type,
Self::DATA_TYPE,
"{}{}Array expects DataType::{}",
T::Offset::PREFIX,
T::PREFIX,
Self::DATA_TYPE
);
assert_eq!(
data.buffers().len(),
buffers.len(),
2,
"{}{}Array data should contain 2 buffers only (offsets and values)",
T::Offset::PREFIX,
T::PREFIX,
);
// buffers are offset then value, so pop in reverse
let value_data = buffers.pop().expect("checked above");
let offset_buffer = buffers.pop().expect("checked above");

// SAFETY:
// ArrayData is valid, and verified type above
let value_offsets = unsafe { get_offsets(&data) };
let value_data = data.buffers()[1].clone();
let value_offsets = unsafe { get_offsets_from_buffer(offset_buffer, offset, len) };
Self {
value_offsets,
value_data,
data_type: T::DATA_TYPE,
nulls: data.nulls().cloned(),
data_type,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also saves calling Drop on DataType and a clone of Nulls -- probably not measurable but faster 🤷

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sometimes a lot of Drop can be surprisingly slow

nulls,
}
}
}
Expand Down
23 changes: 22 additions & 1 deletion arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
mod binary_array;

use crate::types::*;
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_data::ArrayData;
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
use std::any::Any;
Expand Down Expand Up @@ -939,6 +939,27 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
}
}

/// Helper function that creates an [`OffsetBuffer`] from a buffer and array offset/ length
///
/// # Safety
///
/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the
/// given length and offset.
unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a version of get_offsets immediately above here that takes the Buffers rather than ArrayData. Once I have ported all the arrays, I will remove get_offsets

buffer: Buffer,
offset: usize,
len: usize,
) -> OffsetBuffer<O> {
if len == 0 && buffer.is_empty() {
return OffsetBuffer::new_empty();
}

let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
// Safety:
// Arguments were valid
unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
}

/// Helper function for printing potentially long arrays.
fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
where
Expand Down
Loading