Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -967,15 +967,16 @@ impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T>
}

impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
fn from(value: ArrayData) -> Self {
let views = value.buffers()[0].clone();
Copy link
Contributor Author

@alamb alamb Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cloneing the buffers is relatively cheap (they are Arcd internally) so avoiding this just makes the code easier to follow, I don't think it will be any significant performance savings

let views = ScalarBuffer::new(views, value.offset(), value.len());
let buffers = value.buffers()[1..].to_vec().into();
fn from(data: ArrayData) -> Self {
let (_data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
let views = buffers.remove(0); // need to maintain order of remaining buffers
let buffers = Arc::from(buffers);
let views = ScalarBuffer::new(views, offset, len);
Self {
data_type: T::DATA_TYPE,
views,
buffers,
nulls: value.nulls().cloned(),
nulls,
phantom: Default::default(),
}
}
Expand Down
19 changes: 10 additions & 9 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,25 +347,26 @@ impl StructArray {

impl From<ArrayData> for StructArray {
fn from(data: ArrayData) -> Self {
let parent_offset = data.offset();
let parent_len = data.len();
let (data_type, len, nulls, offset, _buffers, child_data) = data.into_parts();

let fields = data
.child_data()
.iter()
let parent_offset = offset;
let parent_len = len;

let fields = child_data
.into_iter()
.map(|cd| {
if parent_offset != 0 || parent_len != cd.len() {
make_array(cd.slice(parent_offset, parent_len))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can probably avoid an additional allocation for sliced arrays by making a version of slice() that consumes self -- like sliced() perhaps 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

} else {
make_array(cd.clone())
make_array(cd)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cd.clone() clones the ArrayData (and allocates a new Vec) for each child, recursively, which is unecessary

}
})
.collect();

Self {
len: data.len(),
data_type: data.data_type().clone(),
nulls: data.nulls().cloned(),
len,
data_type,
nulls,
fields,
}
}
Expand Down
30 changes: 30 additions & 0 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ impl ArrayData {
///
/// Note: This is a low level API and most users of the arrow crate should create
/// arrays using the builders found in [arrow_array](https://docs.rs/arrow-array)
/// or [`ArrayDataBuilder`].
///
/// See also [`Self::into_parts`] to recover the fields
pub fn try_new(
data_type: DataType,
len: usize,
Expand Down Expand Up @@ -351,6 +354,33 @@ impl ArrayData {
Ok(new_self)
}

/// Return the constituent parts of this ArrayData
///
/// This is the inverse of [`ArrayData::try_new`].
///
/// Returns `(data_type, len, nulls, offset, buffers, child_data)`
pub fn into_parts(
self,
) -> (
DataType,
usize,
Option<NullBuffer>,
usize,
Vec<Buffer>,
Vec<ArrayData>,
) {
let Self {
data_type,
len,
nulls,
offset,
buffers,
child_data,
} = self;

(data_type, len, nulls, offset, buffers, child_data)
}

/// Returns a builder to construct a [`ArrayData`] instance of the same [`DataType`]
#[inline]
pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
Expand Down
Loading