Skip to content

Commit f76b30a

Browse files
committed
don't copy
Signed-off-by: Onur Satici <[email protected]>
1 parent d46b101 commit f76b30a

File tree

4 files changed

+17
-31
lines changed

4 files changed

+17
-31
lines changed

vortex-python/python/vortex/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
Array,
7676
PyArray,
7777
_unpickle_array, # pyright: ignore[reportPrivateUsage]
78-
_unpickle_array_p5, # pyright: ignore[reportPrivateUsage]
7978
array,
8079
)
8180
from .file import VortexFile, open
@@ -164,7 +163,6 @@
164163
"ArrayParts",
165164
# Pickle
166165
"_unpickle_array",
167-
"_unpickle_array_p5",
168166
# File
169167
"VortexFile",
170168
"open",

vortex-python/python/vortex/_lib/serde.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
from collections.abc import Sequence
45
from typing import final
56

67
import pyarrow as pa
@@ -30,5 +31,5 @@ class ArrayContext:
3031

3132
def decode_ipc_array(array_bytes: bytes, dtype_bytes: bytes) -> Array: ...
3233
def decode_ipc_array_buffers(
33-
array_buffers: list[bytes | memoryview], dtype_buffers: list[bytes | memoryview]
34+
array_buffers: Sequence[bytes | memoryview], dtype_buffers: Sequence[bytes | memoryview]
3435
) -> Array: ...

vortex-python/python/vortex/arrays.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44

55
import abc
6-
from collections.abc import Callable
6+
from collections.abc import Callable, Sequence
77
from typing import TYPE_CHECKING, Any
88

99
import pyarrow
@@ -14,7 +14,6 @@
1414
from vortex._lib.serde import ( # pyright: ignore[reportMissingModuleSource]
1515
ArrayContext,
1616
ArrayParts,
17-
decode_ipc_array,
1817
decode_ipc_array_buffers,
1918
)
2019

@@ -473,19 +472,13 @@ def decode(cls, parts: ArrayParts, ctx: ArrayContext, dtype: DType, len: int) ->
473472
"""
474473

475474

476-
def _unpickle_array(array_bytes: bytes, dtype_bytes: bytes) -> Array: # pyright: ignore[reportUnusedFunction]
477-
"""Unpickle a Vortex array from IPC-encoded bytes.
475+
def _unpickle_array(array_buffers: Sequence[bytes | memoryview], dtype_buffers: Sequence[bytes | memoryview]) -> Array: # pyright: ignore[reportUnusedFunction]
476+
"""Unpickle a Vortex array from IPC-encoded buffer lists.
478477
479-
This is an internal function used by the pickle module.
480-
"""
481-
return decode_ipc_array(array_bytes, dtype_bytes)
482-
483-
484-
def _unpickle_array_p5(array_buffers: list[bytes | memoryview], dtype_buffers: list[bytes | memoryview]) -> Array: # pyright: ignore[reportUnusedFunction]
485-
"""Unpickle a Vortex array from out-of-band PickleBuffers.
478+
This is an internal function used by the pickle module for both protocol 4 and 5.
486479
487-
This is an internal function used by the pickle module. When
488-
pickle protocol 5 is supported, this methods will be called on unpickle,
489-
saving one extra copy operation for array buffers.
480+
For protocol 4, receives list[bytes] from __reduce__.
481+
For protocol 5, receives list[PickleBuffer/memoryview] from __reduce_ex__.
482+
Both use decode_ipc_array_buffers which concatenates the buffers during deserialization.
490483
"""
491484
return decode_ipc_array_buffers(array_buffers, dtype_buffers)

vortex-python/src/arrays/mod.rs

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -670,29 +670,23 @@ impl PyArray {
670670
let mut encoder = MessageEncoder::default();
671671
let buffers = encoder.encode(EncoderMessage::Array(&*array));
672672

673-
// concat all buffers
674-
let mut serialized = Vec::new();
675-
for buf in buffers.iter() {
676-
serialized.extend_from_slice(buf);
677-
}
673+
// Return buffers as a list instead of concatenating
674+
let array_buffers: Vec<Vec<u8>> = buffers.iter().map(|b| b.to_vec()).collect();
678675

679676
let dtype_buffers = encoder.encode(EncoderMessage::DType(array.dtype()));
680-
let mut dtype_bytes = Vec::new();
681-
for buf in dtype_buffers.iter() {
682-
dtype_bytes.extend_from_slice(buf);
683-
}
677+
let dtype_buffers: Vec<Vec<u8>> = dtype_buffers.iter().map(|b| b.to_vec()).collect();
684678

685679
let vortex_module = PyModule::import(py, "vortex")?;
686680
let unpickle_fn = vortex_module.getattr("_unpickle_array")?;
687681

688-
let args = (serialized, dtype_bytes).into_pyobject(py)?;
682+
let args = (array_buffers, dtype_buffers).into_pyobject(py)?;
689683
Ok((unpickle_fn, args.into_any()))
690684
}
691685

692-
/// Support for Python's pickle protocol with protocol version awareness.
686+
/// Support for Python's pickle protocol for protocol >= 5
693687
///
694-
/// When protocol >= 5, this uses PickleBuffer for out-of-band buffer transfer,
695-
/// which avoids copying large data buffers.
688+
/// uses PickleBuffer for out-of-band buffer transfer,
689+
/// which potentially avoids copying large data buffers.
696690
fn __reduce_ex__<'py>(
697691
slf: &'py Bound<'py, Self>,
698692
protocol: i32,
@@ -729,7 +723,7 @@ impl PyArray {
729723
}
730724

731725
let vortex_module = PyModule::import(py, "vortex")?;
732-
let unpickle_fn = vortex_module.getattr("_unpickle_array_p5")?;
726+
let unpickle_fn = vortex_module.getattr("_unpickle_array")?;
733727

734728
let args = (pickle_buffers, dtype_pickle_buffers).into_pyobject(py)?;
735729
Ok((unpickle_fn, args.into_any()))

0 commit comments

Comments
 (0)