Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ Unlike [binsparse-python](https://github.com/ivirshup/binsparse-python), the dif

This does make reading specific parts (e.g. the coordinates) in a single request a bit harder, but having a single logical array map to a on-disk zarr array does have its advantages.

Useful links:

- zarr-python PR: https://github.com/zarr-developers/zarr-python/pull/3529
- sparse indexing adapter: https://github.com/keewis/sparse-indexing-adapter

## Installation

`zarr-sparse` currently requires a special version of zarr. To install it, use:

```sh
pip install \
"zarr @ git+https://github.com/keewis/zarr-python.git@zarr-sparse-patch" \
"zarr @ git+https://github.com/keewis/zarr-python.git@array-registry" \
"zarr-sparse @ git+https://github.com/keewis/zarr-sparse.git@main"
```

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ test = [
[dependency-groups]
dev = [
"black>=25.1.0",
"dask>=2025.10.0",
"hatch>=1.14.1",
"hypothesis>=6.138.0",
"ipdb>=0.13.13",
"ipython>=9.2.0",
"pyinstrument>=5.1.1",
"pytest>=8.3.5",
"pytest-xdist>=3.6.1",
"xarray>=2025.10.1",
]

[tool.hatch]
Expand Down
16 changes: 14 additions & 2 deletions zarr_sparse/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from zarr.registry import register_ndbuffer

from zarr_sparse.chunk_grid import ChunkGrid
from zarr_sparse.combine import combine_nd
from zarr_sparse.combine import combine_nd, first_value
from zarr_sparse.slices import slice_size
from zarr_sparse.utils import as_decorator

Expand All @@ -21,6 +21,16 @@

def sparse_equal(a, b, equal_nan: bool) -> bool:
equal_nan = equal_nan if a.dtype.kind not in ("U", "S", "T", "O", "V") else False
if isinstance(a, ChunkGrid):
if len(a.data) == 1:
a = next(iter(a.data.values()))
else:
raise RuntimeError("comparing multi-chunk grid")
if isinstance(b, ChunkGrid):
if len(b.data) == 1:
b = next(iter(b.data.values()))
else:
raise RuntimeError("comparing multi-chunk grid")

if b.ndim == 0:
if not np.array_equal(
Expand Down Expand Up @@ -104,7 +114,9 @@ def __getitem__(self, key: Any) -> Self:

def __setitem__(self, key: Any, value: Any) -> None:
if isinstance(value, NDBuffer):
value = value._data
if len(value._data.data) != 1:
raise RuntimeError("setting a non-one-sized buffer is not allowed")
value = first_value(value._data.data)

slice_sizes = tuple(
slice_size(slice_, size) for slice_, size in zip(key, self._data.shape)
Expand Down
6 changes: 5 additions & 1 deletion zarr_sparse/codec/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
from zarr.codecs import BytesCodec, ZstdCodec
from zarr.core.array_spec import ArrayConfig, ArraySpec
from zarr.core.buffer import Buffer, NDBuffer
from zarr.core.buffer.cpu import Buffer as CPUBuffer
from zarr.core.common import JSON, parse_named_configuration
from zarr.core.dtype.npy.int import Int64
from zarr.registry import get_pipeline_class, register_codec

from zarr_sparse.buffer import sparse_buffer_prototype
from zarr_sparse.buffer import SparseNDBuffer, sparse_buffer_prototype
from zarr_sparse.codec import metadata
from zarr_sparse.combine import first_value
from zarr_sparse.comparison import compare_fill_value
Expand Down Expand Up @@ -104,6 +105,9 @@ async def decode_metadata_table(table_data: Buffer) -> dict[str, Any]:


class SparseArrayCodec(ArrayBytesCodec):
codec_input = SparseNDBuffer
codec_output = CPUBuffer

def __init__(self):
self.array_codecs = (BytesCodec(), ZstdCodec())
self.table_codecs = (BytesCodec(),)
Expand Down