From 19900e1824463a6152a41a7ba9fbea9f52207b1c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 24 Jun 2025 19:45:42 -0400 Subject: [PATCH 1/2] Create read only copy if needed when opening a store path (#3156) * Create read only copy if needed when opening a store path * Add ValueError to Raises section * Update expected warning * Update src/zarr/storage/_common.py Co-authored-by: Davis Bennett * Use ANY_ACCESS_MODE * Update src/zarr/storage/_common.py Co-authored-by: David Stansby * Update src/zarr/storage/_common.py Co-authored-by: David Stansby * Update changes * Try using get_args on definition * Revert "Try using get_args on definition" This reverts commit 7ad760fba89e77a3ec5ad9649ada3a99799feeb6. * Add test * Remove warning * Apply suggestion for try; except shortening Co-authored-by: Tom Nicholas * Improve code coverage --------- Co-authored-by: Davis Bennett Co-authored-by: David Stansby Co-authored-by: Tom Nicholas (cherry picked from commit 5731c6c88ef0c5a3cb7f9701a7fbcee2814febf3) --- changes/3068.bugfix.rst | 1 - changes/3156.bugfix.rst | 1 + src/zarr/core/common.py | 2 + src/zarr/storage/_common.py | 80 ++++++++++++++++++++++++----------- tests/test_api.py | 2 +- tests/test_common.py | 25 ++++++++--- tests/test_store/test_core.py | 17 ++++++-- 7 files changed, 92 insertions(+), 36 deletions(-) delete mode 100644 changes/3068.bugfix.rst create mode 100644 changes/3156.bugfix.rst diff --git a/changes/3068.bugfix.rst b/changes/3068.bugfix.rst deleted file mode 100644 index 9ada322c13..0000000000 --- a/changes/3068.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Trying to open an array with ``mode='r'`` when the store is not read-only now raises an error. diff --git a/changes/3156.bugfix.rst b/changes/3156.bugfix.rst new file mode 100644 index 0000000000..64218b6707 --- /dev/null +++ b/changes/3156.bugfix.rst @@ -0,0 +1 @@ +Trying to open a StorePath/Array with ``mode='r'`` when the store is not read-only creates a read-only copy of the store. diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index be37dc5109..c19756b867 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -10,6 +10,7 @@ from typing import ( TYPE_CHECKING, Any, + Final, Literal, TypeVar, cast, @@ -40,6 +41,7 @@ JSON = str | int | float | Mapping[str, "JSON"] | Sequence["JSON"] | None MemoryOrder = Literal["C", "F"] AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"] +ANY_ACCESS_MODE: Final = "r", "r+", "a", "w", "w-" DimensionNames = Iterable[str | None] | None diff --git a/src/zarr/storage/_common.py b/src/zarr/storage/_common.py index f264728cf2..e25fa28424 100644 --- a/src/zarr/storage/_common.py +++ b/src/zarr/storage/_common.py @@ -7,7 +7,14 @@ from zarr.abc.store import ByteRequest, Store from zarr.core.buffer import Buffer, default_buffer_prototype -from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, AccessModeLiteral, ZarrFormat +from zarr.core.common import ( + ANY_ACCESS_MODE, + ZARR_JSON, + ZARRAY_JSON, + ZGROUP_JSON, + AccessModeLiteral, + ZarrFormat, +) from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError from zarr.storage._local import LocalStore from zarr.storage._memory import MemoryStore @@ -54,59 +61,82 @@ def __init__(self, store: Store, path: str = "") -> None: def read_only(self) -> bool: return self.store.read_only + @classmethod + async def _create_open_instance(cls, store: Store, path: str) -> Self: + """Helper to create and return a StorePath instance.""" + await store._ensure_open() + return cls(store, path) + @classmethod async def open(cls, store: Store, path: str, mode: AccessModeLiteral | None = None) -> Self: """ Open StorePath based on the provided mode. - * If the mode is 'w-' and the StorePath contains keys, raise a FileExistsError. - * If the mode is 'w', delete all keys nested within the StorePath - * If the mode is 'a', 'r', or 'r+', do nothing + * If the mode is None, return an opened version of the store with no changes. + * If the mode is 'r+', 'w-', 'w', or 'a' and the store is read-only, raise a ValueError. + * If the mode is 'r' and the store is not read-only, return a copy of the store with read_only set to True. + * If the mode is 'w-' and the store is not read-only and the StorePath contains keys, raise a FileExistsError. + * If the mode is 'w' and the store is not read-only, delete all keys nested within the StorePath. Parameters ---------- mode : AccessModeLiteral The mode to use when initializing the store path. + The accepted values are: + + - ``'r'``: read only (must exist) + - ``'r+'``: read/write (must exist) + - ``'a'``: read/write (create if doesn't exist) + - ``'w'``: read/write (overwrite if exists) + - ``'w-'``: read/write (create if doesn't exist). + Raises ------ FileExistsError If the mode is 'w-' and the store path already exists. ValueError If the mode is not "r" and the store is read-only, or - if the mode is "r" and the store is not read-only. """ - await store._ensure_open() - self = cls(store, path) - # fastpath if mode is None if mode is None: - return self + return await cls._create_open_instance(store, path) - if store.read_only and mode != "r": - raise ValueError(f"Store is read-only but mode is '{mode}'") - if not store.read_only and mode == "r": - raise ValueError(f"Store is not read-only but mode is '{mode}'") + if mode not in ANY_ACCESS_MODE: + raise ValueError(f"Invalid mode: {mode}, expected one of {ANY_ACCESS_MODE}") + if store.read_only: + # Don't allow write operations on a read-only store + if mode != "r": + raise ValueError( + f"Store is read-only but mode is {mode!r}. Create a writable store or use 'r' mode." + ) + self = await cls._create_open_instance(store, path) + elif mode == "r": + # Create read-only copy for read mode on writable store + try: + read_only_store = store.with_read_only(True) + except NotImplementedError as e: + raise ValueError( + "Store is not read-only but mode is 'r'. Unable to create a read-only copy of the store. " + "Please use a read-only store or a storage class that implements .with_read_only()." + ) from e + self = await cls._create_open_instance(read_only_store, path) + else: + # writable store and writable mode + self = await cls._create_open_instance(store, path) + + # Handle mode-specific operations match mode: case "w-": if not await self.is_empty(): - msg = ( - f"{self} is not empty, but `mode` is set to 'w-'." - "Either remove the existing objects in storage," - "or set `mode` to a value that handles pre-existing objects" - "in storage, like `a` or `w`." + raise FileExistsError( + f"Cannot create '{path}' with mode 'w-' because it already contains data. " + f"Use mode 'w' to overwrite or 'a' to append." ) - raise FileExistsError(msg) case "w": await self.delete_dir() - case "a" | "r" | "r+": - # No init action - pass - case _: - raise ValueError(f"Invalid mode: {mode}") - return self async def get( diff --git a/tests/test_api.py b/tests/test_api.py index 2a95d7b97c..e6cb612a82 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1318,7 +1318,7 @@ def test_no_overwrite_open(tmp_path: Path, open_func: Callable, mode: str) -> No existing_fpath = add_empty_file(tmp_path) assert existing_fpath.exists() - with contextlib.suppress(FileExistsError, FileNotFoundError, ValueError): + with contextlib.suppress(FileExistsError, FileNotFoundError, UserWarning): open_func(store=store, mode=mode) if mode == "w": assert not existing_fpath.exists() diff --git a/tests/test_common.py b/tests/test_common.py index c28723d1a8..0944c3375a 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,23 +1,36 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Iterable - from typing import Any, Literal +from typing import TYPE_CHECKING, get_args import numpy as np import pytest -from zarr.core.common import parse_name, parse_shapelike, product +from zarr.core.common import ( + ANY_ACCESS_MODE, + AccessModeLiteral, + parse_name, + parse_shapelike, + product, +) from zarr.core.config import parse_indexing_order +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Any, Literal + @pytest.mark.parametrize("data", [(0, 0, 0, 0), (1, 3, 4, 5, 6), (2, 4)]) def test_product(data: tuple[int, ...]) -> None: assert product(data) == np.prod(data) +def test_access_modes() -> None: + """ + Test that the access modes type and variable for run-time checking are equivalent. + """ + assert set(ANY_ACCESS_MODE) == set(get_args(AccessModeLiteral)) + + # todo: test def test_concurrent_map() -> None: ... diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index e9c9319ad3..a3850de90f 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -7,7 +7,7 @@ import zarr from zarr import Group from zarr.core.common import AccessModeLiteral, ZarrFormat -from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath +from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath, ZipStore from zarr.storage._common import contains_array, contains_group, make_store_path from zarr.storage._utils import ( _join_paths, @@ -263,8 +263,19 @@ def test_relativize_path_invalid() -> None: _relativize_path(path="a/b/c", prefix="b") -def test_invalid_open_mode() -> None: +def test_different_open_mode(tmp_path: LEGACY_PATH) -> None: + # Test with a store that implements .with_read_only() store = MemoryStore() zarr.create((100,), store=store, zarr_format=2, path="a") - with pytest.raises(ValueError, match="Store is not read-only but mode is 'r'"): + arr = zarr.open_array(store=store, path="a", zarr_format=2, mode="r") + assert arr.store.read_only + + # Test with a store that doesn't implement .with_read_only() + zarr_path = tmp_path / "foo.zarr" + store = ZipStore(zarr_path, mode="w") + zarr.create((100,), store=store, zarr_format=2, path="a") + with pytest.raises( + ValueError, + match="Store is not read-only but mode is 'r'. Unable to create a read-only copy of the store. Please use a read-only store or a storage class that implements .with_read_only().", + ): zarr.open_array(store=store, path="a", zarr_format=2, mode="r") From 4c48e709574fe97265c994520f5992d96661dcf1 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 30 Jun 2025 11:58:20 +0200 Subject: [PATCH 2/2] release notes --- changes/2774.feature.rst | 1 - changes/2921.bugfix.rst | 1 - changes/3021.feature.rst | 1 - changes/3066.feature.rst | 1 - changes/3081.feature.rst | 1 - changes/3082.feature.rst | 1 - changes/3100.bugfix.rst | 3 --- changes/3103.bugfix.rst | 7 ------- changes/3127.bugfix.rst | 2 -- changes/3128.bugfix.rst | 1 - changes/3130.feature.rst | 1 - changes/3138.feature.rst | 1 - changes/3140.bugfix.rst | 8 -------- changes/3156.bugfix.rst | 1 - docs/release-notes.rst | 43 ++++++++++++++++++++++++++++++++++++++++ 15 files changed, 43 insertions(+), 30 deletions(-) delete mode 100644 changes/2774.feature.rst delete mode 100644 changes/2921.bugfix.rst delete mode 100644 changes/3021.feature.rst delete mode 100644 changes/3066.feature.rst delete mode 100644 changes/3081.feature.rst delete mode 100644 changes/3082.feature.rst delete mode 100644 changes/3100.bugfix.rst delete mode 100644 changes/3103.bugfix.rst delete mode 100644 changes/3127.bugfix.rst delete mode 100644 changes/3128.bugfix.rst delete mode 100644 changes/3130.feature.rst delete mode 100644 changes/3138.feature.rst delete mode 100644 changes/3140.bugfix.rst delete mode 100644 changes/3156.bugfix.rst diff --git a/changes/2774.feature.rst b/changes/2774.feature.rst deleted file mode 100644 index 4df83f54ec..0000000000 --- a/changes/2774.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Add `zarr.storage.FsspecStore.from_mapper()` so that `zarr.open()` supports stores of type `fsspec.mapping.FSMap`. \ No newline at end of file diff --git a/changes/2921.bugfix.rst b/changes/2921.bugfix.rst deleted file mode 100644 index 65db48654f..0000000000 --- a/changes/2921.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Ignore stale child metadata when reconsolidating metadata. diff --git a/changes/3021.feature.rst b/changes/3021.feature.rst deleted file mode 100644 index 8805797ce3..0000000000 --- a/changes/3021.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Implemented ``move`` for ``LocalStore`` and ``ZipStore``. This allows users to move the store to a different root path. \ No newline at end of file diff --git a/changes/3066.feature.rst b/changes/3066.feature.rst deleted file mode 100644 index 89d5ddb1c6..0000000000 --- a/changes/3066.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Added `~zarr.errors.GroupNotFoundError`, which is raised when attempting to open a group that does not exist. diff --git a/changes/3081.feature.rst b/changes/3081.feature.rst deleted file mode 100644 index 8cf83ea7c2..0000000000 --- a/changes/3081.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Adds ``fill_value`` to the list of attributes displayed in the output of the ``AsyncArray.info()`` method. \ No newline at end of file diff --git a/changes/3082.feature.rst b/changes/3082.feature.rst deleted file mode 100644 index e990d1f3a0..0000000000 --- a/changes/3082.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Use :py:func:`numpy.zeros` instead of :py:func:`np.full` for a performance speedup when creating a `zarr.core.buffer.NDBuffer` with `fill_value=0`. \ No newline at end of file diff --git a/changes/3100.bugfix.rst b/changes/3100.bugfix.rst deleted file mode 100644 index 11f06628c0..0000000000 --- a/changes/3100.bugfix.rst +++ /dev/null @@ -1,3 +0,0 @@ -For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a -``Vlen-UT8`` codec in the array of filters. Fixed-length string arrays do not need this codec. This -change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. \ No newline at end of file diff --git a/changes/3103.bugfix.rst b/changes/3103.bugfix.rst deleted file mode 100644 index 93aecce908..0000000000 --- a/changes/3103.bugfix.rst +++ /dev/null @@ -1,7 +0,0 @@ -When creating arrays without explicitly specifying a chunk size using `zarr.create` and other -array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. -For large arrays this will result in smaller default chunk sizes. -To retain previous behaviour, explicitly set the chunk shape to the data shape. - -This fix matches the existing chunking behaviour of -`zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. diff --git a/changes/3127.bugfix.rst b/changes/3127.bugfix.rst deleted file mode 100644 index 35d7f5d329..0000000000 --- a/changes/3127.bugfix.rst +++ /dev/null @@ -1,2 +0,0 @@ -When `zarr.save` has an argument `path=some/path/` and multiple arrays in `args`, the path resulted in `some/path/some/path` due to using the `path` -argument twice while building the array path. This is now fixed. \ No newline at end of file diff --git a/changes/3128.bugfix.rst b/changes/3128.bugfix.rst deleted file mode 100644 index b93416070e..0000000000 --- a/changes/3128.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix `zarr.open` default for argument `mode` when `store` is `read_only` \ No newline at end of file diff --git a/changes/3130.feature.rst b/changes/3130.feature.rst deleted file mode 100644 index 7a64582f06..0000000000 --- a/changes/3130.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Port more stateful testing actions from `Icechunk `_. diff --git a/changes/3138.feature.rst b/changes/3138.feature.rst deleted file mode 100644 index ecd339bf9c..0000000000 --- a/changes/3138.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Adds a `with_read_only` convenience method to the `Store` abstract base class (raises `NotImplementedError`) and implementations to the `MemoryStore`, `ObjectStore`, `LocalStore`, and `FsspecStore` classes. \ No newline at end of file diff --git a/changes/3140.bugfix.rst b/changes/3140.bugfix.rst deleted file mode 100644 index 6ef83c90a5..0000000000 --- a/changes/3140.bugfix.rst +++ /dev/null @@ -1,8 +0,0 @@ -Suppress `FileNotFoundError` when deleting non-existent keys in the `obstore` adapter. - -When writing empty chunks (i.e. chunks where all values are equal to the array's fill value) to a zarr array, zarr -will delete those chunks from the underlying store. For zarr arrays backed by the `obstore` adapter, this will potentially -raise a `FileNotFoundError` if the chunk doesn't already exist. -Since whether or not a delete of a non-existing object raises an error depends on the behavior of the underlying store, -suppressing the error in all cases results in consistent behavior across stores, and is also what `zarr` seems to expect -from the store. diff --git a/changes/3156.bugfix.rst b/changes/3156.bugfix.rst deleted file mode 100644 index 64218b6707..0000000000 --- a/changes/3156.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Trying to open a StorePath/Array with ``mode='r'`` when the store is not read-only creates a read-only copy of the store. diff --git a/docs/release-notes.rst b/docs/release-notes.rst index a89046dd6d..51b73920f4 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -3,6 +3,49 @@ Release notes .. towncrier release notes start +3.0.9 (2025-06-30) +------------------ + +Features +~~~~~~~~ + +- Add `zarr.storage.FsspecStore.from_mapper()` so that `zarr.open()` supports stores of type `fsspec.mapping.FSMap`. (:issue:`2774`) +- Implemented ``move`` for ``LocalStore`` and ``ZipStore``. This allows users to move the store to a different root path. (:issue:`3021`) +- Added `~zarr.errors.GroupNotFoundError`, which is raised when attempting to open a group that does not exist. (:issue:`3066`) +- Adds ``fill_value`` to the list of attributes displayed in the output of the ``AsyncArray.info()`` method. (:issue:`3081`) +- Use :py:func:`numpy.zeros` instead of :py:func:`np.full` for a performance speedup when creating a `zarr.core.buffer.NDBuffer` with `fill_value=0`. (:issue:`3082`) +- Port more stateful testing actions from `Icechunk `_. (:issue:`3130`) +- Adds a `with_read_only` convenience method to the `Store` abstract base class (raises `NotImplementedError`) and implementations to the `MemoryStore`, `ObjectStore`, `LocalStore`, and `FsspecStore` classes. (:issue:`3138`) + + +Bugfixes +~~~~~~~~ + +- Ignore stale child metadata when reconsolidating metadata. (:issue:`2921`) +- For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a + ``Vlen-UT8`` codec in the array of filters. Fixed-length string arrays do not need this codec. This + change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. (:issue:`3100`) +- When creating arrays without explicitly specifying a chunk size using `zarr.create` and other + array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. + For large arrays this will result in smaller default chunk sizes. + To retain previous behaviour, explicitly set the chunk shape to the data shape. + + This fix matches the existing chunking behaviour of + `zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. (:issue:`3103`) +- When `zarr.save` has an argument `path=some/path/` and multiple arrays in `args`, the path resulted in `some/path/some/path` due to using the `path` + argument twice while building the array path. This is now fixed. (:issue:`3127`) +- Fix `zarr.open` default for argument `mode` when `store` is `read_only` (:issue:`3128`) +- Suppress `FileNotFoundError` when deleting non-existent keys in the `obstore` adapter. + + When writing empty chunks (i.e. chunks where all values are equal to the array's fill value) to a zarr array, zarr + will delete those chunks from the underlying store. For zarr arrays backed by the `obstore` adapter, this will potentially + raise a `FileNotFoundError` if the chunk doesn't already exist. + Since whether or not a delete of a non-existing object raises an error depends on the behavior of the underlying store, + suppressing the error in all cases results in consistent behavior across stores, and is also what `zarr` seems to expect + from the store. (:issue:`3140`) +- Trying to open a StorePath/Array with ``mode='r'`` when the store is not read-only creates a read-only copy of the store. (:issue:`3156`) + + 3.0.8 (2025-05-19) ------------------