Skip to content

Commit 249b7e8

Browse files
authored
Merge pull request #73 from bcdev/forman-72-replace_option
Add configuration setting `force_new`
2 parents 2b56f1f + 9741c43 commit 249b7e8

File tree

11 files changed

+136
-17
lines changed

11 files changed

+136
-17
lines changed

CHANGES.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
## Version 0.5.2 (in development)
22

3-
3+
* Added configuration setting `force_new`, which forces creation of a new
4+
target dataset. An existing target dataset (and its lock) will be
5+
permanently deleted before appending of slice datasets begins. [#72]
46

57
## Version 0.5.1 (2024-02-23)
68

docs/cli.md

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ Options:
2020
the previous ones.
2121
-t, --target TARGET Target Zarr dataset path or URI. Overrides the
2222
'target_dir' configuration field.
23+
--force-new Force creation of a new target dataset. An existing
24+
target dataset (and its lock) will be permanently
25+
deleted before appending of slice datasets begins.
26+
WARNING: the deletion cannot be rolled back.
2327
--dry-run Run the tool without creating, changing, or deleting
2428
any files.
2529
--traceback Show Python traceback on error.

docs/config.md

+6
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ The URI or local path of the directory that will be used to temporarily store ro
240240
Type _object_.
241241
Options for the filesystem given by the protocol of `temp_dir`.
242242

243+
## `force_new`
244+
245+
Type _boolean_.
246+
Force creation of a new target dataset. An existing target dataset (and its lock) will be permanently deleted before appending of slice datasets begins. WARNING: the deletion cannot be rolled back.
247+
Defaults to `false`.
248+
243249
## `disable_rollback`
244250

245251
Type _boolean_.

tests/config/test_schema.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def test_get_config_schema(self):
2121
"disable_rollback",
2222
"dry_run",
2323
"excluded_variables",
24+
"force_new",
2425
"fixed_dims",
2526
"included_variables",
2627
"logging",

tests/test_api.py

+34
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from zappend.api import FileObj
1414
from zappend.api import SliceSource
1515
from zappend.api import zappend
16+
from zappend.fsutil.transaction import Transaction
1617
from .helpers import clear_memory_fs
1718
from .helpers import make_test_dataset
1819

@@ -108,6 +109,39 @@ def test_some_slices_local_output_to_non_existing_dir(self):
108109
for slice_dir in slices:
109110
shutil.rmtree(slice_dir, ignore_errors=True)
110111

112+
def test_some_slices_local_output_to_existing_dir_force_new(self):
113+
target_dir = "memory://target.zarr"
114+
slices = [
115+
"memory://slice-0.zarr",
116+
"memory://slice-1.zarr",
117+
"memory://slice-2.zarr",
118+
"memory://slice-3.zarr",
119+
]
120+
for uri in slices:
121+
make_test_dataset(uri=uri)
122+
123+
# Expect nothing else to happen, even though force_new=True.
124+
zappend(slices[:1], target_dir=target_dir, force_new=True)
125+
target_ds = xr.open_zarr(target_dir)
126+
self.assertEqual({"time": 3, "y": 50, "x": 100}, target_ds.sizes)
127+
128+
# Expect deletion of existing target_dir
129+
zappend(slices[1:], target_dir=target_dir, force_new=True)
130+
target_ds = xr.open_zarr(target_dir)
131+
self.assertEqual({"time": 9, "y": 50, "x": 100}, target_ds.sizes)
132+
133+
# Expect no changes, even if force_new=True, because dry_run=True
134+
zappend(slices, target_dir=target_dir, force_new=True, dry_run=True)
135+
target_ds = xr.open_zarr(target_dir)
136+
self.assertEqual({"time": 9, "y": 50, "x": 100}, target_ds.sizes)
137+
138+
# Expect the lock file to be deleted too
139+
lock_file = Transaction.get_lock_file(FileObj(target_dir))
140+
lock_file.write("")
141+
self.assertEqual(True, lock_file.exists())
142+
zappend(slices, target_dir=target_dir, force_new=True)
143+
self.assertEqual(False, lock_file.exists())
144+
111145
def test_some_slices_with_class_slice_source(self):
112146
target_dir = "memory://target.zarr"
113147
slices = [make_test_dataset(index=3 * i) for i in range(3)]

tests/test_context.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
import unittest
66

7-
import pytest
87
import numpy as np
8+
import pytest
99
import xarray as xr
1010

1111
from zappend.context import Context
@@ -35,6 +35,12 @@ def test_with_existing_target(self):
3535
self.assertEqual(target_dir, ctx.target_dir.uri)
3636
self.assertIsInstance(ctx.target_metadata, DatasetMetadata)
3737

38+
def test_force_new(self):
39+
ctx = Context({"target_dir": "memory://target.zarr"})
40+
self.assertEqual(False, ctx.force_new)
41+
ctx = Context({"target_dir": "memory://target.zarr", "force_new": True})
42+
self.assertEqual(True, ctx.force_new)
43+
3844
def test_append_dim(self):
3945
ctx = Context({"target_dir": "memory://target.zarr"})
4046
self.assertEqual("time", ctx.append_dim)

zappend/cli.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,35 @@
1313
"-c",
1414
metavar="CONFIG",
1515
multiple=True,
16-
help="Configuration JSON or YAML file."
17-
" If multiple are passed, subsequent configurations"
18-
" are incremental to the previous ones.",
16+
help=(
17+
"Configuration JSON or YAML file."
18+
" If multiple are passed, subsequent configurations"
19+
" are incremental to the previous ones."
20+
),
1921
)
2022
@click.option(
2123
"--target",
2224
"-t",
2325
metavar="TARGET",
24-
help="Target Zarr dataset path or URI."
25-
" Overrides the 'target_dir' configuration field.",
26+
help=(
27+
"Target Zarr dataset path or URI."
28+
" Overrides the 'target_dir' configuration field."
29+
),
30+
)
31+
@click.option(
32+
"--force-new",
33+
is_flag=True,
34+
help=(
35+
"Force creation of a new target dataset."
36+
" An existing target dataset (and its lock) will be"
37+
" permanently deleted before appending of slice datasets"
38+
" begins. WARNING: the deletion cannot be rolled back."
39+
),
2640
)
2741
@click.option(
2842
"--dry-run",
2943
is_flag=True,
30-
help="Run the tool without creating, changing," " or deleting any files.",
44+
help="Run the tool without creating, changing, or deleting any files.",
3145
)
3246
@click.option(
3347
"--traceback",
@@ -49,6 +63,7 @@ def zappend(
4963
slices: tuple[str, ...],
5064
config: tuple[str, ...],
5165
target: str | None,
66+
force_new: bool,
5267
dry_run: bool,
5368
traceback: bool,
5469
version: bool,
@@ -78,7 +93,13 @@ def zappend(
7893

7994
# noinspection PyBroadException
8095
try:
81-
zappend(slices, config=config, target_dir=target, dry_run=dry_run)
96+
zappend(
97+
slices,
98+
config=config,
99+
target_dir=target,
100+
force_new=force_new,
101+
dry_run=dry_run,
102+
)
82103
except BaseException as e:
83104
if traceback:
84105
import traceback as tb

zappend/config/schema.py

+10
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,16 @@
674674
"type": "object",
675675
"additionalProperties": True,
676676
},
677+
force_new={
678+
"description": (
679+
"Force creation of a new target dataset. "
680+
" An existing target dataset (and its lock) will be"
681+
" permanently deleted before appending of slice datasets"
682+
" begins. WARNING: the deletion cannot be rolled back."
683+
),
684+
"type": "boolean",
685+
"default": False,
686+
},
677687
disable_rollback={
678688
"description": (
679689
"Disable rolling back dataset changes on failure."

zappend/context.py

+5
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ def persist_mem_slices(self) -> bool:
189189
"""Whether to persist in-memory slice datasets."""
190190
return self._config.get("persist_mem_slices", False)
191191

192+
@property
193+
def force_new(self) -> bool:
194+
"""If set, an existing target dataset will be deleted."""
195+
return self._config.get("force_new", False)
196+
192197
@property
193198
def disable_rollback(self) -> bool:
194199
"""Whether to disable transaction rollbacks."""

zappend/fsutil/transaction.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,18 @@ def __init__(
8585
):
8686
transaction_id = f"zappend-{uuid.uuid4()}"
8787
rollback_dir = temp_dir / transaction_id
88-
lock_file = target_dir.parent / (target_dir.filename + LOCK_EXT)
8988
self._id = transaction_id
9089
self._rollback_dir = rollback_dir
9190
self._rollback_file = rollback_dir / ROLLBACK_FILE
9291
self._target_dir = target_dir
93-
self._lock_file = lock_file
92+
self._lock_file = self.get_lock_file(target_dir)
9493
self._disable_rollback = disable_rollback
9594
self._entered_ctx = False
9695

96+
@classmethod
97+
def get_lock_file(cls, file_obj: FileObj) -> FileObj:
98+
return file_obj.parent / (file_obj.filename + LOCK_EXT)
99+
97100
@property
98101
def target_dir(self) -> FileObj:
99102
"""Target directory that is subject to this transaction."""

zappend/processor.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,27 @@
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
44

5-
from typing import Iterable, Any
65
import collections.abc
6+
from typing import Iterable, Any
77

88
import numpy as np
99
import xarray as xr
1010
import zarr.attrs
1111
import zarr.convenience
1212

1313
from .config import ConfigLike
14-
from .config import exclude_from_config
15-
from .config import normalize_config
16-
from .config import validate_config
1714
from .config import eval_dyn_config_attrs
15+
from .config import exclude_from_config
1816
from .config import get_dyn_config_attrs_env
1917
from .config import has_dyn_config_attrs
18+
from .config import normalize_config
19+
from .config import validate_config
2020
from .context import Context
21-
from .fsutil.transaction import Transaction
21+
from .fsutil import FileObj
2222
from .fsutil.transaction import RollbackCallback
23-
from .log import logger
23+
from .fsutil.transaction import Transaction
2424
from .log import configure_logging
25+
from .log import logger
2526
from .profiler import Profiler
2627
from .rollbackstore import RollbackStore
2728
from .slice import SliceObj
@@ -53,6 +54,12 @@ def __init__(self, config: ConfigLike = None, **kwargs):
5354
configure_logging(config.get("logging"))
5455
self._profiler = Profiler(config.get("profiling"))
5556
self._config = config
57+
if config.get("force_new"):
58+
logger.warning(
59+
f"Setting 'force_new' is enabled. This will"
60+
f" permanently delete existing targets (no rollback)."
61+
)
62+
delete_target_permanently(config)
5663

5764
def process_slices(self, slices: Iterable[SliceObj]):
5865
"""Process the given `slices`.
@@ -119,6 +126,26 @@ def process_slice(self, slice_obj: SliceObj, slice_index: int = 0):
119126
update_target_from_slice(ctx, slice_dataset, rollback_callback)
120127

121128

129+
def delete_target_permanently(config: dict[str, Any]):
130+
# TODO: I'm not happy with the config being a dict here, because it
131+
# implies and hence duplicates definition of default values.
132+
# Make Processor constructor turn config dict into config object,
133+
# Pass config object to Context and publish via ctx.config property.
134+
dry_run = config.get("dry_run", False)
135+
target_uri = config.get("target_dir")
136+
target_storage_options = config.get("target_storage_options")
137+
target_dir = FileObj(target_uri, storage_options=target_storage_options)
138+
if target_dir.exists():
139+
logger.warning(f"Permanently deleting {target_dir}")
140+
if not dry_run:
141+
target_dir.delete(recursive=True)
142+
target_lock = Transaction.get_lock_file(target_dir)
143+
if target_lock.exists():
144+
logger.warning(f"Permanently deleting {target_lock}")
145+
if not dry_run:
146+
target_lock.delete()
147+
148+
122149
def create_target_from_slice(
123150
ctx: Context, slice_ds: xr.Dataset, rollback_cb: RollbackCallback
124151
):

0 commit comments

Comments
 (0)