-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(api): add missing denormalized rules in refactor export_study_fla… #1676
Changes from all commits
961929d
a31feec
73ebae0
ed3f6bf
71cfa52
aa12b42
f65effa
998756f
91aaddc
8652528
2ba8eef
9c5699d
abd7490
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,26 @@ | ||
import functools | ||
import logging | ||
import os | ||
import shutil | ||
import tempfile | ||
import time | ||
import zipfile | ||
from abc import ABC | ||
from pathlib import Path | ||
from typing import IO, List, Optional, Union | ||
from typing import IO, List, Optional, Union, Sequence | ||
from uuid import uuid4 | ||
|
||
from antarest.core.config import Config | ||
from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError | ||
from antarest.core.interfaces.cache import CacheConstants, ICache | ||
from antarest.core.model import JSON | ||
from antarest.core.utils.utils import StopWatch, assert_this, extract_zip, unzip, zip_dir | ||
from antarest.core.utils.utils import StopWatch, extract_zip, unzip, zip_dir | ||
from antarest.study.common.studystorage import IStudyStorageService, T | ||
from antarest.study.common.utils import get_study_information | ||
from antarest.study.model import ( | ||
PatchOutputs, | ||
PatchStudy, | ||
RawStudy, | ||
StudyAdditionalData, | ||
StudyMetadataDTO, | ||
StudyMetadataPatchDTO, | ||
|
@@ -32,6 +37,61 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
# noinspection PyUnusedLocal | ||
def _ignore_patterns(path_study: Path, directory: str, contents: Sequence[str]) -> Sequence[str]: | ||
return ["output"] if Path(directory) == path_study else [] | ||
|
||
|
||
def export_study_flat( | ||
path_study: Path, | ||
dest: Path, | ||
outputs: bool = True, | ||
output_list_filter: Optional[List[str]] = None, | ||
output_src_path: Optional[Path] = None, | ||
) -> None: | ||
""" | ||
Export study to destination | ||
|
||
Args: | ||
path_study: Study source path | ||
dest: Destination path. | ||
outputs: List of outputs to keep. | ||
output_list_filter: List of outputs to keep (None indicate all outputs). | ||
output_src_path: list of source outputs path. | ||
""" | ||
start_time = time.time() | ||
output_src_path = output_src_path or path_study / "output" | ||
output_dest_path = dest / "output" | ||
|
||
ignore_patterns = functools.partial(_ignore_patterns, path_study) | ||
# noinspection PyTypeChecker | ||
shutil.copytree(src=path_study, dst=dest, ignore=ignore_patterns) | ||
if outputs and output_src_path.is_dir(): | ||
if output_dest_path.exists(): | ||
shutil.rmtree(output_dest_path) | ||
if output_list_filter is not None: | ||
os.mkdir(output_dest_path) | ||
for output in output_list_filter: | ||
zip_path = output_src_path / f"{output}.zip" | ||
if zip_path.exists(): | ||
with zipfile.ZipFile(zip_path) as zf: | ||
zf.extractall(output_dest_path / output) | ||
else: | ||
shutil.copytree( | ||
src=output_src_path / output, | ||
dst=output_dest_path / output, | ||
) | ||
else: | ||
shutil.copytree( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ma remarque concernant la duplication des sorties et aussi valable ici. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Je te propose l'implémentation suivante : import functools
import shutil
import typing as t
import zipfile
from pathlib import Path
# noinspection PyUnusedLocal
def _ignore_patterns(path_study: Path, directory: str, contents: t.Sequence[str]) -> t.Sequence[str]:
return ["output"] if Path(directory) == path_study else []
def export_study_flat(
path_study: Path,
dest: Path,
outputs: bool = True,
output_list_filter: t.Sequence[str] = (),
) -> None:
"""
Export study to destination
Args:
path_study: Study source path
dest: Destination path.
outputs: List of outputs to keep.
output_list_filter: List of outputs to keep (None indicate all outputs).
"""
output_src_path = path_study / "output"
output_dest_path = dest / "output"
ignore_patterns = functools.partial(_ignore_patterns, path_study)
# noinspection PyTypeChecker
shutil.copytree(src=path_study, dst=dest, ignore=ignore_patterns)
if outputs and output_src_path.exists():
if not output_list_filter:
# Retrieve all directories or ZIP files without duplicates
output_list_filter = list(
{
# fmt: off
f.with_suffix("").name
for f in output_src_path.iterdir()
if f.is_dir() or f.suffix.lower() == ".zip"
# fmt: on
}
)
# Copy each folder or uncompress each ZIP file to the destination dir.
shutil.rmtree(output_dest_path, ignore_errors=True)
output_dest_path.mkdir()
for output in output_list_filter:
zip_path = output_src_path / f"{output}.zip"
if zip_path.exists():
with zipfile.ZipFile(zip_path) as zf:
zf.extractall(output_dest_path / output)
else:
shutil.copytree(
src=output_src_path / output,
dst=output_dest_path / output,
) |
||
src=output_src_path, | ||
dst=output_dest_path, | ||
) | ||
|
||
stop_time = time.time() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bof ! on peut supprimer. |
||
duration = "{:.3f}".format(stop_time - start_time) | ||
logger.info(f"Study {path_study} exported (flat mode) in {duration}s") | ||
|
||
|
||
class AbstractStorageService(IStudyStorageService[T], ABC): | ||
def __init__( | ||
self, | ||
|
@@ -230,7 +290,23 @@ def export_study(self, metadata: T, target: Path, outputs: bool = True) -> Path: | |
with tempfile.TemporaryDirectory(dir=self.config.storage.tmp_dir) as tmpdir: | ||
logger.info(f"Exporting study {metadata.id} to temporary path {tmpdir}") | ||
tmp_study_path = Path(tmpdir) / "tmp_copy" | ||
self.export_study_flat(metadata, tmp_study_path, outputs) | ||
if isinstance(metadata, RawStudy): | ||
export_study_flat( | ||
path_study=path_study, | ||
dest=tmp_study_path, | ||
outputs=outputs, | ||
) | ||
else: | ||
snapshot_path = path_study / "snapshot" | ||
output_src_path = path_study / "output" | ||
export_study_flat( | ||
path_study=snapshot_path, | ||
dest=tmp_study_path, | ||
outputs=outputs, | ||
output_src_path=output_src_path, | ||
) | ||
study = self.study_factory.create_from_fs(tmp_study_path, "", use_cache=False) | ||
study.tree.denormalize() | ||
stopwatch = StopWatch() | ||
zip_dir(tmp_study_path, target) | ||
stopwatch.log_elapsed(lambda x: logger.info(f"Study {path_study} exported (zipped mode) in {x}s")) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,14 +16,13 @@ | |
from antarest.core.requests import RequestParameters | ||
from antarest.core.utils.utils import extract_zip | ||
from antarest.study.model import DEFAULT_WORKSPACE_NAME, Patch, RawStudy, Study, StudyAdditionalData | ||
from antarest.study.storage.abstract_storage_service import AbstractStorageService | ||
from antarest.study.storage.abstract_storage_service import AbstractStorageService, export_study_flat | ||
from antarest.study.storage.patch_service import PatchService | ||
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfig, FileStudyTreeConfigDTO | ||
from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy, StudyFactory | ||
from antarest.study.storage.rawstudy.model.filesystem.lazy_node import LazyNode | ||
from antarest.study.storage.utils import ( | ||
create_new_empty_study, | ||
export_study_flat, | ||
fix_study_root, | ||
get_default_workspace_path, | ||
is_managed, | ||
|
@@ -325,28 +324,21 @@ def import_study(self, metadata: RawStudy, stream: IO[bytes]) -> Study: | |
|
||
def export_study_flat( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Attention : la signature de cette fonction publique a changé. Cela entraîne une rupture de compatibilité. Il faut donc adapter le code appelant (je crois que c'est fait), mais aussi les tests unitaires. |
||
self, | ||
metadata: RawStudy, | ||
path_study: Path, | ||
dst_path: Path, | ||
outputs: bool = True, | ||
output_src_path: Optional[Path] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pour moi, ce paramètre est inutile car le répertoire de sortie est et restera toujours "output'. |
||
output_list_filter: Optional[List[str]] = None, | ||
denormalize: bool = True, | ||
) -> None: | ||
path_study = Path(metadata.path) | ||
|
||
if metadata.archived: | ||
self.unarchive(metadata) | ||
try: | ||
export_study_flat( | ||
path_study, | ||
dst_path, | ||
self.study_factory, | ||
outputs, | ||
output_list_filter, | ||
denormalize, | ||
) | ||
finally: | ||
if metadata.archived: | ||
shutil.rmtree(metadata.path) | ||
export_study_flat( | ||
path_study=path_study, | ||
dest=dst_path, | ||
outputs=outputs, | ||
output_list_filter=output_list_filter, | ||
output_src_path=output_src_path, | ||
) | ||
study = self.study_factory.create_from_fs(dst_path, "", use_cache=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. La "dénormalisation" doit rester optionnelle, il te faut donc conserver le paramètre |
||
study.tree.denormalize() | ||
|
||
def check_errors( | ||
self, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Le défaut de cet algorithme réside dans la possibilité de dupliquer des données. En effet, lorsqu'une sortie au format ZIP est décompressée, il n'y a aucune garantie que le fichier ZIP d'origine soit supprimé à la fin de la décompression. Autrement dit, si l'on décompresse le fichier "20230802-1628eco.zip" (par exemple, via une tâche planifiée), il est possible de se retrouver avec à la fois le répertoire "20230802-1628eco" et le fichier "20230802-1628eco.zip" d'origine.