Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(api): add missing denormalized rules in refactor export_study_fla… #1676

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions antarest/study/common/studystorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,27 +230,6 @@ def export_output(self, metadata: T, output_id: str, target: Path) -> None:
"""
raise NotImplementedError()

@abstractmethod
def export_study_flat(
self,
metadata: T,
dst_path: Path,
outputs: bool = True,
output_list_filter: Optional[List[str]] = None,
denormalize: bool = True,
) -> None:
"""
Export study to destination

Args:
metadata: study.
dst_path: destination path.
outputs: list of outputs to keep.
output_list_filter: list of outputs to keep (None indicate all outputs).
denormalize: denormalize the study (replace matrix links by real matrices).
"""
raise NotImplementedError()

@abstractmethod
def get_synthesis(self, metadata: T, params: Optional[RequestParameters] = None) -> FileStudyTreeConfigDTO:
"""
Expand Down
47 changes: 42 additions & 5 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import logging
import os
import shutil
from datetime import datetime, timedelta
from http import HTTPStatus
from pathlib import Path
Expand Down Expand Up @@ -87,6 +88,7 @@
StudySimResultDTO,
)
from antarest.study.repository import StudyMetadataRepository
from antarest.study.storage.abstract_storage_service import export_study_flat
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfigDTO
from antarest.study.storage.rawstudy.model.filesystem.folder_node import ChildNotFoundError
from antarest.study.storage.rawstudy.model.filesystem.ini_file_node import IniFileNode
Expand Down Expand Up @@ -918,7 +920,18 @@ def export_study(
def export_task(notifier: TaskUpdateNotifier) -> TaskResult:
try:
target_study = self.get_study(uuid)
self.storage_service.get_storage(target_study).export_study(target_study, export_path, outputs)
storage = self.storage_service.get_storage(target_study)
if isinstance(target_study, RawStudy):
storage = cast(RawStudyService, storage)
if target_study.archived:
storage.unarchive(target_study)
try:
storage.export_study(target_study, export_path, outputs)
finally:
if target_study.archived:
shutil.rmtree(target_study.path)
else:
storage.export_study(target_study, export_path, outputs)
self.file_transfer_manager.set_ready(export_id)
return TaskResult(success=True, message=f"Study {uuid} successfully exported")
except Exception as e:
Expand Down Expand Up @@ -1020,10 +1033,34 @@ def export_study_flat(
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)
self._assert_study_unarchived(study)

return self.storage_service.get_storage(study).export_study_flat(
study, dest, len(output_list or []) > 0, output_list
)
path_study = Path(study.path)
storage = self.storage_service.get_storage(study)
if isinstance(study, RawStudy):
storage = cast(RawStudyService, storage)
if study.archived:
storage.unarchive(study)
try:
return storage.export_study_flat(
path_study=path_study,
dst_path=dest,
outputs=len(output_list or []) > 0,
output_list_filter=output_list,
output_src_path=None,
)
finally:
if study.archived:
shutil.rmtree(study.path)
else:
snapshot_path = path_study / "snapshot"
output_src_path = path_study / "output"
storage = cast(VariantStudyService, storage)
return storage.export_study_flat(
path_study=snapshot_path,
dst_path=dest,
outputs=len(output_list or []) > 0,
output_list_filter=output_list,
output_src_path=output_src_path,
)

def delete_study(self, uuid: str, children: bool, params: RequestParameters) -> None:
"""
Expand Down
82 changes: 79 additions & 3 deletions antarest/study/storage/abstract_storage_service.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
import functools
import logging
import os
import shutil
import tempfile
import time
import zipfile
from abc import ABC
from pathlib import Path
from typing import IO, List, Optional, Union
from typing import IO, List, Optional, Union, Sequence
from uuid import uuid4

from antarest.core.config import Config
from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError
from antarest.core.interfaces.cache import CacheConstants, ICache
from antarest.core.model import JSON
from antarest.core.utils.utils import StopWatch, assert_this, extract_zip, unzip, zip_dir
from antarest.core.utils.utils import StopWatch, extract_zip, unzip, zip_dir
from antarest.study.common.studystorage import IStudyStorageService, T
from antarest.study.common.utils import get_study_information
from antarest.study.model import (
PatchOutputs,
PatchStudy,
RawStudy,
StudyAdditionalData,
StudyMetadataDTO,
StudyMetadataPatchDTO,
Expand All @@ -32,6 +37,61 @@
logger = logging.getLogger(__name__)


# noinspection PyUnusedLocal
def _ignore_patterns(path_study: Path, directory: str, contents: Sequence[str]) -> Sequence[str]:
return ["output"] if Path(directory) == path_study else []


def export_study_flat(
path_study: Path,
dest: Path,
outputs: bool = True,
output_list_filter: Optional[List[str]] = None,
output_src_path: Optional[Path] = None,
) -> None:
"""
Export study to destination

Args:
path_study: Study source path
dest: Destination path.
outputs: List of outputs to keep.
output_list_filter: List of outputs to keep (None indicate all outputs).
output_src_path: list of source outputs path.
"""
start_time = time.time()
output_src_path = output_src_path or path_study / "output"
output_dest_path = dest / "output"

ignore_patterns = functools.partial(_ignore_patterns, path_study)
# noinspection PyTypeChecker
shutil.copytree(src=path_study, dst=dest, ignore=ignore_patterns)
if outputs and output_src_path.is_dir():
if output_dest_path.exists():
shutil.rmtree(output_dest_path)
if output_list_filter is not None:
os.mkdir(output_dest_path)
for output in output_list_filter:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Le défaut de cet algorithme réside dans la possibilité de dupliquer des données. En effet, lorsqu'une sortie au format ZIP est décompressée, il n'y a aucune garantie que le fichier ZIP d'origine soit supprimé à la fin de la décompression. Autrement dit, si l'on décompresse le fichier "20230802-1628eco.zip" (par exemple, via une tâche planifiée), il est possible de se retrouver avec à la fois le répertoire "20230802-1628eco" et le fichier "20230802-1628eco.zip" d'origine.

zip_path = output_src_path / f"{output}.zip"
if zip_path.exists():
with zipfile.ZipFile(zip_path) as zf:
zf.extractall(output_dest_path / output)
else:
shutil.copytree(
src=output_src_path / output,
dst=output_dest_path / output,
)
else:
shutil.copytree(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ma remarque concernant la duplication des sorties et aussi valable ici.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Je te propose l'implémentation suivante :

import functools
import shutil
import typing as t
import zipfile
from pathlib import Path


# noinspection PyUnusedLocal
def _ignore_patterns(path_study: Path, directory: str, contents: t.Sequence[str]) -> t.Sequence[str]:
    return ["output"] if Path(directory) == path_study else []


def export_study_flat(
    path_study: Path,
    dest: Path,
    outputs: bool = True,
    output_list_filter: t.Sequence[str] = (),
) -> None:
    """
    Export study to destination

    Args:
        path_study: Study source path
        dest: Destination path.
        outputs: List of outputs to keep.
        output_list_filter: List of outputs to keep (None indicate all outputs).
    """
    output_src_path = path_study / "output"
    output_dest_path = dest / "output"

    ignore_patterns = functools.partial(_ignore_patterns, path_study)
    # noinspection PyTypeChecker
    shutil.copytree(src=path_study, dst=dest, ignore=ignore_patterns)

    if outputs and output_src_path.exists():
        if not output_list_filter:
            # Retrieve all directories or ZIP files without duplicates
            output_list_filter = list(
                {
                    # fmt: off
                    f.with_suffix("").name
                    for f in output_src_path.iterdir()
                    if f.is_dir() or f.suffix.lower() == ".zip"
                    # fmt: on
                }
            )
        # Copy each folder or uncompress each ZIP file to the destination dir.
        shutil.rmtree(output_dest_path, ignore_errors=True)
        output_dest_path.mkdir()
        for output in output_list_filter:
            zip_path = output_src_path / f"{output}.zip"
            if zip_path.exists():
                with zipfile.ZipFile(zip_path) as zf:
                    zf.extractall(output_dest_path / output)
            else:
                shutil.copytree(
                    src=output_src_path / output,
                    dst=output_dest_path / output,
                )

src=output_src_path,
dst=output_dest_path,
)

stop_time = time.time()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bof ! on peut supprimer.

duration = "{:.3f}".format(stop_time - start_time)
logger.info(f"Study {path_study} exported (flat mode) in {duration}s")


class AbstractStorageService(IStudyStorageService[T], ABC):
def __init__(
self,
Expand Down Expand Up @@ -230,7 +290,23 @@ def export_study(self, metadata: T, target: Path, outputs: bool = True) -> Path:
with tempfile.TemporaryDirectory(dir=self.config.storage.tmp_dir) as tmpdir:
logger.info(f"Exporting study {metadata.id} to temporary path {tmpdir}")
tmp_study_path = Path(tmpdir) / "tmp_copy"
self.export_study_flat(metadata, tmp_study_path, outputs)
if isinstance(metadata, RawStudy):
export_study_flat(
path_study=path_study,
dest=tmp_study_path,
outputs=outputs,
)
else:
snapshot_path = path_study / "snapshot"
output_src_path = path_study / "output"
export_study_flat(
path_study=snapshot_path,
dest=tmp_study_path,
outputs=outputs,
output_src_path=output_src_path,
)
study = self.study_factory.create_from_fs(tmp_study_path, "", use_cache=False)
study.tree.denormalize()
stopwatch = StopWatch()
zip_dir(tmp_study_path, target)
stopwatch.log_elapsed(lambda x: logger.info(f"Study {path_study} exported (zipped mode) in {x}s"))
Expand Down
32 changes: 12 additions & 20 deletions antarest/study/storage/rawstudy/raw_study_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@
from antarest.core.requests import RequestParameters
from antarest.core.utils.utils import extract_zip
from antarest.study.model import DEFAULT_WORKSPACE_NAME, Patch, RawStudy, Study, StudyAdditionalData
from antarest.study.storage.abstract_storage_service import AbstractStorageService
from antarest.study.storage.abstract_storage_service import AbstractStorageService, export_study_flat
from antarest.study.storage.patch_service import PatchService
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfig, FileStudyTreeConfigDTO
from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy, StudyFactory
from antarest.study.storage.rawstudy.model.filesystem.lazy_node import LazyNode
from antarest.study.storage.utils import (
create_new_empty_study,
export_study_flat,
fix_study_root,
get_default_workspace_path,
is_managed,
Expand Down Expand Up @@ -325,28 +324,21 @@ def import_study(self, metadata: RawStudy, stream: IO[bytes]) -> Study:

def export_study_flat(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Attention : la signature de cette fonction publique a changé. Cela entraîne une rupture de compatibilité.

Il faut donc adapter le code appelant (je crois que c'est fait), mais aussi les tests unitaires.

self,
metadata: RawStudy,
path_study: Path,
dst_path: Path,
outputs: bool = True,
output_src_path: Optional[Path] = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pour moi, ce paramètre est inutile car le répertoire de sortie est et restera toujours "output'.

output_list_filter: Optional[List[str]] = None,
denormalize: bool = True,
) -> None:
path_study = Path(metadata.path)

if metadata.archived:
self.unarchive(metadata)
try:
export_study_flat(
path_study,
dst_path,
self.study_factory,
outputs,
output_list_filter,
denormalize,
)
finally:
if metadata.archived:
shutil.rmtree(metadata.path)
export_study_flat(
path_study=path_study,
dest=dst_path,
outputs=outputs,
output_list_filter=output_list_filter,
output_src_path=output_src_path,
)
study = self.study_factory.create_from_fs(dst_path, "", use_cache=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

La "dénormalisation" doit rester optionnelle, il te faut donc conserver le paramètre denormalize.

study.tree.denormalize()

def check_errors(
self,
Expand Down
47 changes: 0 additions & 47 deletions antarest/study/storage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,50 +326,3 @@ def get_start_date(
first_week_size=first_week_size,
level=level,
)


def export_study_flat(
path_study: Path,
dest: Path,
study_factory: StudyFactory,
outputs: bool = True,
output_list_filter: Optional[List[str]] = None,
denormalize: bool = True,
output_src_path: Optional[Path] = None,
) -> None:
start_time = time.time()

output_src_path = output_src_path or path_study / "output"
output_dest_path = dest / "output"
ignore_patterns = lambda directory, contents: ["output"] if str(directory) == str(path_study) else []

shutil.copytree(src=path_study, dst=dest, ignore=ignore_patterns)

if outputs and output_src_path.exists():
if output_list_filter is None:
# Retrieve all directories or ZIP files without duplicates
output_list_filter = list(
{f.with_suffix("").name for f in output_src_path.iterdir() if f.is_dir() or f.suffix == ".zip"}
)
# Copy each folder or uncompress each ZIP file to the destination dir.
shutil.rmtree(output_dest_path, ignore_errors=True)
output_dest_path.mkdir()
for output in output_list_filter:
zip_path = output_src_path / f"{output}.zip"
if zip_path.exists():
with ZipFile(zip_path) as zf:
zf.extractall(output_dest_path / output)
else:
shutil.copytree(
src=output_src_path / output,
dst=output_dest_path / output,
)

stop_time = time.time()
duration = "{:.3f}".format(stop_time - start_time)
logger.info(f"Study {path_study} exported (flat mode) in {duration}s")
study = study_factory.create_from_fs(dest, "", use_cache=False)
if denormalize:
study.tree.denormalize()
duration = "{:.3f}".format(time.time() - stop_time)
logger.info(f"Study {path_study} denormalized in {duration}s")
Loading