diff --git a/antarest/core/utils/utils.py b/antarest/core/utils/utils.py index 1e337f402c..5b730b1552 100644 --- a/antarest/core/utils/utils.py +++ b/antarest/core/utils/utils.py @@ -52,7 +52,7 @@ def __init__(self, message: str = "Unsupported archive format") -> None: super().__init__(message) -def extract_zip(stream: t.BinaryIO, target_dir: Path) -> None: +def extract_archive(stream: t.BinaryIO, target_dir: Path) -> None: """ Extract a ZIP archive to a given destination. @@ -182,11 +182,12 @@ def zip_dir(dir_path: Path, zip_path: Path, remove_source_dir: bool = False) -> def seven_zip_dir(dir_path: Path, seven_zip_path: Path, remove_source_dir: bool = False) -> None: + len_dir_path = len(str(dir_path)) with SevenZipFile(seven_zip_path, "w") as szf: for root, _, files in os.walk(dir_path): for file in files: file_path = os.path.join(root, file) - szf.write(file_path, arcname=os.path.relpath(file_path, dir_path)) + szf.write(file_path, arcname=file_path[len_dir_path:]) if remove_source_dir: shutil.rmtree(dir_path) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 7877ae6f7f..4394e22bdc 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -14,7 +14,7 @@ from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError from antarest.core.interfaces.cache import CacheConstants, ICache from antarest.core.model import JSON, PublicMode -from antarest.core.utils.utils import StopWatch, extract_zip, seven_zip_dir, unzip, zip_dir +from antarest.core.utils.utils import StopWatch, extract_archive, seven_zip_dir, unzip, zip_dir from antarest.login.model import GroupDTO from antarest.study.common.default_values import AreasQueryFile, LinksQueryFile from antarest.study.common.studystorage import IStudyStorageService, T @@ -482,7 +482,7 @@ def import_output( path_output = Path(str(path_output) + ".zip") shutil.copyfile(output, path_output) else: - extract_zip(output, path_output) + extract_archive(output, path_output) stopwatch.log_elapsed(lambda t: logger.info(f"Copied output for {study_id} in {t}s")) fix_study_root(path_output) diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py index cafc901644..a58e6a31aa 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/files.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/files.py @@ -8,6 +8,8 @@ from enum import Enum from pathlib import Path +from py7zr import SevenZipFile + from antarest.core.model import JSON from antarest.study.storage.rawstudy.ini_reader import IniReader from antarest.study.storage.rawstudy.model.filesystem.config.binding_constraint import ( @@ -47,6 +49,64 @@ class FileType(Enum): MULTI_INI = "multi_ini" +def extract_lines_from_archive(root: Path, posix_path: str) -> t.List[str]: + """ + Extract text lines from various types of files. + + Args: + root: 7zip or ZIP file containing the study. + posix_path: Relative path to the file to extract. + + Returns: + list of lines + """ + if root.suffix.lower() == ".zip": + with zipfile.ZipFile(root) as zf: + try: + with zf.open(posix_path) as f: + text = f.read().decode("utf-8") + return text.splitlines(keepends=False) + except KeyError: + # File not found in the ZIP archive + return [] + elif root.suffix.lower() == ".7z": + raise NotImplementedError("7z archive not supported yet") + else: + raise ValueError(f"Unsupported file type: {root}") + + +def extract_data_from_archive( + root: Path, + posix_path: str, + reader: IniReader, +) -> t.Dict[str, t.Any]: + """ + Extract and process data from various types of files. + + Args: + root: 7zip or ZIP file containing the study. + posix_path: Relative path to the file to extract. + reader: IniReader object to use for processing the file. + + Returns: + The content of the file, processed according to its type: + - SIMPLE_INI or MULTI_INI: dictionary of keys/values + """ + if root.suffix.lower() == ".zip": + with zipfile.ZipFile(root) as zf: + try: + with zf.open(posix_path) as f: + buffer = io.StringIO(f.read().decode("utf-8")) + return reader.read(buffer) + except KeyError: + # File not found in the ZIP archive + return {} + elif root.suffix.lower() == ".7z": + raise NotImplementedError("7z archive not supported yet") + else: + raise ValueError(f"Unsupported file type: {root}") + + def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None) -> "FileStudyTreeConfig": """ Extracts data from the filesystem to build a study config. @@ -60,10 +120,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None) Returns: An instance of `FileStudyTreeConfig` filled with the study data. """ - is_zip_file = study_path.suffix.lower() == ".zip" + is_archive = study_path.suffix.lower() in {".zip", ".7z"} # Study directory to use if the study is compressed - study_dir = study_path.with_suffix("") if is_zip_file else study_path + study_dir = study_path.with_suffix("") if is_archive else study_path (sns, asi, enr_modelling) = _parse_parameters(study_path) outputs_dir: Path = output_path or study_path / "output" @@ -80,52 +140,10 @@ def build(study_path: Path, study_id: str, output_path: t.Optional[Path] = None) store_new_set=sns, archive_input_series=asi, enr_modelling=enr_modelling, - zip_path=study_path if is_zip_file else None, + zip_path=study_path if is_archive else None, ) -def _extract_text_from_zip(root: Path, posix_path: str) -> t.Sequence[str]: - """ - Extracts text from a file inside a ZIP archive and returns it as a list of lines. - - Args: - root: The path to the ZIP archive. - posix_path: The relative path to the file inside the ZIP archive. - - Returns: - A list of lines in the file. If the file is not found, an empty list is returned. - """ - with zipfile.ZipFile(root) as zf: - try: - with zf.open(posix_path) as f: - text = f.read().decode("utf-8") - return text.splitlines(keepends=False) - except KeyError: - return [] - - -def _extract_ini_from_zip(root: Path, posix_path: str, multi_ini_keys: t.Sequence[str] = ()) -> t.Mapping[str, t.Any]: - """ - Extracts data from an INI file inside a ZIP archive and returns it as a dictionary. - - Args: - root: The path to the ZIP archive. - posix_path: The relative path to the file inside the ZIP archive. - multi_ini_keys: List of keys to use for multi INI files. - - Returns: - A dictionary of keys/values in the INI file. If the file is not found, an empty dictionary is returned. - """ - reader = IniReader(multi_ini_keys) - with zipfile.ZipFile(root) as zf: - try: - with zf.open(posix_path) as f: - buffer = io.StringIO(f.read().decode("utf-8")) - return reader.read(buffer) - except KeyError: - return {} - - def _extract_data_from_file( root: Path, inside_root_path: Path, @@ -147,13 +165,13 @@ def _extract_data_from_file( - SIMPLE_INI or MULTI_INI: dictionary of keys/values """ - is_zip_file: bool = root.suffix.lower() == ".zip" + is_archive: bool = root.suffix.lower() in {".zip", ".7z"} posix_path: str = inside_root_path.as_posix() if file_type == FileType.TXT: # Parse the file as a list of lines, return an empty list if missing. - if is_zip_file: - return _extract_text_from_zip(root, posix_path) + if is_archive: + return extract_lines_from_archive(root, posix_path) else: output_data_path = root / inside_root_path try: @@ -163,12 +181,12 @@ def _extract_data_from_file( elif file_type in {FileType.MULTI_INI, FileType.SIMPLE_INI}: # Parse the file as a dictionary of keys/values, return an empty dictionary if missing. - if is_zip_file: - return _extract_ini_from_zip(root, posix_path, multi_ini_keys=multi_ini_keys) + reader = IniReader(multi_ini_keys) + if is_archive: + return extract_data_from_archive(root, posix_path, reader) else: output_data_path = root / inside_root_path try: - reader = IniReader(multi_ini_keys) return reader.read(output_data_path) except FileNotFoundError: return {} diff --git a/antarest/study/storage/rawstudy/raw_study_service.py b/antarest/study/storage/rawstudy/raw_study_service.py index 659fb1b1c1..a88f82a3a8 100644 --- a/antarest/study/storage/rawstudy/raw_study_service.py +++ b/antarest/study/storage/rawstudy/raw_study_service.py @@ -6,14 +6,13 @@ from pathlib import Path from threading import Thread from uuid import uuid4 -from zipfile import ZipFile from antarest.core.config import Config from antarest.core.exceptions import StudyDeletionNotAllowed from antarest.core.interfaces.cache import ICache from antarest.core.model import PublicMode from antarest.core.requests import RequestParameters -from antarest.core.utils.utils import extract_zip +from antarest.core.utils.utils import extract_archive from antarest.study.model import DEFAULT_WORKSPACE_NAME, Patch, RawStudy, Study, StudyAdditionalData from antarest.study.storage.abstract_storage_service import AbstractStorageService from antarest.study.storage.patch_service import PatchService @@ -61,14 +60,17 @@ def __init__( ) self.cleanup_thread.start() - def update_from_raw_meta(self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False) -> None: + def update_from_raw_meta( + self, metadata: RawStudy, fallback_on_default: t.Optional[bool] = False, study_path: t.Optional[Path] = None + ) -> None: """ Update metadata from study raw metadata Args: metadata: study fallback_on_default: use default values in case of failure + study_path: optional study path """ - path = self.get_study_path(metadata) + path = study_path if study_path is not None else self.get_study_path(metadata) study = self.study_factory.create_from_fs(path, study_id="") try: raw_meta = study.tree.get(["study", "antares"]) @@ -307,19 +309,22 @@ def import_study(self, metadata: RawStudy, stream: t.BinaryIO) -> Study: Raises: BadArchiveContent: If the archive is corrupted or in an unknown format. """ - path_study = Path(metadata.path) - path_study.mkdir() + study_path = Path(metadata.path) + study_path.mkdir() try: - extract_zip(stream, path_study) - fix_study_root(path_study) - self.update_from_raw_meta(metadata) + extract_archive(stream, study_path) + fix_study_root(study_path) + self.update_from_raw_meta( + metadata, + study_path=study_path, + ) except Exception: - shutil.rmtree(path_study) + shutil.rmtree(study_path) raise - metadata.path = str(path_study) + metadata.path = str(study_path) return metadata def export_study_flat( diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 5dc97b081b..fefcf25529 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -65,7 +65,7 @@ def fix_study_root(study_path: Path) -> None: study_path: the study initial root path """ # TODO: what if it is a zipped output ? - if study_path.suffix == ".zip": + if study_path.suffix in {".zip", ".7z"}: return None if not study_path.is_dir(): diff --git a/tests/core/utils/test_extract_zip.py b/tests/core/utils/test_extract_zip.py index 11c3c11ff3..8f1944462c 100644 --- a/tests/core/utils/test_extract_zip.py +++ b/tests/core/utils/test_extract_zip.py @@ -5,7 +5,7 @@ import py7zr import pytest -from antarest.core.utils.utils import BadArchiveContent, extract_zip +from antarest.core.utils.utils import BadArchiveContent, extract_archive class TestExtractZip: @@ -21,7 +21,7 @@ def test_extract_zip__with_zip(self, tmp_path: Path): # Then, call the function with open(zip_path, mode="rb") as stream: - extract_zip(stream, tmp_path) + extract_archive(stream, tmp_path) # Finally, check the result assert (tmp_path / "test.txt").read_text() == "Hello world!" @@ -34,7 +34,7 @@ def test_extract_zip__with_7z(self, tmp_path: Path): # Then, call the function with open(zip_path, mode="rb") as stream: - extract_zip(stream, tmp_path) + extract_archive(stream, tmp_path) # Finally, check the result assert (tmp_path / "test.txt").read_text() == "Hello world!" @@ -43,22 +43,22 @@ def test_extract_zip__empty_file(self): stream = io.BytesIO(b"") with pytest.raises(BadArchiveContent): - extract_zip(stream, Path("dummy/path")) + extract_archive(stream, Path("dummy/path")) def test_extract_zip__corrupted_zip(self): stream = io.BytesIO(b"PK\x03\x04 BLURP") with pytest.raises(BadArchiveContent): - extract_zip(stream, Path("dummy/path")) + extract_archive(stream, Path("dummy/path")) def test_extract_zip__corrupted_7z(self): stream = io.BytesIO(b"7z BLURP") with pytest.raises(BadArchiveContent): - extract_zip(stream, Path("dummy/path")) + extract_archive(stream, Path("dummy/path")) def test_extract_zip__unknown_format(self): stream = io.BytesIO(b"ZORRO") with pytest.raises(BadArchiveContent): - extract_zip(stream, Path("dummy/path")) + extract_archive(stream, Path("dummy/path")) diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index 940b4c785a..a15a824684 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -1967,7 +1967,7 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp res = client.get(f"/v1/studies/{study_id}", headers=admin_headers) assert res.json()["archived"] - assert (tmp_path / "archive_dir" / f"{study_id}.zip").exists() + assert (tmp_path / "archive_dir" / f"{study_id}.7z").exists() res = client.put(f"/v1/studies/{study_id}/unarchive", headers=admin_headers) @@ -1977,12 +1977,12 @@ def test_archive(client: TestClient, admin_access_token: str, study_id: str, tmp f"/v1/tasks/{task_id}", headers=admin_headers, ).json()["status"] - == 3 + == 3, ) res = client.get(f"/v1/studies/{study_id}", headers=admin_headers) assert not res.json()["archived"] - assert not (tmp_path / "archive_dir" / f"{study_id}.zip").exists() + assert not (tmp_path / "archive_dir" / f"{study_id}.7z").exists() def test_maintenance(client: TestClient, admin_access_token: str, study_id: str) -> None: diff --git a/tests/storage/business/test_export.py b/tests/storage/business/test_export.py index 759bc99ed3..a1515e34c6 100644 --- a/tests/storage/business/test_export.py +++ b/tests/storage/business/test_export.py @@ -4,6 +4,7 @@ import pytest from checksumdir import dirhash +from py7zr import SevenZipFile from antarest.core.config import Config, StorageConfig from antarest.study.model import DEFAULT_WORKSPACE_NAME, RawStudy @@ -36,7 +37,13 @@ def test_export_file(tmp_path: Path): @pytest.mark.unit_test -@pytest.mark.parametrize("outputs", [True, False]) +@pytest.mark.parametrize( + "outputs", + [ + True, + False, + ], +) def test_export_file(tmp_path: Path, outputs: bool): root = tmp_path / "folder" root.mkdir() @@ -46,7 +53,7 @@ def test_export_file(tmp_path: Path, outputs: bool): (root / "output/results1").mkdir(parents=True) (root / "output/results1/file.txt").write_text("42") - export_path = tmp_path / "study.zip" + export_path = tmp_path / "study.7z" study_factory = Mock() study_service = RawStudyService( @@ -62,10 +69,11 @@ def test_export_file(tmp_path: Path, outputs: bool): study_factory.create_from_fs.return_value = study_tree study_service.export_study(study, export_path, outputs=outputs) - with ZipFile(export_path) as zipf: - assert "file.txt" in zipf.namelist() - assert "test/file.txt" in zipf.namelist() - assert ("output/results1/file.txt" in zipf.namelist()) == outputs + with SevenZipFile(export_path) as szf: + szf_files = set(szf.getnames()) + assert "file.txt" in szf_files + assert "test/file.txt" in szf_files + assert ("output/results1/file.txt" in szf_files) == outputs @pytest.mark.unit_test diff --git a/tests/study/storage/test_abstract_storage_service.py b/tests/study/storage/test_abstract_storage_service.py index 09d82ea083..317bb97458 100644 --- a/tests/study/storage/test_abstract_storage_service.py +++ b/tests/study/storage/test_abstract_storage_service.py @@ -1,9 +1,10 @@ import datetime -import zipfile from pathlib import Path from typing import List, Optional, Sequence from unittest.mock import Mock, call +from py7zr import SevenZipFile + from antarest.core.config import Config, StorageConfig from antarest.core.interfaces.cache import ICache from antarest.core.model import PublicMode @@ -137,15 +138,15 @@ def test_export_study(self, tmp_path: Path) -> None: ## Check the `export_study` function service.export_study_flat = Mock(return_value=None) - target_path = tmp_path / "export.zip" + target_path = tmp_path / "export.7z" actual = service.export_study(metadata, target_path, outputs=True) assert actual == target_path ## Check the call to export_study_flat assert service.export_study_flat.mock_calls == [call(metadata, TmpCopy(tmp_path), True)] - ## Check that the ZIP file exist and is valid - with zipfile.ZipFile(target_path) as zf: - # Actually, there is nothing is the ZIP file, + ## Check that the 7zip file exist and is valid + with SevenZipFile(target_path) as szf: + # Actually, there is nothing is the .7z file, # because the Study files doesn't really exist. - assert not zf.namelist() + assert not szf.getnames()