diff --git a/antarest/core/exceptions.py b/antarest/core/exceptions.py index f26a16c59b..f0818d559c 100644 --- a/antarest/core/exceptions.py +++ b/antarest/core/exceptions.py @@ -655,7 +655,7 @@ def __init__(self, message: str) -> None: super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message) -class CannotScanInternalWorkspace(HTTPException): +class CannotAccessInternalWorkspace(HTTPException): def __init__(self) -> None: super().__init__( HTTPStatus.BAD_REQUEST, @@ -668,6 +668,15 @@ def __init__(self, message: str) -> None: super().__init__(HTTPStatus.NOT_FOUND, message) +class WorkspaceNotFound(HTTPException): + """ + This will be raised when we try to load a workspace that does not exist + """ + + def __init__(self, message: str) -> None: + super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message) + + class BadArchiveContent(Exception): """ Exception raised when the archive file is corrupted (or unknown). @@ -675,3 +684,12 @@ class BadArchiveContent(Exception): def __init__(self, message: str = "Unsupported archive format") -> None: super().__init__(message) + + +class FolderNotFoundInWorkspace(HTTPException): + """ + This will be raised when we try to load a folder that does not exist + """ + + def __init__(self, message: str) -> None: + super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message) diff --git a/antarest/service_creator.py b/antarest/service_creator.py index 3bee50cc45..b133d29b9c 100644 --- a/antarest/service_creator.py +++ b/antarest/service_creator.py @@ -44,7 +44,9 @@ from antarest.study.main import build_study_service from antarest.study.service import StudyService from antarest.study.storage.auto_archive_service import AutoArchiveService +from antarest.study.storage.explorer_service import Explorer from antarest.study.storage.rawstudy.watcher import Watcher +from antarest.study.web.explorer_blueprint import create_explorer_routes from antarest.study.web.watcher_blueprint import create_watcher_routes from antarest.worker.archive_worker import ArchiveWorker from antarest.worker.worker import AbstractWorker @@ -187,6 +189,14 @@ def create_watcher( return watcher +def create_explorer(config: Config, app_ctxt: t.Optional[AppBuildContext]) -> t.Any: + explorer = Explorer(config=config) + if app_ctxt: + app_ctxt.api_root.include_router(create_explorer_routes(config=config, explorer=explorer)) + + return explorer + + def create_matrix_gc( config: Config, app_ctxt: t.Optional[AppBuildContext], @@ -249,6 +259,9 @@ def create_services( watcher = create_watcher(config=config, app_ctxt=app_ctxt, study_service=study_service) services["watcher"] = watcher + explorer_service = create_explorer(config=config, app_ctxt=app_ctxt) + services["explorer"] = explorer_service + if config.server.services and Module.MATRIX_GC.value in config.server.services or create_all: matrix_garbage_collector = create_matrix_gc( config=config, diff --git a/antarest/study/model.py b/antarest/study/model.py index aec84fcc7d..aa24c26b09 100644 --- a/antarest/study/model.py +++ b/antarest/study/model.py @@ -319,6 +319,25 @@ class StudyFolder: groups: t.List[Group] +class NonStudyFolder(AntaresBaseModel): + """ + DTO used by the explorer to list directories that aren't studies directory, this will be usefull for the front + so the user can navigate in the hierarchy + """ + + path: Path + workspace: str + name: str + + +class WorkspaceMetadata(AntaresBaseModel): + """ + DTO used by the explorer to list all workspaces + """ + + name: str + + class PatchStudy(AntaresBaseModel): scenario: t.Optional[str] = None doc: t.Optional[str] = None diff --git a/antarest/study/service.py b/antarest/study/service.py index 5bd0cd4d88..d3cc605dab 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -858,13 +858,17 @@ def remove_duplicates(self) -> None: if ids: # Check if ids is not empty self.repository.delete(*ids) - def sync_studies_on_disk(self, folders: t.List[StudyFolder], directory: t.Optional[Path] = None) -> None: + def sync_studies_on_disk( + self, folders: t.List[StudyFolder], directory: t.Optional[Path] = None, recursive: bool = True + ) -> None: """ Used by watcher to send list of studies present on filesystem. Args: folders: list of studies currently present on folder directory: directory of studies that will be watched + recursive: if False, the delta will apply only to the studies in "directory", otherwise + it will apply to all studies having a path that descend from "directory". Returns: @@ -873,11 +877,15 @@ def sync_studies_on_disk(self, folders: t.List[StudyFolder], directory: t.Option clean_up_missing_studies_threshold = now - timedelta(days=MAX_MISSING_STUDY_TIMEOUT) all_studies = self.repository.get_all_raw() if directory: - all_studies = [raw_study for raw_study in all_studies if directory in Path(raw_study.path).parents] + if recursive: + all_studies = [raw_study for raw_study in all_studies if directory in Path(raw_study.path).parents] + else: + all_studies = [raw_study for raw_study in all_studies if directory == Path(raw_study.path).parent] studies_by_path = {study.path: study for study in all_studies} # delete orphan studies on database paths = [str(f.path) for f in folders] + for study in all_studies: if ( isinstance(study, RawStudy) @@ -900,7 +908,7 @@ def sync_studies_on_disk(self, folders: t.List[StudyFolder], directory: t.Option permissions=PermissionInfo.from_study(study), ) ) - elif study.missing < clean_up_missing_studies_threshold: + if study.missing < clean_up_missing_studies_threshold: logger.info( "Study %s at %s is not present in disk and will be deleted", study.id, diff --git a/antarest/study/storage/explorer_service.py b/antarest/study/storage/explorer_service.py new file mode 100644 index 0000000000..5610f3e5f8 --- /dev/null +++ b/antarest/study/storage/explorer_service.py @@ -0,0 +1,60 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. + +import logging +from typing import List + +from antarest.core.config import Config +from antarest.study.model import DEFAULT_WORKSPACE_NAME, NonStudyFolder, WorkspaceMetadata +from antarest.study.storage.utils import ( + get_folder_from_workspace, + get_workspace_from_config, + is_study_folder, + should_ignore_folder_for_scan, +) + +logger = logging.getLogger(__name__) + + +class Explorer: + def __init__(self, config: Config): + self.config = config + + def list_dir( + self, + workspace_name: str, + workspace_directory_path: str, + ) -> List[NonStudyFolder]: + """ + return a list of all directories under workspace_directory_path, that aren't studies. + """ + workspace = get_workspace_from_config(self.config, workspace_name, default_allowed=False) + directory_path = get_folder_from_workspace(workspace, workspace_directory_path) + directories = [] + for child in directory_path.iterdir(): + if child.is_dir() and not is_study_folder(child) and not should_ignore_folder_for_scan(child): + # we don't want to expose the full absolute path on the server + child_rel_path = child.relative_to(workspace.path) + directories.append(NonStudyFolder(path=child_rel_path, workspace=workspace_name, name=child.name)) + return directories + + def list_workspaces( + self, + ) -> List[WorkspaceMetadata]: + """ + Return the list of all configured workspace name, except the default one. + """ + return [ + WorkspaceMetadata(name=workspace_name) + for workspace_name in self.config.storage.workspaces.keys() + if workspace_name != DEFAULT_WORKSPACE_NAME + ] diff --git a/antarest/study/storage/rawstudy/watcher.py b/antarest/study/storage/rawstudy/watcher.py index d00a7f7204..3166bbe40f 100644 --- a/antarest/study/storage/rawstudy/watcher.py +++ b/antarest/study/storage/rawstudy/watcher.py @@ -14,17 +14,13 @@ import re import tempfile from html import escape -from http import HTTPStatus -from http.client import HTTPException from pathlib import Path from time import sleep, time from typing import List, Optional -from antares.study.version.upgrade_app import is_temporary_upgrade_dir from filelock import FileLock from antarest.core.config import Config -from antarest.core.exceptions import CannotScanInternalWorkspace from antarest.core.interfaces.service import IService from antarest.core.requests import RequestParameters from antarest.core.tasks.model import TaskResult, TaskType @@ -34,7 +30,11 @@ from antarest.login.model import Group from antarest.study.model import DEFAULT_WORKSPACE_NAME, StudyFolder from antarest.study.service import StudyService -from antarest.study.storage.variantstudy.model.command.generate_thermal_cluster_timeseries import is_ts_gen_tmp_dir +from antarest.study.storage.utils import ( + get_folder_from_workspace, + get_workspace_from_config, + should_ignore_folder_for_scan, +) logger = logging.getLogger(__name__) @@ -49,11 +49,6 @@ def __call__(self, duration: float) -> None: logger.info(f"Workspace {self.workspace_name} scanned in {duration}s") -class WorkspaceNotFound(HTTPException): - def __init__(self, message: str) -> None: - super().__init__(HTTPStatus.BAD_REQUEST, message) - - class Watcher(IService): """ Files Watcher to listen raw studies changes and trigger a database update. @@ -128,28 +123,26 @@ def _rec_scan( groups: List[Group], filter_in: List[str], filter_out: List[str], + max_depth: Optional[int] = None, ) -> List[StudyFolder]: try: - if (path / "AW_NO_SCAN").exists(): - logger.info(f"No scan directive file found. Will skip further scan of folder {path}") - return [] - - if is_temporary_upgrade_dir(path): - logger.info(f"Upgrade temporary folder found. Will skip further scan of folder {path}") - return [] - - if is_ts_gen_tmp_dir(path): - logger.info(f"TS generation temporary folder found. Will skip further scan of folder {path}") + if should_ignore_folder_for_scan(path): return [] if (path / "study.antares").exists(): logger.debug(f"Study {path.name} found in {workspace}") return [StudyFolder(path, workspace, groups)] + if max_depth is not None and max_depth <= 0: + logger.info(f"Scan was configured to not go any deeper, max _depth : {max_depth}") + return [] + else: folders: List[StudyFolder] = list() if path.is_dir(): for child in path.iterdir(): + if max_depth is not None: + max_depth = max_depth - 1 try: if ( (child.is_dir()) @@ -157,11 +150,7 @@ def _rec_scan( and not any([re.search(regex, child.name) for regex in filter_out]) ): folders = folders + self._rec_scan( - child, - workspace, - groups, - filter_in, - filter_out, + child, workspace, groups, filter_in, filter_out, max_depth ) except Exception as e: logger.error(f"Failed to scan dir {child}", exc_info=e) @@ -173,6 +162,7 @@ def _rec_scan( def oneshot_scan( self, params: RequestParameters, + recursive: bool, workspace: Optional[str] = None, path: Optional[str] = None, ) -> str: @@ -183,11 +173,12 @@ def oneshot_scan( params: user parameters workspace: workspace to scan path: relative path to folder to scan + recursive: if true, scan recursively all subfolders otherwise only the first level """ # noinspection PyUnusedLocal def scan_task(notifier: ITaskNotifier) -> TaskResult: - self.scan(workspace, path) + self.scan(recursive, workspace, path) return TaskResult(success=True, message="Scan completed") return self.task_service.add_task( @@ -202,34 +193,32 @@ def scan_task(notifier: ITaskNotifier) -> TaskResult: def scan( self, + recursive: bool = True, workspace_name: Optional[str] = None, workspace_directory_path: Optional[str] = None, ) -> None: """ Scan recursively list of studies present on disk. Send updated list to study service. + + Args: + recursive: if true, scan recursively all subfolders otherwise only the first level Returns: """ stopwatch = StopWatch() studies: List[StudyFolder] = list() directory_path: Optional[Path] = None + + # max depth when we call _rec_scan + max_depth = None if recursive else 1 + if workspace_directory_path is not None and workspace_name: - if workspace_name == DEFAULT_WORKSPACE_NAME: - raise CannotScanInternalWorkspace - try: - workspace = self.config.storage.workspaces[workspace_name] - except KeyError: - logger.error(f"Workspace {workspace_name} not found") - raise WorkspaceNotFound(f"Workspace {workspace_name} not found") + workspace = get_workspace_from_config(self.config, workspace_name) + directory_path = get_folder_from_workspace(workspace, workspace_directory_path) groups = [Group(id=escape(g), name=escape(g)) for g in workspace.groups] - directory_path = workspace.path / workspace_directory_path studies = self._rec_scan( - directory_path, - workspace_name, - groups, - workspace.filter_in, - workspace.filter_out, + directory_path, workspace_name, groups, workspace.filter_in, workspace.filter_out, max_depth=max_depth ) elif workspace_directory_path is None and workspace_name is None: for name, workspace in self.config.storage.workspaces.items(): @@ -237,11 +226,7 @@ def scan( path = Path(workspace.path) groups = [Group(id=escape(g), name=escape(g)) for g in workspace.groups] studies = studies + self._rec_scan( - path, - name, - groups, - workspace.filter_in, - workspace.filter_out, + path, name, groups, workspace.filter_in, workspace.filter_out, max_depth=max_depth ) stopwatch.log_elapsed(_LogScanDuration(name)) else: @@ -250,7 +235,7 @@ def scan( logger.info(f"Waiting for FileLock to synchronize {directory_path or 'all studies'}") with FileLock(Watcher.SCAN_LOCK): logger.info(f"FileLock acquired to synchronize for {directory_path or 'all studies'}") - self.study_service.sync_studies_on_disk(studies, directory_path) + self.study_service.sync_studies_on_disk(studies, directory_path, recursive) stopwatch.log_elapsed( lambda x: logger.info(f"{directory_path or 'All studies'} synchronized in {x}s"), since_start=True, diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 85ba04b630..ac4ba9acfb 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -24,8 +24,16 @@ from zipfile import ZipFile from antares.study.version import StudyVersion - -from antarest.core.exceptions import StudyValidationError, UnsupportedStudyVersion +from antares.study.version.upgrade_app import is_temporary_upgrade_dir + +from antarest.core.config import Config, WorkspaceConfig +from antarest.core.exceptions import ( + CannotAccessInternalWorkspace, + FolderNotFoundInWorkspace, + StudyValidationError, + UnsupportedStudyVersion, + WorkspaceNotFound, +) from antarest.core.interfaces.cache import CacheConstants, ICache from antarest.core.jwt import JWTUser from antarest.core.model import PermissionInfo, StudyPermissionType @@ -50,6 +58,10 @@ logger = logging.getLogger(__name__) +TS_GEN_PREFIX = "~" +TS_GEN_SUFFIX = ".thermal_timeseries_gen.tmp" + + # noinspection SpellCheckingInspection def update_antares_info(metadata: Study, study_tree: FileStudyTree, *, update_author: bool) -> None: """ @@ -390,3 +402,76 @@ def ignore_outputs(directory: str, _: t.Sequence[str]) -> t.Sequence[str]: study.tree.denormalize() duration = "{:.3f}".format(time.time() - stop_time) logger.info(f"Study '{study_dir}' denormalized in {duration}s") + + +def is_folder_safe(workspace: WorkspaceConfig, folder: str) -> bool: + """ + Check if the provided folder path is safe to prevent path traversal attack. + + Args: + workspace: The workspace name. + folder: The folder path. + + Returns: + `True` if the folder path is safe, `False` otherwise. + """ + requested_path = workspace.path / folder + requested_path = requested_path.resolve() + safe_dir = workspace.path.resolve() + # check weither the requested path is a subdirectory of the workspace + return requested_path.is_relative_to(safe_dir) + + +def is_study_folder(path: Path) -> bool: + return path.is_dir() and (path / "study.antares").exists() + + +def is_aw_no_scan(path: Path) -> bool: + return (path / "AW_NO_SCAN").exists() + + +def get_workspace_from_config(config: Config, workspace_name: str, default_allowed: bool = False) -> WorkspaceConfig: + if not default_allowed and workspace_name == DEFAULT_WORKSPACE_NAME: + raise CannotAccessInternalWorkspace() + try: + return config.storage.workspaces[workspace_name] + except KeyError: + logger.error(f"Workspace {workspace_name} not found") + raise WorkspaceNotFound(f"Workspace {workspace_name} not found") + + +def get_folder_from_workspace(workspace: WorkspaceConfig, folder: str) -> Path: + if not is_folder_safe(workspace, folder): + raise FolderNotFoundInWorkspace(f"Invalid path for folder: {folder} in workspace {workspace}") + folder_path = workspace.path / folder + if not folder_path.is_dir(): + raise FolderNotFoundInWorkspace(f"Provided path is not dir: {folder} in workspace {workspace}") + return folder_path + + +def is_ts_gen_tmp_dir(path: Path) -> bool: + """ + Check if a path is a temporary directory used for thermal timeseries generation + Args: + path: the path to check + + Returns: + True if the path is a temporary directory used for thermal timeseries generation + """ + return path.name.startswith(TS_GEN_PREFIX) and "".join(path.suffixes[-2:]) == TS_GEN_SUFFIX and path.is_dir() + + +def should_ignore_folder_for_scan(path: Path) -> bool: + if is_aw_no_scan(path): + logger.info(f"No scan directive file found. Will skip further scan of folder {path}") + return True + + if is_temporary_upgrade_dir(path): + logger.info(f"Upgrade temporary folder found. Will skip further scan of folder {path}") + return True + + if is_ts_gen_tmp_dir(path): + logger.info(f"TS generation temporary folder found. Will skip further scan of folder {path}") + return True + + return False diff --git a/antarest/study/storage/variantstudy/model/command/generate_thermal_cluster_timeseries.py b/antarest/study/storage/variantstudy/model/command/generate_thermal_cluster_timeseries.py index 1ff3dffc05..cb7c758b1a 100644 --- a/antarest/study/storage/variantstudy/model/command/generate_thermal_cluster_timeseries.py +++ b/antarest/study/storage/variantstudy/model/command/generate_thermal_cluster_timeseries.py @@ -26,6 +26,7 @@ from antarest.study.storage.rawstudy.model.filesystem.config.thermal import LocalTSGenerationBehavior from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import dump_dataframe +from antarest.study.storage.utils import TS_GEN_PREFIX, TS_GEN_SUFFIX from antarest.study.storage.variantstudy.model.command.common import CommandName, CommandOutput from antarest.study.storage.variantstudy.model.command.icommand import ICommand, OutputTuple from antarest.study.storage.variantstudy.model.command_listener.command_listener import ICommandListener @@ -37,20 +38,6 @@ MODULATION_CAPACITY_COLUMN = 2 FO_RATE_COLUMN = 2 PO_RATE_COLUMN = 3 -TS_GEN_PREFIX = "~" -TS_GEN_SUFFIX = ".thermal_timeseries_gen.tmp" - - -def is_ts_gen_tmp_dir(path: Path) -> bool: - """ - Check if a path is a temporary directory used for thermal timeseries generation - Args: - path: the path to check - - Returns: - True if the path is a temporary directory used for thermal timeseries generation - """ - return path.name.startswith(TS_GEN_PREFIX) and "".join(path.suffixes[-2:]) == TS_GEN_SUFFIX and path.is_dir() class GenerateThermalClusterTimeSeries(ICommand): diff --git a/antarest/study/web/explorer_blueprint.py b/antarest/study/web/explorer_blueprint.py new file mode 100644 index 0000000000..0981ba5214 --- /dev/null +++ b/antarest/study/web/explorer_blueprint.py @@ -0,0 +1,83 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. + +import logging +from typing import List + +from fastapi import APIRouter, Depends + +from antarest.core.config import Config +from antarest.core.jwt import JWTUser +from antarest.login.auth import Auth +from antarest.study.model import NonStudyFolder, WorkspaceMetadata +from antarest.study.storage.explorer_service import Explorer + +logger = logging.getLogger(__name__) + + +def create_explorer_routes(config: Config, explorer: Explorer) -> APIRouter: + """ + Endpoint implementation for explorer management + Args: + explorer: explorer service facade to handle request + config: main server configuration + + Returns: + + """ + bp = APIRouter(prefix="/v1/private") + auth = Auth(config) + + @bp.get( + "/explorer/{workspace}/_list_dir", + summary="For a given directory, list sub directories that aren't studies", + response_model=List[NonStudyFolder], + ) + def list_dir( + workspace: str, + path: str, + current_user: JWTUser = Depends(auth.get_current_user), + ) -> List[NonStudyFolder]: + """ + Endpoint to list sub directories of a given directory + Args: + path: path to the directory to scan + current_user: user that perform the request + + Returns: + List of sub directories + + """ + logger.info(f"Listing directory {path} in workspace {workspace}") + return explorer.list_dir(workspace, path) + + @bp.get( + "/explorer/_list_workspaces", + summary="List all workspaces", + response_model=List[WorkspaceMetadata], + ) + def list_workspaces( + current_user: JWTUser = Depends(auth.get_current_user), + ) -> List[WorkspaceMetadata]: + """ + Endpoint to list workspaces + Args: + current_user: user that perform the request + + Returns: + List of workspace + + """ + logger.info("Listing workspaces") + return explorer.list_workspaces() + + return bp diff --git a/antarest/study/web/watcher_blueprint.py b/antarest/study/web/watcher_blueprint.py index a98ccfe0c5..cbfd3a8cb6 100644 --- a/antarest/study/web/watcher_blueprint.py +++ b/antarest/study/web/watcher_blueprint.py @@ -13,7 +13,7 @@ import logging from http import HTTPStatus from http.client import HTTPException -from typing import Any, List +from typing import List from fastapi import APIRouter, Depends @@ -56,8 +56,9 @@ def create_watcher_routes( ) def scan_dir( path: str, + recursive: bool = True, current_user: JWTUser = Depends(auth.get_current_user), - ) -> Any: + ) -> str: params = RequestParameters(user=current_user) if path: # The front actually sends / @@ -82,6 +83,6 @@ def scan_dir( ) relative_path = None workspace = None - return watcher.oneshot_scan(params=params, workspace=workspace, path=relative_path) + return watcher.oneshot_scan(params=params, recursive=recursive, workspace=workspace, path=relative_path) return bp diff --git a/tests/integration/explorer_blueprint/test_explorer.py b/tests/integration/explorer_blueprint/test_explorer.py new file mode 100644 index 0000000000..dbb6f83ebc --- /dev/null +++ b/tests/integration/explorer_blueprint/test_explorer.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. +from pathlib import Path + +import pytest +from starlette.testclient import TestClient + +from antarest.study.model import NonStudyFolder, WorkspaceMetadata + +BAD_REQUEST_STATUS_CODE = 400 +# Status code for directory listing with invalid parameters +INVALID_PARAMS_STATUS_CODE = 422 + + +@pytest.fixture +def study_tree(tmp_path: Path) -> Path: + """ + Create this hierarchy + + tmp_path + └── ext_workspace + └── folder + ├── studyC + │ └── study.antares + ├── trash + │ └── trash + ├── another_folder + │ ├── AW_NO_SCAN + │ └── study.antares + """ + ext_workspace = tmp_path / "ext_workspace" + c = ext_workspace / "folder/studyC" + c.mkdir(parents=True) + (c / "study.antares").touch() + + d = ext_workspace / "folder/trash" + d.mkdir(parents=True) + (d / "trash").touch() + + f = ext_workspace / "folder/another_folder" + f.mkdir(parents=True) + (f / "AW_NO_SCAN").touch() + (f / "study.antares").touch() + + return tmp_path + + +def test_explorer(client: TestClient, admin_access_token: str, study_tree: Path): + # Don't be confused here by the workspace name is "ext" being different from its folder name "ext_workspace" + # that's just how it's configured in the "client" fixture + workspace = "ext" + + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder/", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + res.raise_for_status() + directories_res = res.json() + directories_res = [NonStudyFolder(**d) for d in directories_res] + directorires_expected = [ + NonStudyFolder( + path=Path("folder/trash"), + workspace="ext", + name="trash", + ) + ] + assert directories_res == directorires_expected + + # request an path where there're no folders + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder/trash", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + res.raise_for_status() + directories_res = res.json() + assert len(directories_res) == 0 + + # request a path that isn't a folder + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder/trash/trash", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + assert res.status_code == INVALID_PARAMS_STATUS_CODE, res.json() + + # try a path traversal attack + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder/../../", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + assert res.status_code == INVALID_PARAMS_STATUS_CODE, res.json() + + # try to access default workspase + workspace = "default" + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + assert res.status_code == BAD_REQUEST_STATUS_CODE + + # request a workspace that doesn't exist + workspace = "ext2" + res = client.get( + f"/v1/private/explorer/{workspace}/_list_dir?path=folder", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + assert res.status_code == INVALID_PARAMS_STATUS_CODE + + # get list of workspaces + + res = client.get( + "/v1/private/explorer/_list_workspaces", + headers={"Authorization": f"Bearer {admin_access_token}"}, + ) + expected = [ + WorkspaceMetadata( + name="ext", + ) + ] + res = res.json() + res = [WorkspaceMetadata(**e) for e in res] + assert res == expected diff --git a/tests/storage/business/test_explorer_service.py b/tests/storage/business/test_explorer_service.py new file mode 100644 index 0000000000..883e79cfca --- /dev/null +++ b/tests/storage/business/test_explorer_service.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. + +from pathlib import Path + +import pytest + +from antarest.core.config import Config, StorageConfig, WorkspaceConfig +from antarest.study.model import DEFAULT_WORKSPACE_NAME, NonStudyFolder, WorkspaceMetadata +from antarest.study.storage.explorer_service import Explorer + + +def build_config(root: Path) -> Config: + return Config( + storage=StorageConfig( + workspaces={ + DEFAULT_WORKSPACE_NAME: WorkspaceConfig(path=root / DEFAULT_WORKSPACE_NAME, groups=["toto"]), + "diese": WorkspaceConfig( + path=root / "diese", + groups=["tata"], + filter_out=["to_skip.*"], + ), + "test": WorkspaceConfig( + path=root / "test", + groups=["toto"], + filter_out=["to_skip.*"], + ), + } + ) + ) + + +@pytest.fixture +def config_scenario_a(tmp_path: Path) -> Config: + default = tmp_path / "default" + default.mkdir() + a = default / "studyA" + a.mkdir() + (a / "study.antares").touch() + + diese = tmp_path / "diese" + diese.mkdir() + c = diese / "folder/studyC" + c.mkdir(parents=True) + (c / "study.antares").touch() + + d = diese / "folder/subfolder1" + d.mkdir(parents=True) + (d / "trash").touch() + + d = diese / "folder/subfolder2" + d.mkdir(parents=True) + (d / "trash").touch() + + d = diese / "folder/subfolder3" + d.mkdir(parents=True) + (d / "trash").touch() + + e = diese / "folder/to_skip_folder" + e.mkdir(parents=True) + (e / "study.antares").touch() + + f = diese / "folder/another_folder" + f.mkdir(parents=True) + (f / "AW_NO_SCAN").touch() + (f / "study.antares").touch() + + config = build_config(tmp_path) + + return config + + +@pytest.mark.unit_test +def test_list_dir_empty_string(config_scenario_a: Config): + explorer = Explorer(config_scenario_a) + result = explorer.list_dir("diese", "") + + assert len(result) == 1 + workspace_path = config_scenario_a.get_workspace_path(workspace="diese") + assert result[0] == NonStudyFolder(path=Path("folder"), workspace="diese", name="folder") + + +@pytest.mark.unit_test +def test_list_dir_several_subfolders(config_scenario_a: Config): + explorer = Explorer(config_scenario_a) + result = explorer.list_dir("diese", "folder") + + assert len(result) == 3 + workspace_path = config_scenario_a.get_workspace_path(workspace="diese") + folder_path = Path("folder") + assert NonStudyFolder(path=(folder_path / "subfolder1"), workspace="diese", name="subfolder1") in result + assert NonStudyFolder(path=(folder_path / "subfolder2"), workspace="diese", name="subfolder2") in result + assert NonStudyFolder(path=(folder_path / "subfolder3"), workspace="diese", name="subfolder3") in result + + +@pytest.mark.unit_test +def test_list_dir_in_empty_folder(config_scenario_a: Config): + explorer = Explorer(config_scenario_a) + result = explorer.list_dir("diese", "folder/subfolder1") + + assert len(result) == 0 + + +@pytest.mark.unit_test +def test_list_workspaces(tmp_path: Path): + config = build_config(tmp_path) + explorer = Explorer(config) + + result = explorer.list_workspaces() + assert result == [WorkspaceMetadata(name="diese"), WorkspaceMetadata(name="test")] diff --git a/tests/storage/business/test_watcher.py b/tests/storage/business/test_watcher.py index 34d28db877..daebd1e0cf 100644 --- a/tests/storage/business/test_watcher.py +++ b/tests/storage/business/test_watcher.py @@ -12,19 +12,31 @@ import logging import os import typing as t +from datetime import datetime, timedelta from multiprocessing import Pool from pathlib import Path +from unittest import mock from unittest.mock import Mock import pytest from sqlalchemy import create_engine +from sqlalchemy.orm import Session from antarest.core.config import Config, StorageConfig, WorkspaceConfig -from antarest.core.exceptions import CannotScanInternalWorkspace +from antarest.core.exceptions import CannotAccessInternalWorkspace +from antarest.core.interfaces.cache import ICache +from antarest.core.model import PublicMode from antarest.core.persistence import Base +from antarest.core.tasks.service import ITaskService from antarest.core.utils.fastapi_sqlalchemy import DBSessionMiddleware -from antarest.study.model import DEFAULT_WORKSPACE_NAME +from antarest.login.model import GroupDTO +from antarest.login.service import LoginService +from antarest.study.model import DEFAULT_WORKSPACE_NAME, OwnerInfo, Study, StudyMetadataDTO +from antarest.study.repository import StudyMetadataRepository +from antarest.study.service import StudyService +from antarest.study.storage.rawstudy.raw_study_service import RawStudyService from antarest.study.storage.rawstudy.watcher import Watcher +from antarest.study.storage.variantstudy.variant_study_service import VariantStudyService from tests.storage.conftest import SimpleSyncTaskService @@ -57,18 +69,76 @@ def clean_files() -> None: os.remove(lock) -@pytest.mark.unit_test -def test_scan(tmp_path: Path): - engine = create_engine("sqlite:///:memory:", echo=False) - Base.metadata.create_all(engine) - # noinspection SpellCheckingInspection - DBSessionMiddleware( - None, - custom_engine=engine, - session_args={"autocommit": False, "autoflush": False}, +def build_study_service( + raw_study_service: RawStudyService, + repository: StudyMetadataRepository, + config: Config, + user_service: LoginService = Mock(spec=LoginService), + cache_service: ICache = Mock(spec=ICache), + variant_study_service: VariantStudyService = Mock(spec=VariantStudyService), + task_service: ITaskService = Mock(spec=ITaskService), +) -> StudyService: + return StudyService( + raw_study_service=raw_study_service, + variant_study_service=variant_study_service, + user_service=user_service, + repository=repository, + event_bus=Mock(), + task_service=task_service, + file_transfer_manager=Mock(), + cache_service=cache_service, + config=config, ) - clean_files() + +def study_to_dto(study: Study) -> StudyMetadataDTO: + return StudyMetadataDTO( + id=study.id, + name=study.name, + version=study.version, + created=str(study.created_at), + updated=str(study.updated_at), + workspace=DEFAULT_WORKSPACE_NAME, + managed=True, + type=study.type, + archived=study.archived if study.archived is not None else False, + owner=( + OwnerInfo(id=study.owner.id, name=study.owner.name) + if study.owner is not None + else OwnerInfo(name="Unknown") + ), + groups=[GroupDTO(id=group.id, name=group.name) for group in study.groups], + public_mode=study.public_mode or PublicMode.NONE, + horizon=study.additional_data.horizon, + scenario=None, + status=None, + doc=None, + folder=None, + ) + + +@pytest.fixture +def study_tree(tmp_path: Path) -> Path: + """ + Create this hierarchy + + tmp_path + ├── default + │ └── studyA + │ └── study.antares + └── diese + └── folder + ├── studyC + │ └── study.antares + ├── trash + │ └── trash + ├── another_folder + │ ├── AW_NO_SCAN + │ └── study.antares + └── subfolder + └── studyG + └── study.antares + """ default = tmp_path / "default" default.mkdir() @@ -86,24 +156,27 @@ def test_scan(tmp_path: Path): d.mkdir(parents=True) (d / "trash").touch() - e = diese / "folder/to_skip_folder" - e.mkdir(parents=True) - (e / "study.antares").touch() - f = diese / "folder/another_folder" f.mkdir(parents=True) (f / "AW_NO_SCAN").touch() (f / "study.antares").touch() + return tmp_path + + +@pytest.mark.unit_test +def test_scan(study_tree: Path): + clean_files() + service = Mock() - watcher = Watcher(build_config(tmp_path), service, task_service=SimpleSyncTaskService()) + watcher = Watcher(build_config(study_tree), service, task_service=SimpleSyncTaskService()) watcher.scan() assert service.sync_studies_on_disk.call_count == 1 call = service.sync_studies_on_disk.call_args_list[0] assert len(call.args[0]) == 1 - assert call.args[0][0].path == c + assert call.args[0][0].path == study_tree / "diese/folder/studyC" assert call.args[0][0].workspace == "diese" groups = call.args[0][0].groups assert len(groups) == 1 @@ -112,6 +185,54 @@ def test_scan(tmp_path: Path): assert call.args[1] is None +@pytest.mark.unit_test +def test_scan_recursive_false(study_tree: Path, db_session: Session): + def count_studies(): + return db_session.query(Study).count() + + clean_files() + + g = study_tree / "diese/folder/subfolder/studyG" + g.mkdir(parents=True) + (g / "study.antares").touch() + + raw_study_service = Mock(spec=RawStudyService) + raw_study_service.get_study_information.side_effect = study_to_dto + repository = StudyMetadataRepository(session=db_session, cache_service=Mock(spec=ICache)) + repository.delete = Mock() + config = build_config(study_tree) + service = build_study_service(raw_study_service, repository, config) + watcher = Watcher(config, service, task_service=SimpleSyncTaskService()) + + # at the beginning, no study in the database + assert count_studies() == 0 + + # only the studyA should be scanned, as the recursive flag is set to False + watcher.scan(recursive=False, workspace_name="diese", workspace_directory_path="folder") + assert count_studies() == 1 + + # Now studyC should be scanned, as we scan folder/subfolder which contains studyG + watcher.scan(recursive=False, workspace_name="diese", workspace_directory_path="folder/subfolder") + assert count_studies() == 2 + + # Even if we deleted stydu G, the scan shoudl not delete, as we are not scanning the folder containing it + os.remove(g / "study.antares") + watcher.scan(recursive=False, workspace_name="diese", workspace_directory_path="folder") + assert count_studies() == 2 + assert repository.delete.call_count == 0 + + # Now we scan the folder containing studyG, it should be marked for deletion but not deleted yet + watcher.scan(recursive=False, workspace_name="diese", workspace_directory_path="folder/subfolder") + assert repository.delete.call_count == 0 + + # We simulate three days went by, now a delete should be triggered + in_3_days = datetime.utcnow() + timedelta(days=3) + with mock.patch("antarest.study.service.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = in_3_days + watcher.scan(recursive=False, workspace_name="diese", workspace_directory_path="folder/subfolder") + assert repository.delete.call_count == 1 + + @pytest.mark.unit_test def test_partial_scan(tmp_path: Path, caplog: t.Any): engine = create_engine("sqlite:///:memory:", echo=False) @@ -152,10 +273,10 @@ def test_partial_scan(tmp_path: Path, caplog: t.Any): service = Mock() watcher = Watcher(build_config(tmp_path), service, task_service=SimpleSyncTaskService()) - with pytest.raises(CannotScanInternalWorkspace): + with pytest.raises(CannotAccessInternalWorkspace): watcher.scan(workspace_name="default", workspace_directory_path=default) - with caplog.at_level(level=logging.INFO, logger="antarest.study.storage.rawstudy.watcher"): + with caplog.at_level(level=logging.INFO, logger="antarest.study.storage.utils"): # scan the `default` directory watcher.scan(workspace_name="test", workspace_directory_path=default) diff --git a/tests/study/storage/test_utils.py b/tests/study/storage/test_utils.py new file mode 100644 index 0000000000..6f5b907a4a --- /dev/null +++ b/tests/study/storage/test_utils.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. +from pathlib import Path + +import pytest + +from antarest.core.config import WorkspaceConfig +from antarest.study.storage.utils import is_folder_safe + + +@pytest.fixture +def workspace_config(tmp_path: Path) -> WorkspaceConfig: + return WorkspaceConfig(path=tmp_path) + + +def test_is_folder_safe_within_workspace(workspace_config: WorkspaceConfig): + # Test case: folder within the workspace + folder = "project" + assert is_folder_safe(workspace_config, folder) is True + + +def test_is_folder_safe_outside_workspace(workspace_config: WorkspaceConfig): + # Test case: folder outside the workspace + folder = "../outside" + assert is_folder_safe(workspace_config, folder) is False + + +def test_is_folder_safe_home_directory(workspace_config: WorkspaceConfig): + # Test case: folder outside the workspace + folder = "/~/project" + assert is_folder_safe(workspace_config, folder) is False + + +def test_is_folder_safe_traversal_attack(workspace_config: WorkspaceConfig): + # Test case: folder with traversal attack attempt + folder = "../../etc/passwd" + assert is_folder_safe(workspace_config, folder) is False + + +def test_is_folder_safe_nested_folder(workspace_config: WorkspaceConfig): + # Test case: nested folder within the workspace + folder = "project/subfolder" + assert is_folder_safe(workspace_config, folder) is True