diff --git a/antarest/core/exceptions.py b/antarest/core/exceptions.py index 7001d832ca..89555f3e48 100644 --- a/antarest/core/exceptions.py +++ b/antarest/core/exceptions.py @@ -545,6 +545,11 @@ def __init__(self, message: str) -> None: super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message) +class MatrixImportFailed(HTTPException): + def __init__(self, message: str) -> None: + super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message) + + class ConstraintTermNotFound(HTTPException): """ Exception raised when a constraint term is not found. diff --git a/antarest/study/service.py b/antarest/study/service.py index e0e1e1a9c2..31bb80fa1f 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -13,7 +13,6 @@ import base64 import collections import contextlib -import csv import http import io import logging @@ -25,6 +24,7 @@ from uuid import uuid4 import numpy as np +import numpy.typing as npt import pandas as pd from antares.study.version import StudyVersion from fastapi import HTTPException, UploadFile @@ -39,6 +39,7 @@ CommandApplicationError, FolderCreationNotAllowed, IncorrectPathError, + MatrixImportFailed, NotAManagedStudyException, OutputAlreadyArchived, OutputAlreadyUnarchived, @@ -197,6 +198,20 @@ def get_disk_usage(path: t.Union[str, Path]) -> int: return total_size +def _imports_matrix_from_bytes(data: bytes) -> npt.NDArray[np.float64]: + """Tries to convert bytes to a numpy array when importing a matrix""" + str_data = data.decode("utf-8") + if not str_data: + return np.zeros(shape=(0, 0)) + for delimiter in [",", ";", "\t"]: + with contextlib.suppress(Exception): + df = pd.read_csv(io.BytesIO(data), delimiter=delimiter, header=None).replace(",", ".", regex=True) + df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import + matrix = df.to_numpy(dtype=np.float64) + return matrix + raise MatrixImportFailed("Could not parse the given matrix") + + def _get_path_inside_user_folder( path: str, exception_class: t.Type[t.Union[FolderCreationNotAllowed, ResourceDeletionNotAllowed]] ) -> str: @@ -1591,19 +1606,7 @@ def _create_edit_study_command( elif isinstance(tree_node, InputSeriesMatrix): if isinstance(data, bytes): # noinspection PyTypeChecker - str_data = data.decode("utf-8") - if not str_data: - matrix = np.zeros(shape=(0, 0)) - else: - size_to_check = min(len(str_data), 64) # sniff a chunk only to speed up the code - try: - delimiter = csv.Sniffer().sniff(str_data[:size_to_check], delimiters=r"[,;\t]").delimiter - except csv.Error: - # Can happen with data with only one column. In this case, we don't care about the delimiter. - delimiter = "\t" - df = pd.read_csv(io.BytesIO(data), delimiter=delimiter, header=None).replace(",", ".", regex=True) - df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import - matrix = df.to_numpy(dtype=np.float64) + matrix = _imports_matrix_from_bytes(data) matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix return ReplaceMatrix( target=url, matrix=matrix.tolist(), command_context=context, study_version=study_version diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index 8cbd7429eb..d49cd53639 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -219,8 +219,9 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern b"1;1;1;1;1\r1;1;1;1;1", b"0,000000;0,000000;0,000000;0,000000\n0,000000;0,000000;0,000000;0,000000", b"1;2;3;;;\n4;5;6;;;\n", + b"1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n", ], - ["\t", "\t", ",", "\t", ";", ";", ";", ";"], + ["\t", "\t", ",", "\t", ";", ";", ";", ";", ";"], ): res = client.put(raw_url, params={"path": matrix_path}, files={"file": io.BytesIO(content)}) assert res.status_code == 204, res.json() @@ -281,13 +282,6 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern assert res.status_code == 200, res.json() assert res.json() == {"index": [], "columns": [], "data": []} - # Some files can be corrupted - user_folder_dir = study_dir.joinpath("user/bad") - for file_path in user_folder_dir.glob("*.*"): - rel_path = file_path.relative_to(study_dir) - res = client.get(raw_url, params={"path": f"/{rel_path.as_posix()}", "depth": 1}) - assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY - # We can access to the configuration the classic way, # for instance, we can get the list of areas: res = client.get(raw_url, params={"path": "/input/areas/list", "depth": 1}) @@ -305,6 +299,23 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern res = client.get(raw_url, params={"path": path, "depth": depth}) assert res.status_code == 200, f"Error for path={path} and depth={depth}" + # ============================= + # ERRORS + # ============================= + + # Some files can be corrupted + user_folder_dir = study_dir.joinpath("user/bad") + for file_path in user_folder_dir.glob("*.*"): + rel_path = file_path.relative_to(study_dir) + res = client.get(raw_url, params={"path": f"/{rel_path.as_posix()}", "depth": 1}) + assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY + + # Imports a wrongly formatted matrix + res = client.put(raw_url, params={"path": matrix_path}, files={"file": io.BytesIO(b"BLABLABLA")}) + assert res.status_code == 422 + assert res.json()["exception"] == "MatrixImportFailed" + assert res.json()["description"] == "Could not parse the given matrix" + @pytest.mark.parametrize("study_type", ["raw", "variant"]) def test_delete_raw( self, client: TestClient, user_access_token: str, internal_study_id: str, study_type: str