Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(matrix): allow odd matrix format when importing #2305

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions antarest/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,11 @@ def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class MatrixImportFailed(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class ConstraintTermNotFound(HTTPException):
"""
Exception raised when a constraint term is not found.
Expand Down
31 changes: 17 additions & 14 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import base64
import collections
import contextlib
import csv
import http
import io
import logging
Expand All @@ -25,6 +24,7 @@
from uuid import uuid4

import numpy as np
import numpy.typing as npt
import pandas as pd
from antares.study.version import StudyVersion
from fastapi import HTTPException, UploadFile
Expand All @@ -39,6 +39,7 @@
CommandApplicationError,
FolderCreationNotAllowed,
IncorrectPathError,
MatrixImportFailed,
NotAManagedStudyException,
OutputAlreadyArchived,
OutputAlreadyUnarchived,
Expand Down Expand Up @@ -197,6 +198,20 @@ def get_disk_usage(path: t.Union[str, Path]) -> int:
return total_size


def _imports_matrix_from_bytes(data: bytes) -> npt.NDArray[np.float64]:
"""Tries to convert bytes to a numpy array when importing a matrix"""
str_data = data.decode("utf-8")
if not str_data:
return np.zeros(shape=(0, 0))
for delimiter in [",", ";", "\t"]:
with contextlib.suppress(Exception):
df = pd.read_csv(io.BytesIO(data), delimiter=delimiter, header=None).replace(",", ".", regex=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way to load the matrix only once ? Maybe we can do something to "detect" the delimiter to prevent three openings

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realised that it is exactly what was done before ...

Copy link
Contributor Author

@MartinBelthle MartinBelthle Jan 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep indeed. And it's because it wasn't reliable that I had to do this for loop

df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import
matrix = df.to_numpy(dtype=np.float64)
return matrix
raise MatrixImportFailed("Could not parse the given matrix")


def _get_path_inside_user_folder(
path: str, exception_class: t.Type[t.Union[FolderCreationNotAllowed, ResourceDeletionNotAllowed]]
) -> str:
Expand Down Expand Up @@ -1591,19 +1606,7 @@ def _create_edit_study_command(
elif isinstance(tree_node, InputSeriesMatrix):
if isinstance(data, bytes):
# noinspection PyTypeChecker
str_data = data.decode("utf-8")
if not str_data:
matrix = np.zeros(shape=(0, 0))
else:
size_to_check = min(len(str_data), 64) # sniff a chunk only to speed up the code
try:
delimiter = csv.Sniffer().sniff(str_data[:size_to_check], delimiters=r"[,;\t]").delimiter
except csv.Error:
# Can happen with data with only one column. In this case, we don't care about the delimiter.
delimiter = "\t"
df = pd.read_csv(io.BytesIO(data), delimiter=delimiter, header=None).replace(",", ".", regex=True)
df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import
matrix = df.to_numpy(dtype=np.float64)
matrix = _imports_matrix_from_bytes(data)
matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix
return ReplaceMatrix(
target=url, matrix=matrix.tolist(), command_context=context, study_version=study_version
Expand Down
27 changes: 19 additions & 8 deletions tests/integration/raw_studies_blueprint/test_fetch_raw_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,9 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern
b"1;1;1;1;1\r1;1;1;1;1",
b"0,000000;0,000000;0,000000;0,000000\n0,000000;0,000000;0,000000;0,000000",
b"1;2;3;;;\n4;5;6;;;\n",
b"1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n1;1;1;1\r\n",
],
["\t", "\t", ",", "\t", ";", ";", ";", ";"],
["\t", "\t", ",", "\t", ";", ";", ";", ";", ";"],
):
res = client.put(raw_url, params={"path": matrix_path}, files={"file": io.BytesIO(content)})
assert res.status_code == 204, res.json()
Expand Down Expand Up @@ -281,13 +282,6 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern
assert res.status_code == 200, res.json()
assert res.json() == {"index": [], "columns": [], "data": []}

# Some files can be corrupted
user_folder_dir = study_dir.joinpath("user/bad")
for file_path in user_folder_dir.glob("*.*"):
rel_path = file_path.relative_to(study_dir)
res = client.get(raw_url, params={"path": f"/{rel_path.as_posix()}", "depth": 1})
assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY

# We can access to the configuration the classic way,
# for instance, we can get the list of areas:
res = client.get(raw_url, params={"path": "/input/areas/list", "depth": 1})
Expand All @@ -305,6 +299,23 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern
res = client.get(raw_url, params={"path": path, "depth": depth})
assert res.status_code == 200, f"Error for path={path} and depth={depth}"

# =============================
# ERRORS
# =============================

# Some files can be corrupted
user_folder_dir = study_dir.joinpath("user/bad")
for file_path in user_folder_dir.glob("*.*"):
rel_path = file_path.relative_to(study_dir)
res = client.get(raw_url, params={"path": f"/{rel_path.as_posix()}", "depth": 1})
assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY

# Imports a wrongly formatted matrix
res = client.put(raw_url, params={"path": matrix_path}, files={"file": io.BytesIO(b"BLABLABLA")})
assert res.status_code == 422
assert res.json()["exception"] == "MatrixImportFailed"
assert res.json()["description"] == "Could not parse the given matrix"

@pytest.mark.parametrize("study_type", ["raw", "variant"])
def test_delete_raw(
self, client: TestClient, user_access_token: str, internal_study_id: str, study_type: str
Expand Down
Loading