From 8fdae447310a13a26d14d5c6b70933f8786ceee5 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Thu, 18 Apr 2024 18:09:07 +0200 Subject: [PATCH 1/9] feat(launcher): add new API endpoint `/v1/launcher/time-limit` and update `LauncherDialog` --- antarest/core/config.py | 25 +++++- antarest/launcher/web.py | 84 ++++++++++--------- .../launcher_blueprint/test_launcher_local.py | 54 ++++++++++++ webapp/public/locales/en/main.json | 1 + webapp/public/locales/fr/main.json | 1 + .../components/App/Studies/LauncherDialog.tsx | 36 ++++++-- webapp/src/services/api/study.ts | 16 +++- 7 files changed, 166 insertions(+), 51 deletions(-) diff --git a/antarest/core/config.py b/antarest/core/config.py index 2ba7a72745..f93301613a 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -308,7 +308,7 @@ def _autodetect_nb_cores(cls) -> Dict[str, int]: class InvalidConfigurationError(Exception): """ - Exception raised when an attempt is made to retrieve the number of cores + Exception raised when an attempt is made to retrieve a property of a launcher that doesn't exist in the configuration. """ @@ -371,6 +371,29 @@ def get_nb_cores(self, launcher: str) -> "NbCoresConfig": raise InvalidConfigurationError(launcher) return launcher_config.nb_cores + def get_time_limit(self, launcher: str) -> int: + """ + Retrieve the time limit for a job of the given launcher: "local" or "slurm". + If "default" is specified, retrieve the configuration of the default launcher. + + Args: + launcher: type of launcher "local", "slurm" or "default". + + Returns: + Time limit for a job of the given launcher. + + Raises: + InvalidConfigurationError: Exception raised when an attempt is made to retrieve + a property of a launcher that doesn't exist in the configuration. + """ + config_map = {"local": self.local, "slurm": self.slurm} + config_map["default"] = config_map[self.default] + launcher_config = config_map.get(launcher) + if launcher_config is None: + raise InvalidConfigurationError(launcher) + # The default time limit is not available for the local launcher + return getattr(launcher_config, "default_time_limit", 3600) + @dataclass(frozen=True) class LoggingConfig: diff --git a/antarest/launcher/web.py b/antarest/launcher/web.py index 14eb39aee2..e4b7e844b7 100644 --- a/antarest/launcher/web.py +++ b/antarest/launcher/web.py @@ -41,6 +41,25 @@ def __init__(self, solver: str) -> None: ) +LauncherQuery = Query( + "default", + examples={ + "Default launcher": { + "description": "Default solver (auto-detected)", + "value": "default", + }, + "SLURM launcher": { + "description": "SLURM solver configuration", + "value": "slurm", + }, + "Local launcher": { + "description": "Local solver configuration", + "value": "local", + }, + }, +) + + def create_launcher_api(service: LauncherService, config: Config) -> APIRouter: bp = APIRouter(prefix="/v1/launcher") @@ -214,25 +233,7 @@ def get_load() -> LauncherLoadDTO: summary="Get list of supported solver versions", response_model=List[str], ) - def get_solver_versions( - solver: str = Query( - "default", - examples={ - "Default solver": { - "description": "Get the solver versions of the default configuration", - "value": "default", - }, - "SLURM solver": { - "description": "Get the solver versions of the SLURM server if available", - "value": "slurm", - }, - "Local solver": { - "description": "Get the solver versions of the Local server if available", - "value": "local", - }, - }, - ), - ) -> List[str]: + def get_solver_versions(solver: str = LauncherQuery) -> List[str]: """ Get list of supported solver versions defined in the configuration. @@ -251,25 +252,7 @@ def get_solver_versions( summary="Retrieving Min, Default, and Max Core Count", response_model=Dict[str, int], ) - def get_nb_cores( - launcher: str = Query( - "default", - examples={ - "Default launcher": { - "description": "Min, Default, and Max Core Count", - "value": "default", - }, - "SLURM launcher": { - "description": "Min, Default, and Max Core Count", - "value": "slurm", - }, - "Local launcher": { - "description": "Min, Default, and Max Core Count", - "value": "local", - }, - }, - ), - ) -> Dict[str, int]: + def get_nb_cores(launcher: str = LauncherQuery) -> Dict[str, int]: """ Retrieve the numer of cores of the launcher. @@ -288,4 +271,29 @@ def get_nb_cores( except InvalidConfigurationError: raise UnknownSolverConfig(launcher) + # noinspection SpellCheckingInspection + @bp.get( + "/time-limit", + tags=[APITag.launcher], + summary="Retrieve the time limit for a job (in seconds)", + ) + def get_time_limit(launcher: str = LauncherQuery) -> int: + """ + Retrieve the time limit for a job (in seconds) of the given launcher: "local" or "slurm". + + If a jobs exceed this time limit, SLURM kills the job and it is considered failed. + + Args: + - `launcher`: name of the configuration to read: "slurm" or "local". + If "default" is specified, retrieve the configuration of the default launcher. + + Returns: + - time limit in seconds + """ + logger.info(f"Fetching the time limit for the '{launcher}' configuration") + try: + return service.config.launcher.get_time_limit(launcher) + except InvalidConfigurationError: + raise UnknownSolverConfig(launcher) + return bp diff --git a/tests/integration/launcher_blueprint/test_launcher_local.py b/tests/integration/launcher_blueprint/test_launcher_local.py index 7244fba8ee..7b52cf4261 100644 --- a/tests/integration/launcher_blueprint/test_launcher_local.py +++ b/tests/integration/launcher_blueprint/test_launcher_local.py @@ -68,3 +68,57 @@ def test_get_launcher_nb_cores( "description": "Unknown solver configuration: 'unknown'", "exception": "UnknownSolverConfig", } + + def test_get_time_limit( + self, + client: TestClient, + user_access_token: str, + ) -> None: + nb_cores_expected = 3600 + res = client.get( + "/v1/launcher/time-limit", + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + res.raise_for_status() + actual = res.json() + assert actual == nb_cores_expected + + res = client.get( + "/v1/launcher/time-limit?launcher=default", + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + res.raise_for_status() + actual = res.json() + assert actual == nb_cores_expected + + res = client.get( + "/v1/launcher/time-limit?launcher=local", + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + res.raise_for_status() + actual = res.json() + assert actual == nb_cores_expected + + # Check that the endpoint raise an exception when the "slurm" launcher is requested. + res = client.get( + "/v1/launcher/time-limit?launcher=slurm", + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY, res.json() + actual = res.json() + assert actual == { + "description": "Unknown solver configuration: 'slurm'", + "exception": "UnknownSolverConfig", + } + + # Check that the endpoint raise an exception when an unknown launcher is requested. + res = client.get( + "/v1/launcher/time-limit?launcher=unknown", + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY, res.json() + actual = res.json() + assert actual == { + "description": "Unknown solver configuration: 'unknown'", + "exception": "UnknownSolverConfig", + } diff --git a/webapp/public/locales/en/main.json b/webapp/public/locales/en/main.json index ccfac647f2..bc124b50bf 100644 --- a/webapp/public/locales/en/main.json +++ b/webapp/public/locales/en/main.json @@ -564,6 +564,7 @@ "study.error.listOutputs": "Failed to retrieve output list", "study.error.launcherVersions": "Failed to retrieve launcher versions", "study.error.launcherCores": "Failed to retrieve launcher number of cores", + "study.error.launcherTimeLimit": "Failed to retrieve launcher time limit", "study.error.fetchComments": "Failed to fetch comments", "study.error.commentsNotSaved": "Comments not saved", "study.error.studyIdCopy": "Failed to copy study ID", diff --git a/webapp/public/locales/fr/main.json b/webapp/public/locales/fr/main.json index ef12be54ec..a0504ae745 100644 --- a/webapp/public/locales/fr/main.json +++ b/webapp/public/locales/fr/main.json @@ -564,6 +564,7 @@ "study.error.listOutputs": "Échec de la récupération des sorties", "study.error.launcherVersions": "Échec lors de la récupération des versions du launcher", "study.error.launcherCores": "Échec lors de la récupération du nombre de cœurs du launcher", + "study.error.launcherTimeLimit": "Échec lors de la récupération de la limite de temps du launcher", "study.error.fetchComments": "Échec lors de la récupération des commentaires", "study.error.commentsNotSaved": "Erreur lors de l'enregistrement des commentaires", "study.error.studyIdCopy": "Erreur lors de la copie de l'identifiant de l'étude", diff --git a/webapp/src/components/App/Studies/LauncherDialog.tsx b/webapp/src/components/App/Studies/LauncherDialog.tsx index 7e610e2f88..13a485ce11 100644 --- a/webapp/src/components/App/Studies/LauncherDialog.tsx +++ b/webapp/src/components/App/Studies/LauncherDialog.tsx @@ -18,12 +18,13 @@ import { useSnackbar } from "notistack"; import { useMountedState } from "react-use"; import { shallowEqual } from "react-redux"; import { + LaunchOptions, StudyMetadata, StudyOutput, - LaunchOptions, } from "../../../common/types"; import { getLauncherCores, + getLauncherTimeLimit, getLauncherVersions, getStudyOutputs, launchStudy, @@ -49,7 +50,7 @@ interface Props { onClose: () => void; } -function LauncherDialog(props: Props) { +function LauncherDialog(props: Readonly) { const { studyIds, open, onClose } = props; const [t] = useTranslation(); const { enqueueSnackbar } = useSnackbar(); @@ -57,7 +58,7 @@ function LauncherDialog(props: Props) { const [options, setOptions] = useState({ nb_cpu: DEFAULT_NB_CPU, auto_unzip: true, - time_limit: DEFAULT_TIME_LIMIT, + time_limit: undefined, }); const [solverVersion, setSolverVersion] = useState(); const [isLaunching, setIsLaunching] = useState(false); @@ -83,6 +84,18 @@ function LauncherDialog(props: Props) { }, ); + const { data: launcherTimeLimit } = usePromiseWithSnackbarError( + async () => { + return await getLauncherTimeLimit(); + }, + { + errorMessage: t("study.error.launcherTimeLimit"), + }, + ); + + const minSeconds = 3600; + const maxSeconds = launcherTimeLimit ?? DEFAULT_TIME_LIMIT; + const { data: outputList } = usePromiseWithSnackbarError( () => Promise.all(studyIds.map((sid) => getStudyOutputs(sid))), { errorMessage: t("study.error.listOutputs"), deps: [studyIds] }, @@ -182,7 +195,7 @@ function LauncherDialog(props: Props) { */ const parseHoursToSeconds = (hourString: string): number => { const seconds = moment.duration(hourString, "hours").asSeconds(); - return seconds > 0 ? seconds : DEFAULT_TIME_LIMIT; + return Math.max(minSeconds, Math.min(seconds, maxSeconds)); }; //////////////////////////////////////////////////////////////// @@ -273,7 +286,11 @@ function LauncherDialog(props: Props) { type="number" variant="filled" // Convert from seconds to hours the displayed value - value={(options.time_limit ?? DEFAULT_TIME_LIMIT) / 3600} + value={ + options.time_limit === undefined + ? maxSeconds / 3600 + : options.time_limit / 3600 + } onChange={(e) => handleChange("time_limit", parseHoursToSeconds(e.target.value)) } @@ -281,8 +298,9 @@ function LauncherDialog(props: Props) { shrink: true, }} inputProps={{ - min: 1, - max: 240, + min: minSeconds / 3600, + max: maxSeconds / 3600, + step: 1, }} sx={{ minWidth: "125px", @@ -449,7 +467,7 @@ function LauncherDialog(props: Props) { } /> } - label={t("launcher.xpansion.sensitivityMode") as string} + label={t("launcher.xpansion.sensitivityMode")} /> handleObjectChange("xpansion", { output_id: data, diff --git a/webapp/src/services/api/study.ts b/webapp/src/services/api/study.ts index 9ee256d299..27ba032431 100644 --- a/webapp/src/services/api/study.ts +++ b/webapp/src/services/api/study.ts @@ -206,7 +206,7 @@ export const exportStudy = async ( export const getExportUrl = (sid: string, skipOutputs = false): string => `${ - getConfig().downloadHostUrl || + getConfig().downloadHostUrl ?? getConfig().baseUrl + getConfig().restEndpoint }/v1/studies/${sid}/export?no_output=${skipOutputs}`; @@ -226,7 +226,7 @@ export const importStudy = async ( if (onProgress) { options.onUploadProgress = (progressEvent): void => { const percentCompleted = Math.round( - (progressEvent.loaded * 100) / (progressEvent.total || 1), + (progressEvent.loaded * 100) / (progressEvent.total ?? 1), ); onProgress(percentCompleted); }; @@ -253,7 +253,7 @@ export const importFile = async ( if (onProgress) { options.onUploadProgress = (progressEvent): void => { const percentCompleted = Math.round( - (progressEvent.loaded * 100) / (progressEvent.total || 1), + (progressEvent.loaded * 100) / (progressEvent.total ?? 1), ); onProgress(percentCompleted); }; @@ -304,6 +304,16 @@ export const getLauncherCores = async (): Promise> => { return res.data; }; +/** + * Time limit for SLURM jobs (in seconds). + * If a jobs exceed this time limit, SLURM kills the job and it is considered failed. + * Often used value: 172800 (48 hours) + */ +export const getLauncherTimeLimit = async (): Promise => { + const res = await client.get("/v1/launcher/time-limit"); + return res.data; +}; + export const getLauncherMetrics = async (): Promise => { const res = await client.get("/v1/launcher/load"); return res.data; From b63a111ac638ae4e2d7ae6a11a7507f153c2306a Mon Sep 17 00:00:00 2001 From: hatim dinia Date: Tue, 21 May 2024 13:09:16 +0200 Subject: [PATCH 2/9] refactor(ui-launcher): revamp `LauncherDialog` styles --- .../components/App/Studies/LauncherDialog.tsx | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/webapp/src/components/App/Studies/LauncherDialog.tsx b/webapp/src/components/App/Studies/LauncherDialog.tsx index 13a485ce11..4c3f928613 100644 --- a/webapp/src/components/App/Studies/LauncherDialog.tsx +++ b/webapp/src/components/App/Studies/LauncherDialog.tsx @@ -207,9 +207,8 @@ function LauncherDialog(props: Readonly) { title={t("study.runStudy")} open={open} onClose={onClose} - contentProps={{ - sx: { width: "600px", height: "500px", p: 0, overflow: "hidden" }, - }} + maxWidth="md" + PaperProps={{ sx: { width: 700 } }} actions={ <>