Skip to content

Commit

Permalink
feat(launcher): add new API endpoint /v1/launcher/time-limit and up…
Browse files Browse the repository at this point in the history
…date `LauncherDialog`
  • Loading branch information
laurent-laporte-pro committed Apr 18, 2024
1 parent b42fee2 commit e15b830
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 51 deletions.
25 changes: 24 additions & 1 deletion antarest/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def _autodetect_nb_cores(cls) -> Dict[str, int]:

class InvalidConfigurationError(Exception):
"""
Exception raised when an attempt is made to retrieve the number of cores
Exception raised when an attempt is made to retrieve a property
of a launcher that doesn't exist in the configuration.
"""

Expand Down Expand Up @@ -371,6 +371,29 @@ def get_nb_cores(self, launcher: str) -> "NbCoresConfig":
raise InvalidConfigurationError(launcher)
return launcher_config.nb_cores

def get_time_limit(self, launcher: str) -> int:
"""
Retrieve the time limit for a job of the given launcher: "local" or "slurm".
If "default" is specified, retrieve the configuration of the default launcher.
Args:
launcher: type of launcher "local", "slurm" or "default".
Returns:
Time limit for a job of the given launcher.
Raises:
InvalidConfigurationError: Exception raised when an attempt is made to retrieve
a property of a launcher that doesn't exist in the configuration.
"""
config_map = {"local": self.local, "slurm": self.slurm}
config_map["default"] = config_map[self.default]
launcher_config = config_map.get(launcher)
if launcher_config is None:
raise InvalidConfigurationError(launcher)
# The default time limit is not available for the local launcher
return getattr(launcher_config, "default_time_limit", 3600)


@dataclass(frozen=True)
class LoggingConfig:
Expand Down
84 changes: 46 additions & 38 deletions antarest/launcher/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ def __init__(self, solver: str) -> None:
)


LauncherQuery = Query(
"default",
examples={
"Default launcher": {
"description": "Default solver (auto-detected)",
"value": "default",
},
"SLURM launcher": {
"description": "SLURM solver configuration",
"value": "slurm",
},
"Local launcher": {
"description": "Local solver configuration",
"value": "local",
},
},
)


def create_launcher_api(service: LauncherService, config: Config) -> APIRouter:
bp = APIRouter(prefix="/v1/launcher")

Expand Down Expand Up @@ -214,25 +233,7 @@ def get_load() -> LauncherLoadDTO:
summary="Get list of supported solver versions",
response_model=List[str],
)
def get_solver_versions(
solver: str = Query(
"default",
examples={
"Default solver": {
"description": "Get the solver versions of the default configuration",
"value": "default",
},
"SLURM solver": {
"description": "Get the solver versions of the SLURM server if available",
"value": "slurm",
},
"Local solver": {
"description": "Get the solver versions of the Local server if available",
"value": "local",
},
},
),
) -> List[str]:
def get_solver_versions(solver: str = LauncherQuery) -> List[str]:
"""
Get list of supported solver versions defined in the configuration.
Expand All @@ -251,25 +252,7 @@ def get_solver_versions(
summary="Retrieving Min, Default, and Max Core Count",
response_model=Dict[str, int],
)
def get_nb_cores(
launcher: str = Query(
"default",
examples={
"Default launcher": {
"description": "Min, Default, and Max Core Count",
"value": "default",
},
"SLURM launcher": {
"description": "Min, Default, and Max Core Count",
"value": "slurm",
},
"Local launcher": {
"description": "Min, Default, and Max Core Count",
"value": "local",
},
},
),
) -> Dict[str, int]:
def get_nb_cores(launcher: str = LauncherQuery) -> Dict[str, int]:
"""
Retrieve the numer of cores of the launcher.
Expand All @@ -288,4 +271,29 @@ def get_nb_cores(
except InvalidConfigurationError:
raise UnknownSolverConfig(launcher)

# noinspection SpellCheckingInspection
@bp.get(
"/time-limit",
tags=[APITag.launcher],
summary="Retrieve the time limit for a job (in seconds)",
)
def get_time_limit(launcher: str = LauncherQuery) -> int:
"""
Retrieve the time limit for a job (in seconds) of the given launcher: "local" or "slurm".
If a jobs exceed this time limit, SLURM kills the job and it is considered failed.
Args:
- `launcher`: name of the configuration to read: "slurm" or "local".
If "default" is specified, retrieve the configuration of the default launcher.
Returns:
- time limit in seconds
"""
logger.info(f"Fetching the time limit for the '{launcher}' configuration")
try:
return service.config.launcher.get_time_limit(launcher)
except InvalidConfigurationError:
raise UnknownSolverConfig(launcher)

return bp
54 changes: 54 additions & 0 deletions tests/integration/launcher_blueprint/test_launcher_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,57 @@ def test_get_launcher_nb_cores(
"description": "Unknown solver configuration: 'unknown'",
"exception": "UnknownSolverConfig",
}

def test_get_time_limit(
self,
client: TestClient,
user_access_token: str,
) -> None:
nb_cores_expected = 3600
res = client.get(
"/v1/launcher/time-limit",
headers={"Authorization": f"Bearer {user_access_token}"},
)
res.raise_for_status()
actual = res.json()
assert actual == nb_cores_expected

res = client.get(
"/v1/launcher/time-limit?launcher=default",
headers={"Authorization": f"Bearer {user_access_token}"},
)
res.raise_for_status()
actual = res.json()
assert actual == nb_cores_expected

res = client.get(
"/v1/launcher/time-limit?launcher=local",
headers={"Authorization": f"Bearer {user_access_token}"},
)
res.raise_for_status()
actual = res.json()
assert actual == nb_cores_expected

# Check that the endpoint raise an exception when the "slurm" launcher is requested.
res = client.get(
"/v1/launcher/time-limit?launcher=slurm",
headers={"Authorization": f"Bearer {user_access_token}"},
)
assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY, res.json()
actual = res.json()
assert actual == {
"description": "Unknown solver configuration: 'slurm'",
"exception": "UnknownSolverConfig",
}

# Check that the endpoint raise an exception when an unknown launcher is requested.
res = client.get(
"/v1/launcher/time-limit?launcher=unknown",
headers={"Authorization": f"Bearer {user_access_token}"},
)
assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY, res.json()
actual = res.json()
assert actual == {
"description": "Unknown solver configuration: 'unknown'",
"exception": "UnknownSolverConfig",
}
1 change: 1 addition & 0 deletions webapp/public/locales/en/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@
"study.error.listOutputs": "Failed to retrieve output list",
"study.error.launcherVersions": "Failed to retrieve launcher versions",
"study.error.launcherCores": "Failed to retrieve launcher number of cores",
"study.error.launcherTimeLimit": "Failed to retrieve launcher time limit",
"study.error.fetchComments": "Failed to fetch comments",
"study.error.commentsNotSaved": "Comments not saved",
"study.error.studyIdCopy": "Failed to copy study ID",
Expand Down
1 change: 1 addition & 0 deletions webapp/public/locales/fr/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@
"study.error.listOutputs": "Échec de la récupération des sorties",
"study.error.launcherVersions": "Échec lors de la récupération des versions du launcher",
"study.error.launcherCores": "Échec lors de la récupération du nombre de cœurs du launcher",
"study.error.launcherTimeLimit": "Échec lors de la récupération de la limite de temps du launcher",
"study.error.fetchComments": "Échec lors de la récupération des commentaires",
"study.error.commentsNotSaved": "Erreur lors de l'enregistrement des commentaires",
"study.error.studyIdCopy": "Erreur lors de la copie de l'identifiant de l'étude",
Expand Down
36 changes: 27 additions & 9 deletions webapp/src/components/App/Studies/LauncherDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ import { useSnackbar } from "notistack";
import { useMountedState } from "react-use";
import { shallowEqual } from "react-redux";
import {
LaunchOptions,
StudyMetadata,
StudyOutput,
LaunchOptions,
} from "../../../common/types";
import {
getLauncherCores,
getLauncherTimeLimit,
getLauncherVersions,
getStudyOutputs,
launchStudy,
Expand All @@ -49,15 +50,15 @@ interface Props {
onClose: () => void;
}

function LauncherDialog(props: Props) {
function LauncherDialog(props: Readonly<Props>) {
const { studyIds, open, onClose } = props;
const [t] = useTranslation();
const { enqueueSnackbar } = useSnackbar();
const enqueueErrorSnackbar = useEnqueueErrorSnackbar();
const [options, setOptions] = useState<LaunchOptions>({
nb_cpu: DEFAULT_NB_CPU,
auto_unzip: true,
time_limit: DEFAULT_TIME_LIMIT,
time_limit: undefined,
});
const [solverVersion, setSolverVersion] = useState<string>();
const [isLaunching, setIsLaunching] = useState(false);
Expand All @@ -83,6 +84,18 @@ function LauncherDialog(props: Props) {
},
);

const { data: launcherTimeLimit } = usePromiseWithSnackbarError(
async () => {
return await getLauncherTimeLimit();
},
{
errorMessage: t("study.error.launcherTimeLimit"),
},
);

const minSeconds = 3600;
const maxSeconds = launcherTimeLimit ?? DEFAULT_TIME_LIMIT;

const { data: outputList } = usePromiseWithSnackbarError(
() => Promise.all(studyIds.map((sid) => getStudyOutputs(sid))),
{ errorMessage: t("study.error.listOutputs"), deps: [studyIds] },
Expand Down Expand Up @@ -182,7 +195,7 @@ function LauncherDialog(props: Props) {
*/
const parseHoursToSeconds = (hourString: string): number => {
const seconds = moment.duration(hourString, "hours").asSeconds();
return seconds > 0 ? seconds : DEFAULT_TIME_LIMIT;
return Math.max(minSeconds, Math.min(seconds, maxSeconds));
};

////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -273,16 +286,21 @@ function LauncherDialog(props: Props) {
type="number"
variant="filled"
// Convert from seconds to hours the displayed value
value={(options.time_limit ?? DEFAULT_TIME_LIMIT) / 3600}
value={
options.time_limit === undefined
? maxSeconds / 3600
: options.time_limit / 3600
}
onChange={(e) =>
handleChange("time_limit", parseHoursToSeconds(e.target.value))
}
InputLabelProps={{
shrink: true,
}}
inputProps={{
min: 1,
max: 240,
min: minSeconds / 3600,
max: maxSeconds / 3600,
step: 1,
}}
sx={{
minWidth: "125px",
Expand Down Expand Up @@ -449,7 +467,7 @@ function LauncherDialog(props: Props) {
}
/>
}
label={t("launcher.xpansion.sensitivityMode") as string}
label={t("launcher.xpansion.sensitivityMode")}
/>
<SelectSingle
name={t("studies.selectOutput")}
Expand All @@ -458,7 +476,7 @@ function LauncherDialog(props: Props) {
name: o.name,
}))}
disabled={!!options.xpansion_r_version || !options.xpansion}
data={options.xpansion?.output_id || ""}
data={options.xpansion?.output_id ?? ""}
setValue={(data: string) =>
handleObjectChange("xpansion", {
output_id: data,
Expand Down
16 changes: 13 additions & 3 deletions webapp/src/services/api/study.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ export const exportStudy = async (

export const getExportUrl = (sid: string, skipOutputs = false): string =>
`${
getConfig().downloadHostUrl ||
getConfig().downloadHostUrl ??
getConfig().baseUrl + getConfig().restEndpoint
}/v1/studies/${sid}/export?no_output=${skipOutputs}`;

Expand All @@ -226,7 +226,7 @@ export const importStudy = async (
if (onProgress) {
options.onUploadProgress = (progressEvent): void => {
const percentCompleted = Math.round(
(progressEvent.loaded * 100) / (progressEvent.total || 1),
(progressEvent.loaded * 100) / (progressEvent.total ?? 1),
);
onProgress(percentCompleted);
};
Expand All @@ -253,7 +253,7 @@ export const importFile = async (
if (onProgress) {
options.onUploadProgress = (progressEvent): void => {
const percentCompleted = Math.round(
(progressEvent.loaded * 100) / (progressEvent.total || 1),
(progressEvent.loaded * 100) / (progressEvent.total ?? 1),
);
onProgress(percentCompleted);
};
Expand Down Expand Up @@ -304,6 +304,16 @@ export const getLauncherCores = async (): Promise<Record<string, number>> => {
return res.data;
};

/**
* Time limit for SLURM jobs (in seconds).
* If a jobs exceed this time limit, SLURM kills the job and it is considered failed.
* Often used value: 172800 (48 hours)
*/
export const getLauncherTimeLimit = async (): Promise<number> => {
const res = await client.get("/v1/launcher/time-limit");
return res.data;
};

export const getLauncherMetrics = async (): Promise<LauncherMetrics> => {
const res = await client.get("/v1/launcher/load");
return res.data;
Expand Down

0 comments on commit e15b830

Please sign in to comment.