Skip to content

Commit 6f75427

Browse files
Improve Scalability
1 parent e8e7874 commit 6f75427

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

src/common/schedulers/slurm_commands.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ def resume_powering_down_nodes():
355355
# TODO: This function was added due to Slurm ticket 12915. The bug is not reproducible and the ticket was then
356356
# closed. This operation may now be useless: we need to check this.
357357
log.info("Resuming powering down nodes.")
358+
log.info("new helllllllllllllllllll")
358359
powering_down_nodes = _get_slurm_nodes(states="powering_down")
359360
update_nodes(nodes=powering_down_nodes, state="resume", raise_on_error=False)
360361

@@ -374,7 +375,7 @@ def _get_all_partition_nodes(partition_name, command_timeout=DEFAULT_GET_INFO_CO
374375

375376

376377
def _get_slurm_nodes(states=None, partition_name=None, command_timeout=DEFAULT_GET_INFO_COMMAND_TIMEOUT):
377-
sinfo_command = f"{SINFO} -h -N -o %N"
378+
sinfo_command = f"{SINFO} -h -o %N"
378379
partition_name = partition_name or ",".join(PartitionNodelistMapping.instance().get_partitions())
379380
validate_subprocess_argument(partition_name)
380381
sinfo_command += f" -p {partition_name}"
@@ -383,7 +384,11 @@ def _get_slurm_nodes(states=None, partition_name=None, command_timeout=DEFAULT_G
383384
sinfo_command += f" -t {states}"
384385
# Every node is print on a separate line
385386
# It's safe to use the function affected by B604 since the command is fully built in this code
386-
return check_command_output(sinfo_command, timeout=command_timeout, shell=True).splitlines() # nosec B604
387+
sinfo_output = check_command_output(sinfo_command, timeout=command_timeout, shell=True).splitlines()
388+
nodes=[]
389+
for line in sinfo_output:
390+
nodes.extend(check_command_output(f"{SCONTROL} show hostnames {line}", timeout=command_timeout, shell=True).splitlines())
391+
return nodes # nosec B604
387392

388393

389394
def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:

0 commit comments

Comments
 (0)