From 9fec7a72beba9c9b36ed0d356159db671bc5e723 Mon Sep 17 00:00:00 2001 From: adnanzai Date: Thu, 3 Oct 2024 11:25:46 -0400 Subject: [PATCH 1/5] slurm wrapper for archivetar --- bin/.archivetar | 21 +++++++++++++++++++++ bin/archivetar | 39 +++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 18 deletions(-) create mode 100755 bin/.archivetar mode change 100755 => 100644 bin/archivetar diff --git a/bin/.archivetar b/bin/.archivetar new file mode 100755 index 0000000..26b661a --- /dev/null +++ b/bin/.archivetar @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +# Brock Palen +# brockp@umich.edu +# 7/2020 +# +# prep a directory for placement in dataden +# process: +# 1. run mpiFileUtils / dwalk (deafault sort in name / path order) all files < minsize +# 2. Take resulting list build tar lists by summing size until > tarsize (before compression) +# 3. Tar each list: OR --dryrun create list with est size +# a. Create Index file of contents +# b. Optionally compress -z / -j with gzip/pigz bzip/lbzip2 if installed +# c. Optionally purge +# 4. (?) Kick out optimized untar script (pigz / lbzip2) + +import sys + +import archivetar + +archivetar.main(sys.argv) diff --git a/bin/archivetar b/bin/archivetar old mode 100755 new mode 100644 index 26b661a..10fdf62 --- a/bin/archivetar +++ b/bin/archivetar @@ -1,21 +1,24 @@ -#!/usr/bin/env python3 - -# Brock Palen -# brockp@umich.edu -# 7/2020 -# -# prep a directory for placement in dataden -# process: -# 1. run mpiFileUtils / dwalk (deafault sort in name / path order) all files < minsize -# 2. Take resulting list build tar lists by summing size until > tarsize (before compression) -# 3. Tar each list: OR --dryrun create list with est size -# a. Create Index file of contents -# b. Optionally compress -z / -j with gzip/pigz bzip/lbzip2 if installed -# c. Optionally purge -# 4. (?) Kick out optimized untar script (pigz / lbzip2) - +import os +import subprocess import sys -import archivetar +def main(): + # Check for help options and run locally + if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"): + print("\033[34m==>\033[32m Running locally for help option\033[0m") + subprocess.run([".archivetar", "-h"]) + sys.exit(0) + + # Check if running inside a SLURM job + slurm_job_id = os.getenv("SLURM_JOB_ID") + if slurm_job_id: + # Run inside SLURM + subprocess.run([".archivetar"] + sys.argv[1:]) + else: + print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") + # Run Python script from within SLURM + cmd = f"srun --partition=archive --ntasks-per-node=8 --mem=40G --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" + subprocess.run(cmd, shell=True) -archivetar.main(sys.argv) +if __name__ == "__main__": + main() \ No newline at end of file From 6871cc57fb8dc520e50a041aecac621a8a191889 Mon Sep 17 00:00:00 2001 From: adnanzai Date: Thu, 3 Oct 2024 12:48:33 -0400 Subject: [PATCH 2/5] improved logic --- bin/archivetar | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/bin/archivetar b/bin/archivetar index 10fdf62..2ac97f0 100644 --- a/bin/archivetar +++ b/bin/archivetar @@ -1,24 +1,49 @@ import os import subprocess import sys +import shutil def main(): + ''' + To disable slurm execution entirely, unset env-variable AT_SLURM_OFFLOAD. + To enable slurm execution, set env-variable AT_SLURM_OFFLOAD=1 + + ARCHIVETAR_TASKS, ARCHIVETAR_MEM, and ARCHIVETAR_PAR control the cores, memory and partition requirements + of the SLURM job srun executes. + ''' # Check for help options and run locally if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"): print("\033[34m==>\033[32m Running locally for help option\033[0m") subprocess.run([".archivetar", "-h"]) sys.exit(0) + # Handle the case where no arguments are provided (just "archivetar") + if len(sys.argv) == 1: + print("\033[34m==>\033[32m Running archivetar with no arguments\033[0m") + subprocess.run([".archivetar"]) + sys.exit(0) + # Check if running inside a SLURM job slurm_job_id = os.getenv("SLURM_JOB_ID") + at_slurm_offload = os.getenv("AT_SLURM_OFFLOAD") + if slurm_job_id: - # Run inside SLURM + # Run locally inside SLURM without executing another SLURM job. subprocess.run([".archivetar"] + sys.argv[1:]) + elif at_slurm_offload and shutil.which("srun"): + print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") + # Get environment variables or use default values + tasks = os.getenv("ARCHIVETAR_TASKS", "8") # Default to 8 tasks if ARCHIVETAR_TASKS is not set + mem = os.getenv("ARCHIVETAR_MEM", "40G") # Default to 40G if ARCHIVETAR_MEM is not set + partition = os.getenv("ARCHIVETAR_PAR", "archive") # Default to archive if ARCHIVETAR_PAR is not set + + # Run Python script from within SLURM + cmd = f"srun --partition={partition} --ntasks-per-node={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" + subprocess.run(cmd, shell=True) else: - print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") - # Run Python script from within SLURM - cmd = f"srun --partition=archive --ntasks-per-node=8 --mem=40G --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" - subprocess.run(cmd, shell=True) + # Run locally without SLURM + subprocess.run([".archivetar"] + sys.argv[1:]) + if __name__ == "__main__": main() \ No newline at end of file From 7fa05aa9f05ea69ba4b3f4815df3fa099972c5e2 Mon Sep 17 00:00:00 2001 From: adnanzai Date: Thu, 3 Oct 2024 12:55:48 -0400 Subject: [PATCH 3/5] executable --- bin/archivetar | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 bin/archivetar diff --git a/bin/archivetar b/bin/archivetar old mode 100644 new mode 100755 From dc47e27cdba636eb82a36900bfedd5155dc3c797 Mon Sep 17 00:00:00 2001 From: adnanzai Date: Thu, 3 Oct 2024 14:50:12 -0400 Subject: [PATCH 4/5] error code carry forward, and mpi task testing --- bin/archivetar | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bin/archivetar b/bin/archivetar index 2ac97f0..2b9797c 100755 --- a/bin/archivetar +++ b/bin/archivetar @@ -29,7 +29,8 @@ def main(): if slurm_job_id: # Run locally inside SLURM without executing another SLURM job. - subprocess.run([".archivetar"] + sys.argv[1:]) + result = subprocess.run([".archivetar"] + sys.argv[1:]) + sys.exit(result.returncode) elif at_slurm_offload and shutil.which("srun"): print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") # Get environment variables or use default values @@ -38,11 +39,13 @@ def main(): partition = os.getenv("ARCHIVETAR_PAR", "archive") # Default to archive if ARCHIVETAR_PAR is not set # Run Python script from within SLURM - cmd = f"srun --partition={partition} --ntasks-per-node={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" - subprocess.run(cmd, shell=True) + cmd = f"srun --partition={partition} --cpu-bind=no --ntasks=1 --cpus-per-task={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" + result = subprocess.run(cmd, shell=True) + sys.exit(result.returncode) else: # Run locally without SLURM - subprocess.run([".archivetar"] + sys.argv[1:]) + result = subprocess.run([".archivetar"] + sys.argv[1:]) + sys.exit(result.returncode) if __name__ == "__main__": From 4e0a75040ab02ae597f5662f8eae97e4baa2edcd Mon Sep 17 00:00:00 2001 From: adnanzai Date: Thu, 3 Oct 2024 15:26:00 -0400 Subject: [PATCH 5/5] changed env var naming consistency, and time --- bin/archivetar | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/archivetar b/bin/archivetar index 2b9797c..d379ab6 100755 --- a/bin/archivetar +++ b/bin/archivetar @@ -34,12 +34,12 @@ def main(): elif at_slurm_offload and shutil.which("srun"): print("\033[34m==>\033[35m Running archivetar within SLURM\033[0m") # Get environment variables or use default values - tasks = os.getenv("ARCHIVETAR_TASKS", "8") # Default to 8 tasks if ARCHIVETAR_TASKS is not set - mem = os.getenv("ARCHIVETAR_MEM", "40G") # Default to 40G if ARCHIVETAR_MEM is not set - partition = os.getenv("ARCHIVETAR_PAR", "archive") # Default to archive if ARCHIVETAR_PAR is not set + tasks = os.getenv("AT_TASKS", "8") # Default to 8 tasks if ARCHIVETAR_TASKS is not set + mem = os.getenv("AT_MEM", "40G") # Default to 40G if ARCHIVETAR_MEM is not set + partition = os.getenv("AT_PAR", "archive") # Default to archive if ARCHIVETAR_PAR is not set # Run Python script from within SLURM - cmd = f"srun --partition={partition} --cpu-bind=no --ntasks=1 --cpus-per-task={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" + cmd = f"srun --partition={partition} --cpu-bind=no --ntasks=1 --cpus-per-task={tasks} --mem={mem} --job-name=archivetar_{os.getenv('USER')} --time=14-00:00:00 --pty bash -c '.archivetar {' '.join(sys.argv[1:])}'" result = subprocess.run(cmd, shell=True) sys.exit(result.returncode) else: