Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI Performance Test Bugfix #323

Merged
merged 17 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions .gitlab/jobs-mpi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,29 @@ toss_gcc_mvapich2_test:

toss_gcc_mvapich2_cleanup:
extends: [.toss_resource2, .gcc_mvapich2, .cleanup_dir]
needs: [toss_gcc_mvapich2_test]

toss_gcc_mvapich2_perf:
extends: [.toss_resource2, .gcc_mvapich2, .run_perf, .merge_pr_rule]
extends: [.toss_shell1, .gcc_mvapich2, .run_perf, .merge_pr_rule]
needs: [toss_gcc_mvapich2_build]



toss_clang_mvapich2_tpls:
extends: [.toss_resource1, .clang_mvapich2, .tpls]
extends: [.toss_resource3, .clang_mvapich2, .tpls]

toss_clang_mvapich2_build:
extends: [.toss_resource1, .clang_mvapich2, .build_and_test]
extends: [.toss_resource3, .clang_mvapich2, .build_and_test]
needs: [toss_clang_mvapich2_tpls]

toss_clang_mvapich2_test:
extends: [.toss_resource2, .clang_mvapich2, .run_ats]
extends: [.toss_resource3, .clang_mvapich2, .run_ats]
needs: [toss_clang_mvapich2_build]

toss_clang_mvapich2_cleanup:
extends: [.toss_resource2, .clang_mvapich2, .cleanup_dir]
needs: [toss_clang_mvapich2_test]
extends: [.toss_resource3, .clang_mvapich2, .cleanup_dir]

toss_clang_mvapich2_perf:
extends: [.toss_resource2, .clang_mvapich2, .run_perf, .merge_pr_rule]
extends: [.toss_shell1, .clang_mvapich2, .run_perf, .merge_pr_rule]
needs: [toss_clang_mvapich2_build]


Expand All @@ -74,10 +72,9 @@ blueos_gcc_spectrum_test:

blueos_gcc_spectrum_cleanup:
extends: [.blueos_resource1, .gcc_spectrum, .cleanup_dir]
needs: [blueos_gcc_spectrum_test]

blueos_gcc_spectrum_perf:
extends: [.blueos_resource1, .gcc_spectrum, .run_perf, .merge_pr_rule]
extends: [.blueos_shell1, .gcc_spectrum, .run_perf, .merge_pr_rule]
needs: [blueos_gcc_spectrum_build]


Expand All @@ -95,8 +92,7 @@ blueos_cuda_11_gcc_spectrum_test:

blueos_cuda_11_gcc_spectrum_cleanup:
extends: [.blueos_resource2, .cuda_11_gcc_spectrum, .cleanup_dir]
needs: [blueos_cuda_11_gcc_spectrum_test]

blueos_cuda_11_gcc_spectrum_perf:
extends: [.blueos_resource2, .cuda_11_gcc_spectrum, .run_perf, .merge_pr_rule]
extends: [.blueos_shell2, .cuda_11_gcc_spectrum, .run_perf, .merge_pr_rule]
needs: [blueos_cuda_11_gcc_spectrum_build]
30 changes: 30 additions & 0 deletions .gitlab/machines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@
timeout: 120 minutes
extends: [.on_toss_4_x86]

.ruby_shell:
tags:
- ruby
- shell
variables:
ENVIRONMENT: "INTERACTIVE"
extends: [.on_toss_4_x86]

.on_lassen:
tags:
- lassen
Expand All @@ -27,6 +35,15 @@
timeout: 150 minutes
extends: [.on_blueos_3_ppc64]

.lassen_shell:
tags:
- lassen
- shell
variables:
ENVIRONMENT: "INTERACTIVE"
HOSTNAME: 'lassen'
extends: [.on_blueos_3_ppc64]

# ------------------------------------------------------------------------------

.toss_resource_general:
Expand All @@ -35,6 +52,15 @@
.blueos_resource_general:
extends: [.on_lassen]

.toss_shell1:
resource_group: toss2
extends: [.ruby_shell]

.blueos_shell1:
extends: [.lassen_shell]

.blueos_shell2:
extends: [.lassen_shell]
# ------------------------------------------------------------------------------
#
.toss_resource1:
Expand All @@ -45,6 +71,10 @@
resource_group: toss2
extends: [.toss_resource_general]

.toss_resource3:
resource_group: toss3
extends: [.toss_resource_general]

.blueos_resource1:
#resource_group: blueos1
extends: [.blueos_resource_general]
Expand Down
6 changes: 4 additions & 2 deletions .gitlab/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,18 @@
stage: run_perf
variables:
GIT_STRATEGY: none
FF_ENABLE_BASH_EXIT_CODE_CHECK: 1
FF_USE_NEW_BASH_EVAL_STRATEGY: 1
script:
- CI_BUILD_DIR=$(cat ci-dir.txt)
- cd $CI_BUILD_DIR && cat job-name.txt

- ./build_gitlab/install/spheral-ats --ciRun --numNodes 2 --logs perf_logs ./build_gitlab/install/$PERF_ATS_FILE
- ./build_gitlab/install/spheral-ats --ciRun --numNodes 2 --logs perf_logs ./build_gitlab/install/$PERF_ATS_FILE || exit_code=$?
- exit $exit_code
artifacts:
when: always
paths:
- ci-dir.txt
- perf_logs/

.cleanup_dir:
stage: cleanup
Expand Down
4 changes: 2 additions & 2 deletions scripts/devtools/performance_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,11 @@ def get_caliper_files(file_path):
if (main_diff > ref_thresh):
cur_status = "FAILED"
if args.display:
display(cprof.statsframe.tree(metric1, metric1+"_mean"))
display(ctest.statsframe.tree(metric1, metric1+"_mean"))
elif (main_diff < -ref_thresh):
cur_status = "PASSED"
if args.display:
display(cprof.statsframe.tree(metric1, metric1+"_mean"))
display(ctest.statsframe.tree(metric1, metric1+"_mean"))
else:
cur_status = "PASSED"
test_status.update({test_name: (cur_status, cur_main, ref_main, ref_thresh)})
Expand Down
42 changes: 24 additions & 18 deletions scripts/spheral_ats.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ def run_and_report(run_command, ci_output, num_runs):
def install_ats_args():
install_args = []
if (SpheralConfigs.build_type() == "Debug"):
install_args.append('--level 99')
install_args.append("--level 99")
if (mpi.is_fake_mpi()):
install_args.append('--filter="np<2"')
install_args.append("--filter='np<2'")
comp_configs = SpheralConfigs.component_configs()
test_comps = ["FSISPH", "GSPH", "SVPH"]
for ts in test_comps:
if ts not in comp_configs:
install_args.append(f'--filter="not {ts.lower()}"')
install_args.append(f"--filter='not {ts.lower()}'")
return install_args

#---------------------------------------------------------------------------
Expand All @@ -101,6 +101,7 @@ def main():
test_log_name = "test-logs"
toss_machine_names = ["rzgenie", "rzwhippet", "rzhound", "ruby"]
blueos_machine_names = ["rzansel", "lassen"]
ci_launch_flags = {"ruby": "--res=ci", "lassen": "-q pci"}
temp_uname = os.uname()
hostname = temp_uname[1]
sys_type = os.getenv("SYS_TYPE")
Expand Down Expand Up @@ -154,19 +155,21 @@ def main():
mac_args = []
if any(x in hostname for x in toss_machine_names):
numNodes = numNodes if numNodes else 2
timeLimit = timeLimit if timeLimit else 120
mac_args = [f"--numNodes {numNodes}"]
timeLimit = timeLimit if timeLimit else 120
inAllocVars = ["SLURM_JOB_NUM_NODES", "SLURM_NNODES"]
launch_cmd = f"salloc --exclusive -N {numNodes} -t {timeLimit} "
if (options.ciRun):
launch_cmd += "-p pdebug "
elif any(x in hostname for x in blueos_machine_names):
blueOS = True
numNodes = numNodes if numNodes else 1
timeLimit = timeLimit if timeLimit else 60
inAllocVars = ["LSB_MAX_NUM_PROCESSORS"]
numNodes = numNodes if numNodes else 2
mac_args = ["--smpi_off", f"--numNodes {numNodes}"]
launch_cmd = f"bsub -nnodes {numNodes} -Is -XF -W {timeLimit} -core_isolation 2 "
inAllocVars = ["LSB_MAX_NUM_PROCESSORS"]
timeLimit = timeLimit if timeLimit else 120
launch_cmd = f"bsub -nnodes {numNodes} -Is -XF -core_isolation 2 -alloc_flags atsdisable -W {timeLimit} "
if (options.ciRun):
for i, j in ci_launch_flags.items():
if (i in hostname):
launch_cmd += j + " "
ats_args.extend(mac_args)

#---------------------------------------------------------------------------
Expand All @@ -180,13 +183,13 @@ def main():
else:
log_name_indx = unknown_options.index("--logs") + 1
log_name = unknown_options[log_name_indx]
ats_args.append('--glue="independent=True"')
ats_args.append('--continueFreq=15')
ats_args.append("--continueFreq=15")
# Pass flag to tell tests this is a CI run
ats_args.append('--glue="cirun=True"')
ats_args.append("--glue='cirun=True'")
if (options.threads):
ats_args.append(f'--glue="threads={options.threads}"')
ats_args.append(f'''--glue="benchmark_dir='{benchmark_dir}'"''')
ats_args.append(f"--glue='threads={options.threads}'")
ats_args.append(f"""--glue='benchmark_dir="{benchmark_dir}"'""")
ats_args.append("--glue='independent=True'")
ats_args = " ".join(str(x) for x in ats_args)
other_args = " ".join(str(x) for x in unknown_options)
cmd = f"{ats_exe} -e {spheral_exe} {ats_args} {other_args}"
Expand All @@ -197,9 +200,12 @@ def main():
run_command = cmd
else:
if blueOS:
# Launches using Bsub have issues with '<' being in command
# so entire run statment must be in quotes
run_command = f"{launch_cmd} '{cmd}'"
# Launches using Bsub requires quoting the whole command
# This causes issues for the glue='benchmark_dir... line
# unless we escape the characters
run_command = f'{launch_cmd} "{cmd}"'
run_command = run_command.replace('="', '=\\"')
run_command = run_command.replace('"\'', '\\"\'')
else:
run_command = f"{launch_cmd}{cmd}"
print(f"\nRunning: {run_command}\n")
Expand Down
8 changes: 4 additions & 4 deletions tests/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# Retrieve the host name and remove any numbers
temp_uname = os.uname()
hostname = "".join([i for i in temp_uname[1] if not i.isdigit()])
mac_procs = {"rzhound": 112, "rzwhippet": 112, "ruby": 112,
mac_procs = {"rzhound": 112, "rzwhippet": 112, "ruby": 56,
"rzadams": 84, "rzvernal": 64, "tioga": 64,
"rzansel": 40, "lassen": 40, "rzgenie": 36}
# Find out how many nodes our allocation has grabbed
Expand Down Expand Up @@ -164,9 +164,9 @@ def spheral_setup_test(test_file, test_name, inps, ncores, threads=1, **kwargs):
f"--rlength {rlen} --zlength {zlen} --nr {nr} --nz {nz}"

# Test variations
test_inp = {"CRK": "--crksph True --densityUpdate SumVoronoiCellDensity",
"FSI": "--fsisph True",
"SOLIDSPH": "--fsisph False --crksph False"}
test_inp = {"CRK": "--hydroType CRKSPH --densityUpdate SumVoronoiCellDensity",
"FSI": "--hydroType FSISPH",
"SOLIDSPH": "--hydroType SPH"}
for tname, tinp in test_inp.items():
inps = f"{gen_inps} {tinp}"
spheral_setup_test(test_path, test_name+tname, inps, num_cores, num_threads)
Expand Down
Loading