55 # push:
66 schedule :
77 - cron : ' 0 20 * * FRI' # 8:00 PM every Friday
8-
8+ env :
9+ BRANCH_NAME : ${{ github.head_ref || github.ref_name }}
910jobs :
1011 Tracer-Weekly :
12+ timeout-minutes : 720
1113 if : ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }}
1214 runs-on : tgrogers-gpu01
1315 defaults :
3840 git -C ./gpu-app-collection/ submodule update --init -- ./src/cuda/cuda-samples
3941 source ./gpu-app-collection/src/setup_environment
4042 ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/
43+ make -j8 -C ./gpu-app-collection/src rodinia_2.0-ft
4144 make -j8 -C ./gpu-app-collection/src rodinia-3.1
4245 make -j8 -C ./gpu-app-collection/src GPU_Microbenchmark
4346 # make -j8 -C ./gpu-app-collection/src Deepbench_nvidia
@@ -49,30 +52,22 @@ jobs:
4952 source ./env-setup/12.8_env_setup.sh
5053 source ./gpu-app-collection/src/setup_environment
5154 rm -rf ./hw_run/
55+ ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7
5256 rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
5357 mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
54- ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
55- ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark -D 7
56- # ./util/tracer_nvbit/run_hw_trace.py -B rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
58+ mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run
59+ # ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7
5760 - name : generate-spinlock-traces-spinlock_handling
5861 run : |
5962 source ./env-setup/12.8_env_setup.sh
6063 source ./gpu-app-collection/src/setup_environment
6164 rm -rf ./hw_run/
6265 ./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling fast_forward
63- mv ./hw_run . /hw_run_fast_forward
66+ mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces /hw_run_fast_forward
6467 ./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling none
65- mv ./hw_run ./hw_run_none
66- - name : test-new-traces-spinlock_handling
67- # Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr)
68- run : |
69- source ./env-setup/12.8_env_setup.sh
70- source ./gpu-simulator/setup_environment.sh
71- ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward
72- ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward
73- # ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T ./hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none
74- # ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none
68+ mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none
7569 SASS-Weekly :
70+ timeout-minutes : 720
7671 needs : [Tracer-Weekly]
7772 if : ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }}
7873 runs-on : tgrogers-raid
@@ -93,14 +88,69 @@ jobs:
9388 run : |
9489 source ./env-setup/12.8_env_setup.sh
9590 rm -rf ./gpu-simulator/gpgpu-sim
91+
92+ # Clone gpgpu-sim with fork-aware branch selection
93+ echo "Cloning gpgpu-sim with fork-aware branch selection..."
94+ git clone --quiet [email protected] :accel-sim/gpgpu-sim_distribution.git ./gpu-simulator/gpgpu-sim 95+
96+ current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
97+ current_branch=$BRANCH_NAME
98+ current_repo=$(echo $GITHUB_REPOSITORY | cut -d'/' -f2)
99+
100+ gpgpusim_repo=$(echo $current_repo | sed 's/accel-sim-framework/gpgpu-sim_distribution/')
101+
102+ echo "Attempting to checkout branch '$BRANCH_NAME' from '$current_owner/$gpgpusim_repo'"
103+
104+ # First, try to add the fork owner's repository as a remote and check if the branch exists
105+ if git -C ./gpu-simulator/gpgpu-sim/ remote add fork-owner [email protected] :$current_owner/$gpgpusim_repo.git 2>/dev/null; then 106+ # Check if the branch exists in the fork owner's repository
107+ if git -C ./gpu-simulator/gpgpu-sim/ ls-remote fork-owner | grep -q "refs/heads/$BRANCH_NAME"; then
108+ echo "Found branch '$BRANCH_NAME' in '$current_owner/$gpgpusim_repo' repository, checking it out"
109+ git -C ./gpu-simulator/gpgpu-sim/ fetch fork-owner
110+ git -C ./gpu-simulator/gpgpu-sim/ checkout -B $BRANCH_NAME fork-owner/$BRANCH_NAME
111+ else
112+ echo "Branch '$BRANCH_NAME' not found in '$current_owner/$gpgpusim_repo' repository, falling back to accel-sim dev branch"
113+ git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
114+ fi
115+ # Remove the temporary remote
116+ git -C ./gpu-simulator/gpgpu-sim/ remote remove fork-owner
117+ else
118+ echo "Could not add '$current_owner/$gpgpusim_repo' remote, falling back to upstream dev branch"
119+ git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev
120+ fi
121+
96122 source ./gpu-simulator/setup_environment.sh
97123 make clean -C gpu-simulator
98124 make -j -C gpu-simulator
99125 - name : run SASS
100126 run : |
101127 source ./env-setup/12.8_env_setup.sh
102128 source ./gpu-simulator/setup_environment.sh
103- ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces ./hw_run
104- # ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark,sdk-4.2-scaled,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
105- ./util/job_launching/run_simulations.py -B rodinia-3.1,GPU_Microbenchmark -C QV100-SASS-5B_INSN -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
129+ ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run ./hw_run
130+ ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -C QV100-SASS -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G
106131 ./util/job_launching/monitor_func_test.py -T 12 -S 1800 -I -v -s weekly-stats-per-app.csv -N weekly-$$
132+ - name : test-new-traces-spinlock_handling
133+ # Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr)
134+ run : |
135+ source ./env-setup/12.8_env_setup.sh
136+ source ./gpu-simulator/setup_environment.sh
137+ ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward
138+ ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward
139+ # ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none
140+ # ./util/job_launching/monitor_func_test.py -I -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none
141+ failures :
142+ if : failure()
143+ env :
144+ ACTION_URL : ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
145+ REPORT_URL : " "
146+ runs-on : tgrogers-raid
147+ needs : [Tracer-Weekly, SASS-Weekly]
148+ steps :
149+ - uses : actions/checkout@v4
150+ - name : Notify Failure
151+ run : |
152+ # Setup envs
153+ git clone --quiet --branch cluster-ubuntu [email protected] :purdue-aalp/env-setup.git 154+ source ./env-setup/common/common_inc.sh
155+ export BRANCH_NAME="Weekly Tests"
156+ python3 .github/scripts/send_ci_email.py -t failure
0 commit comments