1616name : Precompiled images
1717
1818on :
19- schedule :
20- - cron : ' 00 09 * * *'
19+ pull_request :
20+ types :
21+ - opened
22+ - synchronize
23+ branches :
24+ - test-holodeck
25+ push :
26+ branches :
27+ - test-holodeck
2128
2229jobs :
2330 set-driver-version-matrix :
@@ -39,17 +46,20 @@ jobs:
3946 echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT
4047
4148 # get kernel flavors
42- KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
49+ # KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
50+ KERNEL_FLAVORS=("nvidia")
4351 kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .)
4452 echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT
4553
4654 # get ubuntu distributions
47- DIST=("ubuntu22.04" "ubuntu24.04")
55+ # DIST=("ubuntu22.04" "ubuntu24.04")
56+ DIST=("ubuntu22.04")
4857 dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .)
4958 echo "dist=$dist_json" >> $GITHUB_OUTPUT
5059
5160 # LTS_KERNEL setup
52- LTS_KERNEL=("5.15" "6.8")
61+ # LTS_KERNEL=("5.15" "6.8")
62+ LTS_KERNEL=("6.8")
5363 lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .)
5464 echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT
5565
@@ -303,8 +313,12 @@ jobs:
303313 DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
304314 echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
305315
316+ - name : Set kernel version in holodeck_${{ env.DIST }}.yaml
317+ run : |
318+ yq e '.spec.kernel.version = "${{ env.KERNEL_VERSION }}"' -i tests/holodeck_${{ env.DIST }}.yaml
319+
306320 - name : Set up Holodeck
307- uses : NVIDIA/holodeck@v0.2.6
321+ uses : NVIDIA/holodeck@main
308322 env :
309323 AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
310324 AWS_ACCESS_KEY_ID : ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -329,27 +343,6 @@ jobs:
329343 sudo apt-get update
330344 sudo apt-get install -y gh
331345
332- - name : Upgrade the kernel for Precompiled e2e test
333- env :
334- UPGRADE_KERNEL_SCRIPT : " ./tests/scripts/upgrade-kernel.sh"
335- run : |
336- status=0
337- ./tests/ci-remote-exec.sh "${UPGRADE_KERNEL_SCRIPT}" "${KERNEL_VERSION}" || status=$?
338- # On the target system, all scripts/test-case exit with code 1 for error handling.
339- # However, since reboot-related disconnections break the SSH connection
340- # and can cause the entire job to exit, we should ignore all errors except
341- # exit code 1. During a reboot, exit code 1 will not be thrown, so handling
342- # other errors as code 1 will ensure proper management of reboot scenarios
343- if [ $status -eq 1 ]; then
344- echo "Kernel version $KERNEL_VERSION upgrade failed"
345- exit 1
346- fi
347- ./tests/scripts/remote_retry.sh || status=$?
348- if [ $status -ne 0 ]; then
349- echo "Failed to connect to remote instance"
350- exit $status
351- fi
352-
353346 - name : Precompiled e2e test gpu driver validation
354347 env :
355348 TEST_CASE : " ./tests/cases/nvidia-driver.sh"
@@ -376,6 +369,11 @@ jobs:
376369 # add escape character for space
377370 TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS")
378371 IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar"
372+ # ./tests/scripts/remote_retry.sh || status=$?
373+ # if [ $status -ne 0 ]; then
374+ # echo "Failed to connect to remote instance"
375+ # exit $status
376+ # fi
379377 ./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$?
380378 if [ $status -eq 1 ]; then
381379 echo "e2e validation failed for driver version $DRIVER_VERSION with status $status"
0 commit comments