Skip to content

Commit bd132c8

Browse files
committed
Test Not for review holodeck kernel version change
Signed-off-by: shiva kumar <shivaku@nvidia.com>
1 parent 2d3f008 commit bd132c8

File tree

8 files changed

+41
-104
lines changed

8 files changed

+41
-104
lines changed

.github/workflows/image.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ on:
2121
- opened
2222
- synchronize
2323
branches:
24-
- main
25-
- release-*
24+
- main-no
25+
- release-no
2626
push:
2727
branches:
28-
- main
29-
- release-*
28+
- main-no
29+
- release-no
3030

3131
jobs:
3232
image:

.github/workflows/precompiled.yaml

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,15 @@
1616
name: Precompiled images
1717

1818
on:
19-
schedule:
20-
- cron: '00 09 * * *'
19+
pull_request:
20+
types:
21+
- opened
22+
- synchronize
23+
branches:
24+
- test-holodeck
25+
push:
26+
branches:
27+
- test-holodeck
2128

2229
jobs:
2330
set-driver-version-matrix:
@@ -39,17 +46,20 @@ jobs:
3946
echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT
4047
4148
# get kernel flavors
42-
KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
49+
# KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle")
50+
KERNEL_FLAVORS=("nvidia")
4351
kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .)
4452
echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT
4553
4654
# get ubuntu distributions
47-
DIST=("ubuntu22.04" "ubuntu24.04")
55+
# DIST=("ubuntu22.04" "ubuntu24.04")
56+
DIST=("ubuntu22.04")
4857
dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .)
4958
echo "dist=$dist_json" >> $GITHUB_OUTPUT
5059
5160
# LTS_KERNEL setup
52-
LTS_KERNEL=("5.15" "6.8")
61+
# LTS_KERNEL=("5.15" "6.8")
62+
LTS_KERNEL=("6.8")
5363
lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .)
5464
echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT
5565
@@ -303,8 +313,12 @@ jobs:
303313
DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]'))
304314
echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV
305315
316+
- name: Set kernel version in holodeck_${{ env.DIST }}.yaml
317+
run: |
318+
yq e '.spec.kernel.version = "${{ env.KERNEL_VERSION }}"' -i tests/holodeck_${{ env.DIST }}.yaml
319+
306320
- name: Set up Holodeck
307-
uses: NVIDIA/holodeck@v0.2.6
321+
uses: NVIDIA/holodeck@main
308322
env:
309323
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
310324
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -329,27 +343,6 @@ jobs:
329343
sudo apt-get update
330344
sudo apt-get install -y gh
331345
332-
- name: Upgrade the kernel for Precompiled e2e test
333-
env:
334-
UPGRADE_KERNEL_SCRIPT: "./tests/scripts/upgrade-kernel.sh"
335-
run: |
336-
status=0
337-
./tests/ci-remote-exec.sh "${UPGRADE_KERNEL_SCRIPT}" "${KERNEL_VERSION}" || status=$?
338-
# On the target system, all scripts/test-case exit with code 1 for error handling.
339-
# However, since reboot-related disconnections break the SSH connection
340-
# and can cause the entire job to exit, we should ignore all errors except
341-
# exit code 1. During a reboot, exit code 1 will not be thrown, so handling
342-
# other errors as code 1 will ensure proper management of reboot scenarios
343-
if [ $status -eq 1 ]; then
344-
echo "Kernel version $KERNEL_VERSION upgrade failed"
345-
exit 1
346-
fi
347-
./tests/scripts/remote_retry.sh || status=$?
348-
if [ $status -ne 0 ]; then
349-
echo "Failed to connect to remote instance"
350-
exit $status
351-
fi
352-
353346
- name: Precompiled e2e test gpu driver validation
354347
env:
355348
TEST_CASE: "./tests/cases/nvidia-driver.sh"
@@ -376,6 +369,11 @@ jobs:
376369
# add escape character for space
377370
TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS")
378371
IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar"
372+
# ./tests/scripts/remote_retry.sh || status=$?
373+
# if [ $status -ne 0 ]; then
374+
# echo "Failed to connect to remote instance"
375+
# exit $status
376+
# fi
379377
./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$?
380378
if [ $status -eq 1 ]; then
381379
echo "e2e validation failed for driver version $DRIVER_VERSION with status $status"

tests/holodeck_ubuntu22.04.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ spec:
2727
containerRuntime:
2828
install: true
2929
name: containerd
30+
version: 1.6.27
3031
kubernetes:
3132
install: true
3233
installer: kubeadm
33-
version: v1.28.5
34-
crictlVersion: v1.28.0
34+
version: v1.29.3
35+
crictlVersion: v1.29.0
36+
kernel:
37+
version: 5.15.0-139-generic

tests/holodeck_ubuntu24.04.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,7 @@ spec:
3131
kubernetes:
3232
install: true
3333
installer: kubeadm
34-
version: v1.30.0
35-
crictlVersion: v1.30.0
34+
version: v1.28.5
35+
crictlVersion: v1.28.0
36+
kernel:
37+
version: 6.8.0-59-generic

tests/scripts/.definitions.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ CASES_DIR="$( cd "${TEST_DIR}/cases" && pwd )"
1616

1717
: ${HELM_NVIDIA_REPO:="https://helm.ngc.nvidia.com/nvidia"}
1818

19-
: ${DAEMON_POD_STATUS_TIME_OUT:="15m"}
19+
: ${DAEMON_POD_STATUS_TIME_OUT:="30m"}
2020
: ${POD_STATUS_TIME_OUT:="2m"}
2121

2222
: ${LOG_DIR:="/tmp/logs"}

tests/scripts/findkernelversion.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,5 @@ if [[ $status -eq 0 ]]; then
4040
else
4141
export should_continue=true
4242
fi
43+
# SHIVA remove this
44+
export should_continue=true

tests/scripts/kernel-upgrade-helper.sh

Lines changed: 0 additions & 54 deletions
This file was deleted.

tests/scripts/upgrade-kernel.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)