diff --git a/rhel8/Dockerfile b/rhel8/Dockerfile index a2864f6f..420fae60 100644 --- a/rhel8/Dockerfile +++ b/rhel8/Dockerfile @@ -17,9 +17,10 @@ ENV PATH /usr/local/go/bin:$PATH WORKDIR /work RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ - cd driver/vgpu/src && \ - go build -o vgpu-util && \ - mv vgpu-util /work + go build -C driver/vgpu/src -o vgpu-util && \ + mv driver/vgpu/src/vgpu-util /work && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /work FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi8 @@ -36,6 +37,8 @@ ENV DRIVER_VERSION=$DRIVER_VERSION # Arg to indicate if driver type is either of passthrough/baremetal or vgpu ARG DRIVER_TYPE=passthrough ENV DRIVER_TYPE=$DRIVER_TYPE +ARG DRIVER_BRANCH=550 +ENV DRIVER_BRANCH=$DRIVER_BRANCH ARG VGPU_LICENSE_SERVER_TYPE=NLS ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE # Enable vGPU version compability check by default @@ -84,6 +87,7 @@ COPY ocp_dtk_entrypoint /usr/local/bin COPY common.sh /usr/local/bin COPY --from=build /work/vgpu-util /usr/local/bin +COPY --from=build /work/gpu-driver-util /usr/local/bin WORKDIR /drivers diff --git a/rhel8/nvidia-driver b/rhel8/nvidia-driver index ebb9e45c..53f53809 100755 --- a/rhel8/nvidia-driver +++ b/rhel8/nvidia-driver @@ -6,6 +6,7 @@ set -eu RUN_DIR=/run/nvidia PID_FILE=${RUN_DIR}/${0##*/}.pid DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"} +DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"} KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver NUM_VGPU_DEVICES=0 NVIDIA_MODULE_PARAMS=() @@ -17,9 +18,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}" DNF_RELEASEVER=${DNF_RELEASEVER:-""} RHEL_VERSION=${RHEL_VERSION:-""} RHEL_MAJOR_VERSION=8 - -OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} -[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} echo "DRIVER_ARCH is $DRIVER_ARCH" @@ -577,6 +576,24 @@ _start_vgpu_topology_daemon() { nvidia-topologyd } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + _prepare() { if [ "${DRIVER_TYPE}" = "vgpu" ]; then _find_vgpu_driver_version || exit 1 @@ -797,5 +814,6 @@ if [ $# -ne 0 ]; then fi _resolve_rhel_version || exit 1 +_resolve_kernel_type || exit 1 $command diff --git a/rhel8/ocp_dtk_entrypoint b/rhel8/ocp_dtk_entrypoint index 458ecd57..d1d0e984 100755 --- a/rhel8/ocp_dtk_entrypoint +++ b/rhel8/ocp_dtk_entrypoint @@ -25,6 +25,7 @@ nv-ctr-run-with-dtk() { /usr/local/bin/nvidia-driver \ /usr/local/bin/common.sh \ /usr/local/bin/extract-vmlinux \ + /usr/local/bin/gpu-driver-util \ /usr/local/bin/vgpu-util \ /drivers \ /licenses \ @@ -136,6 +137,7 @@ dtk-build-driver() { "$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \ "$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \ "$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \ + "$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \ "$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \ "${DRIVER_TOOLKIT_SHARED_DIR}/bin" diff --git a/rhel9/Dockerfile b/rhel9/Dockerfile index e6be84ea..3604323f 100644 --- a/rhel9/Dockerfile +++ b/rhel9/Dockerfile @@ -17,9 +17,10 @@ ENV PATH /usr/local/go/bin:$PATH WORKDIR /work RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ - cd driver/vgpu/src && \ - go build -o vgpu-util && \ - mv vgpu-util /work + go build -C driver/vgpu/src -o vgpu-util && \ + mv driver/vgpu/src/vgpu-util /work && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /work FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi9 @@ -36,6 +37,8 @@ ENV DRIVER_VERSION=$DRIVER_VERSION # Arg to indicate if driver type is either of passthrough/baremetal or vgpu ARG DRIVER_TYPE=passthrough ENV DRIVER_TYPE=$DRIVER_TYPE +ARG DRIVER_BRANCH=550 +ENV DRIVER_BRANCH=$DRIVER_BRANCH ARG VGPU_LICENSE_SERVER_TYPE=NLS ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE # Enable vGPU version compability check by default @@ -78,6 +81,7 @@ COPY ocp_dtk_entrypoint /usr/local/bin COPY common.sh /usr/local/bin COPY --from=build /work/vgpu-util /usr/local/bin +COPY --from=build /work/gpu-driver-util /usr/local/bin WORKDIR /drivers diff --git a/rhel9/nvidia-driver b/rhel9/nvidia-driver index c1f7a7a5..f1797cc6 100755 --- a/rhel9/nvidia-driver +++ b/rhel9/nvidia-driver @@ -6,6 +6,7 @@ set -eu RUN_DIR=/run/nvidia PID_FILE=${RUN_DIR}/${0##*/}.pid DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"} +DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"} KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver NUM_VGPU_DEVICES=0 NVIDIA_MODULE_PARAMS=() @@ -17,9 +18,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}" DNF_RELEASEVER=${DNF_RELEASEVER:-""} RHEL_VERSION=${RHEL_VERSION:-""} RHEL_MAJOR_VERSION=9 - -OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} -[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} echo "DRIVER_ARCH is $DRIVER_ARCH" @@ -571,6 +570,24 @@ _find_vgpu_driver_version() { return 0 } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + _start_vgpu_topology_daemon() { type nvidia-topologyd > /dev/null 2>&1 || return 0 echo "Starting nvidia-topologyd.." @@ -797,5 +814,6 @@ if [ $# -ne 0 ]; then fi _resolve_rhel_version || exit 1 +_resolve_kernel_type || exit 1 $command diff --git a/rhel9/ocp_dtk_entrypoint b/rhel9/ocp_dtk_entrypoint index 458ecd57..d1d0e984 100755 --- a/rhel9/ocp_dtk_entrypoint +++ b/rhel9/ocp_dtk_entrypoint @@ -25,6 +25,7 @@ nv-ctr-run-with-dtk() { /usr/local/bin/nvidia-driver \ /usr/local/bin/common.sh \ /usr/local/bin/extract-vmlinux \ + /usr/local/bin/gpu-driver-util \ /usr/local/bin/vgpu-util \ /drivers \ /licenses \ @@ -136,6 +137,7 @@ dtk-build-driver() { "$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \ "$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \ "$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \ + "$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \ "$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \ "${DRIVER_TOOLKIT_SHARED_DIR}/bin" diff --git a/ubuntu20.04/Dockerfile b/ubuntu20.04/Dockerfile index 4e7d859f..2a2e9700 100644 --- a/ubuntu20.04/Dockerfile +++ b/ubuntu20.04/Dockerfile @@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH WORKDIR /work RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ - cd driver/vgpu/src && \ - go build -o vgpu-util && \ - mv vgpu-util /work + go build -C driver/vgpu/src -o vgpu-util && \ + mv driver/vgpu/src/vgpu-util /work && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /work FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04 @@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \ COPY nvidia-driver /usr/local/bin COPY --from=build /work/vgpu-util /usr/local/bin +COPY --from=build /work/gpu-driver-util /usr/local/bin ADD drivers drivers/ diff --git a/ubuntu20.04/nvidia-driver b/ubuntu20.04/nvidia-driver index 838283e0..c7f1715a 100755 --- a/ubuntu20.04/nvidia-driver +++ b/ubuntu20.04/nvidia-driver @@ -16,8 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} -OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} -[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} export DEBIAN_FRONTEND=noninteractive @@ -477,6 +476,24 @@ _shutdown() { return 1 } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + _find_vgpu_driver_version() { local count="" local version="" @@ -520,6 +537,8 @@ init() { _find_vgpu_driver_version || exit 1 fi + _resolve_kernel_type || exit 1 + # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \ @@ -592,6 +611,8 @@ update() { fi exec 3>&- + _resolve_kernel_type || exit 1 + # vgpu driver version is choosen dynamically during runtime, so pre-compile modules for # only non-vgpu driver types if [ "${DRIVER_TYPE}" != "vgpu" ]; then diff --git a/ubuntu22.04/Dockerfile b/ubuntu22.04/Dockerfile index bc401e55..a3bf873e 100644 --- a/ubuntu22.04/Dockerfile +++ b/ubuntu22.04/Dockerfile @@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH WORKDIR /work RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ - cd driver/vgpu/src && \ - go build -o vgpu-util && \ - mv vgpu-util /work + go build -C driver/vgpu/src -o vgpu-util && \ + mv driver/vgpu/src/vgpu-util /work && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /work FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04 @@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \ COPY nvidia-driver /usr/local/bin COPY --from=build /work/vgpu-util /usr/local/bin +COPY --from=build /work/gpu-driver-util /usr/local/bin ADD drivers drivers/ diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index aedeeea2..ba2f1a82 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -15,9 +15,7 @@ NVIDIA_UVM_MODULE_PARAMS=() NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} - -OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} -[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} export DEBIAN_FRONTEND=noninteractive @@ -481,6 +479,24 @@ _shutdown() { return 1 } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + _find_vgpu_driver_version() { local count="" local version="" @@ -524,6 +540,8 @@ init() { _find_vgpu_driver_version || exit 1 fi + _resolve_kernel_type || exit 1 + # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \ @@ -596,6 +614,8 @@ update() { fi exec 3>&- + _resolve_kernel_type || exit 1 + # vgpu driver version is choosen dynamically during runtime, so pre-compile modules for # only non-vgpu driver types if [ "${DRIVER_TYPE}" != "vgpu" ]; then diff --git a/ubuntu22.04/precompiled/Dockerfile b/ubuntu22.04/precompiled/Dockerfile index c59b80e8..9f72b14d 100644 --- a/ubuntu22.04/precompiled/Dockerfile +++ b/ubuntu22.04/precompiled/Dockerfile @@ -2,6 +2,8 @@ FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETARCH +ARG GOLANG_VERSION ARG DRIVER_BRANCH=535 ENV DRIVER_BRANCH=$DRIVER_BRANCH ARG DRIVER_VERSION=535.216.03 @@ -12,6 +14,8 @@ ENV KERNEL_VERSION=$KERNEL_VERSION ENV NVIDIA_VISIBLE_DEVICES=void +SHELL ["/bin/bash", "-c"] + RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections # Fetch GPG keys for CUDA repo @@ -26,6 +30,7 @@ RUN dpkg --add-architecture i386 && \ curl \ kmod \ file \ + git \ libelf-dev \ libglvnd-dev \ pkg-config && \ @@ -41,6 +46,18 @@ RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe RUN curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \ chmod +x /usr/local/bin/donkey +# download appropriate binary based on the target architecture for multi-arch builds +RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \ + curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV PATH=/usr/local/go/bin:$PATH + +RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /usr/local/bin && \ + rm -rf driver + # Install / upgrade packages here that are required to resolve CVEs ARG CVE_UPDATES RUN if [ -n "${CVE_UPDATES}" ]; then \ diff --git a/ubuntu22.04/precompiled/nvidia-driver b/ubuntu22.04/precompiled/nvidia-driver index 1eb5a1ea..69632c7c 100755 --- a/ubuntu22.04/precompiled/nvidia-driver +++ b/ubuntu22.04/precompiled/nvidia-driver @@ -4,7 +4,7 @@ set -eu KERNEL_VERSION=$(uname -r) -OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}" +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} RUN_DIR=/run/nvidia PID_FILE=${RUN_DIR}/${0##*/}.pid DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"} @@ -96,6 +96,24 @@ _get_module_params() { fi } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + # Load the kernel modules and start persistenced. _load_driver() { echo "Parsing kernel module parameters..." @@ -245,7 +263,7 @@ _install_driver() { xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server # Now install the precompiled kernel module packages signed by Canonical - if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then + if [ "${KERNEL_TYPE}" == "kernel-open" ]; then echo "Installing Open NVIDIA driver kernel modules..." apt-get install --no-install-recommends -y \ linux-signatures-nvidia-${KERNEL_VERSION} \ @@ -293,6 +311,7 @@ init() { _unload_driver || exit 1 _unmount_rootfs + _resolve_kernel_type || exit 1 _install_driver _load_driver || exit 1 _mount_rootfs diff --git a/ubuntu24.04/precompiled/Dockerfile b/ubuntu24.04/precompiled/Dockerfile index a8b77ba3..af5622ee 100644 --- a/ubuntu24.04/precompiled/Dockerfile +++ b/ubuntu24.04/precompiled/Dockerfile @@ -2,6 +2,8 @@ FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu24.04 ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETARCH +ARG GOLANG_VERSION ARG DRIVER_BRANCH=550 ENV DRIVER_BRANCH=$DRIVER_BRANCH ARG DRIVER_VERSION=550.90.12 @@ -12,6 +14,8 @@ ENV KERNEL_VERSION=$KERNEL_VERSION ENV NVIDIA_VISIBLE_DEVICES=void +SHELL ["/bin/bash", "-c"] + RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections RUN dpkg --add-architecture i386 && \ @@ -20,6 +24,7 @@ RUN dpkg --add-architecture i386 && \ build-essential \ ca-certificates \ curl \ + git \ gpg \ kmod \ file \ @@ -36,6 +41,18 @@ RUN apt-key del 3bf863cc && \ RUN usermod -o -u 0 -g 0 _apt +# download appropriate binary based on the target architecture for multi-arch builds +RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \ + curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV PATH=/usr/local/go/bin:$PATH + +RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \ + go build -C driver/gpu-driver-util -o gpu-driver-util && \ + mv driver/gpu-driver-util/gpu-driver-util /usr/local/bin && \ + rm -rf driver + # Install / upgrade packages here that are required to resolve CVEs ARG CVE_UPDATES RUN if [ -n "${CVE_UPDATES}" ]; then \ diff --git a/ubuntu24.04/precompiled/nvidia-driver b/ubuntu24.04/precompiled/nvidia-driver index f207e2cc..e0baffb2 100755 --- a/ubuntu24.04/precompiled/nvidia-driver +++ b/ubuntu24.04/precompiled/nvidia-driver @@ -4,7 +4,7 @@ set -eu KERNEL_VERSION=$(uname -r) -OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}" +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} RUN_DIR=/run/nvidia PID_FILE=${RUN_DIR}/${0##*/}.pid DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"} @@ -96,6 +96,24 @@ _get_module_params() { fi } +_resolve_kernel_type() { + if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then + KERNEL_TYPE=kernel + elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then + KERNEL_TYPE=kernel-open + elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then + KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}") + if [ $? -ne 0 ]; then + echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..." + tail -n 3 /var/log/gpu-driver-util.log + return 1 + fi + else + echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}" + return 1 + fi +} + # Load the kernel modules and start persistenced. _load_driver() { echo "Parsing kernel module parameters..." @@ -245,7 +263,7 @@ _install_driver() { xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server # Now install the precompiled kernel module packages signed by Canonical - if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then + if [ "${KERNEL_TYPE}" == "kernel-open" ]; then echo "Installing Open NVIDIA driver kernel modules..." apt-get install --no-install-recommends -y \ linux-signatures-nvidia-${KERNEL_VERSION} \ @@ -293,6 +311,7 @@ init() { _unload_driver || exit 1 _unmount_rootfs + _resolve_kernel_type || exit 1 _install_driver _load_driver || exit 1 _mount_rootfs