Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -3553,6 +3553,30 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
}
}

// Mount /lib/modules for precompiled drivers on SUSE distributions.
// Those containers need access to host /lib/modules at runtime.
osID := getOSName(n.gpuNodeOSTag)
if config.Driver.UsePrecompiledDrivers() && (osID == "sles" || osID == "sl-micro") {
n.logger.Info("Mounting /lib/modules into the driver container")
libModulesVolMount := corev1.VolumeMount{
Name: "lib-modules",
MountPath: "/run/host/lib/modules",
ReadOnly: true,
}
driverContainer.VolumeMounts = append(driverContainer.VolumeMounts, libModulesVolMount)

libModulesVol := corev1.Volume{
Name: "lib-modules",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/lib/modules",
Type: ptr.To(corev1.HostPathDirectory),
},
},
}
podSpec.Volumes = append(podSpec.Volumes, libModulesVol)
}

// no further repo configuration required when using pre-compiled drivers, return here.
if config.Driver.UsePrecompiledDrivers() {
return nil
Expand Down Expand Up @@ -3594,7 +3618,7 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
}
}

osID := getOSName(n.gpuNodeOSTag)
osID = getOSName(n.gpuNodeOSTag)
// set up subscription entitlements for RHEL(using K8s with a non-CRIO runtime) and SLES
if (osID == "rhel" && n.openshift == "" && n.runtime != gpuv1.CRIO) || osID == "sles" || osID == "sl-micro" {
n.logger.Info("Mounting subscriptions into the driver container", "OS", osID)
Expand Down
94 changes: 93 additions & 1 deletion controllers/object_controls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,11 @@ func testDaemonsetCommon(t *testing.T, cp *gpuv1.ClusterPolicy, component string

if component == "Driver" && cp.Spec.Driver.UsePrecompiledDrivers() {
// for pre-compiled drivers, container image is kernel specific
require.Contains(t, mainCtr.Image, "-generic-ubuntu22.04", "unexpected Image")
suffix := "-generic-ubuntu22.04"
if clusterPolicyController.gpuNodeOSTag != "" {
suffix = fmt.Sprintf("-%s", clusterPolicyController.gpuNodeOSTag)
}
require.Contains(t, mainCtr.Image, suffix, "unexpected Image")
} else {
require.Equal(t, mainCtrImage, mainCtr.Image, "unexpected Image")
}
Expand Down Expand Up @@ -1883,3 +1887,91 @@ func TestMIGManager(t *testing.T) {
})
}
}

// TestDriverPrecompiledLibModulesUbuntu tests that /lib/modules is NOT mounted for precompiled drivers on Ubuntu
func TestDriverPrecompiledLibModulesUbuntu(t *testing.T) {
cp := getDriverTestInput("precompiled")
output := getDriverTestOutput("precompiled")

ds, err := testDaemonsetCommon(t, cp, "Driver", output["numDaemonsets"].(int))
if err != nil {
t.Fatalf("error in testDaemonsetCommon(): %v", err)
}
require.NotNil(t, ds)

// Check that /lib/modules volume and mount are NOT present
for _, vol := range ds.Spec.Template.Spec.Volumes {
require.NotEqual(t, "lib-modules", vol.Name, "lib-modules volume should not be present for ubuntu")
}

driverContainer := findContainerByName(ds.Spec.Template.Spec.Containers, "nvidia-driver-ctr")
require.NotNil(t, driverContainer)

for _, mount := range driverContainer.VolumeMounts {
require.NotEqual(t, "lib-modules", mount.Name, "lib-modules volume mount should not be present for ubuntu")
}

// Cleanup
err = removeState(&clusterPolicyController, clusterPolicyController.idx-1)
if err != nil {
t.Fatalf("error removing state %v:", err)
}
clusterPolicyController.idx--
}

// TestDriverPrecompiledLibModulesSuse tests that /lib/modules is mounted for precompiled drivers on SLES and SL-Micro
func TestDriverPrecompiledLibModulesSuse(t *testing.T) {
osTags := []string{"sles16.0", "sl-micro6.1"}

for _, osTag := range osTags {
t.Run(osTag, func(t *testing.T) {
// Save original OS tag and restore after test
originalOSTag := clusterPolicyController.gpuNodeOSTag
defer func() {
clusterPolicyController.gpuNodeOSTag = originalOSTag
}()

clusterPolicyController.gpuNodeOSTag = osTag

cp := getDriverTestInput("precompiled")
output := getDriverTestOutput("precompiled")

ds, err := testDaemonsetCommon(t, cp, "Driver", output["numDaemonsets"].(int))
if err != nil {
t.Fatalf("error in testDaemonsetCommon(): %v", err)
}
require.NotNil(t, ds)

// Check for /lib/modules volume and mount
foundVolume := false
for _, vol := range ds.Spec.Template.Spec.Volumes {
if vol.Name == "lib-modules" {
foundVolume = true
require.NotNil(t, vol.HostPath)
require.Equal(t, "/lib/modules", vol.HostPath.Path)
}
}
require.Truef(t, foundVolume, "lib-modules volume not found for precompiled drivers on %s", osTag)

foundMount := false
driverContainer := findContainerByName(ds.Spec.Template.Spec.Containers, "nvidia-driver-ctr")
require.NotNil(t, driverContainer)

for _, mount := range driverContainer.VolumeMounts {
if mount.Name == "lib-modules" {
foundMount = true
require.Equal(t, "/run/host/lib/modules", mount.MountPath)
require.True(t, mount.ReadOnly)
}
}
require.Truef(t, foundMount, "lib-modules volume mount not found for precompiled drivers on %s", osTag)

// Cleanup
err = removeState(&clusterPolicyController, clusterPolicyController.idx-1)
if err != nil {
t.Fatalf("error removing state %v:", err)
}
clusterPolicyController.idx--
})
}
}
23 changes: 23 additions & 0 deletions internal/state/driver_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,29 @@ func (s *stateDriver) getDriverAdditionalConfigs(ctx context.Context, cr *v1alph
additionalCfgs.Volumes = append(additionalCfgs.Volumes, subscriptionVol)
}
}

// Mount /lib/modules for precompiled drivers on SUSE distributions.
// Those containers need access to host /lib/modules at runtime.
if cr.Spec.UsePrecompiledDrivers() && (pool.osRelease == "sles" || pool.osRelease == "sl-micro") {
logger.Info("Mounting /lib/modules into the driver container")
libModulesVolMount := corev1.VolumeMount{
Name: "lib-modules",
MountPath: "/run/host/lib/modules",
ReadOnly: true,
}
additionalCfgs.VolumeMounts = append(additionalCfgs.VolumeMounts, libModulesVolMount)

libModulesVol := corev1.Volume{
Name: "lib-modules",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/lib/modules",
Type: ptr.To(corev1.HostPathDirectory),
},
},
}
additionalCfgs.Volumes = append(additionalCfgs.Volumes, libModulesVol)
}
}

// mount any custom kernel module configuration parameters at /drivers
Expand Down
Loading