From 3d9ce1cc8d3f284990fbceb9388df90dd701fd85 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 18 Jan 2025 21:09:07 +0000 Subject: [PATCH] Rename and copy cmds / helm charts to split GPU and IMEX drivers For now they are 100% clones of each other, but subsequent commits will fine tune them to either the GPU driver or the IMEX driver. Signed-off-by: Kevin Klues --- README.md | 5 +- .../allocatable.go | 0 .../cdi.go | 0 .../cdioptions.go | 0 .../checkpoint.go | 0 .../device_state.go | 0 .../deviceinfo.go | 0 .../driver.go | 0 .../main.go | 4 +- .../nvlib.go | 0 .../prepared.go | 0 .../root.go | 0 .../sharing.go | 0 .../types.go | 0 .../controller.go | 0 .../deployment.go | 0 .../deploymentpods.go | 0 .../deviceclass.go | 0 .../imexchannels.go | 0 .../indexers.go | 0 .../main.go | 4 +- .../mnenv.go | 0 .../resourceclaim.go | 0 .../types.go | 0 cmd/nvidia-dra-imex-plugin/allocatable.go | 108 +++ cmd/nvidia-dra-imex-plugin/cdi.go | 298 ++++++++ cmd/nvidia-dra-imex-plugin/cdioptions.go | 81 +++ cmd/nvidia-dra-imex-plugin/checkpoint.go | 53 ++ cmd/nvidia-dra-imex-plugin/device_state.go | 558 +++++++++++++++ cmd/nvidia-dra-imex-plugin/deviceinfo.go | 223 ++++++ cmd/nvidia-dra-imex-plugin/driver.go | 168 +++++ cmd/nvidia-dra-imex-plugin/main.go | 206 ++++++ cmd/nvidia-dra-imex-plugin/nvlib.go | 669 ++++++++++++++++++ cmd/nvidia-dra-imex-plugin/prepared.go | 205 ++++++ cmd/nvidia-dra-imex-plugin/root.go | 109 +++ cmd/nvidia-dra-imex-plugin/sharing.go | 442 ++++++++++++ cmd/nvidia-dra-imex-plugin/types.go | 30 + ...ra-driver.sh => install-dra-gpu-driver.sh} | 2 +- demo/clusters/gke/install-dra-imex-driver.sh | 42 ++ demo/clusters/kind/install-dra-gpu-driver.sh | 39 + ...a-driver.sh => install-dra-imex-driver.sh} | 4 +- deployments/container/Dockerfile.ubi8 | 7 +- deployments/container/Dockerfile.ubuntu | 7 +- .../.helmignore | 0 .../Chart.yaml | 2 +- .../gpu.nvidia.com_multinodeenvironments.yaml | 58 ++ .../templates/_helpers.tpl | 0 .../templates/clusterrole.yaml | 0 .../templates/clusterrolebinding.yaml | 0 .../templates/controller.yaml | 4 +- .../templates/deviceclass-gpu.yaml | 0 .../templates/deviceclass-mig.yaml | 0 .../templates/kubeletplugin.yaml | 4 +- .../openshiftprivilegedrolebinging.yaml | 0 .../templates/serviceaccount.yaml | 0 .../templates/validatingadmissionpolicy.yaml | 0 .../validatingadmissionpolicybinding.yaml | 0 .../templates/validation.yaml | 0 .../values.yaml | 0 .../helm/k8s-dra-imex-driver/.helmignore | 23 + .../helm/k8s-dra-imex-driver/Chart.yaml | 24 + .../gpu.nvidia.com_multinodeenvironments.yaml | 58 ++ .../templates/_helpers.tpl | 129 ++++ .../templates/clusterrole.yaml | 31 + .../templates/clusterrolebinding.yaml | 14 + .../templates/controller.yaml | 81 +++ .../templates/deviceclass-gpu.yaml | 11 + .../templates/deviceclass-mig.yaml | 11 + .../templates/kubeletplugin.yaml | 130 ++++ .../openshiftprivilegedrolebinging.yaml | 17 + .../templates/serviceaccount.yaml | 13 + .../templates/validatingadmissionpolicy.yaml | 33 + .../validatingadmissionpolicybinding.yaml | 8 + .../templates/validation.yaml | 63 ++ .../helm/k8s-dra-imex-driver/values.yaml | 121 ++++ 75 files changed, 4078 insertions(+), 21 deletions(-) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/allocatable.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/cdi.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/cdioptions.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/checkpoint.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/device_state.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/deviceinfo.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/driver.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/main.go (97%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/nvlib.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/prepared.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/root.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/sharing.go (100%) rename cmd/{nvidia-dra-plugin => nvidia-dra-gpu-plugin}/types.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/controller.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/deployment.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/deploymentpods.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/deviceclass.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/imexchannels.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/indexers.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/main.go (97%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/mnenv.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/resourceclaim.go (100%) rename cmd/{nvidia-dra-controller => nvidia-dra-imex-controller}/types.go (100%) create mode 100644 cmd/nvidia-dra-imex-plugin/allocatable.go create mode 100644 cmd/nvidia-dra-imex-plugin/cdi.go create mode 100644 cmd/nvidia-dra-imex-plugin/cdioptions.go create mode 100644 cmd/nvidia-dra-imex-plugin/checkpoint.go create mode 100644 cmd/nvidia-dra-imex-plugin/device_state.go create mode 100644 cmd/nvidia-dra-imex-plugin/deviceinfo.go create mode 100644 cmd/nvidia-dra-imex-plugin/driver.go create mode 100644 cmd/nvidia-dra-imex-plugin/main.go create mode 100644 cmd/nvidia-dra-imex-plugin/nvlib.go create mode 100644 cmd/nvidia-dra-imex-plugin/prepared.go create mode 100644 cmd/nvidia-dra-imex-plugin/root.go create mode 100644 cmd/nvidia-dra-imex-plugin/sharing.go create mode 100644 cmd/nvidia-dra-imex-plugin/types.go rename demo/clusters/gke/{install-dra-driver.sh => install-dra-gpu-driver.sh} (96%) create mode 100755 demo/clusters/gke/install-dra-imex-driver.sh create mode 100755 demo/clusters/kind/install-dra-gpu-driver.sh rename demo/clusters/kind/{install-dra-driver.sh => install-dra-imex-driver.sh} (93%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/.helmignore (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/Chart.yaml (97%) create mode 100644 deployments/helm/k8s-dra-gpu-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/_helpers.tpl (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/clusterrole.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/clusterrolebinding.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/controller.yaml (95%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/deviceclass-gpu.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/deviceclass-mig.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/kubeletplugin.yaml (97%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/openshiftprivilegedrolebinging.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/serviceaccount.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/validatingadmissionpolicy.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/validatingadmissionpolicybinding.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/templates/validation.yaml (100%) rename deployments/helm/{k8s-dra-driver => k8s-dra-gpu-driver}/values.yaml (100%) create mode 100644 deployments/helm/k8s-dra-imex-driver/.helmignore create mode 100644 deployments/helm/k8s-dra-imex-driver/Chart.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/_helpers.tpl create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/clusterrole.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/clusterrolebinding.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/controller.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/deviceclass-gpu.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/deviceclass-mig.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/kubeletplugin.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/openshiftprivilegedrolebinging.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/serviceaccount.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicy.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicybinding.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/templates/validation.yaml create mode 100644 deployments/helm/k8s-dra-imex-driver/values.yaml diff --git a/README.md b/README.md index 499559966..7cc3f1fef 100644 --- a/README.md +++ b/README.md @@ -84,9 +84,8 @@ This should show two pods running in the `nvidia` namespace: kubectl get pods -n nvidia ``` ``` -NAME READY STATUS RESTARTS AGE -nvidia-dra-driver-k8s-dra-driver-controller-844fcb94b-ktbkc 1/1 Running 0 69s -nvidia-dra-driver-k8s-dra-driver-kubelet-plugin-5vfp9 1/1 Running 0 69s +NAME READY STATUS RESTARTS AGE +nvidia-dra-driver-k8s-dra-driver-gpu-kubelet-plugin-5vfp9 1/1 Running 0 69s ``` ### Run the examples by following the steps in the demo script diff --git a/cmd/nvidia-dra-plugin/allocatable.go b/cmd/nvidia-dra-gpu-plugin/allocatable.go similarity index 100% rename from cmd/nvidia-dra-plugin/allocatable.go rename to cmd/nvidia-dra-gpu-plugin/allocatable.go diff --git a/cmd/nvidia-dra-plugin/cdi.go b/cmd/nvidia-dra-gpu-plugin/cdi.go similarity index 100% rename from cmd/nvidia-dra-plugin/cdi.go rename to cmd/nvidia-dra-gpu-plugin/cdi.go diff --git a/cmd/nvidia-dra-plugin/cdioptions.go b/cmd/nvidia-dra-gpu-plugin/cdioptions.go similarity index 100% rename from cmd/nvidia-dra-plugin/cdioptions.go rename to cmd/nvidia-dra-gpu-plugin/cdioptions.go diff --git a/cmd/nvidia-dra-plugin/checkpoint.go b/cmd/nvidia-dra-gpu-plugin/checkpoint.go similarity index 100% rename from cmd/nvidia-dra-plugin/checkpoint.go rename to cmd/nvidia-dra-gpu-plugin/checkpoint.go diff --git a/cmd/nvidia-dra-plugin/device_state.go b/cmd/nvidia-dra-gpu-plugin/device_state.go similarity index 100% rename from cmd/nvidia-dra-plugin/device_state.go rename to cmd/nvidia-dra-gpu-plugin/device_state.go diff --git a/cmd/nvidia-dra-plugin/deviceinfo.go b/cmd/nvidia-dra-gpu-plugin/deviceinfo.go similarity index 100% rename from cmd/nvidia-dra-plugin/deviceinfo.go rename to cmd/nvidia-dra-gpu-plugin/deviceinfo.go diff --git a/cmd/nvidia-dra-plugin/driver.go b/cmd/nvidia-dra-gpu-plugin/driver.go similarity index 100% rename from cmd/nvidia-dra-plugin/driver.go rename to cmd/nvidia-dra-gpu-plugin/driver.go diff --git a/cmd/nvidia-dra-plugin/main.go b/cmd/nvidia-dra-gpu-plugin/main.go similarity index 97% rename from cmd/nvidia-dra-plugin/main.go rename to cmd/nvidia-dra-gpu-plugin/main.go index ab5bd5856..69f15859d 100644 --- a/cmd/nvidia-dra-plugin/main.go +++ b/cmd/nvidia-dra-gpu-plugin/main.go @@ -125,8 +125,8 @@ func newApp() *cli.App { cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) app := &cli.App{ - Name: "nvidia-dra-plugin", - Usage: "nvidia-dra-plugin implements a DRA driver plugin for NVIDIA GPUs.", + Name: "nvidia-dra-gpu-plugin", + Usage: "nvidia-dra-gpu-plugin implements a DRA driver plugin for NVIDIA GPUs.", ArgsUsage: " ", HideHelpCommand: true, Flags: cliFlags, diff --git a/cmd/nvidia-dra-plugin/nvlib.go b/cmd/nvidia-dra-gpu-plugin/nvlib.go similarity index 100% rename from cmd/nvidia-dra-plugin/nvlib.go rename to cmd/nvidia-dra-gpu-plugin/nvlib.go diff --git a/cmd/nvidia-dra-plugin/prepared.go b/cmd/nvidia-dra-gpu-plugin/prepared.go similarity index 100% rename from cmd/nvidia-dra-plugin/prepared.go rename to cmd/nvidia-dra-gpu-plugin/prepared.go diff --git a/cmd/nvidia-dra-plugin/root.go b/cmd/nvidia-dra-gpu-plugin/root.go similarity index 100% rename from cmd/nvidia-dra-plugin/root.go rename to cmd/nvidia-dra-gpu-plugin/root.go diff --git a/cmd/nvidia-dra-plugin/sharing.go b/cmd/nvidia-dra-gpu-plugin/sharing.go similarity index 100% rename from cmd/nvidia-dra-plugin/sharing.go rename to cmd/nvidia-dra-gpu-plugin/sharing.go diff --git a/cmd/nvidia-dra-plugin/types.go b/cmd/nvidia-dra-gpu-plugin/types.go similarity index 100% rename from cmd/nvidia-dra-plugin/types.go rename to cmd/nvidia-dra-gpu-plugin/types.go diff --git a/cmd/nvidia-dra-controller/controller.go b/cmd/nvidia-dra-imex-controller/controller.go similarity index 100% rename from cmd/nvidia-dra-controller/controller.go rename to cmd/nvidia-dra-imex-controller/controller.go diff --git a/cmd/nvidia-dra-controller/deployment.go b/cmd/nvidia-dra-imex-controller/deployment.go similarity index 100% rename from cmd/nvidia-dra-controller/deployment.go rename to cmd/nvidia-dra-imex-controller/deployment.go diff --git a/cmd/nvidia-dra-controller/deploymentpods.go b/cmd/nvidia-dra-imex-controller/deploymentpods.go similarity index 100% rename from cmd/nvidia-dra-controller/deploymentpods.go rename to cmd/nvidia-dra-imex-controller/deploymentpods.go diff --git a/cmd/nvidia-dra-controller/deviceclass.go b/cmd/nvidia-dra-imex-controller/deviceclass.go similarity index 100% rename from cmd/nvidia-dra-controller/deviceclass.go rename to cmd/nvidia-dra-imex-controller/deviceclass.go diff --git a/cmd/nvidia-dra-controller/imexchannels.go b/cmd/nvidia-dra-imex-controller/imexchannels.go similarity index 100% rename from cmd/nvidia-dra-controller/imexchannels.go rename to cmd/nvidia-dra-imex-controller/imexchannels.go diff --git a/cmd/nvidia-dra-controller/indexers.go b/cmd/nvidia-dra-imex-controller/indexers.go similarity index 100% rename from cmd/nvidia-dra-controller/indexers.go rename to cmd/nvidia-dra-imex-controller/indexers.go diff --git a/cmd/nvidia-dra-controller/main.go b/cmd/nvidia-dra-imex-controller/main.go similarity index 97% rename from cmd/nvidia-dra-controller/main.go rename to cmd/nvidia-dra-imex-controller/main.go index ce4600538..5a9c2aeab 100644 --- a/cmd/nvidia-dra-controller/main.go +++ b/cmd/nvidia-dra-imex-controller/main.go @@ -128,8 +128,8 @@ func newApp() *cli.App { cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) app := &cli.App{ - Name: "nvidia-dra-controller", - Usage: "nvidia-dra-controller implements a DRA driver controller for NVIDIA GPUs.", + Name: "nvidia-dra-imex-controller", + Usage: "nvidia-dra-imex-controller implements a DRA driver controller for NVIDIA IMEX domains.", ArgsUsage: " ", HideHelpCommand: true, Flags: cliFlags, diff --git a/cmd/nvidia-dra-controller/mnenv.go b/cmd/nvidia-dra-imex-controller/mnenv.go similarity index 100% rename from cmd/nvidia-dra-controller/mnenv.go rename to cmd/nvidia-dra-imex-controller/mnenv.go diff --git a/cmd/nvidia-dra-controller/resourceclaim.go b/cmd/nvidia-dra-imex-controller/resourceclaim.go similarity index 100% rename from cmd/nvidia-dra-controller/resourceclaim.go rename to cmd/nvidia-dra-imex-controller/resourceclaim.go diff --git a/cmd/nvidia-dra-controller/types.go b/cmd/nvidia-dra-imex-controller/types.go similarity index 100% rename from cmd/nvidia-dra-controller/types.go rename to cmd/nvidia-dra-imex-controller/types.go diff --git a/cmd/nvidia-dra-imex-plugin/allocatable.go b/cmd/nvidia-dra-imex-plugin/allocatable.go new file mode 100644 index 000000000..9b8680ad9 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/allocatable.go @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "slices" + + resourceapi "k8s.io/api/resource/v1beta1" +) + +type AllocatableDevices map[string]*AllocatableDevice + +type AllocatableDevice struct { + Gpu *GpuInfo + Mig *MigDeviceInfo + ImexChannel *ImexChannelInfo +} + +func (d AllocatableDevice) Type() string { + if d.Gpu != nil { + return GpuDeviceType + } + if d.Mig != nil { + return MigDeviceType + } + if d.ImexChannel != nil { + return ImexChannelType + } + return UnknownDeviceType +} + +func (d *AllocatableDevice) CanonicalName() string { + switch d.Type() { + case GpuDeviceType: + return d.Gpu.CanonicalName() + case MigDeviceType: + return d.Mig.CanonicalName() + case ImexChannelType: + return d.ImexChannel.CanonicalName() + } + panic("unexpected type for AllocatableDevice") +} + +func (d *AllocatableDevice) CanonicalIndex() string { + switch d.Type() { + case GpuDeviceType: + return d.Gpu.CanonicalIndex() + case MigDeviceType: + return d.Mig.CanonicalIndex() + case ImexChannelType: + return d.ImexChannel.CanonicalIndex() + } + panic("unexpected type for AllocatableDevice") +} + +func (d *AllocatableDevice) GetDevice() resourceapi.Device { + switch d.Type() { + case GpuDeviceType: + return d.Gpu.GetDevice() + case MigDeviceType: + return d.Mig.GetDevice() + case ImexChannelType: + return d.ImexChannel.GetDevice() + } + panic("unexpected type for AllocatableDevice") +} + +func (d AllocatableDevices) GpuUUIDs() []string { + var uuids []string + for _, device := range d { + if device.Type() == GpuDeviceType { + uuids = append(uuids, device.Gpu.UUID) + } + } + slices.Sort(uuids) + return uuids +} + +func (d AllocatableDevices) MigDeviceUUIDs() []string { + var uuids []string + for _, device := range d { + if device.Type() == MigDeviceType { + uuids = append(uuids, device.Mig.UUID) + } + } + slices.Sort(uuids) + return uuids +} + +func (d AllocatableDevices) UUIDs() []string { + uuids := append(d.GpuUUIDs(), d.MigDeviceUUIDs()...) + slices.Sort(uuids) + return uuids +} diff --git a/cmd/nvidia-dra-imex-plugin/cdi.go b/cmd/nvidia-dra-imex-plugin/cdi.go new file mode 100644 index 000000000..548f624dd --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/cdi.go @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/sirupsen/logrus" + + nvdevice "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvml/pkg/nvml" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" + transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root" + "k8s.io/klog/v2" + cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" + cdiparser "tags.cncf.io/container-device-interface/pkg/parser" + cdispec "tags.cncf.io/container-device-interface/specs-go" +) + +const ( + cdiVendor = "k8s." + DriverName + + cdiDeviceClass = "device" + cdiDeviceKind = cdiVendor + "/" + cdiDeviceClass + cdiClaimClass = "claim" + cdiClaimKind = cdiVendor + "/" + cdiClaimClass + + cdiBaseSpecIdentifier = "base" + + defaultCDIRoot = "/var/run/cdi" +) + +type CDIHandler struct { + logger *logrus.Logger + nvml nvml.Interface + nvdevice nvdevice.Interface + nvcdiDevice nvcdi.Interface + nvcdiClaim nvcdi.Interface + cache *cdiapi.Cache + driverRoot string + devRoot string + targetDriverRoot string + nvidiaCTKPath string + + cdiRoot string + vendor string + deviceClass string + claimClass string +} + +func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) { + h := &CDIHandler{} + for _, opt := range opts { + opt(h) + } + + if h.logger == nil { + h.logger = logrus.New() + h.logger.SetOutput(io.Discard) + } + if h.nvml == nil { + h.nvml = nvml.New() + } + if h.cdiRoot == "" { + h.cdiRoot = defaultCDIRoot + } + if h.nvdevice == nil { + h.nvdevice = nvdevice.New(h.nvml) + } + if h.vendor == "" { + h.vendor = cdiVendor + } + if h.deviceClass == "" { + h.deviceClass = cdiDeviceClass + } + if h.claimClass == "" { + h.claimClass = cdiClaimClass + } + if h.nvcdiDevice == nil { + nvcdilib, err := nvcdi.New( + nvcdi.WithDeviceLib(h.nvdevice), + nvcdi.WithDriverRoot(h.driverRoot), + nvcdi.WithDevRoot(h.devRoot), + nvcdi.WithLogger(h.logger), + nvcdi.WithNvmlLib(h.nvml), + nvcdi.WithMode("nvml"), + nvcdi.WithVendor(h.vendor), + nvcdi.WithClass(h.deviceClass), + nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath), + ) + if err != nil { + return nil, fmt.Errorf("unable to create CDI library for devices: %w", err) + } + h.nvcdiDevice = nvcdilib + } + if h.nvcdiClaim == nil { + nvcdilib, err := nvcdi.New( + nvcdi.WithDeviceLib(h.nvdevice), + nvcdi.WithDriverRoot(h.driverRoot), + nvcdi.WithDevRoot(h.devRoot), + nvcdi.WithLogger(h.logger), + nvcdi.WithNvmlLib(h.nvml), + nvcdi.WithMode("nvml"), + nvcdi.WithVendor(h.vendor), + nvcdi.WithClass(h.claimClass), + nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath), + ) + if err != nil { + return nil, fmt.Errorf("unable to create CDI library for claims: %w", err) + } + h.nvcdiClaim = nvcdilib + } + if h.cache == nil { + cache, err := cdiapi.NewCache( + cdiapi.WithSpecDirs(h.cdiRoot), + ) + if err != nil { + return nil, fmt.Errorf("unable to create a new CDI cache: %w", err) + } + h.cache = cache + } + + return h, nil +} + +func (cdi *CDIHandler) GetImexChannelContainerEdits(info *ImexChannelInfo) *cdiapi.ContainerEdits { + channelPath := fmt.Sprintf("/dev/nvidia-caps-imex-channels/channel%d", info.Channel) + + return &cdiapi.ContainerEdits{ + ContainerEdits: &cdispec.ContainerEdits{ + DeviceNodes: []*cdispec.DeviceNode{ + { + Path: channelPath, + HostPath: filepath.Join(cdi.devRoot, channelPath), + }, + }, + }, + } +} + +func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevices) error { + // Initialize NVML in order to get the device edits. + if r := cdi.nvml.Init(); r != nvml.SUCCESS { + return fmt.Errorf("failed to initialize NVML: %v", r) + } + defer func() { + if r := cdi.nvml.Shutdown(); r != nvml.SUCCESS { + klog.Warningf("failed to shutdown NVML: %v", r) + } + }() + + // Generate the set of common edits. + commonEdits, err := cdi.nvcdiDevice.GetCommonEdits() + if err != nil { + return fmt.Errorf("failed to get common CDI spec edits: %w", err) + } + + // Make sure that NVIDIA_VISIBLE_DEVICES is set to void to avoid the + // nvidia-container-runtime honoring it in addition to the underlying + // runtime honoring CDI. + commonEdits.ContainerEdits.Env = append( + commonEdits.ContainerEdits.Env, + "NVIDIA_VISIBLE_DEVICES=void") + + // Generate device specs for all full GPUs and MIG devices. + var deviceSpecs []cdispec.Device + for _, device := range allocatable { + if device.Type() == ImexChannelType { + continue + } + dspecs, err := cdi.nvcdiDevice.GetDeviceSpecsByID(device.CanonicalIndex()) + if err != nil { + return fmt.Errorf("unable to get device spec for %s: %w", device.CanonicalName(), err) + } + dspecs[0].Name = device.CanonicalName() + deviceSpecs = append(deviceSpecs, dspecs[0]) + } + + // Generate base spec from commonEdits and deviceEdits. + spec, err := spec.New( + spec.WithVendor(cdiVendor), + spec.WithClass(cdiDeviceClass), + spec.WithDeviceSpecs(deviceSpecs), + spec.WithEdits(*commonEdits.ContainerEdits), + ) + if err != nil { + return fmt.Errorf("failed to creat CDI spec: %w", err) + } + + // Transform the spec to make it aware that it is running inside a container. + err = transformroot.New( + transformroot.WithRoot(cdi.driverRoot), + transformroot.WithTargetRoot(cdi.targetDriverRoot), + transformroot.WithRelativeTo("host"), + ).Transform(spec.Raw()) + if err != nil { + return fmt.Errorf("failed to transform driver root in CDI spec: %w", err) + } + + // Update the spec to include only the minimum version necessary. + minVersion, err := cdiapi.MinimumRequiredVersion(spec.Raw()) + if err != nil { + return fmt.Errorf("failed to get minimum required CDI spec version: %v", err) + } + spec.Raw().Version = minVersion + + // Write the spec out to disk. + specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiBaseSpecIdentifier) + return cdi.cache.WriteSpec(spec.Raw(), specName) +} + +func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, preparedDevices PreparedDevices) error { + // Generate claim specific specs for each device. + var deviceSpecs []cdispec.Device + for _, group := range preparedDevices { + // If there are no edits passed back as part of the device config state, skip it + if group.ConfigState.containerEdits == nil { + continue + } + + // Apply any edits passed back as part of the device config state to all devices + for _, device := range group.Devices { + deviceSpec := cdispec.Device{ + Name: fmt.Sprintf("%s-%s", claimUID, device.CanonicalName()), + ContainerEdits: *group.ConfigState.containerEdits.ContainerEdits, + } + + deviceSpecs = append(deviceSpecs, deviceSpec) + } + } + + // Generate the claim specific device spec for this driver. + spec, err := spec.New( + spec.WithVendor(cdiVendor), + spec.WithClass(cdiClaimClass), + spec.WithDeviceSpecs(deviceSpecs), + ) + if err != nil { + return fmt.Errorf("failed to creat CDI spec: %w", err) + } + + // Transform the spec to make it aware that it is running inside a container. + err = transformroot.New( + transformroot.WithRoot(cdi.driverRoot), + transformroot.WithTargetRoot(cdi.targetDriverRoot), + transformroot.WithRelativeTo("host"), + ).Transform(spec.Raw()) + if err != nil { + return fmt.Errorf("failed to transform driver root in CDI spec: %w", err) + } + + // Update the spec to include only the minimum version necessary. + minVersion, err := cdiapi.MinimumRequiredVersion(spec.Raw()) + if err != nil { + return fmt.Errorf("failed to get minimum required CDI spec version: %v", err) + } + spec.Raw().Version = minVersion + + // Write the spec out to disk. + specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClaimClass, claimUID) + return cdi.cache.WriteSpec(spec.Raw(), specName) +} + +func (cdi *CDIHandler) DeleteClaimSpecFile(claimUID string) error { + specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClaimClass, claimUID) + return cdi.cache.RemoveSpec(specName) +} + +func (cdi *CDIHandler) GetStandardDevice(device *AllocatableDevice) string { + if device.Type() == ImexChannelType { + return "" + } + return cdiparser.QualifiedName(cdiVendor, cdiDeviceClass, device.CanonicalName()) +} + +func (cdi *CDIHandler) GetClaimDevice(claimUID string, device *AllocatableDevice, containerEdits *cdiapi.ContainerEdits) string { + if containerEdits == nil { + return "" + } + return cdiparser.QualifiedName(cdiVendor, cdiClaimClass, fmt.Sprintf("%s-%s", claimUID, device.CanonicalName())) +} diff --git a/cmd/nvidia-dra-imex-plugin/cdioptions.go b/cmd/nvidia-dra-imex-plugin/cdioptions.go new file mode 100644 index 000000000..4e7b99168 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/cdioptions.go @@ -0,0 +1,81 @@ +/* +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package main + +import ( + nvdevice "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvml/pkg/nvml" +) + +// cdiOption represents a functional option for constructing a CDI handler. +type cdiOption func(*CDIHandler) + +// WithDriverRoot provides an cdiOption to set the driver root used by the 'cdi' interface. +func WithDriverRoot(root string) cdiOption { + return func(c *CDIHandler) { + c.driverRoot = root + } +} + +// WithDevRoot provides a cdiOption to set the device root used by the 'cdi' interface. +func WithDevRoot(root string) cdiOption { + return func(c *CDIHandler) { + c.devRoot = root + } +} + +// WithTargetDriverRoot provides an cdiOption to set the target driver root used by the 'cdi' interface. +func WithTargetDriverRoot(root string) cdiOption { + return func(c *CDIHandler) { + c.targetDriverRoot = root + } +} + +// WithCDIRoot provides an cdiOption to set the CDI root used by the 'cdi' interface. +func WithCDIRoot(cdiRoot string) cdiOption { + return func(c *CDIHandler) { + c.cdiRoot = cdiRoot + } +} + +// WithNvidiaCTKPath provides an cdiOption to set the nvidia-ctk path used by the 'cdi' interface. +func WithNvidiaCTKPath(path string) cdiOption { + return func(c *CDIHandler) { + c.nvidiaCTKPath = path + } +} + +// WithNvml provides an cdiOption to set the NVML library used by the 'cdi' interface. +func WithNvml(nvml nvml.Interface) cdiOption { + return func(c *CDIHandler) { + c.nvml = nvml + } +} + +// WithDeviceLib provides and Optin to set the device enumeration and query library. +func WithDeviceLib(nvdevice nvdevice.Interface) cdiOption { + return func(c *CDIHandler) { + c.nvdevice = nvdevice + } +} + +// WithVendor provides an cdiOption to set the vendor used by the 'cdi' interface. +func WithVendor(vendor string) cdiOption { + return func(c *CDIHandler) { + c.vendor = vendor + } +} diff --git a/cmd/nvidia-dra-imex-plugin/checkpoint.go b/cmd/nvidia-dra-imex-plugin/checkpoint.go new file mode 100644 index 000000000..7311e7602 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/checkpoint.go @@ -0,0 +1,53 @@ +package main + +import ( + "encoding/json" + + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" +) + +type Checkpoint struct { + Checksum checksum.Checksum `json:"checksum"` + V1 *CheckpointV1 `json:"v1,omitempty"` +} + +type CheckpointV1 struct { + PreparedClaims PreparedClaims `json:"preparedClaims,omitempty"` +} + +func newCheckpoint() *Checkpoint { + pc := &Checkpoint{ + Checksum: 0, + V1: &CheckpointV1{ + PreparedClaims: make(PreparedClaims), + }, + } + return pc +} + +func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) { + cp.Checksum = 0 + out, err := json.Marshal(*cp) + if err != nil { + return nil, err + } + cp.Checksum = checksum.New(out) + return json.Marshal(*cp) +} + +func (cp *Checkpoint) UnmarshalCheckpoint(data []byte) error { + return json.Unmarshal(data, cp) +} + +func (cp *Checkpoint) VerifyChecksum() error { + ck := cp.Checksum + cp.Checksum = 0 + defer func() { + cp.Checksum = ck + }() + out, err := json.Marshal(*cp) + if err != nil { + return err + } + return ck.Verify(out) +} diff --git a/cmd/nvidia-dra-imex-plugin/device_state.go b/cmd/nvidia-dra-imex-plugin/device_state.go new file mode 100644 index 000000000..22879d839 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/device_state.go @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "fmt" + "slices" + "sync" + + resourceapi "k8s.io/api/resource/v1beta1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" + cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" + + configapi "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/v1alpha1" +) + +type OpaqueDeviceConfig struct { + Requests []string + Config runtime.Object +} + +type DeviceConfigState struct { + MpsControlDaemonID string `json:"mpsControlDaemonID"` + containerEdits *cdiapi.ContainerEdits +} + +type DeviceState struct { + sync.Mutex + cdi *CDIHandler + tsManager *TimeSlicingManager + mpsManager *MpsManager + allocatable AllocatableDevices + config *Config + + nvdevlib *deviceLib + checkpointManager checkpointmanager.CheckpointManager +} + +func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) { + containerDriverRoot := root(config.flags.containerDriverRoot) + nvdevlib, err := newDeviceLib(containerDriverRoot) + if err != nil { + return nil, fmt.Errorf("failed to create device library: %w", err) + } + + allocatable, err := nvdevlib.enumerateAllPossibleDevices(config) + if err != nil { + return nil, fmt.Errorf("error enumerating all possible devices: %w", err) + } + + devRoot := containerDriverRoot.getDevRoot() + klog.Infof("using devRoot=%v", devRoot) + + hostDriverRoot := config.flags.hostDriverRoot + cdi, err := NewCDIHandler( + WithNvml(nvdevlib.nvmllib), + WithDeviceLib(nvdevlib), + WithDriverRoot(string(containerDriverRoot)), + WithDevRoot(devRoot), + WithTargetDriverRoot(hostDriverRoot), + WithNvidiaCTKPath(config.flags.nvidiaCTKPath), + WithCDIRoot(config.flags.cdiRoot), + WithVendor(cdiVendor), + ) + if err != nil { + return nil, fmt.Errorf("unable to create CDI handler: %w", err) + } + + tsManager := NewTimeSlicingManager(nvdevlib) + mpsManager := NewMpsManager(config, nvdevlib, MpsRoot, hostDriverRoot, MpsControlDaemonTemplatePath) + + if err := cdi.CreateStandardDeviceSpecFile(allocatable); err != nil { + return nil, fmt.Errorf("unable to create base CDI spec file: %v", err) + } + + checkpointManager, err := checkpointmanager.NewCheckpointManager(DriverPluginPath) + if err != nil { + return nil, fmt.Errorf("unable to create checkpoint manager: %v", err) + } + + state := &DeviceState{ + cdi: cdi, + tsManager: tsManager, + mpsManager: mpsManager, + allocatable: allocatable, + config: config, + nvdevlib: nvdevlib, + checkpointManager: checkpointManager, + } + + checkpoints, err := state.checkpointManager.ListCheckpoints() + if err != nil { + return nil, fmt.Errorf("unable to list checkpoints: %v", err) + } + + for _, c := range checkpoints { + if c == DriverPluginCheckpointFile { + return state, nil + } + } + + checkpoint := newCheckpoint() + if err := state.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) + } + + return state, nil +} + +func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceClaim) ([]*drapbv1.Device, error) { + s.Lock() + defer s.Unlock() + + claimUID := string(claim.UID) + + checkpoint := newCheckpoint() + if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) + } + preparedClaims := checkpoint.V1.PreparedClaims + + if preparedClaims[claimUID] != nil { + return preparedClaims[claimUID].GetDevices(), nil + } + + preparedDevices, err := s.prepareDevices(ctx, claim) + if err != nil { + return nil, fmt.Errorf("prepare devices failed: %w", err) + } + + if err := s.cdi.CreateClaimSpecFile(claimUID, preparedDevices); err != nil { + return nil, fmt.Errorf("unable to create CDI spec file for claim: %w", err) + } + + preparedClaims[claimUID] = preparedDevices + if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) + } + + return preparedClaims[claimUID].GetDevices(), nil +} + +func (s *DeviceState) Unprepare(ctx context.Context, claimUID string) error { + s.Lock() + defer s.Unlock() + + checkpoint := newCheckpoint() + if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + return fmt.Errorf("unable to sync from checkpoint: %v", err) + } + preparedClaims := checkpoint.V1.PreparedClaims + + if preparedClaims[claimUID] == nil { + return nil + } + + if err := s.unprepareDevices(ctx, claimUID, preparedClaims[claimUID]); err != nil { + return fmt.Errorf("unprepare devices failed: %w", err) + } + + err := s.cdi.DeleteClaimSpecFile(claimUID) + if err != nil { + return fmt.Errorf("unable to delete CDI spec file for claim: %w", err) + } + + delete(preparedClaims, claimUID) + if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + return fmt.Errorf("unable to sync to checkpoint: %v", err) + } + + return nil +} + +func (s *DeviceState) prepareDevices(ctx context.Context, claim *resourceapi.ResourceClaim) (PreparedDevices, error) { + if claim.Status.Allocation == nil { + return nil, fmt.Errorf("claim not yet allocated") + } + + // Retrieve the full set of device configs for the driver. + configs, err := GetOpaqueDeviceConfigs( + configapi.Decoder, + DriverName, + claim.Status.Allocation.Devices.Config, + ) + if err != nil { + return nil, fmt.Errorf("error getting opaque device configs: %v", err) + } + + // Add the default GPU and MIG device Configs to the front of the config + // list with the lowest precedence. This guarantees there will be at least + // one of each config in the list with len(Requests) == 0 for the lookup below. + configs = slices.Insert(configs, 0, &OpaqueDeviceConfig{ + Requests: []string{}, + Config: configapi.DefaultGpuConfig(), + }) + configs = slices.Insert(configs, 0, &OpaqueDeviceConfig{ + Requests: []string{}, + Config: configapi.DefaultMigDeviceConfig(), + }) + configs = slices.Insert(configs, 0, &OpaqueDeviceConfig{ + Requests: []string{}, + Config: configapi.DefaultImexChannelConfig(), + }) + + // Look through the configs and figure out which one will be applied to + // each device allocation result based on their order of precedence and type. + configResultsMap := make(map[runtime.Object][]*resourceapi.DeviceRequestAllocationResult) + for _, result := range claim.Status.Allocation.Devices.Results { + device, exists := s.allocatable[result.Device] + if !exists { + return nil, fmt.Errorf("requested device is not allocatable: %v", result.Device) + } + for _, c := range slices.Backward(configs) { + if slices.Contains(c.Requests, result.Request) { + if _, ok := c.Config.(*configapi.GpuConfig); ok && device.Type() != GpuDeviceType { + return nil, fmt.Errorf("cannot apply GPU config to request: %v", result.Request) + } + if _, ok := c.Config.(*configapi.MigDeviceConfig); ok && device.Type() != MigDeviceType { + return nil, fmt.Errorf("cannot apply MIG device config to request: %v", result.Request) + } + if _, ok := c.Config.(*configapi.ImexChannelConfig); ok && device.Type() != ImexChannelType { + return nil, fmt.Errorf("cannot apply Imex Channel config to request: %v", result.Request) + } + configResultsMap[c.Config] = append(configResultsMap[c.Config], &result) + break + } + if len(c.Requests) == 0 { + if _, ok := c.Config.(*configapi.GpuConfig); ok && device.Type() != GpuDeviceType { + continue + } + if _, ok := c.Config.(*configapi.MigDeviceConfig); ok && device.Type() != MigDeviceType { + continue + } + if _, ok := c.Config.(*configapi.ImexChannelConfig); ok && device.Type() != ImexChannelType { + continue + } + configResultsMap[c.Config] = append(configResultsMap[c.Config], &result) + break + } + } + } + + // Normalize, validate, and apply all configs associated with devices that + // need to be prepared. Track device group configs generated from applying the + // config to the set of device allocation results. + preparedDeviceGroupConfigState := make(map[runtime.Object]*DeviceConfigState) + for c, results := range configResultsMap { + // Cast the opaque config to a configapi.Interface type + var config configapi.Interface + switch castConfig := c.(type) { + case *configapi.GpuConfig: + config = castConfig + case *configapi.MigDeviceConfig: + config = castConfig + case *configapi.ImexChannelConfig: + config = castConfig + default: + return nil, fmt.Errorf("runtime object is not a recognized configuration") + } + + // Normalize the config to set any implied defaults. + if err := config.Normalize(); err != nil { + return nil, fmt.Errorf("error normalizing GPU config: %w", err) + } + + // Validate the config to ensure its integrity. + if err := config.Validate(); err != nil { + return nil, fmt.Errorf("error validating GPU config: %w", err) + } + + // Apply the config to the list of results associated with it. + configState, err := s.applyConfig(ctx, config, claim, results) + if err != nil { + return nil, fmt.Errorf("error applying GPU config: %w", err) + } + + // Capture the prepared device group config in the map. + preparedDeviceGroupConfigState[c] = configState + } + + // Walk through each config and its associated device allocation results + // and construct the list of prepared devices to return. + var preparedDevices PreparedDevices + for c, results := range configResultsMap { + preparedDeviceGroup := PreparedDeviceGroup{ + ConfigState: *preparedDeviceGroupConfigState[c], + } + + for _, result := range results { + cdiDevices := []string{} + if d := s.cdi.GetStandardDevice(s.allocatable[result.Device]); d != "" { + cdiDevices = append(cdiDevices, d) + } + if d := s.cdi.GetClaimDevice(string(claim.UID), s.allocatable[result.Device], preparedDeviceGroupConfigState[c].containerEdits); d != "" { + cdiDevices = append(cdiDevices, d) + } + + device := &drapbv1.Device{ + RequestNames: []string{result.Request}, + PoolName: result.Pool, + DeviceName: result.Device, + CDIDeviceIDs: cdiDevices, + } + + var preparedDevice PreparedDevice + switch s.allocatable[result.Device].Type() { + case GpuDeviceType: + preparedDevice.Gpu = &PreparedGpu{ + Info: s.allocatable[result.Device].Gpu, + Device: device, + } + case MigDeviceType: + preparedDevice.Mig = &PreparedMigDevice{ + Info: s.allocatable[result.Device].Mig, + Device: device, + } + case ImexChannelType: + preparedDevice.ImexChannel = &PreparedImexChannel{ + Info: s.allocatable[result.Device].ImexChannel, + Device: device, + } + } + + preparedDeviceGroup.Devices = append(preparedDeviceGroup.Devices, preparedDevice) + } + + preparedDevices = append(preparedDevices, &preparedDeviceGroup) + } + return preparedDevices, nil +} + +func (s *DeviceState) unprepareDevices(ctx context.Context, claimUID string, devices PreparedDevices) error { + for _, group := range devices { + // Stop any MPS control daemons started for each group of prepared devices. + mpsControlDaemon := s.mpsManager.NewMpsControlDaemon(claimUID, group) + if err := mpsControlDaemon.Stop(ctx); err != nil { + return fmt.Errorf("error stopping MPS control daemon: %w", err) + } + + // Go back to default time-slicing for all full GPUs. + tsc := configapi.DefaultGpuConfig().Sharing.TimeSlicingConfig + if err := s.tsManager.SetTimeSlice(group.Devices.Gpus(), tsc); err != nil { + return fmt.Errorf("error setting timeslice for devices: %w", err) + } + } + return nil +} + +func (s *DeviceState) applyConfig(ctx context.Context, config configapi.Interface, claim *resourceapi.ResourceClaim, results []*resourceapi.DeviceRequestAllocationResult) (*DeviceConfigState, error) { + switch castConfig := config.(type) { + case *configapi.GpuConfig: + return s.applySharingConfig(ctx, castConfig.Sharing, claim, results) + case *configapi.MigDeviceConfig: + return s.applySharingConfig(ctx, castConfig.Sharing, claim, results) + case *configapi.ImexChannelConfig: + return s.applyImexChannelConfig(ctx, castConfig, claim, results) + default: + return nil, fmt.Errorf("unknown config type: %T", castConfig) + } +} + +func (s *DeviceState) applySharingConfig(ctx context.Context, config configapi.Sharing, claim *resourceapi.ResourceClaim, results []*resourceapi.DeviceRequestAllocationResult) (*DeviceConfigState, error) { + // Get the list of claim requests this config is being applied over. + var requests []string + for _, r := range results { + requests = append(requests, r.Request) + } + + // Get the list of allocatable devices this config is being applied over. + allocatableDevices := make(AllocatableDevices) + for _, r := range results { + allocatableDevices[r.Device] = s.allocatable[r.Device] + } + + // Declare a device group state object to populate. + var configState DeviceConfigState + + // Apply time-slicing settings (if available). + if config.IsTimeSlicing() { + tsc, err := config.GetTimeSlicingConfig() + if err != nil { + return nil, fmt.Errorf("error getting timeslice config for requests '%v' in claim '%v': %w", requests, claim.UID, err) + } + if tsc != nil { + err = s.tsManager.SetTimeSlice(allocatableDevices, tsc) + if err != nil { + return nil, fmt.Errorf("error setting timeslice config for requests '%v' in claim '%v': %w", requests, claim.UID, err) + } + } + } + + // Apply MPS settings. + if config.IsMps() { + mpsc, err := config.GetMpsConfig() + if err != nil { + return nil, fmt.Errorf("error getting MPS configuration: %w", err) + } + mpsControlDaemon := s.mpsManager.NewMpsControlDaemon(string(claim.UID), allocatableDevices) + if err := mpsControlDaemon.Start(ctx, mpsc); err != nil { + return nil, fmt.Errorf("error starting MPS control daemon: %w", err) + } + if err := mpsControlDaemon.AssertReady(ctx); err != nil { + return nil, fmt.Errorf("MPS control daemon is not yet ready: %w", err) + } + configState.MpsControlDaemonID = mpsControlDaemon.GetID() + configState.containerEdits = mpsControlDaemon.GetCDIContainerEdits() + } + + return &configState, nil +} + +func (s *DeviceState) applyImexChannelConfig(ctx context.Context, config *configapi.ImexChannelConfig, claim *resourceapi.ResourceClaim, results []*resourceapi.DeviceRequestAllocationResult) (*DeviceConfigState, error) { + // Declare a device group state object to populate. + var configState DeviceConfigState + + // Create any necessary IMEX channels and gather their CDI container edits. + for _, r := range results { + imexChannel := s.allocatable[r.Device].ImexChannel + if err := s.nvdevlib.createImexChannelDevice(imexChannel.Channel); err != nil { + return nil, fmt.Errorf("error creating IMEX channel device: %w", err) + } + configState.containerEdits = configState.containerEdits.Append(s.cdi.GetImexChannelContainerEdits(imexChannel)) + } + + return &configState, nil +} + +// GetOpaqueDeviceConfigs returns an ordered list of the configs contained in possibleConfigs for this driver. +// +// Configs can either come from the resource claim itself or from the device +// class associated with the request. Configs coming directly from the resource +// claim take precedence over configs coming from the device class. Moreover, +// configs found later in the list of configs attached to its source take +// precedence over configs found earlier in the list for that source. +// +// All of the configs relevant to the driver from the list of possibleConfigs +// will be returned in order of precedence (from lowest to highest). If no +// configs are found, nil is returned. +func GetOpaqueDeviceConfigs( + decoder runtime.Decoder, + driverName string, + possibleConfigs []resourceapi.DeviceAllocationConfiguration, +) ([]*OpaqueDeviceConfig, error) { + // Collect all configs in order of reverse precedence. + var classConfigs []resourceapi.DeviceAllocationConfiguration + var claimConfigs []resourceapi.DeviceAllocationConfiguration + var candidateConfigs []resourceapi.DeviceAllocationConfiguration + for _, config := range possibleConfigs { + switch config.Source { + case resourceapi.AllocationConfigSourceClass: + classConfigs = append(classConfigs, config) + case resourceapi.AllocationConfigSourceClaim: + claimConfigs = append(claimConfigs, config) + default: + return nil, fmt.Errorf("invalid config source: %v", config.Source) + } + } + candidateConfigs = append(candidateConfigs, classConfigs...) + candidateConfigs = append(candidateConfigs, claimConfigs...) + + // Decode all configs that are relevant for the driver. + var resultConfigs []*OpaqueDeviceConfig + for _, config := range candidateConfigs { + // If this is nil, the driver doesn't support some future API extension + // and needs to be updated. + if config.DeviceConfiguration.Opaque == nil { + return nil, fmt.Errorf("only opaque parameters are supported by this driver") + } + + // Configs for different drivers may have been specified because a + // single request can be satisfied by different drivers. This is not + // an error -- drivers must skip over other driver's configs in order + // to support this. + if config.DeviceConfiguration.Opaque.Driver != driverName { + continue + } + + decodedConfig, err := runtime.Decode(decoder, config.DeviceConfiguration.Opaque.Parameters.Raw) + if err != nil { + return nil, fmt.Errorf("error decoding config parameters: %w", err) + } + + resultConfig := &OpaqueDeviceConfig{ + Requests: config.Requests, + Config: decodedConfig, + } + + resultConfigs = append(resultConfigs, resultConfig) + } + + return resultConfigs, nil +} + +// TODO: Dynamic MIG is not yet supported with structured parameters. +// Refactor this to allow for the allocation of statically partitioned MIG +// devices. +// +// func (s *DeviceState) prepareMigDevices(claimUID string, allocated *nascrd.AllocatedMigDevices) (*PreparedMigDevices, error) { +// prepared := &PreparedMigDevices{} +// +// for _, device := range allocated.Devices { +// if _, exists := s.allocatable[device.ParentUUID]; !exists { +// return nil, fmt.Errorf("allocated GPU does not exist: %v", device.ParentUUID) +// } +// +// parent := s.allocatable[device.ParentUUID] +// +// if !parent.migEnabled { +// return nil, fmt.Errorf("cannot prepare a GPU with MIG mode disabled: %v", device.ParentUUID) +// } +// +// if _, exists := parent.migProfiles[device.Profile]; !exists { +// return nil, fmt.Errorf("MIG profile %v does not exist on GPU: %v", device.Profile, device.ParentUUID) +// } +// +// placement := nvml.GpuInstancePlacement{ +// Start: uint32(device.Placement.Start), +// Size: uint32(device.Placement.Size), +// } +// +// migInfo, err := s.nvdevlib.createMigDevice(parent.GpuInfo, parent.migProfiles[device.Profile].profile, &placement) +// if err != nil { +// return nil, fmt.Errorf("error creating MIG device: %w", err) +// } +// +// prepared.Devices = append(prepared.Devices, migInfo) +// } +// +// return prepared, nil +// } +// +// func (s *DeviceState) unprepareMigDevices(claimUID string, devices *PreparedDevices) error { +// for _, device := range devices.Mig.Devices { +// err := s.nvdevlib.deleteMigDevice(device) +// if err != nil { +// return fmt.Errorf("error deleting MIG device for %v: %w", device.uuid, err) +// } +// } +// return nil +//} diff --git a/cmd/nvidia-dra-imex-plugin/deviceinfo.go b/cmd/nvidia-dra-imex-plugin/deviceinfo.go new file mode 100644 index 000000000..cd21669fc --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/deviceinfo.go @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + + "github.com/Masterminds/semver" + nvdev "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvml/pkg/nvml" + resourceapi "k8s.io/api/resource/v1beta1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" +) + +type GpuInfo struct { + UUID string `json:"uuid"` + index int + minor int + migEnabled bool + memoryBytes uint64 + productName string + brand string + architecture string + cudaComputeCapability string + driverVersion string + cudaDriverVersion string + migProfiles []*MigProfileInfo +} + +type MigDeviceInfo struct { + UUID string `json:"uuid"` + index int + profile string + parent *GpuInfo + placement *MigDevicePlacement + giProfileInfo *nvml.GpuInstanceProfileInfo + giInfo *nvml.GpuInstanceInfo + ciProfileInfo *nvml.ComputeInstanceProfileInfo + ciInfo *nvml.ComputeInstanceInfo +} + +type MigProfileInfo struct { + profile nvdev.MigProfile + placements []*MigDevicePlacement +} + +type MigDevicePlacement struct { + nvml.GpuInstancePlacement +} + +type ImexChannelInfo struct { + Channel int `json:"channel"` +} + +func (p MigProfileInfo) String() string { + return p.profile.String() +} + +func (d *GpuInfo) CanonicalName() string { + return fmt.Sprintf("gpu-%d", d.index) +} + +func (d *MigDeviceInfo) CanonicalName() string { + return fmt.Sprintf("gpu-%d-mig-%d-%d-%d", d.parent.index, d.giInfo.ProfileId, d.placement.Start, d.placement.Size) +} + +func (d *ImexChannelInfo) CanonicalName() string { + return fmt.Sprintf("imex-channel-%d", d.Channel) +} + +func (d *GpuInfo) CanonicalIndex() string { + return fmt.Sprintf("%d", d.index) +} + +func (d *MigDeviceInfo) CanonicalIndex() string { + return fmt.Sprintf("%d:%d", d.parent.index, d.index) +} + +func (d *ImexChannelInfo) CanonicalIndex() string { + return fmt.Sprintf("%d", d.Channel) +} + +func (d *GpuInfo) GetDevice() resourceapi.Device { + device := resourceapi.Device{ + Name: d.CanonicalName(), + Basic: &resourceapi.BasicDevice{ + Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{ + "type": { + StringValue: ptr.To(GpuDeviceType), + }, + "uuid": { + StringValue: &d.UUID, + }, + "minor": { + IntValue: ptr.To(int64(d.minor)), + }, + "index": { + IntValue: ptr.To(int64(d.index)), + }, + "productName": { + StringValue: &d.productName, + }, + "brand": { + StringValue: &d.brand, + }, + "architecture": { + StringValue: &d.architecture, + }, + "cudaComputeCapability": { + VersionValue: ptr.To(semver.MustParse(d.cudaComputeCapability).String()), + }, + "driverVersion": { + VersionValue: ptr.To(semver.MustParse(d.driverVersion).String()), + }, + "cudaDriverVersion": { + VersionValue: ptr.To(semver.MustParse(d.cudaDriverVersion).String()), + }, + }, + Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{ + "memory": { + Value: *resource.NewQuantity(int64(d.memoryBytes), resource.BinarySI), + }, + }, + }, + } + return device +} + +func (d *MigDeviceInfo) GetDevice() resourceapi.Device { + device := resourceapi.Device{ + Name: d.CanonicalName(), + Basic: &resourceapi.BasicDevice{ + Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{ + "type": { + StringValue: ptr.To(MigDeviceType), + }, + "uuid": { + StringValue: &d.UUID, + }, + "parentUUID": { + StringValue: &d.parent.UUID, + }, + "index": { + IntValue: ptr.To(int64(d.index)), + }, + "parentIndex": { + IntValue: ptr.To(int64(d.parent.index)), + }, + "profile": { + StringValue: &d.profile, + }, + "productName": { + StringValue: &d.parent.productName, + }, + "brand": { + StringValue: &d.parent.brand, + }, + "architecture": { + StringValue: &d.parent.architecture, + }, + "cudaComputeCapability": { + VersionValue: ptr.To(semver.MustParse(d.parent.cudaComputeCapability).String()), + }, + "driverVersion": { + VersionValue: ptr.To(semver.MustParse(d.parent.driverVersion).String()), + }, + "cudaDriverVersion": { + VersionValue: ptr.To(semver.MustParse(d.parent.cudaDriverVersion).String()), + }, + }, + Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{ + "multiprocessors": { + Value: *resource.NewQuantity(int64(d.giProfileInfo.MultiprocessorCount), resource.BinarySI), + }, + "copyEngines": {Value: *resource.NewQuantity(int64(d.giProfileInfo.CopyEngineCount), resource.BinarySI)}, + "decoders": {Value: *resource.NewQuantity(int64(d.giProfileInfo.DecoderCount), resource.BinarySI)}, + "encoders": {Value: *resource.NewQuantity(int64(d.giProfileInfo.EncoderCount), resource.BinarySI)}, + "jpegEngines": {Value: *resource.NewQuantity(int64(d.giProfileInfo.JpegCount), resource.BinarySI)}, + "ofaEngines": {Value: *resource.NewQuantity(int64(d.giProfileInfo.OfaCount), resource.BinarySI)}, + "memory": {Value: *resource.NewQuantity(int64(d.giProfileInfo.MemorySizeMB*1024*1024), resource.BinarySI)}, + }, + }, + } + for i := d.placement.Start; i < d.placement.Start+d.placement.Size; i++ { + capacity := resourceapi.QualifiedName(fmt.Sprintf("memorySlice%d", i)) + device.Basic.Capacity[capacity] = resourceapi.DeviceCapacity{ + Value: *resource.NewQuantity(1, resource.BinarySI), + } + } + return device +} + +func (d *ImexChannelInfo) GetDevice() resourceapi.Device { + device := resourceapi.Device{ + Name: d.CanonicalName(), + Basic: &resourceapi.BasicDevice{ + Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{ + "type": { + StringValue: ptr.To(ImexChannelType), + }, + "channel": { + IntValue: ptr.To(int64(d.Channel)), + }, + }, + }, + } + return device +} diff --git a/cmd/nvidia-dra-imex-plugin/driver.go b/cmd/nvidia-dra-imex-plugin/driver.go new file mode 100644 index 000000000..0a93a8f5e --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/driver.go @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "fmt" + "sync" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + coreclientset "k8s.io/client-go/kubernetes" + "k8s.io/dynamic-resource-allocation/kubeletplugin" + "k8s.io/klog/v2" + drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" +) + +var _ drapbv1.DRAPluginServer = &driver{} + +type driver struct { + sync.Mutex + client coreclientset.Interface + plugin kubeletplugin.DRAPlugin + state *DeviceState +} + +func NewDriver(ctx context.Context, config *Config) (*driver, error) { + driver := &driver{ + client: config.clientsets.Core, + } + + state, err := NewDeviceState(ctx, config) + if err != nil { + return nil, err + } + driver.state = state + + plugin, err := kubeletplugin.Start( + ctx, + []any{driver}, + kubeletplugin.KubeClient(driver.client), + kubeletplugin.NodeName(config.flags.nodeName), + kubeletplugin.DriverName(DriverName), + kubeletplugin.RegistrarSocketPath(PluginRegistrationPath), + kubeletplugin.PluginSocketPath(DriverPluginSocketPath), + kubeletplugin.KubeletPluginSocketPath(DriverPluginSocketPath)) + if err != nil { + return nil, err + } + driver.plugin = plugin + + // If not responsible for advertising GPUs or MIG devices, we are done + if !(config.flags.deviceClasses.Has(GpuDeviceType) || config.flags.deviceClasses.Has(MigDeviceType)) { + return driver, nil + } + + // Otherwise, enumerate the set of GPU and MIG devices and publish them + var resources kubeletplugin.Resources + for _, device := range state.allocatable { + // Explicitly exclude IMEX channels from being advertised here. They + // are instead advertised in as a network resource from the control plane. + if device.Type() == ImexChannelType { + continue + } + resources.Devices = append(resources.Devices, device.GetDevice()) + } + + if err := plugin.PublishResources(ctx, resources); err != nil { + return nil, err + } + + return driver, nil +} + +func (d *driver) Shutdown() error { + if d == nil { + return nil + } + d.plugin.Stop() + return nil +} + +func (d *driver) NodePrepareResources(ctx context.Context, req *drapbv1.NodePrepareResourcesRequest) (*drapbv1.NodePrepareResourcesResponse, error) { + klog.Infof("NodePrepareResource is called: number of claims: %d", len(req.Claims)) + preparedResources := &drapbv1.NodePrepareResourcesResponse{Claims: map[string]*drapbv1.NodePrepareResourceResponse{}} + + for _, claim := range req.Claims { + preparedResources.Claims[claim.UID] = d.nodePrepareResource(ctx, claim) + } + + return preparedResources, nil +} + +func (d *driver) NodeUnprepareResources(ctx context.Context, req *drapbv1.NodeUnprepareResourcesRequest) (*drapbv1.NodeUnprepareResourcesResponse, error) { + klog.Infof("NodeUnprepareResource is called: number of claims: %d", len(req.Claims)) + unpreparedResources := &drapbv1.NodeUnprepareResourcesResponse{Claims: map[string]*drapbv1.NodeUnprepareResourceResponse{}} + + for _, claim := range req.Claims { + unpreparedResources.Claims[claim.UID] = d.nodeUnprepareResource(ctx, claim) + } + + return unpreparedResources, nil +} + +func (d *driver) nodePrepareResource(ctx context.Context, claim *drapbv1.Claim) *drapbv1.NodePrepareResourceResponse { + d.Lock() + defer d.Unlock() + + resourceClaim, err := d.client.ResourceV1beta1().ResourceClaims(claim.Namespace).Get( + ctx, + claim.Name, + metav1.GetOptions{}) + if err != nil { + return &drapbv1.NodePrepareResourceResponse{ + Error: fmt.Sprintf("failed to fetch ResourceClaim %s in namespace %s", claim.Name, claim.Namespace), + } + } + + prepared, err := d.state.Prepare(ctx, resourceClaim) + if err != nil { + return &drapbv1.NodePrepareResourceResponse{ + Error: fmt.Sprintf("error preparing devices for claim %v: %v", claim.UID, err), + } + } + + klog.Infof("Returning newly prepared devices for claim '%v': %v", claim.UID, prepared) + return &drapbv1.NodePrepareResourceResponse{Devices: prepared} +} + +func (d *driver) nodeUnprepareResource(ctx context.Context, claim *drapbv1.Claim) *drapbv1.NodeUnprepareResourceResponse { + d.Lock() + defer d.Unlock() + + if err := d.state.Unprepare(ctx, claim.UID); err != nil { + return &drapbv1.NodeUnprepareResourceResponse{ + Error: fmt.Sprintf("error unpreparing devices for claim %v: %v", claim.UID, err), + } + } + + return &drapbv1.NodeUnprepareResourceResponse{} +} + +// TODO: implement loop to remove CDI files from the CDI path for claimUIDs +// that have been removed from the AllocatedClaims map. +// func (d *driver) cleanupCDIFiles(wg *sync.WaitGroup) chan error { +// errors := make(chan error) +// return errors +// } +// +// TODO: implement loop to remove mpsControlDaemon folders from the mps +// path for claimUIDs that have been removed from the AllocatedClaims map. +// func (d *driver) cleanupMpsControlDaemonArtifacts(wg *sync.WaitGroup) chan error { +// errors := make(chan error) +// return errors +// } diff --git a/cmd/nvidia-dra-imex-plugin/main.go b/cmd/nvidia-dra-imex-plugin/main.go new file mode 100644 index 000000000..b6ff16d3b --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/main.go @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2022-2023 NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/urfave/cli/v2" + + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + "github.com/NVIDIA/k8s-dra-driver/internal/info" + "github.com/NVIDIA/k8s-dra-driver/pkg/flags" +) + +const ( + DriverName = "gpu.nvidia.com" + + PluginRegistrationPath = "/var/lib/kubelet/plugins_registry/" + DriverName + ".sock" + DriverPluginPath = "/var/lib/kubelet/plugins/" + DriverName + DriverPluginSocketPath = DriverPluginPath + "/plugin.sock" + DriverPluginCheckpointFile = "checkpoint.json" +) + +type Flags struct { + kubeClientConfig flags.KubeClientConfig + loggingConfig *flags.LoggingConfig + + nodeName string + namespace string + cdiRoot string + containerDriverRoot string + hostDriverRoot string + nvidiaCTKPath string + deviceClasses sets.Set[string] +} + +type Config struct { + flags *Flags + clientsets flags.ClientSets +} + +func main() { + if err := newApp().Run(os.Args); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func newApp() *cli.App { + flags := &Flags{ + loggingConfig: flags.NewLoggingConfig(), + } + cliFlags := []cli.Flag{ + &cli.StringFlag{ + Name: "node-name", + Usage: "The name of the node to be worked on.", + Required: true, + Destination: &flags.nodeName, + EnvVars: []string{"NODE_NAME"}, + }, + &cli.StringFlag{ + Name: "namespace", + Usage: "The namespace used for the custom resources.", + Value: "default", + Destination: &flags.namespace, + EnvVars: []string{"NAMESPACE"}, + }, + &cli.StringFlag{ + Name: "cdi-root", + Usage: "Absolute path to the directory where CDI files will be generated.", + Value: "/etc/cdi", + Destination: &flags.cdiRoot, + EnvVars: []string{"CDI_ROOT"}, + }, + &cli.StringFlag{ + Name: "nvidia-driver-root", + Aliases: []string{"host_driver-root"}, + Value: "/", + Usage: "the root path for the NVIDIA driver installation on the host (typical values are '/' or '/run/nvidia/driver')", + Destination: &flags.hostDriverRoot, + EnvVars: []string{"NVIDIA_DRIVER_ROOT", "HOST_DRIVER_ROOT"}, + }, + &cli.StringFlag{ + Name: "container-driver-root", + Value: "/driver-root", + Usage: "the path where the NVIDIA driver root is mounted in the container; used for generating CDI specifications", + Destination: &flags.containerDriverRoot, + EnvVars: []string{"CONTAINER_DRIVER_ROOT"}, + }, + &cli.StringFlag{ + Name: "nvidia-ctk-path", + Value: "/usr/bin/nvidia-ctk", + Usage: "the path to use for the nvidia-ctk in the generated CDI specification. Note that this represents the path on the host.", + Destination: &flags.nvidiaCTKPath, + EnvVars: []string{"NVIDIA_CTK_PATH"}, + }, + &cli.StringSliceFlag{ + Name: "device-classes", + Usage: "The supported set of DRA device classes", + Value: cli.NewStringSlice(GpuDeviceType, MigDeviceType, ImexChannelType), + EnvVars: []string{"DEVICE_CLASSES"}, + }, + } + cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...) + cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) + + app := &cli.App{ + Name: "nvidia-dra-imex-plugin", + Usage: "nvidia-dra-imex-plugin implements a DRA driver plugin for NVIDIA IMEX daemons.", + ArgsUsage: " ", + HideHelpCommand: true, + Flags: cliFlags, + Before: func(c *cli.Context) error { + if c.Args().Len() > 0 { + return fmt.Errorf("arguments not supported: %v", c.Args().Slice()) + } + return flags.loggingConfig.Apply() + }, + Action: func(c *cli.Context) error { + ctx := c.Context + flags.deviceClasses = sets.New[string](c.StringSlice("device-classes")...) + + clientSets, err := flags.kubeClientConfig.NewClientSets() + if err != nil { + return fmt.Errorf("create client: %w", err) + } + + config := &Config{ + flags: flags, + clientsets: clientSets, + } + + return StartPlugin(ctx, config) + }, + Version: info.GetVersionString(), + } + + // We remove the -v alias for the version flag so as to not conflict with the -v flag used for klog. + f, ok := cli.VersionFlag.(*cli.BoolFlag) + if ok { + f.Aliases = nil + } + + return app +} + +func StartPlugin(ctx context.Context, config *Config) error { + err := os.MkdirAll(DriverPluginPath, 0750) + if err != nil { + return err + } + + info, err := os.Stat(config.flags.cdiRoot) + switch { + case err != nil && os.IsNotExist(err): + err := os.MkdirAll(config.flags.cdiRoot, 0750) + if err != nil { + return err + } + case err != nil: + return err + case !info.IsDir(): + return fmt.Errorf("path for cdi file generation is not a directory: '%v'", config.flags.cdiRoot) + } + + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) + + var driver *driver + ctx, cancel := context.WithCancel(ctx) + defer func() { + cancel() + if err := driver.Shutdown(); err != nil { + klog.Errorf("Unable to cleanly shutdown driver: %v", err) + } + }() + + driver, err = NewDriver(ctx, config) + if err != nil { + return fmt.Errorf("error creating driver: %w", err) + } + + <-sigs + + return nil +} diff --git a/cmd/nvidia-dra-imex-plugin/nvlib.go b/cmd/nvidia-dra-imex-plugin/nvlib.go new file mode 100644 index 000000000..421e7a505 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/nvlib.go @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + + nvdev "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" + "github.com/NVIDIA/go-nvml/pkg/nvml" +) + +const ( + procDevicesPath = "/proc/devices" + nvidiaCapsImexChannelsDeviceName = "nvidia-caps-imex-channels" +) + +type deviceLib struct { + nvdev.Interface + nvmllib nvml.Interface + driverLibraryPath string + devRoot string + nvidiaSMIPath string +} + +func newDeviceLib(driverRoot root) (*deviceLib, error) { + driverLibraryPath, err := driverRoot.getDriverLibraryPath() + if err != nil { + return nil, fmt.Errorf("failed to locate driver libraries: %w", err) + } + + nvidiaSMIPath, err := driverRoot.getNvidiaSMIPath() + if err != nil { + return nil, fmt.Errorf("failed to locate nvidia-smi: %w", err) + } + + // We construct an NVML library specifying the path to libnvidia-ml.so.1 + // explicitly so that we don't have to rely on the library path. + nvmllib := nvml.New( + nvml.WithLibraryPath(driverLibraryPath), + ) + d := deviceLib{ + Interface: nvdev.New(nvmllib), + nvmllib: nvmllib, + driverLibraryPath: driverLibraryPath, + devRoot: driverRoot.getDevRoot(), + nvidiaSMIPath: nvidiaSMIPath, + } + return &d, nil +} + +// prependPathListEnvvar prepends a specified list of strings to a specified envvar and returns its value. +func prependPathListEnvvar(envvar string, prepend ...string) string { + if len(prepend) == 0 { + return os.Getenv(envvar) + } + current := filepath.SplitList(os.Getenv(envvar)) + return strings.Join(append(prepend, current...), string(filepath.ListSeparator)) +} + +// setOrOverrideEnvvar adds or updates an envar to the list of specified envvars and returns it. +func setOrOverrideEnvvar(envvars []string, key, value string) []string { + var updated []string + for _, envvar := range envvars { + pair := strings.SplitN(envvar, "=", 2) + if pair[0] == key { + continue + } + updated = append(updated, envvar) + } + return append(updated, fmt.Sprintf("%s=%s", key, value)) +} + +func (l deviceLib) Init() error { + ret := l.nvmllib.Init() + if ret != nvml.SUCCESS { + return fmt.Errorf("error initializing NVML: %v", ret) + } + return nil +} + +func (l deviceLib) alwaysShutdown() { + ret := l.nvmllib.Shutdown() + if ret != nvml.SUCCESS { + klog.Warningf("error shutting down NVML: %v", ret) + } +} + +func (l deviceLib) enumerateAllPossibleDevices(config *Config) (AllocatableDevices, error) { + alldevices := make(AllocatableDevices) + deviceClasses := config.flags.deviceClasses + + if deviceClasses.Has(GpuDeviceType) || deviceClasses.Has(MigDeviceType) { + gms, err := l.enumerateGpusAndMigDevices(config) + if err != nil { + return nil, fmt.Errorf("error enumerating GPUs and MIG devices: %w", err) + } + for k, v := range gms { + alldevices[k] = v + } + } + + if deviceClasses.Has(ImexChannelType) { + imex, err := l.enumerateImexChannels(config) + if err != nil { + return nil, fmt.Errorf("error enumerating IMEX devices: %w", err) + } + for k, v := range imex { + alldevices[k] = v + } + } + + return alldevices, nil +} + +func (l deviceLib) enumerateGpusAndMigDevices(config *Config) (AllocatableDevices, error) { + if err := l.Init(); err != nil { + return nil, err + } + defer l.alwaysShutdown() + + devices := make(AllocatableDevices) + deviceClasses := config.flags.deviceClasses + err := l.VisitDevices(func(i int, d nvdev.Device) error { + gpuInfo, err := l.getGpuInfo(i, d) + if err != nil { + return fmt.Errorf("error getting info for GPU %d: %w", i, err) + } + + if deviceClasses.Has(GpuDeviceType) && !gpuInfo.migEnabled { + deviceInfo := &AllocatableDevice{ + Gpu: gpuInfo, + } + devices[gpuInfo.CanonicalName()] = deviceInfo + } + + if deviceClasses.Has(MigDeviceType) { + migs, err := l.getMigDevices(gpuInfo) + if err != nil { + return fmt.Errorf("error getting MIG devices for GPU %d: %w", i, err) + } + + for _, migDeviceInfo := range migs { + deviceInfo := &AllocatableDevice{ + Mig: migDeviceInfo, + } + devices[migDeviceInfo.CanonicalName()] = deviceInfo + } + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("error visiting devices: %w", err) + } + + return devices, nil +} + +func (l deviceLib) enumerateImexChannels(config *Config) (AllocatableDevices, error) { + devices := make(AllocatableDevices) + + imexChannelCount, err := l.getImexChannelCount() + if err != nil { + return nil, fmt.Errorf("error getting IMEX channel count: %w", err) + } + for i := 0; i < imexChannelCount; i++ { + imexChannelInfo := &ImexChannelInfo{ + Channel: i, + } + deviceInfo := &AllocatableDevice{ + ImexChannel: imexChannelInfo, + } + devices[imexChannelInfo.CanonicalName()] = deviceInfo + } + + return devices, nil +} + +func (l deviceLib) getGpuInfo(index int, device nvdev.Device) (*GpuInfo, error) { + minor, ret := device.GetMinorNumber() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting minor number for device %d: %v", index, ret) + } + uuid, ret := device.GetUUID() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting UUID for device %d: %v", index, ret) + } + migEnabled, err := device.IsMigEnabled() + if err != nil { + return nil, fmt.Errorf("error checking if MIG mode enabled for device %d: %w", index, err) + } + memory, ret := device.GetMemoryInfo() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting memory info for device %d: %v", index, ret) + } + productName, ret := device.GetName() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting product name for device %d: %v", index, ret) + } + architecture, err := device.GetArchitectureAsString() + if err != nil { + return nil, fmt.Errorf("error getting architecture for device %d: %w", index, err) + } + brand, err := device.GetBrandAsString() + if err != nil { + return nil, fmt.Errorf("error getting brand for device %d: %w", index, err) + } + cudaComputeCapability, err := device.GetCudaComputeCapabilityAsString() + if err != nil { + return nil, fmt.Errorf("error getting CUDA compute capability for device %d: %w", index, err) + } + driverVersion, ret := l.nvmllib.SystemGetDriverVersion() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting driver version: %w", err) + } + cudaDriverVersion, ret := l.nvmllib.SystemGetCudaDriverVersion() + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting CUDA driver version: %w", err) + } + + var migProfiles []*MigProfileInfo + for i := 0; i < nvml.GPU_INSTANCE_PROFILE_COUNT; i++ { + giProfileInfo, ret := device.GetGpuInstanceProfileInfo(i) + if ret == nvml.ERROR_NOT_SUPPORTED { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error retrieving GpuInstanceProfileInfo for profile %d on GPU %v", i, uuid) + } + + giPossiblePlacements, ret := device.GetGpuInstancePossiblePlacements(&giProfileInfo) + if ret == nvml.ERROR_NOT_SUPPORTED { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error retrieving GpuInstancePossiblePlacements for profile %d on GPU %v", i, uuid) + } + + var migDevicePlacements []*MigDevicePlacement + for _, p := range giPossiblePlacements { + mdp := &MigDevicePlacement{ + GpuInstancePlacement: p, + } + migDevicePlacements = append(migDevicePlacements, mdp) + } + + for j := 0; j < nvml.COMPUTE_INSTANCE_PROFILE_COUNT; j++ { + for k := 0; k < nvml.COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT; k++ { + migProfile, err := l.NewMigProfile(i, j, k, giProfileInfo.MemorySizeMB, memory.Total) + if err != nil { + return nil, fmt.Errorf("error building MIG profile from GpuInstanceProfileInfo for profile %d on GPU %v", i, uuid) + } + + if migProfile.GetInfo().G != migProfile.GetInfo().C { + continue + } + + profileInfo := &MigProfileInfo{ + profile: migProfile, + placements: migDevicePlacements, + } + + migProfiles = append(migProfiles, profileInfo) + } + } + } + + gpuInfo := &GpuInfo{ + UUID: uuid, + minor: minor, + index: index, + migEnabled: migEnabled, + memoryBytes: memory.Total, + productName: productName, + brand: brand, + architecture: architecture, + cudaComputeCapability: cudaComputeCapability, + driverVersion: driverVersion, + cudaDriverVersion: fmt.Sprintf("%v.%v", cudaDriverVersion/1000, (cudaDriverVersion%1000)/10), + migProfiles: migProfiles, + } + + return gpuInfo, nil +} + +func (l deviceLib) getMigDevices(gpuInfo *GpuInfo) (map[string]*MigDeviceInfo, error) { + if !gpuInfo.migEnabled { + return nil, nil + } + + if err := l.Init(); err != nil { + return nil, err + } + defer l.alwaysShutdown() + + device, ret := l.nvmllib.DeviceGetHandleByUUID(gpuInfo.UUID) + if ret != nvml.SUCCESS { + return nil, fmt.Errorf("error getting GPU device handle: %v", ret) + } + + migInfos := make(map[string]*MigDeviceInfo) + err := walkMigDevices(device, func(i int, migDevice nvml.Device) error { + giID, ret := migDevice.GetGpuInstanceId() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting GPU instance ID for MIG device: %v", ret) + } + gi, ret := device.GetGpuInstanceById(giID) + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting GPU instance for '%v': %v", giID, ret) + } + giInfo, ret := gi.GetInfo() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting GPU instance info for '%v': %v", giID, ret) + } + ciID, ret := migDevice.GetComputeInstanceId() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting Compute instance ID for MIG device: %v", ret) + } + ci, ret := gi.GetComputeInstanceById(ciID) + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting Compute instance for '%v': %v", ciID, ret) + } + ciInfo, ret := ci.GetInfo() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting Compute instance info for '%v': %v", ciID, ret) + } + uuid, ret := migDevice.GetUUID() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting UUID for MIG device: %v", ret) + } + + var migProfile *MigProfileInfo + var giProfileInfo *nvml.GpuInstanceProfileInfo + var ciProfileInfo *nvml.ComputeInstanceProfileInfo + for _, profile := range gpuInfo.migProfiles { + profileInfo := profile.profile.GetInfo() + gipInfo, ret := device.GetGpuInstanceProfileInfo(profileInfo.GIProfileID) + if ret != nvml.SUCCESS { + continue + } + if giInfo.ProfileId != gipInfo.Id { + continue + } + cipInfo, ret := gi.GetComputeInstanceProfileInfo(profileInfo.CIProfileID, profileInfo.CIEngProfileID) + if ret != nvml.SUCCESS { + continue + } + if ciInfo.ProfileId != cipInfo.Id { + continue + } + migProfile = profile + giProfileInfo = &gipInfo + ciProfileInfo = &cipInfo + } + if migProfile == nil { + return fmt.Errorf("error getting profile info for MIG device: %v", uuid) + } + + placement := MigDevicePlacement{ + GpuInstancePlacement: giInfo.Placement, + } + + migInfos[uuid] = &MigDeviceInfo{ + UUID: uuid, + index: i, + profile: migProfile.String(), + parent: gpuInfo, + placement: &placement, + giProfileInfo: giProfileInfo, + giInfo: &giInfo, + ciProfileInfo: ciProfileInfo, + ciInfo: &ciInfo, + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("error enumerating MIG devices: %w", err) + } + + if len(migInfos) == 0 { + return nil, nil + } + + return migInfos, nil +} + +func walkMigDevices(d nvml.Device, f func(i int, d nvml.Device) error) error { + count, ret := nvml.Device(d).GetMaxMigDeviceCount() + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting max MIG device count: %v", ret) + } + + for i := 0; i < count; i++ { + device, ret := d.GetMigDeviceHandleByIndex(i) + if ret == nvml.ERROR_NOT_FOUND { + continue + } + if ret == nvml.ERROR_INVALID_ARGUMENT { + continue + } + if ret != nvml.SUCCESS { + return fmt.Errorf("error getting MIG device handle at index '%v': %v", i, ret) + } + err := f(i, device) + if err != nil { + return err + } + } + return nil +} + +func (l deviceLib) getImexChannelCount() (int, error) { + // TODO: Pull this value from /proc/driver/nvidia/params + return 2048, nil +} + +func (l deviceLib) getImexChannelMajor() (int, error) { + file, err := os.Open(procDevicesPath) + if err != nil { + return -1, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + foundCharDevices := false + + for scanner.Scan() { + line := scanner.Text() + + // Ignore empty lines + if line == "" { + continue + } + + // Check for any line with text followed by a colon (header) + if strings.Contains(line, ":") { + // Stop if we've already found the character devices section and reached another section + if foundCharDevices { + break + } + // Check if we entered the character devices section + if strings.HasSuffix(line, ":") && strings.HasPrefix(line, "Character") { + foundCharDevices = true + } + // Continue to the next line, regardless + continue + } + + // If we've passed the character devices section, check for nvidiaCapsImexChannelsDeviceName + if foundCharDevices { + parts := strings.Fields(line) + if len(parts) == 2 && parts[1] == nvidiaCapsImexChannelsDeviceName { + return strconv.Atoi(parts[0]) + } + } + } + + return -1, scanner.Err() +} + +func (l deviceLib) createImexChannelDevice(channel int) error { + // Construct the properties of the device node to create. + path := fmt.Sprintf("/dev/nvidia-caps-imex-channels/channel%d", channel) + path = filepath.Join(l.devRoot, path) + mode := uint32(unix.S_IFCHR | 0666) + + // Get the IMEX channel major and build a /dev device from it + major, err := l.getImexChannelMajor() + if err != nil { + return fmt.Errorf("error getting IMEX channel major: %w", err) + } + dev := unix.Mkdev(uint32(major), uint32(channel)) + + // Recursively create any parent directories of the channel. + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return fmt.Errorf("error creating directory for IMEX channel device nodes: %w", err) + } + + // Remove the channel if it already exists. + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("error removing existing IMEX channel device node: %w", err) + } + + // Create the device node using syscall.Mknod + if err := unix.Mknod(path, mode, int(dev)); err != nil { + return fmt.Errorf("mknod of IMEX channel failed: %w", err) + } + + return nil +} + +func (l deviceLib) setTimeSlice(uuids []string, timeSlice int) error { + for _, uuid := range uuids { + cmd := exec.Command( + l.nvidiaSMIPath, + "compute-policy", + "-i", uuid, + "--set-timeslice", fmt.Sprintf("%d", timeSlice)) + + // In order for nvidia-smi to run, we need update LD_PRELOAD to include the path to libnvidia-ml.so.1. + cmd.Env = setOrOverrideEnvvar(os.Environ(), "LD_PRELOAD", prependPathListEnvvar("LD_PRELOAD", l.driverLibraryPath)) + + output, err := cmd.CombinedOutput() + if err != nil { + klog.Errorf("\n%v", string(output)) + return fmt.Errorf("error running nvidia-smi: %w", err) + } + } + return nil +} + +func (l deviceLib) setComputeMode(uuids []string, mode string) error { + for _, uuid := range uuids { + cmd := exec.Command( + l.nvidiaSMIPath, + "-i", uuid, + "-c", mode) + + // In order for nvidia-smi to run, we need update LD_PRELOAD to include the path to libnvidia-ml.so.1. + cmd.Env = setOrOverrideEnvvar(os.Environ(), "LD_PRELOAD", prependPathListEnvvar("LD_PRELOAD", l.driverLibraryPath)) + + output, err := cmd.CombinedOutput() + if err != nil { + klog.Errorf("\n%v", string(output)) + return fmt.Errorf("error running nvidia-smi: %w", err) + } + } + return nil +} + +// TODO: Reenable dynamic MIG functionality once it is supported in Kubernetes 1.32 +// +// func (l deviceLib) createMigDevice(gpu *GpuInfo, profile nvdev.MigProfile, placement *nvml.GpuInstancePlacement) (*MigDeviceInfo, error) { +// if err := l.Init(); err != nil { +// return nil, err +// } +// defer l.alwaysShutdown() +// +// profileInfo := profile.GetInfo() +// +// device, ret := l.nvmllib.DeviceGetHandleByUUID(gpu.UUID) +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error getting GPU device handle: %v", ret) +// } +// +// giProfileInfo, ret := device.GetGpuInstanceProfileInfo(profileInfo.GIProfileID) +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error getting GPU instance profile info for '%v': %v", profile, ret) +// } +// +// gi, ret := device.CreateGpuInstanceWithPlacement(&giProfileInfo, placement) +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error creating GPU instance for '%v': %v", profile, ret) +// } +// +// giInfo, ret := gi.GetInfo() +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error getting GPU instance info for '%v': %v", profile, ret) +// } +// +// ciProfileInfo, ret := gi.GetComputeInstanceProfileInfo(profileInfo.CIProfileID, profileInfo.CIEngProfileID) +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error getting Compute instance profile info for '%v': %v", profile, ret) +// } +// +// ci, ret := gi.CreateComputeInstance(&ciProfileInfo) +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error creating Compute instance for '%v': %v", profile, ret) +// } +// +// ciInfo, ret := ci.GetInfo() +// if ret != nvml.SUCCESS { +// return nil, fmt.Errorf("error getting GPU instance info for '%v': %v", profile, ret) +// } +// +// uuid := "" +// err := walkMigDevices(device, func(i int, migDevice nvml.Device) error { +// giID, ret := migDevice.GetGpuInstanceId() +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting GPU instance ID for MIG device: %v", ret) +// } +// ciID, ret := migDevice.GetComputeInstanceId() +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting Compute instance ID for MIG device: %v", ret) +// } +// if giID != int(giInfo.Id) || ciID != int(ciInfo.Id) { +// return nil +// } +// uuid, ret = migDevice.GetUUID() +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting UUID for MIG device: %v", ret) +// } +// return nil +// }) +// if err != nil { +// return nil, fmt.Errorf("error processing MIG device for GI and CI just created: %w", err) +// } +// if uuid == "" { +// return nil, fmt.Errorf("unable to find MIG device for GI and CI just created") +// } +// +// migInfo := &MigDeviceInfo{ +// UUID: uuid, +// parent: gpu, +// profile: profile, +// giInfo: &giInfo, +// ciInfo: &ciInfo, +// } +// +// return migInfo, nil +// } +// +// func (l deviceLib) deleteMigDevice(mig *MigDeviceInfo) error { +// if err := l.Init(); err != nil { +// return err +// } +// defer l.alwaysShutdown() +// +// parent, ret := l.nvmllib.DeviceGetHandleByUUID(mig.parent.UUID) +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting device from UUID '%v': %v", mig.parent.UUID, ret) +// } +// gi, ret := parent.GetGpuInstanceById(int(mig.giInfo.Id)) +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting GPU instance ID for MIG device: %v", ret) +// } +// ci, ret := gi.GetComputeInstanceById(int(mig.ciInfo.Id)) +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error getting Compute instance ID for MIG device: %v", ret) +// } +// ret = ci.Destroy() +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error destroying Compute Instance: %v", ret) +// } +// ret = gi.Destroy() +// if ret != nvml.SUCCESS { +// return fmt.Errorf("error destroying GPU Instance: %v", ret) +// } +// return nil +// } diff --git a/cmd/nvidia-dra-imex-plugin/prepared.go b/cmd/nvidia-dra-imex-plugin/prepared.go new file mode 100644 index 000000000..e2d01f795 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/prepared.go @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "slices" + + drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" +) + +type PreparedDeviceList []PreparedDevice +type PreparedDevices []*PreparedDeviceGroup +type PreparedClaims map[string]PreparedDevices + +type PreparedDevice struct { + Gpu *PreparedGpu `json:"gpu"` + Mig *PreparedMigDevice `json:"mig"` + ImexChannel *PreparedImexChannel `json:"imexChannel"` +} + +type PreparedGpu struct { + Info *GpuInfo `json:"info"` + Device *drapbv1.Device `json:"device"` +} + +type PreparedMigDevice struct { + Info *MigDeviceInfo `json:"info"` + Device *drapbv1.Device `json:"device"` +} + +type PreparedImexChannel struct { + Info *ImexChannelInfo `json:"info"` + Device *drapbv1.Device `json:"device"` +} + +type PreparedDeviceGroup struct { + Devices PreparedDeviceList `json:"devices"` + ConfigState DeviceConfigState `json:"configState"` +} + +func (d PreparedDevice) Type() string { + if d.Gpu != nil { + return GpuDeviceType + } + if d.Mig != nil { + return MigDeviceType + } + if d.ImexChannel != nil { + return ImexChannelType + } + return UnknownDeviceType +} + +func (d *PreparedDevice) CanonicalName() string { + switch d.Type() { + case GpuDeviceType: + return d.Gpu.Info.CanonicalName() + case MigDeviceType: + return d.Mig.Info.CanonicalName() + case ImexChannelType: + return d.ImexChannel.Info.CanonicalName() + } + panic("unexpected type for AllocatableDevice") +} + +func (d *PreparedDevice) CanonicalIndex() string { + switch d.Type() { + case GpuDeviceType: + return d.Gpu.Info.CanonicalIndex() + case MigDeviceType: + return d.Mig.Info.CanonicalIndex() + case ImexChannelType: + return d.ImexChannel.Info.CanonicalIndex() + } + panic("unexpected type for AllocatableDevice") +} + +func (l PreparedDeviceList) Gpus() PreparedDeviceList { + var devices PreparedDeviceList + for _, device := range l { + if device.Type() == GpuDeviceType { + devices = append(devices, device) + } + } + return devices +} + +func (l PreparedDeviceList) MigDevices() PreparedDeviceList { + var devices PreparedDeviceList + for _, device := range l { + if device.Type() == MigDeviceType { + devices = append(devices, device) + } + } + return devices +} + +func (l PreparedDeviceList) ImexChannels() PreparedDeviceList { + var devices PreparedDeviceList + for _, device := range l { + if device.Type() == ImexChannelType { + devices = append(devices, device) + } + } + return devices +} + +func (d PreparedDevices) GetDevices() []*drapbv1.Device { + var devices []*drapbv1.Device + for _, group := range d { + devices = append(devices, group.GetDevices()...) + } + return devices +} + +func (g *PreparedDeviceGroup) GetDevices() []*drapbv1.Device { + var devices []*drapbv1.Device + for _, device := range g.Devices { + switch device.Type() { + case GpuDeviceType: + devices = append(devices, device.Gpu.Device) + case MigDeviceType: + devices = append(devices, device.Mig.Device) + case ImexChannelType: + devices = append(devices, device.ImexChannel.Device) + } + } + return devices +} + +func (l PreparedDeviceList) UUIDs() []string { + uuids := append(l.GpuUUIDs(), l.MigDeviceUUIDs()...) + slices.Sort(uuids) + return uuids +} + +func (g *PreparedDeviceGroup) UUIDs() []string { + uuids := append(g.GpuUUIDs(), g.MigDeviceUUIDs()...) + slices.Sort(uuids) + return uuids +} + +func (d PreparedDevices) UUIDs() []string { + uuids := append(d.GpuUUIDs(), d.MigDeviceUUIDs()...) + slices.Sort(uuids) + return uuids +} + +func (l PreparedDeviceList) GpuUUIDs() []string { + var uuids []string + for _, device := range l.Gpus() { + uuids = append(uuids, device.Gpu.Info.UUID) + } + slices.Sort(uuids) + return uuids +} + +func (g *PreparedDeviceGroup) GpuUUIDs() []string { + return g.Devices.Gpus().UUIDs() +} + +func (d PreparedDevices) GpuUUIDs() []string { + var uuids []string + for _, group := range d { + uuids = append(uuids, group.GpuUUIDs()...) + } + slices.Sort(uuids) + return uuids +} + +func (l PreparedDeviceList) MigDeviceUUIDs() []string { + var uuids []string + for _, device := range l.MigDevices() { + uuids = append(uuids, device.Mig.Info.UUID) + } + slices.Sort(uuids) + return uuids +} + +func (g *PreparedDeviceGroup) MigDeviceUUIDs() []string { + return g.Devices.MigDevices().UUIDs() +} + +func (d PreparedDevices) MigDeviceUUIDs() []string { + var uuids []string + for _, group := range d { + uuids = append(uuids, group.MigDeviceUUIDs()...) + } + slices.Sort(uuids) + return uuids +} diff --git a/cmd/nvidia-dra-imex-plugin/root.go b/cmd/nvidia-dra-imex-plugin/root.go new file mode 100644 index 000000000..9d93f9f29 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/root.go @@ -0,0 +1,109 @@ +/** +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package main + +import ( + "fmt" + "os" + "path/filepath" +) + +type root string + +// getDriverLibraryPath returns path to `libnvidia-ml.so.1` in the driver root. +// The folder for this file is also expected to be the location of other driver files. +func (r root) getDriverLibraryPath() (string, error) { + librarySearchPaths := []string{ + "/usr/lib64", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + "/lib64", + "/lib/x86_64-linux-gnu", + "/lib/aarch64-linux-gnu", + } + + libraryPath, err := r.findFile("libnvidia-ml.so.1", librarySearchPaths...) + if err != nil { + return "", err + } + + return libraryPath, nil +} + +// getNvidiaSMIPath returns path to the `nvidia-smi` executable in the driver root. +func (r root) getNvidiaSMIPath() (string, error) { + binarySearchPaths := []string{ + "/usr/bin", + "/usr/sbin", + "/bin", + "/sbin", + } + + binaryPath, err := r.findFile("nvidia-smi", binarySearchPaths...) + if err != nil { + return "", err + } + + return binaryPath, nil +} + +// isDevRoot checks whether the specified root is a dev root. +// A dev root is defined as a root containing a /dev folder. +func (r root) isDevRoot() bool { + stat, err := os.Stat(filepath.Join(string(r), "dev")) + if err != nil { + return false + } + return stat.IsDir() +} + +// getDevRoot returns the dev root associated with the root. +// If the root is not a dev root, this defaults to "/". +func (r root) getDevRoot() string { + if r.isDevRoot() { + return string(r) + } + return "/" +} + +// findFile searches the root for a specified file. +// A number of folders can be specified to search in addition to the root itself. +// If the file represents a symlink, this is resolved and the final path is returned. +func (r root) findFile(name string, searchIn ...string) (string, error) { + + for _, d := range append([]string{"/"}, searchIn...) { + l := filepath.Join(string(r), d, name) + candidate, err := resolveLink(l) + if err != nil { + continue + } + return candidate, nil + } + + return "", fmt.Errorf("error locating %q", name) +} + +// resolveLink finds the target of a symlink or the file itself in the +// case of a regular file. +// This is equivalent to running `readlink -f ${l}`. +func resolveLink(l string) (string, error) { + resolved, err := filepath.EvalSymlinks(l) + if err != nil { + return "", fmt.Errorf("error resolving link '%v': %v", l, err) + } + return resolved, nil +} diff --git a/cmd/nvidia-dra-imex-plugin/sharing.go b/cmd/nvidia-dra-imex-plugin/sharing.go new file mode 100644 index 000000000..8206e02b8 --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/sharing.go @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "bufio" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "os/exec" + "slices" + "strconv" + "strings" + "text/template" + "time" + + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + "k8s.io/mount-utils" + + cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" + cdispec "tags.cncf.io/container-device-interface/specs-go" + + configapi "github.com/NVIDIA/k8s-dra-driver/api/nvidia.com/resource/gpu/v1alpha1" +) + +const ( + MpsRoot = DriverPluginPath + "/mps" + MpsControlDaemonTemplatePath = "/templates/mps-control-daemon.tmpl.yaml" + MpsControlDaemonNameFmt = "mps-control-daemon-%v" // Fill with ClaimUID +) + +type TimeSlicingManager struct { + nvdevlib *deviceLib +} + +type MpsManager struct { + config *Config + controlFilesRoot string + hostDriverRoot string + templatePath string + + nvdevlib *deviceLib +} + +type MpsControlDaemon struct { + id string + nodeName string + namespace string + name string + rootDir string + pipeDir string + shmDir string + logDir string + devices UUIDProvider + manager *MpsManager +} + +type MpsControlDaemonTemplateData struct { + NodeName string + MpsControlDaemonNamespace string + MpsControlDaemonName string + CUDA_VISIBLE_DEVICES string //nolint:stylecheck + DefaultActiveThreadPercentage string + DefaultPinnedDeviceMemoryLimits map[string]string + NvidiaDriverRoot string + MpsShmDirectory string + MpsPipeDirectory string + MpsLogDirectory string +} + +func NewTimeSlicingManager(deviceLib *deviceLib) *TimeSlicingManager { + return &TimeSlicingManager{ + nvdevlib: deviceLib, + } +} + +func (t *TimeSlicingManager) SetTimeSlice(devices UUIDProvider, config *configapi.TimeSlicingConfig) error { + // Ensure all devices are full devices + if !slices.Equal(devices.UUIDs(), devices.GpuUUIDs()) { + return fmt.Errorf("can only set the time-slice interval on full GPUs") + } + + // Set the compute mode of the GPU to DEFAULT. + err := t.nvdevlib.setComputeMode(devices.UUIDs(), "DEFAULT") + if err != nil { + return fmt.Errorf("error setting compute mode: %w", err) + } + + // Set the time slice based on the config provided. + err = t.nvdevlib.setTimeSlice(devices.UUIDs(), config.Interval.Int()) + if err != nil { + return fmt.Errorf("error setting time slice: %w", err) + } + + return nil +} + +func NewMpsManager(config *Config, deviceLib *deviceLib, controlFilesRoot, hostDriverRoot, templatePath string) *MpsManager { + return &MpsManager{ + controlFilesRoot: controlFilesRoot, + hostDriverRoot: hostDriverRoot, + templatePath: templatePath, + config: config, + nvdevlib: deviceLib, + } +} + +func (m *MpsManager) NewMpsControlDaemon(claimUID string, devices UUIDProvider) *MpsControlDaemon { + id := m.GetMpsControlDaemonID(claimUID, devices) + + return &MpsControlDaemon{ + id: id, + nodeName: m.config.flags.nodeName, + namespace: m.config.flags.namespace, + name: fmt.Sprintf(MpsControlDaemonNameFmt, id), + rootDir: fmt.Sprintf("%s/%s", m.controlFilesRoot, id), + pipeDir: fmt.Sprintf("%s/%s/%s", m.controlFilesRoot, id, "pipe"), + shmDir: fmt.Sprintf("%s/%s/%s", m.controlFilesRoot, id, "shm"), + logDir: fmt.Sprintf("%s/%s/%s", m.controlFilesRoot, id, "log"), + devices: devices, + manager: m, + } +} + +func (m *MpsManager) GetMpsControlDaemonID(claimUID string, devices UUIDProvider) string { + combined := strings.Join(devices.UUIDs(), ",") + hash := sha256.Sum256([]byte(combined)) + return fmt.Sprintf("%s-%s", claimUID, hex.EncodeToString(hash[:])[:5]) +} + +func (m *MpsManager) IsControlDaemonStarted(ctx context.Context, id string) (bool, error) { + name := fmt.Sprintf(MpsControlDaemonNameFmt, id) + _, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.flags.namespace).Get(ctx, name, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return false, nil + } + if err != nil { + return false, fmt.Errorf("failed to get deployment: %w", err) + } + return true, nil +} + +func (m *MpsManager) IsControlDaemonStopped(ctx context.Context, id string) (bool, error) { + name := fmt.Sprintf(MpsControlDaemonNameFmt, id) + _, err := m.config.clientsets.Core.AppsV1().Deployments(m.config.flags.namespace).Get(ctx, name, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return true, nil + } + if err != nil { + return false, fmt.Errorf("failed to get deployment: %w", err) + } + return false, nil +} + +func (m *MpsControlDaemon) GetID() string { + return m.id +} + +func (m *MpsControlDaemon) Start(ctx context.Context, config *configapi.MpsConfig) error { + isStarted, err := m.manager.IsControlDaemonStarted(ctx, m.id) + if err != nil { + return fmt.Errorf("error checking if control daemon already started: %w", err) + } + + if isStarted { + return nil + } + + klog.Infof("Starting MPS control daemon for '%v', with settings: %+v", m.id, config) + + deviceUUIDs := m.devices.UUIDs() + templateData := MpsControlDaemonTemplateData{ + NodeName: m.nodeName, + MpsControlDaemonNamespace: m.namespace, + MpsControlDaemonName: m.name, + CUDA_VISIBLE_DEVICES: strings.Join(deviceUUIDs, ","), + DefaultActiveThreadPercentage: "", + DefaultPinnedDeviceMemoryLimits: nil, + NvidiaDriverRoot: m.manager.hostDriverRoot, + MpsShmDirectory: m.shmDir, + MpsPipeDirectory: m.pipeDir, + MpsLogDirectory: m.logDir, + } + + if config != nil && config.DefaultActiveThreadPercentage != nil { + templateData.DefaultActiveThreadPercentage = fmt.Sprintf("%d", *config.DefaultActiveThreadPercentage) + } + + if config != nil { + limits, err := config.DefaultPerDevicePinnedMemoryLimit.Normalize(deviceUUIDs, config.DefaultPinnedDeviceMemoryLimit) + if err != nil { + return fmt.Errorf("error transforming DefaultPerDevicePinnedMemoryLimit into string: %w", err) + } + templateData.DefaultPinnedDeviceMemoryLimits = limits + } + + tmpl, err := template.ParseFiles(m.manager.templatePath) + if err != nil { + return fmt.Errorf("failed to parse template file: %w", err) + } + + var deploymentYaml bytes.Buffer + if err := tmpl.Execute(&deploymentYaml, templateData); err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + var unstructuredObj unstructured.Unstructured + err = yaml.Unmarshal(deploymentYaml.Bytes(), &unstructuredObj) + if err != nil { + return fmt.Errorf("failed to unmarshal yaml: %w", err) + } + + var deployment appsv1.Deployment + err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.UnstructuredContent(), &deployment) + if err != nil { + return fmt.Errorf("failed to convert unstructured data to typed object: %w", err) + } + + err = os.MkdirAll(m.shmDir, 0755) + if err != nil { + return fmt.Errorf("error creating directory %v: %w", m.shmDir, err) + } + + err = os.MkdirAll(m.pipeDir, 0755) + if err != nil { + return fmt.Errorf("error creating directory %v: %w", m.pipeDir, err) + } + + err = os.MkdirAll(m.logDir, 0755) + if err != nil { + return fmt.Errorf("error creating directory %v: %w", m.logDir, err) + } + + mountExecutable, err := exec.LookPath("mount") + if err != nil { + return fmt.Errorf("error finding 'mount' executable: %w", err) + } + + mounter := mount.New(mountExecutable) + sizeArg := fmt.Sprintf("size=%v", getDefaultShmSize()) + mountOptions := []string{"rw", "nosuid", "nodev", "noexec", "relatime", sizeArg} + err = mounter.Mount("shm", m.shmDir, "tmpfs", mountOptions) + if err != nil { + return fmt.Errorf("error mounting %v as tmpfs: %w", m.shmDir, err) + } + + err = m.manager.nvdevlib.setComputeMode(m.devices.GpuUUIDs(), "EXCLUSIVE_PROCESS") + if err != nil { + return fmt.Errorf("error setting compute mode: %w", err) + } + + _, err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Create(ctx, &deployment, metav1.CreateOptions{}) + if errors.IsAlreadyExists(err) { + return nil + } + if err != nil { + return fmt.Errorf("failed to create deployment: %w", err) + } + + return nil +} + +func (m *MpsControlDaemon) AssertReady(ctx context.Context) error { + backoff := wait.Backoff{ + Duration: time.Second, + Factor: 2, + Jitter: 1, + Steps: 4, + Cap: 10 * time.Second, + } + + return retry.OnError( + backoff, + func(error) bool { + return true + }, + func() error { + deployment, err := m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Get( + ctx, + m.name, + metav1.GetOptions{}, + ) + if err != nil { + return fmt.Errorf("failed to get deployment: %w", err) + } + + if deployment.Status.ReadyReplicas != 1 { + return fmt.Errorf("waiting for MPS control daemon to come online") + } + + selector := deployment.Spec.Selector.MatchLabels + + pods, err := m.manager.config.clientsets.Core.CoreV1().Pods(m.namespace).List( + ctx, + metav1.ListOptions{ + LabelSelector: labels.Set(selector).AsSelector().String(), + }, + ) + if err != nil { + return fmt.Errorf("error listing pods from deployment") + } + + if len(pods.Items) != 1 { + return fmt.Errorf("unexpected number of pods in deployment: %v", len(pods.Items)) + } + + if len(pods.Items[0].Status.ContainerStatuses) != 1 { + return fmt.Errorf("unexpected number of container statuses in pod") + } + + if !pods.Items[0].Status.ContainerStatuses[0].Ready { + return fmt.Errorf("control daemon not yet ready") + } + + return nil + }, + ) +} + +func (m *MpsControlDaemon) GetCDIContainerEdits() *cdiapi.ContainerEdits { + return &cdiapi.ContainerEdits{ + ContainerEdits: &cdispec.ContainerEdits{ + Env: []string{ + fmt.Sprintf("CUDA_MPS_PIPE_DIRECTORY=%s", "/tmp/nvidia-mps"), + }, + Mounts: []*cdispec.Mount{ + { + ContainerPath: "/dev/shm", + HostPath: m.shmDir, + Options: []string{"rw", "nosuid", "nodev", "bind"}, + }, + { + ContainerPath: "/tmp/nvidia-mps", + HostPath: m.pipeDir, + Options: []string{"rw", "nosuid", "nodev", "bind"}, + }, + }, + }, + } +} + +func (m *MpsControlDaemon) Stop(ctx context.Context) error { + _, err := os.Stat(m.rootDir) + if os.IsNotExist(err) { + return nil + } + + klog.Infof("Stopping MPS control daemon for '%v'", m.id) + + deletePolicy := metav1.DeletePropagationForeground + deleteOptions := metav1.DeleteOptions{ + PropagationPolicy: &deletePolicy, + } + + err = m.manager.config.clientsets.Core.AppsV1().Deployments(m.namespace).Delete(ctx, m.name, deleteOptions) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("failed to delete deployment: %w", err) + } + + mountExecutable, err := exec.LookPath("mount") + if err != nil { + return fmt.Errorf("error finding 'mount' executable: %w", err) + } + + mounter := mount.New(mountExecutable) + err = mount.CleanupMountPoint(m.shmDir, mounter, true) + if err != nil { + return fmt.Errorf("error unmounting %v: %w", m.shmDir, err) + } + + err = os.RemoveAll(m.rootDir) + if err != nil { + return fmt.Errorf("error removing directory %v: %w", m.rootDir, err) + } + + return nil +} + +// getDefaultShmSize returns the default size for the tmpfs to be created. +// This reads /proc/meminfo to get the total memory to calculate this. If this +// fails a fallback size of 65536k is used. +func getDefaultShmSize() string { + const fallbackSize = "65536k" + + meminfo, err := os.Open("/proc/meminfo") + if err != nil { + klog.ErrorS(err, "failed to open /proc/meminfo") + return fallbackSize + } + defer func() { + _ = meminfo.Close() + }() + + scanner := bufio.NewScanner(meminfo) + for scanner.Scan() { + line := scanner.Text() + if !strings.HasPrefix(line, "MemTotal:") { + continue + } + + parts := strings.SplitN(strings.TrimSpace(strings.TrimPrefix(line, "MemTotal:")), " ", 2) + memTotal, err := strconv.Atoi(parts[0]) + if err != nil { + klog.ErrorS(err, "could not convert MemTotal to an integer") + return fallbackSize + } + + var unit string + if len(parts) == 2 { + unit = string(parts[1][0]) + } + + return fmt.Sprintf("%d%s", memTotal/2, unit) + } + return fallbackSize +} diff --git a/cmd/nvidia-dra-imex-plugin/types.go b/cmd/nvidia-dra-imex-plugin/types.go new file mode 100644 index 000000000..df20c54fe --- /dev/null +++ b/cmd/nvidia-dra-imex-plugin/types.go @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +const ( + GpuDeviceType = "gpu" + MigDeviceType = "mig" + ImexChannelType = "imex" + UnknownDeviceType = "unknown" +) + +type UUIDProvider interface { + UUIDs() []string + GpuUUIDs() []string + MigDeviceUUIDs() []string +} diff --git a/demo/clusters/gke/install-dra-driver.sh b/demo/clusters/gke/install-dra-gpu-driver.sh similarity index 96% rename from demo/clusters/gke/install-dra-driver.sh rename to demo/clusters/gke/install-dra-gpu-driver.sh index 31780a3b2..cef205ed5 100755 --- a/demo/clusters/gke/install-dra-driver.sh +++ b/demo/clusters/gke/install-dra-gpu-driver.sh @@ -29,7 +29,7 @@ DRIVER_NAME=$(from_versions_mk "DRIVER_NAME") : ${IMAGE_NAME:=${DRIVER_NAME}} : ${IMAGE_TAG:=6c34f5fb-ubi8} -helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-driver \ +helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-gpu-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-gpu-driver \ --set image.repository=${IMAGE_REGISTRY}/${IMAGE_NAME} \ --set image.tag=${IMAGE_TAG} \ --set image.pullPolicy=Always \ diff --git a/demo/clusters/gke/install-dra-imex-driver.sh b/demo/clusters/gke/install-dra-imex-driver.sh new file mode 100755 index 000000000..63b1ea5fb --- /dev/null +++ b/demo/clusters/gke/install-dra-imex-driver.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright 2023 NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" +PROJECT_DIR="$(cd -- "$( dirname -- "${CURRENT_DIR}/../../../.." )" &> /dev/null && pwd)" + +# We extract information from versions.mk +function from_versions_mk() { + local makevar=$1 + local value=$(grep -E "^\s*${makevar}\s+[\?:]= " ${PROJECT_DIR}/versions.mk) + echo ${value##*= } +} +DRIVER_NAME=$(from_versions_mk "DRIVER_NAME") + +: ${IMAGE_REGISTRY:=ghcr.io/nvidia} +: ${IMAGE_NAME:=${DRIVER_NAME}} +: ${IMAGE_TAG:=6c34f5fb-ubi8} + +helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-imex-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-imex-driver \ + --set image.repository=${IMAGE_REGISTRY}/${IMAGE_NAME} \ + --set image.tag=${IMAGE_TAG} \ + --set image.pullPolicy=Always \ + --set controller.priorityClassName="" \ + --set kubeletPlugin.priorityClassName="" \ + --set deviceClasses="{imex}" \ + --set nvidiaDriverRoot="/opt/nvidia" \ + --set kubeletPlugin.tolerations[0].key=nvidia.com/gpu \ + --set kubeletPlugin.tolerations[0].operator=Exists \ + --set kubeletPlugin.tolerations[0].effect=NoSchedule diff --git a/demo/clusters/kind/install-dra-gpu-driver.sh b/demo/clusters/kind/install-dra-gpu-driver.sh new file mode 100755 index 000000000..513993273 --- /dev/null +++ b/demo/clusters/kind/install-dra-gpu-driver.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Copyright 2023 The Kubernetes Authors. +# Copyright 2023 NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" + +set -ex +set -o pipefail + +source "${CURRENT_DIR}/scripts/common.sh" + +kubectl label node -l node-role.x-k8s.io/worker --overwrite nvidia.com/gpu.present=true + +deviceClasses=${1:-"gpu,mig"} +helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-gpu-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-gpu-driver \ + --set deviceClasses="{${deviceClasses}}" \ + ${NVIDIA_CTK_PATH:+--set nvidiaCtkPath=${NVIDIA_CTK_PATH}} \ + ${NVIDIA_DRIVER_ROOT:+--set nvidiaDriverRoot=${NVIDIA_DRIVER_ROOT}} \ + ${MASK_NVIDIA_DRIVER_PARAMS:+--set maskNvidiaDriverParams=${MASK_NVIDIA_DRIVER_PARAMS}} \ + --wait + +set +x +printf '\033[0;32m' +echo "Driver installation complete:" +kubectl get pod -n nvidia +printf '\033[0m' diff --git a/demo/clusters/kind/install-dra-driver.sh b/demo/clusters/kind/install-dra-imex-driver.sh similarity index 93% rename from demo/clusters/kind/install-dra-driver.sh rename to demo/clusters/kind/install-dra-imex-driver.sh index ece8cdf1f..a8dc06688 100755 --- a/demo/clusters/kind/install-dra-driver.sh +++ b/demo/clusters/kind/install-dra-imex-driver.sh @@ -24,8 +24,8 @@ source "${CURRENT_DIR}/scripts/common.sh" kubectl label node -l node-role.x-k8s.io/worker --overwrite nvidia.com/gpu.present=true -deviceClasses=${1:-"gpu,mig,imex"} -helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-driver \ +deviceClasses=${1:-"imex"} +helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-imex-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-imex-driver \ --set deviceClasses="{${deviceClasses}}" \ ${NVIDIA_CTK_PATH:+--set nvidiaCtkPath=${NVIDIA_CTK_PATH}} \ ${NVIDIA_DRIVER_ROOT:+--set nvidiaDriverRoot=${NVIDIA_DRIVER_ROOT}} \ diff --git a/deployments/container/Dockerfile.ubi8 b/deployments/container/Dockerfile.ubi8 index c365f1239..e1113ca5f 100644 --- a/deployments/container/Dockerfile.ubi8 +++ b/deployments/container/Dockerfile.ubi8 @@ -59,9 +59,10 @@ LABEL org.opencontainers.image.description "NVIDIA GPU DRA driver for Kubernetes RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE -COPY --from=build /artifacts/nvidia-dra-controller /usr/bin/nvidia-dra-controller -COPY --from=build /artifacts/nvidia-dra-plugin /usr/bin/nvidia-dra-plugin -COPY --from=build /build/templates /templates +COPY --from=build /artifacts/nvidia-dra-imex-controller /usr/bin/nvidia-dra-imex-controller +COPY --from=build /artifacts/nvidia-dra-imex-plugin /usr/bin/nvidia-dra-imex-plugin +COPY --from=build /artifacts/nvidia-dra-gpu-plugin /usr/bin/nvidia-dra-gpu-plugin +COPY --from=build /build/templates /templates # Install / upgrade packages here that are required to resolve CVEs ARG CVE_UPDATES diff --git a/deployments/container/Dockerfile.ubuntu b/deployments/container/Dockerfile.ubuntu index ea5d58a3b..2c46fb0b5 100644 --- a/deployments/container/Dockerfile.ubuntu +++ b/deployments/container/Dockerfile.ubuntu @@ -64,9 +64,10 @@ LABEL org.opencontainers.image.description "NVIDIA GPU DRA driver for Kubernetes RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE -COPY --from=build /artifacts/nvidia-dra-controller /usr/bin/nvidia-dra-controller -COPY --from=build /artifacts/nvidia-dra-plugin /usr/bin/nvidia-dra-plugin -COPY --from=build /build/templates /templates +COPY --from=build /artifacts/nvidia-dra-imex-controller /usr/bin/nvidia-dra-imex-controller +COPY --from=build /artifacts/nvidia-dra-imex-plugin /usr/bin/nvidia-dra-imex-plugin +COPY --from=build /artifacts/nvidia-dra-gpu-plugin /usr/bin/nvidia-dra-gpu-plugin +COPY --from=build /build/templates /templates # Install / upgrade packages here that are required to resolve CVEs ARG CVE_UPDATES diff --git a/deployments/helm/k8s-dra-driver/.helmignore b/deployments/helm/k8s-dra-gpu-driver/.helmignore similarity index 100% rename from deployments/helm/k8s-dra-driver/.helmignore rename to deployments/helm/k8s-dra-gpu-driver/.helmignore diff --git a/deployments/helm/k8s-dra-driver/Chart.yaml b/deployments/helm/k8s-dra-gpu-driver/Chart.yaml similarity index 97% rename from deployments/helm/k8s-dra-driver/Chart.yaml rename to deployments/helm/k8s-dra-gpu-driver/Chart.yaml index ac0a4580a..df993341c 100644 --- a/deployments/helm/k8s-dra-driver/Chart.yaml +++ b/deployments/helm/k8s-dra-gpu-driver/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: k8s-dra-driver +name: k8s-dra-gpu-driver description: A Helm chart for Kubernetes # A chart can be either an 'application' or a 'library' chart. diff --git a/deployments/helm/k8s-dra-gpu-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml b/deployments/helm/k8s-dra-gpu-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml new file mode 100644 index 000000000..b29f5490b --- /dev/null +++ b/deployments/helm/k8s-dra-gpu-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml @@ -0,0 +1,58 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: multinodeenvironments.gpu.nvidia.com +spec: + group: gpu.nvidia.com + names: + kind: MultiNodeEnvironment + listKind: MultiNodeEnvironmentList + plural: multinodeenvironments + singular: multinodeenvironment + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: MultiNodeEnvironment prepares a set of nodes to run a multi-node + workload in. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MultiNodeEnvironmentSpec provides the spec for a MultiNodeEnvironment. + properties: + deviceClassName: + type: string + numNodes: + type: integer + resourceClaimName: + type: string + required: + - numNodes + type: object + x-kubernetes-validations: + - message: Exactly one of 'resourceClaimName' or 'deviceClassName' must + be set. + rule: '(has(self.resourceClaimName) ? !has(self.deviceClassName) : has(self.deviceClassName))' + type: object + served: true + storage: true diff --git a/deployments/helm/k8s-dra-driver/templates/_helpers.tpl b/deployments/helm/k8s-dra-gpu-driver/templates/_helpers.tpl similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/_helpers.tpl rename to deployments/helm/k8s-dra-gpu-driver/templates/_helpers.tpl diff --git a/deployments/helm/k8s-dra-driver/templates/clusterrole.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/clusterrole.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/clusterrole.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/clusterrole.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/clusterrolebinding.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/clusterrolebinding.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/clusterrolebinding.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/clusterrolebinding.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/controller.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/controller.yaml similarity index 95% rename from deployments/helm/k8s-dra-driver/templates/controller.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/controller.yaml index 440526831..02f00ae17 100644 --- a/deployments/helm/k8s-dra-driver/templates/controller.yaml +++ b/deployments/helm/k8s-dra-gpu-driver/templates/controller.yaml @@ -18,7 +18,7 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "k8s-dra-driver.fullname" . }}-controller + name: {{ include "k8s-dra-driver.fullname" . }}-imex-controller namespace: {{ include "k8s-dra-driver.namespace" . }} labels: {{- include "k8s-dra-driver.labels" . | nindent 4 }} @@ -52,7 +52,7 @@ spec: {{- toYaml .Values.controller.containers.controller.securityContext | nindent 10 }} image: {{ include "k8s-dra-driver.fullimage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - command: ["nvidia-dra-controller", "-v", "6"] + command: ["nvidia-dra-imex-controller", "-v", "6"] resources: {{- toYaml .Values.controller.containers.controller.resources | nindent 10 }} env: diff --git a/deployments/helm/k8s-dra-driver/templates/deviceclass-gpu.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/deviceclass-gpu.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/deviceclass-gpu.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/deviceclass-gpu.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/deviceclass-mig.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/deviceclass-mig.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/deviceclass-mig.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/deviceclass-mig.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/kubeletplugin.yaml similarity index 97% rename from deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/kubeletplugin.yaml index 0b9b09b0f..c9b054461 100644 --- a/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml +++ b/deployments/helm/k8s-dra-gpu-driver/templates/kubeletplugin.yaml @@ -16,7 +16,7 @@ apiVersion: apps/v1 kind: DaemonSet metadata: - name: {{ include "k8s-dra-driver.fullname" . }}-kubelet-plugin + name: {{ include "k8s-dra-driver.fullname" . }}-gpu-kubelet-plugin namespace: {{ include "k8s-dra-driver.namespace" . }} labels: {{- include "k8s-dra-driver.labels" . | nindent 4 }} @@ -65,7 +65,7 @@ spec: sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' root/gpu-params mount --bind root/gpu-params /proc/driver/nvidia/params fi - nvidia-dra-plugin + nvidia-dra-gpu-plugin resources: {{- toYaml .Values.kubeletPlugin.containers.plugin.resources | nindent 10 }} env: diff --git a/deployments/helm/k8s-dra-driver/templates/openshiftprivilegedrolebinging.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/openshiftprivilegedrolebinging.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/openshiftprivilegedrolebinging.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/openshiftprivilegedrolebinging.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/serviceaccount.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/serviceaccount.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/serviceaccount.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/serviceaccount.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/validatingadmissionpolicy.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/validatingadmissionpolicy.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/validatingadmissionpolicy.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/validatingadmissionpolicy.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/validatingadmissionpolicybinding.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/validatingadmissionpolicybinding.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/validatingadmissionpolicybinding.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/validatingadmissionpolicybinding.yaml diff --git a/deployments/helm/k8s-dra-driver/templates/validation.yaml b/deployments/helm/k8s-dra-gpu-driver/templates/validation.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/templates/validation.yaml rename to deployments/helm/k8s-dra-gpu-driver/templates/validation.yaml diff --git a/deployments/helm/k8s-dra-driver/values.yaml b/deployments/helm/k8s-dra-gpu-driver/values.yaml similarity index 100% rename from deployments/helm/k8s-dra-driver/values.yaml rename to deployments/helm/k8s-dra-gpu-driver/values.yaml diff --git a/deployments/helm/k8s-dra-imex-driver/.helmignore b/deployments/helm/k8s-dra-imex-driver/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployments/helm/k8s-dra-imex-driver/Chart.yaml b/deployments/helm/k8s-dra-imex-driver/Chart.yaml new file mode 100644 index 000000000..ad266f76c --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: k8s-dra-imex-driver +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.0" diff --git a/deployments/helm/k8s-dra-imex-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml b/deployments/helm/k8s-dra-imex-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml new file mode 100644 index 000000000..b29f5490b --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/crds/gpu.nvidia.com_multinodeenvironments.yaml @@ -0,0 +1,58 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: multinodeenvironments.gpu.nvidia.com +spec: + group: gpu.nvidia.com + names: + kind: MultiNodeEnvironment + listKind: MultiNodeEnvironmentList + plural: multinodeenvironments + singular: multinodeenvironment + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: MultiNodeEnvironment prepares a set of nodes to run a multi-node + workload in. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MultiNodeEnvironmentSpec provides the spec for a MultiNodeEnvironment. + properties: + deviceClassName: + type: string + numNodes: + type: integer + resourceClaimName: + type: string + required: + - numNodes + type: object + x-kubernetes-validations: + - message: Exactly one of 'resourceClaimName' or 'deviceClassName' must + be set. + rule: '(has(self.resourceClaimName) ? !has(self.deviceClassName) : has(self.deviceClassName))' + type: object + served: true + storage: true diff --git a/deployments/helm/k8s-dra-imex-driver/templates/_helpers.tpl b/deployments/helm/k8s-dra-imex-driver/templates/_helpers.tpl new file mode 100644 index 000000000..7cf4ea012 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/_helpers.tpl @@ -0,0 +1,129 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "k8s-dra-driver.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "k8s-dra-driver.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "k8s-dra-driver.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "k8s-dra-driver.chart" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" $name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "k8s-dra-driver.labels" -}} +helm.sh/chart: {{ include "k8s-dra-driver.chart" . }} +{{ include "k8s-dra-driver.templateLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Template labels +*/}} +{{- define "k8s-dra-driver.templateLabels" -}} +app.kubernetes.io/name: {{ include "k8s-dra-driver.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Values.selectorLabelsOverride }} +{{ toYaml .Values.selectorLabelsOverride }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "k8s-dra-driver.selectorLabels" -}} +{{- if .Values.selectorLabelsOverride -}} +{{ toYaml .Values.selectorLabelsOverride }} +{{- else -}} +{{ include "k8s-dra-driver.templateLabels" . }} +{{- end }} +{{- end }} + +{{/* +Full image name with tag +*/}} +{{- define "k8s-dra-driver.fullimage" -}} +{{- $tag := printf "v%s" .Chart.AppVersion }} +{{- .Values.image.repository -}}:{{- .Values.image.tag | default $tag -}} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "k8s-dra-driver.serviceAccountName" -}} +{{- $name := printf "%s-service-account" (include "k8s-dra-driver.fullname" .) }} +{{- if .Values.serviceAccount.create }} +{{- default $name .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Check for the existence of an element in a list +*/}} +{{- define "k8s-dra-driver.listHas" -}} + {{- $listToCheck := index . 0 }} + {{- $valueToCheck := index . 1 }} + + {{- $found := "" -}} + {{- range $listToCheck}} + {{- if eq . $valueToCheck }} + {{- $found = "true" -}} + {{- end }} + {{- end }} + {{- $found -}} +{{- end }} + +{{/* +Filter a list by a set of valid values +*/}} +{{- define "k8s-dra-driver.filterList" -}} + {{- $listToFilter := index . 0 }} + {{- $validValues := index . 1 }} + + {{- $result := list -}} + {{- range $validValues}} + {{- if include "k8s-dra-driver.listHas" (list $listToFilter .) }} + {{- $result = append $result . }} + {{- end }} + {{- end }} + {{- $result -}} +{{- end -}} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/clusterrole.yaml b/deployments/helm/k8s-dra-imex-driver/templates/clusterrole.yaml new file mode 100644 index 000000000..37bd88977 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/clusterrole.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "k8s-dra-driver.fullname" . }}-role + namespace: {{ include "k8s-dra-driver.namespace" . }} +rules: +- apiGroups: ["gpu.nvidia.com"] + resources: ["multinodeenvironments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["resource.k8s.io"] + resources: ["resourceclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["resource.k8s.io"] + resources: ["deviceclasses"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["resource.k8s.io"] + resources: ["resourceslices"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +- apiGroups: ["resource.k8s.io"] + resources: ["resourceclaims/status"] + verbs: ["update"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] diff --git a/deployments/helm/k8s-dra-imex-driver/templates/clusterrolebinding.yaml b/deployments/helm/k8s-dra-imex-driver/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..5caef3459 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/clusterrolebinding.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "k8s-dra-driver.fullname" . }}-role-binding + namespace: {{ include "k8s-dra-driver.namespace" . }} +subjects: +- kind: ServiceAccount + name: {{ include "k8s-dra-driver.serviceAccountName" . }} + namespace: {{ include "k8s-dra-driver.namespace" . }} +roleRef: + kind: ClusterRole + name: {{ include "k8s-dra-driver.fullname" . }}-role + apiGroup: rbac.authorization.k8s.io diff --git a/deployments/helm/k8s-dra-imex-driver/templates/controller.yaml b/deployments/helm/k8s-dra-imex-driver/templates/controller.yaml new file mode 100644 index 000000000..02f00ae17 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/controller.yaml @@ -0,0 +1,81 @@ +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if (include "k8s-dra-driver.listHas" (list $.Values.deviceClasses "imex")) }} +{{- $deviceClasses := include "k8s-dra-driver.filterList" (list $.Values.deviceClasses (list "imex")) }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "k8s-dra-driver.fullname" . }}-imex-controller + namespace: {{ include "k8s-dra-driver.namespace" . }} + labels: + {{- include "k8s-dra-driver.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "k8s-dra-driver.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.controller.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "k8s-dra-driver.templateLabels" . | nindent 8 }} + spec: + {{- if .Values.controller.priorityClassName }} + priorityClassName: {{ .Values.controller.priorityClassName }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "k8s-dra-driver.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.controller.podSecurityContext | nindent 8 }} + containers: + - name: controller + securityContext: + {{- toYaml .Values.controller.containers.controller.securityContext | nindent 10 }} + image: {{ include "k8s-dra-driver.fullimage" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["nvidia-dra-imex-controller", "-v", "6"] + resources: + {{- toYaml .Values.controller.containers.controller.resources | nindent 10 }} + env: + - name: DEVICE_CLASSES + value: {{ .Values.deviceClasses | join "," }} + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + {{- with .Values.controller.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-gpu.yaml b/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-gpu.yaml new file mode 100644 index 000000000..c9b5ef310 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-gpu.yaml @@ -0,0 +1,11 @@ +{{- if include "k8s-dra-driver.listHas" (list $.Values.deviceClasses "gpu") }} +--- +apiVersion: resource.k8s.io/v1beta1 +kind: DeviceClass +metadata: + name: gpu.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'gpu.nvidia.com' && device.attributes['gpu.nvidia.com'].type == 'gpu'" +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-mig.yaml b/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-mig.yaml new file mode 100644 index 000000000..6cc95357a --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/deviceclass-mig.yaml @@ -0,0 +1,11 @@ +{{- if include "k8s-dra-driver.listHas" (list $.Values.deviceClasses "mig") }} +--- +apiVersion: resource.k8s.io/v1beta1 +kind: DeviceClass +metadata: + name: mig.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'gpu.nvidia.com' && device.attributes['gpu.nvidia.com'].type == 'mig'" +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/kubeletplugin.yaml b/deployments/helm/k8s-dra-imex-driver/templates/kubeletplugin.yaml new file mode 100644 index 000000000..a4dd972c1 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/kubeletplugin.yaml @@ -0,0 +1,130 @@ +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "k8s-dra-driver.fullname" . }}-imex-kubelet-plugin + namespace: {{ include "k8s-dra-driver.namespace" . }} + labels: + {{- include "k8s-dra-driver.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "k8s-dra-driver.selectorLabels" . | nindent 6 }} + {{- with .Values.kubeletPlugin.updateStrategy }} + updateStrategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.kubeletPlugin.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "k8s-dra-driver.templateLabels" . | nindent 8 }} + spec: + {{- if .Values.kubeletPlugin.priorityClassName }} + priorityClassName: {{ .Values.kubeletPlugin.priorityClassName }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "k8s-dra-driver.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.kubeletPlugin.podSecurityContext | nindent 8 }} + containers: + - name: plugin + securityContext: + {{- toYaml .Values.kubeletPlugin.containers.plugin.securityContext | nindent 10 }} + image: {{ include "k8s-dra-driver.fullimage" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["bash", "-c"] + args: + - |- + # Conditionally mask the params file to prevent this container from + # recreating any missing GPU device nodes. This is necessary, for + # example, when running under nvkind to limit the set GPUs governed + # by the plugin even though it has cgroup access to all of them. + if [ "${MASK_NVIDIA_DRIVER_PARAMS}" = "true" ]; then + cp /proc/driver/nvidia/params root/gpu-params + sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' root/gpu-params + mount --bind root/gpu-params /proc/driver/nvidia/params + fi + nvidia-dra-imex-plugin + resources: + {{- toYaml .Values.kubeletPlugin.containers.plugin.resources | nindent 10 }} + env: + - name: MASK_NVIDIA_DRIVER_PARAMS + value: "{{ .Values.maskNvidiaDriverParams }}" + - name: NVIDIA_CTK_PATH + value: "{{ .Values.nvidiaCtkPath }}" + - name: NVIDIA_DRIVER_ROOT + value: "{{ .Values.nvidiaDriverRoot }}" + - name: NVIDIA_VISIBLE_DEVICES + value: void + - name: CDI_ROOT + value: /var/run/cdi + - name: NVIDIA_MIG_CONFIG_DEVICES + value: all + - name: DEVICE_CLASSES + value: {{ .Values.deviceClasses | join "," }} + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: plugins-registry + mountPath: /var/lib/kubelet/plugins_registry + - name: plugins + mountPath: /var/lib/kubelet/plugins + mountPropagation: Bidirectional + - name: cdi + mountPath: /var/run/cdi + # We always mount the driver root at /driver-root in the container. + - name: driver-root + mountPath: /driver-root + readOnly: true + volumes: + - name: plugins-registry + hostPath: + path: /var/lib/kubelet/plugins_registry + - name: plugins + hostPath: + path: /var/lib/kubelet/plugins + - name: cdi + hostPath: + path: /var/run/cdi + - name: driver-root + hostPath: + path: {{ .Values.nvidiaDriverRoot }} + {{- with .Values.kubeletPlugin.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.kubeletPlugin.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.kubeletPlugin.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/openshiftprivilegedrolebinging.yaml b/deployments/helm/k8s-dra-imex-driver/templates/openshiftprivilegedrolebinging.yaml new file mode 100644 index 000000000..a6144e32d --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/openshiftprivilegedrolebinging.yaml @@ -0,0 +1,17 @@ +# Apply only when running on OpenShift to let the kublet plugin run privileged +{{- if .Capabilities.APIVersions.Has "security.openshift.io/v1/SecurityContextConstraints" -}} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "k8s-dra-driver.fullname" . }}-openshift-privileged-role-binding + namespace: {{ include "k8s-dra-driver.namespace" . }} +subjects: +- kind: ServiceAccount + name: {{ include "k8s-dra-driver.serviceAccountName" . }} + namespace: {{ include "k8s-dra-driver.namespace" . }} +roleRef: + kind: ClusterRole + name: system:openshift:scc:privileged + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/serviceaccount.yaml b/deployments/helm/k8s-dra-imex-driver/templates/serviceaccount.yaml new file mode 100644 index 000000000..487a6fa1f --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "k8s-dra-driver.serviceAccountName" . }} + namespace: {{ include "k8s-dra-driver.namespace" . }} + labels: + {{- include "k8s-dra-driver.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicy.yaml b/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicy.yaml new file mode 100644 index 000000000..82c3bab5f --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicy.yaml @@ -0,0 +1,33 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: resourceslices-policy-{{ include "k8s-dra-driver.fullname" . }} +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: ["resource.k8s.io"] + apiVersions: ["v1beta1"] + operations: ["CREATE", "UPDATE", "DELETE"] + resources: ["resourceslices"] + matchConditions: + - name: isRestrictedUser + expression: >- + request.userInfo.username == "system:serviceaccount:{{ include "k8s-dra-driver.namespace" . }}:{{ include "k8s-dra-driver.serviceAccountName" . }}" + variables: + - name: userNodeName + expression: >- + request.userInfo.extra[?'authentication.kubernetes.io/node-name'][0].orValue('') + - name: objectNodeName + expression: >- + (request.operation == "DELETE" ? oldObject : object).spec.?nodeName.orValue("") + - name: nodeSelectorValue + expression: >- + (request.operation == "DELETE" ? oldObject : object).spec.nodeSelector.nodeSelectorTerms[0].matchExpressions[0].values[0].orValue("") + validations: + - expression: variables.userNodeName != "" + message: >- + no node association found for user, this user must run in a pod on a node and ServiceAccountTokenPodNodeInfo must be enabled + - expression: variables.userNodeName == variables.objectNodeName || variables.nodeSelectorValue != "" + messageExpression: >- + "this user running on node '"+variables.userNodeName+"' may not modify cluster or node resourceslices" diff --git a/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicybinding.yaml b/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicybinding.yaml new file mode 100644 index 000000000..2a668b697 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/validatingadmissionpolicybinding.yaml @@ -0,0 +1,8 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: resourceslices-policy-{{ include "k8s-dra-driver.fullname" . }} +spec: + policyName: resourceslices-policy-{{ include "k8s-dra-driver.fullname" . }} + validationActions: [Deny] + # All ResourceSlices are matched. diff --git a/deployments/helm/k8s-dra-imex-driver/templates/validation.yaml b/deployments/helm/k8s-dra-imex-driver/templates/validation.yaml new file mode 100644 index 000000000..ce2dbe689 --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/templates/validation.yaml @@ -0,0 +1,63 @@ +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- $validDeviceClasses := list "gpu" "mig" "imex" }} + +{{- if not (kindIs "slice" .Values.deviceClasses) }} +{{- $error := "" }} +{{- $error = printf "%s\nValue 'deviceClasses' must be a list: %v" $error .Values.deviceClasses }} +{{- $error = printf "%s\nValid device classes are: %v" $error $validDeviceClasses }} +{{- fail $error }} +{{- end }} + +{{- if eq (len .Values.deviceClasses) 0 }} +{{- $error := "" }} +{{- $error = printf "%s\nAt least one 'deviceClass' must be specified." $error }} +{{- $error = printf "%s\nValid device classes are: %v" $error $validDeviceClasses }} +{{- fail $error }} +{{- end }} + +{{- range .Values.deviceClasses }} + {{- $deviceClass := . }} + {{- $found := false }} + {{- range $validDeviceClasses }} + {{- if eq . $deviceClass }} + {{- $found = true }} + {{- end }} + {{- end }} + {{- if not $found }} + {{- $error := "" }} + {{- $error = printf "%s\nInvalid value in 'deviceClasses': %s" $error $deviceClass }} + {{- $error = printf "%s\nValid device classes are: %v" $error $validDeviceClasses }} + {{- fail $error }} + {{- end }} +{{- end }} + +{{- if .Values.namespace }} +{{- $error := "" }} +{{- $error = printf "%s\nValue 'namespace' set to %s" $error .Values.namespace }} +{{- $error = printf "%s\nSetting an explicit 'namespace' in values.yaml or via --set on the command line is no longer supported." $error }} +{{- $error = printf "%s\nUse --namespace (with --create-namespace as necessary) instead." $error }} +{{- $error = printf "%s\nSee: https://helm.sh/docs/helm/helm_install/#options" $error }} +{{- fail $error }} +{{- end }} + +{{- if and (eq (include "k8s-dra-driver.namespace" .) "default") ( eq .Values.namespaceOverride "") (not .Values.allowDefaultNamespace) }} +{{- $error := "" }} +{{- $error = printf "%s\nRunning in the 'default' namespace is not recommended." $error }} +{{- $error = printf "%s\nSet 'allowDefaultNamespace=true' to bypass this error." $error }} +{{- $error = printf "%s\nOtherwise, use --namespace (with --create-namespace as necessary) to run in a specific namespace." $error }} +{{- $error = printf "%s\nSee: https://helm.sh/docs/helm/helm_install/#options" $error }} +{{- fail $error }} +{{- end }} diff --git a/deployments/helm/k8s-dra-imex-driver/values.yaml b/deployments/helm/k8s-dra-imex-driver/values.yaml new file mode 100644 index 000000000..76ff38cad --- /dev/null +++ b/deployments/helm/k8s-dra-imex-driver/values.yaml @@ -0,0 +1,121 @@ +# Copyright 2023 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for k8s-dra-driver. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Specify the driver root on the host. +# If the NVIDIA GPU driver is managed using the NVIDIA GPU Driver Container, +# this is typically /run/nvidia/driver. +# For driver installed directly on a host, a value of `/` is used. +nvidiaDriverRoot: / + +# Specify the path of CTK binary (nvidia-ctk) on the host, +# as it should appear in the the generated CDI specification. +# The path depends on the system that runs on the node. +nvidiaCtkPath: /usr/bin/nvidia-ctk + +nameOverride: "" +fullnameOverride: "" +namespaceOverride: "" +selectorLabelsOverride: {} + +allowDefaultNamespace: false + +deviceClasses: ["gpu", "mig", "imex"] + +# Masking of the params file is typically done to allow nvkind to +# selectively exclude certain GPUs from being visible to the +# underlying GPU driver. Unfortunately, kind doesn't let you choose +# which device nodes to inject into each worker node (they all come in +# via the --priviliged flag passed to docker/podman). Because of +# this, all workers see all GPUs by default. By masking the params +# file we can prevent a container from recreating any missing GPU +# device nodes and limit its view to only those device nodes that +# nvkind decided to allow in. +maskNvidiaDriverParams: false + +imagePullSecrets: [] +image: + repository: nvcr.io/nvidia/cloud-native/k8s-dra-driver + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +controller: + priorityClassName: "system-node-critical" + podAnnotations: {} + podSecurityContext: {} + nodeSelector: {} + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + affinity: {} + containers: + controller: + securityContext: {} + resources: {} + +kubeletPlugin: + priorityClassName: "system-node-critical" + updateStrategy: + type: RollingUpdate + podAnnotations: {} + podSecurityContext: {} + nodeSelector: {} + tolerations: [] + containers: + init: + securityContext: {} + resources: {} + plugin: + securityContext: + privileged: true + resources: {} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID + - key: feature.node.kubernetes.io/pci-10de.present + operator: In + values: + - "true" + - matchExpressions: + # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA + - key: feature.node.kubernetes.io/cpu-model.vendor_id + operator: In + values: + - "NVIDIA" + - matchExpressions: + # We allow a GPU deployment to be forced by setting the following label to "true" + - key: "nvidia.com/gpu.present" + operator: In + values: + - "true"