From 8bc451fa43fb32ff73e52bbf5b24bb8a17d056b4 Mon Sep 17 00:00:00 2001 From: Christopher Desiniotis <cdesiniotis@nvidia.com> Date: Thu, 25 Aug 2022 13:53:03 -0700 Subject: [PATCH] Update container deployment to use the nvidia-vgpu-dm cli Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com> --- deployments/container/Dockerfile.ubi8 | 9 +- main.go => deployments/container/main.go | 70 ++++-- pkg/devicemanager/device-manager.go | 305 ----------------------- 3 files changed, 62 insertions(+), 322 deletions(-) rename main.go => deployments/container/main.go (90%) delete mode 100644 pkg/devicemanager/device-manager.go diff --git a/deployments/container/Dockerfile.ubi8 b/deployments/container/Dockerfile.ubi8 index 93c49996..b275a282 100644 --- a/deployments/container/Dockerfile.ubi8 +++ b/deployments/container/Dockerfile.ubi8 @@ -21,13 +21,16 @@ FROM golang:${GOLANG_VERSION} AS build WORKDIR /build COPY . . -RUN GOOS=linux go build -o /artifacts/vgpu-device-manager . +RUN GOOS=linux go build -o /artifacts/nvidia-vgpu-dm ./cmd/nvidia-vgpu-dm +RUN GOOS=linux go build -o /artifacts/k8s-nvidia-vgpu-dm ./deployments/container/ + FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST} ENV NVIDIA_VISIBLE_DEVICES=void -COPY --from=build /artifacts/vgpu-device-manager /usr/bin/vgpu-device-manager +COPY --from=build /artifacts/nvidia-vgpu-dm /usr/bin/nvidia-vgpu-dm +COPY --from=build /artifacts/k8s-nvidia-vgpu-dm /usr/bin/k8s-nvidia-vgpu-dm LABEL version="${VERSION}" LABEL release="N/A" @@ -45,4 +48,4 @@ RUN if [ -n "${CVE_UPDATES}" ]; then \ rm -rf /var/cache/yum/*; \ fi -ENTRYPOINT ["vgpu-device-manager"] +ENTRYPOINT ["k8s-nvidia-vgpu-dm"] diff --git a/main.go b/deployments/container/main.go similarity index 90% rename from main.go rename to deployments/container/main.go index 7fc05c15..143ed0a1 100644 --- a/main.go +++ b/deployments/container/main.go @@ -24,9 +24,9 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/clientcmd" "os" + "os/exec" "context" - dm "gitlab.com/nvidia/cloud-native/vgpu-device-manager/pkg/devicemanager" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -37,6 +37,7 @@ import ( ) const ( + cliName = "nvidia-vgpu-dm" resourceNodes = "nodes" vGPUConfigLabel = "nvidia.com/vgpu.config" vGPUConfigStateLabel = "nvidia.com/vgpu.config.state" @@ -179,11 +180,6 @@ func start(c *cli.Context) error { return fmt.Errorf("error building kubernetes clientset from config: %s", err) } - m, err := dm.NewVGPUDeviceManager(configFileFlag) - if err != nil { - return fmt.Errorf("error creating new VGPUDeviceManager: %v", err) - } - vGPUConfig := NewSyncableVGPUConfig() stop := continuouslySyncVGPUConfigChanges(clientset, vGPUConfig) @@ -204,7 +200,7 @@ func start(c *cli.Context) error { } log.Infof("Updating to vGPU config: %s", selectedConfig) - err = updateConfig(clientset, m, selectedConfig) + err = updateConfig(clientset, selectedConfig) if err != nil { log.Errorf("ERROR: %v", err) } else { @@ -216,7 +212,7 @@ func start(c *cli.Context) error { log.Infof("Waiting for change to '%s' label", vGPUConfigLabel) value := vGPUConfig.Get() log.Infof("Updating to vGPU config: %s", value) - err = updateConfig(clientset, m, value) + err = updateConfig(clientset, value) if err != nil { log.Errorf("ERROR: %v", err) continue @@ -254,17 +250,24 @@ func continuouslySyncVGPUConfigChanges(clientset *kubernetes.Clientset, vGPUConf return stop } -func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, selectedConfig string) error { +func updateConfig(clientset *kubernetes.Clientset, selectedConfig string) error { defer setVGPUConfigStateLabel(clientset) vGPUConfigState = "failed" log.Info("Asserting that the requested configuration is present in the configuration file") - ok := m.AssertValidConfig(selectedConfig) - if !ok { - return fmt.Errorf("%s is not a valid config", selectedConfig) + err := assertValidConfig(selectedConfig) + if err != nil { + return fmt.Errorf("Unable to validate the selected vGPU configuration") + } + + log.Info("Checking if the selected vGPU device configuration is currently applied or not") + err = assertConfig(selectedConfig) + if err == nil { + vGPUConfigState = "success" + return nil } - err := getNodeStateLabels(clientset) + err = getNodeStateLabels(clientset) if err != nil { return fmt.Errorf("unable to get node state labels: %v", err) } @@ -281,7 +284,8 @@ func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, sele return fmt.Errorf("unable to shutdown gpu operands: %v", err) } - err = m.ApplyConfig(selectedConfig) + log.Info("Applying the selected vGPU device configuration to the node") + err = applyConfig(selectedConfig) if err != nil { return fmt.Errorf("unable to apply config '%s': %v", selectedConfig, err) } @@ -296,6 +300,44 @@ func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, sele return nil } +func assertValidConfig(config string) error { + args := []string{ + "assert", + "--valid-config", + "-f", configFileFlag, + "-c", config, + } + cmd := exec.Command(cliName, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +func assertConfig(config string) error { + args := []string{ + "assert", + "-f", configFileFlag, + "-c", config, + } + cmd := exec.Command(cliName, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +func applyConfig(config string) error { + args := []string{ + "-d", + "apply", + "-f", configFileFlag, + "-c", config, + } + cmd := exec.Command(cliName, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + func getNodeStateLabels(clientset *kubernetes.Clientset) error { node, err := clientset.CoreV1().Nodes().Get(context.TODO(), nodeNameFlag, metav1.GetOptions{}) if err != nil { diff --git a/pkg/devicemanager/device-manager.go b/pkg/devicemanager/device-manager.go deleted file mode 100644 index 704c924a..00000000 --- a/pkg/devicemanager/device-manager.go +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package devicemanager - -import ( - "container/ring" - "fmt" - "github.com/google/uuid" - log "github.com/sirupsen/logrus" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvmdev" - "gitlab.com/nvidia/cloud-native/vgpu-device-manager/api/spec/v1" - "os" - "sigs.k8s.io/yaml" - "sync" -) - -// VGPUDeviceManager is responsible for applying a desired vGPU configuration. -// A vGPU configuration is simply a list of desired vGPU types. Given a valid -// vGPU configuration, the VGPUDeviceManager will create vGPU devices of the desired -// types on the K8s worker node. -type VGPUDeviceManager struct { - config *v1.Spec - nvmdev nvmdev.Interface - mutex sync.Mutex - parentDevices []*nvmdev.ParentDevice - availableVGPUTypesMap map[string][]string - unconfiguredParentsMap map[string]*nvmdev.ParentDevice -} - -// NewVGPUDeviceManager creates a new VGPUDeviceManager -func NewVGPUDeviceManager(configFile string) (*VGPUDeviceManager, error) { - config, err := parseConfigFile(configFile) - if err != nil { - return nil, fmt.Errorf("unable to parse config file: %v", err) - } - - return &VGPUDeviceManager{ - config: config, - nvmdev: nvmdev.New(), - parentDevices: []*nvmdev.ParentDevice{}, - availableVGPUTypesMap: make(map[string][]string), - unconfiguredParentsMap: make(map[string]*nvmdev.ParentDevice), - }, nil -} - -// AssertValidConfig asserts that the named vGPU config is present -// in the vGPU configuration file. -func (m *VGPUDeviceManager) AssertValidConfig(selectedConfig string) bool { - _, ok := m.config.VGPUConfigs[selectedConfig] - return ok -} - -// ApplyConfig applies a named vGPU config. -func (m *VGPUDeviceManager) ApplyConfig(selectedConfig string) error { - if !m.AssertValidConfig(selectedConfig) { - return fmt.Errorf("%s is not a valid config", selectedConfig) - } - - // TODO: the below is temporary until this package is re-implemented - // to properly consume the updated api spec. - desiredTypes := []string{} - for _, vgpuConfig := range m.config.VGPUConfigs[selectedConfig] { - if err := vgpuConfig.VGPUDevices.AssertValid(); err != nil { - return fmt.Errorf("config '%s' is malformed: %v", selectedConfig, err) - } - for k := range vgpuConfig.VGPUDevices { - desiredTypes = append(desiredTypes, k) - } - } - err := m.reconcileVGPUDevices(desiredTypes) - if err != nil { - return fmt.Errorf("%v", err) - } - return nil -} - -// reconcileVGPUDevices reconciles the list of desired vGPU types with the -// actual vGPU devices present on the node. No vGPU device on the node will -// will be of a type not present in the desired lsit of types. -// -// NOTE: Currently no pre-existing vGPU devices are retained on the node, and instead -// every invocation of 'reconcileVGPUDevices()' deletes all existing vGPU -// devices and create new ones based on the list of desired types. -// -// TODO: only delete existing vGPU devices if required. -func (m *VGPUDeviceManager) reconcileVGPUDevices(desiredTypes []string) error { - m.mutex.Lock() - defer m.mutex.Unlock() - - parentDevices, err := m.nvmdev.GetAllParentDevices() - log.Debugf("Number of parent devices: %d", len(parentDevices)) - if err != nil { - return fmt.Errorf("error getting all NVIDIA PCI devices: %v", err) - } - m.parentDevices = parentDevices - - log.Info("Deleting any existing vGPU devices...") - err = m.deleteAllVGPUDevices() - if err != nil { - return fmt.Errorf("error deleting existing vGPU devices: %v", err) - } - - log.Info("Discovering vGPU devices to configure...") - err = m.discoverConfigurableVGPUTypes(desiredTypes) - if err != nil { - return fmt.Errorf("error discovering configurable vGPU types on the node: %v", err) - } - - if (len(m.unconfiguredParentsMap) == 0) || (len(m.availableVGPUTypesMap) == 0) { - log.Info("Nothing to configure") - return nil - } - - log.Info("Creating desired vGPU devices...") - err = m.createDesiredVGPUDevices() - if err != nil { - return fmt.Errorf("error creating desired vGPU devices: %v", err) - } - - return nil -} - -// discoverConfigurableVGPUTypes discovers the overlap between the desired vGPU types -// from the config and the available vGPU types on the node. Based on this overlap, -// the necessary data structures are populated which are later used when creating -// vGPU devices. -func (m *VGPUDeviceManager) discoverConfigurableVGPUTypes(desiredTypes []string) error { - for _, parent := range m.parentDevices { - for _, desiredType := range desiredTypes { - available, err := parent.IsMDEVTypeAvailable(desiredType) - if err != nil { - return fmt.Errorf("failure to detect if vGPU type %s is available on device %s: %v", desiredType, parent.Address, err) - } - if available { - // availableVGPUTypesMap maps vGPU types to a list of parent devices - // that can support vGPU devices of said types. - parentsArray, exists := m.availableVGPUTypesMap[desiredType] - if !exists { - parentsArray = []string{} - } - parentsArray = append(parentsArray, parent.Address) - m.availableVGPUTypesMap[desiredType] = parentsArray - // unconfiguredParentsMap maps a parent PCI address to its - // corresponding ParentDevice struct. Parent devices present - // in the map do not have any vGPU devices created yet. - m.unconfiguredParentsMap[parent.Address] = parent - } - } - } - return nil -} - -// deleteAllVGPUDevices unconditionally deletes all vGPU devices -// present on the node. vGPU devices can only be deleted if they -// are not busy (e.g. assigned to a VM). -func (m *VGPUDeviceManager) deleteAllVGPUDevices() error { - mdevs, err := m.nvmdev.GetAllDevices() - if err != nil { - return fmt.Errorf("unable to get all mdev devices: %v", err) - } - - for _, device := range mdevs { - err := device.Delete() - if err != nil { - return fmt.Errorf("failed to delete mdev: %v", err) - } - log.WithFields(log.Fields{ - "vGPUType": device.MDEVType, - "uuid": device.UUID, - }).Info("Successfully deleted vGPU device") - } - - return nil -} - -// newVGPUTypesRing returns a new ring buffer containing vGPU types to configure. -func (m *VGPUDeviceManager) newVGPUTypesRing() *ring.Ring { - r := ring.New(len(m.availableVGPUTypesMap)) - - for vGPUType := range m.availableVGPUTypesMap { - r.Value = vGPUType - r = r.Next() - } - - return r -} - -// getNextAvailableParentDevice returns the next available parent device from a list -// of parent devices. Parent devices that are already configured (vGPU devices have -// been created) are skipped. -func (m *VGPUDeviceManager) getNextAvailableParentDevice(parents []string) (*nvmdev.ParentDevice, []string) { - for i := 0; i <= len(parents); i++ { - parent := parents[i] - if dev, exists := m.unconfiguredParentsMap[parent]; exists { - return dev, parents[i+1:] - } - } - return nil, parents -} - -// createDesiredVGPUDevices iterates over a vGPU type ring buffer and creates vGPU devices. -// The vGPU type ring buffer is initialized with a list of vGPU types -- the types form the -// overlap between the desired types and those that are available on the node. The algorithm -// continues until there are no more available parent devices or there are no more available -// vGPU types to create from the desired list. -// -// Example: -// Given: Node has 3, A10 GPUs -// Input: Desired list of vGPU types - [A10-4C, A10-8C] -// Result: -// - 6, A10-4C devices get created on the first GPU -// - 3, A10-8C devices get created on the second GPU -// - 6, A10-4C devices get created on the third GPU -func (m *VGPUDeviceManager) createDesiredVGPUDevices() error { - r := m.newVGPUTypesRing() - - if r.Len() == 0 { - log.Warn("No available vGPU types to create") - return nil - } - - for { - vGPUType := r.Value.(string) - if parents, ok := m.availableVGPUTypesMap[vGPUType]; ok { - if len(parents) == 0 { - log.Debugf("No available parent devices for vGPU type: %s\n", vGPUType) - delete(m.availableVGPUTypesMap, vGPUType) - } - parentDevice, parents := m.getNextAvailableParentDevice(parents) - availableInstances, err := parentDevice.GetAvailableMDEVInstances(vGPUType) - if err != nil { - return fmt.Errorf("unable to check if %s is available on device %s: %v", vGPUType, parentDevice.Address, err) - } - if availableInstances > 0 { - log.Infof("Creating %d instance(s) of vGPU type %s on device %s", availableInstances, vGPUType, parentDevice.Address) - for i := 0; i < availableInstances; i++ { - uuid := uuid.New().String() - err := parentDevice.CreateMDEVDevice(vGPUType, uuid) - if err != nil { - return fmt.Errorf("unable to create %s device on parent device %s: %v", vGPUType, parentDevice.Address, err) - } - log.WithFields(log.Fields{ - "vGPUType": vGPUType, - "pciAddress": parentDevice.Address, - "uuid": uuid, - }).Info("Successfully created vGPU device") - } - delete(m.unconfiguredParentsMap, parentDevice.Address) - } - - if len(parents) > 0 { - m.availableVGPUTypesMap[vGPUType] = parents - } - if len(parents) == 0 { - delete(m.availableVGPUTypesMap, vGPUType) - } - } - r = r.Next() - - if (len(m.unconfiguredParentsMap) == 0) || (len(m.availableVGPUTypesMap) == 0) { - break - } - } - return nil -} - -func parseConfigFile(configFile string) (*v1.Spec, error) { - var err error - var configYaml []byte - configYaml, err = os.ReadFile(configFile) - if err != nil { - return nil, fmt.Errorf("read error: %v", err) - } - - var spec v1.Spec - err = yaml.Unmarshal(configYaml, &spec) - if err != nil { - return nil, fmt.Errorf("unmarshal error: %v", err) - } - - return &spec, nil -} - -func stringInSlice(slice []string, str string) bool { - for _, value := range slice { - if value == str { - return true - } - } - return false -}