From 8bc451fa43fb32ff73e52bbf5b24bb8a17d056b4 Mon Sep 17 00:00:00 2001
From: Christopher Desiniotis <cdesiniotis@nvidia.com>
Date: Thu, 25 Aug 2022 13:53:03 -0700
Subject: [PATCH] Update container deployment to use the nvidia-vgpu-dm cli

Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
---
 deployments/container/Dockerfile.ubi8    |   9 +-
 main.go => deployments/container/main.go |  70 ++++--
 pkg/devicemanager/device-manager.go      | 305 -----------------------
 3 files changed, 62 insertions(+), 322 deletions(-)
 rename main.go => deployments/container/main.go (90%)
 delete mode 100644 pkg/devicemanager/device-manager.go

diff --git a/deployments/container/Dockerfile.ubi8 b/deployments/container/Dockerfile.ubi8
index 93c49996..b275a282 100644
--- a/deployments/container/Dockerfile.ubi8
+++ b/deployments/container/Dockerfile.ubi8
@@ -21,13 +21,16 @@ FROM golang:${GOLANG_VERSION} AS build
 
 WORKDIR /build
 COPY . .
-RUN GOOS=linux go build -o /artifacts/vgpu-device-manager .
+RUN GOOS=linux go build -o /artifacts/nvidia-vgpu-dm ./cmd/nvidia-vgpu-dm
+RUN GOOS=linux go build -o /artifacts/k8s-nvidia-vgpu-dm ./deployments/container/
+
 
 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
 
 ENV NVIDIA_VISIBLE_DEVICES=void
 
-COPY --from=build /artifacts/vgpu-device-manager /usr/bin/vgpu-device-manager
+COPY --from=build /artifacts/nvidia-vgpu-dm /usr/bin/nvidia-vgpu-dm
+COPY --from=build /artifacts/k8s-nvidia-vgpu-dm /usr/bin/k8s-nvidia-vgpu-dm
 
 LABEL version="${VERSION}"
 LABEL release="N/A"
@@ -45,4 +48,4 @@ RUN if [ -n "${CVE_UPDATES}" ]; then \
         rm -rf /var/cache/yum/*; \
     fi
 
-ENTRYPOINT ["vgpu-device-manager"]
+ENTRYPOINT ["k8s-nvidia-vgpu-dm"]
diff --git a/main.go b/deployments/container/main.go
similarity index 90%
rename from main.go
rename to deployments/container/main.go
index 7fc05c15..143ed0a1 100644
--- a/main.go
+++ b/deployments/container/main.go
@@ -24,9 +24,9 @@ import (
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/client-go/tools/clientcmd"
 	"os"
+	"os/exec"
 
 	"context"
-	dm "gitlab.com/nvidia/cloud-native/vgpu-device-manager/pkg/devicemanager"
 	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -37,6 +37,7 @@ import (
 )
 
 const (
+	cliName              = "nvidia-vgpu-dm"
 	resourceNodes        = "nodes"
 	vGPUConfigLabel      = "nvidia.com/vgpu.config"
 	vGPUConfigStateLabel = "nvidia.com/vgpu.config.state"
@@ -179,11 +180,6 @@ func start(c *cli.Context) error {
 		return fmt.Errorf("error building kubernetes clientset from config: %s", err)
 	}
 
-	m, err := dm.NewVGPUDeviceManager(configFileFlag)
-	if err != nil {
-		return fmt.Errorf("error creating new VGPUDeviceManager: %v", err)
-	}
-
 	vGPUConfig := NewSyncableVGPUConfig()
 
 	stop := continuouslySyncVGPUConfigChanges(clientset, vGPUConfig)
@@ -204,7 +200,7 @@ func start(c *cli.Context) error {
 	}
 
 	log.Infof("Updating to vGPU config: %s", selectedConfig)
-	err = updateConfig(clientset, m, selectedConfig)
+	err = updateConfig(clientset, selectedConfig)
 	if err != nil {
 		log.Errorf("ERROR: %v", err)
 	} else {
@@ -216,7 +212,7 @@ func start(c *cli.Context) error {
 		log.Infof("Waiting for change to '%s' label", vGPUConfigLabel)
 		value := vGPUConfig.Get()
 		log.Infof("Updating to vGPU config: %s", value)
-		err = updateConfig(clientset, m, value)
+		err = updateConfig(clientset, value)
 		if err != nil {
 			log.Errorf("ERROR: %v", err)
 			continue
@@ -254,17 +250,24 @@ func continuouslySyncVGPUConfigChanges(clientset *kubernetes.Clientset, vGPUConf
 	return stop
 }
 
-func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, selectedConfig string) error {
+func updateConfig(clientset *kubernetes.Clientset, selectedConfig string) error {
 	defer setVGPUConfigStateLabel(clientset)
 	vGPUConfigState = "failed"
 
 	log.Info("Asserting that the requested configuration is present in the configuration file")
-	ok := m.AssertValidConfig(selectedConfig)
-	if !ok {
-		return fmt.Errorf("%s is not a valid config", selectedConfig)
+	err := assertValidConfig(selectedConfig)
+	if err != nil {
+		return fmt.Errorf("Unable to validate the selected vGPU configuration")
+	}
+
+	log.Info("Checking if the selected vGPU device configuration is currently applied or not")
+	err = assertConfig(selectedConfig)
+	if err == nil {
+		vGPUConfigState = "success"
+		return nil
 	}
 
-	err := getNodeStateLabels(clientset)
+	err = getNodeStateLabels(clientset)
 	if err != nil {
 		return fmt.Errorf("unable to get node state labels: %v", err)
 	}
@@ -281,7 +284,8 @@ func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, sele
 		return fmt.Errorf("unable to shutdown gpu operands: %v", err)
 	}
 
-	err = m.ApplyConfig(selectedConfig)
+	log.Info("Applying the selected vGPU device configuration to the node")
+	err = applyConfig(selectedConfig)
 	if err != nil {
 		return fmt.Errorf("unable to apply config '%s': %v", selectedConfig, err)
 	}
@@ -296,6 +300,44 @@ func updateConfig(clientset *kubernetes.Clientset, m *dm.VGPUDeviceManager, sele
 	return nil
 }
 
+func assertValidConfig(config string) error {
+	args := []string{
+		"assert",
+		"--valid-config",
+		"-f", configFileFlag,
+		"-c", config,
+	}
+	cmd := exec.Command(cliName, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func assertConfig(config string) error {
+	args := []string{
+		"assert",
+		"-f", configFileFlag,
+		"-c", config,
+	}
+	cmd := exec.Command(cliName, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func applyConfig(config string) error {
+	args := []string{
+		"-d",
+		"apply",
+		"-f", configFileFlag,
+		"-c", config,
+	}
+	cmd := exec.Command(cliName, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
 func getNodeStateLabels(clientset *kubernetes.Clientset) error {
 	node, err := clientset.CoreV1().Nodes().Get(context.TODO(), nodeNameFlag, metav1.GetOptions{})
 	if err != nil {
diff --git a/pkg/devicemanager/device-manager.go b/pkg/devicemanager/device-manager.go
deleted file mode 100644
index 704c924a..00000000
--- a/pkg/devicemanager/device-manager.go
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package devicemanager
-
-import (
-	"container/ring"
-	"fmt"
-	"github.com/google/uuid"
-	log "github.com/sirupsen/logrus"
-	"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvmdev"
-	"gitlab.com/nvidia/cloud-native/vgpu-device-manager/api/spec/v1"
-	"os"
-	"sigs.k8s.io/yaml"
-	"sync"
-)
-
-// VGPUDeviceManager is responsible for applying a desired vGPU configuration.
-// A vGPU configuration is simply a list of desired vGPU types. Given a valid
-// vGPU configuration, the VGPUDeviceManager will create vGPU devices of the desired
-// types on the K8s worker node.
-type VGPUDeviceManager struct {
-	config                 *v1.Spec
-	nvmdev                 nvmdev.Interface
-	mutex                  sync.Mutex
-	parentDevices          []*nvmdev.ParentDevice
-	availableVGPUTypesMap  map[string][]string
-	unconfiguredParentsMap map[string]*nvmdev.ParentDevice
-}
-
-// NewVGPUDeviceManager creates a new VGPUDeviceManager
-func NewVGPUDeviceManager(configFile string) (*VGPUDeviceManager, error) {
-	config, err := parseConfigFile(configFile)
-	if err != nil {
-		return nil, fmt.Errorf("unable to parse config file: %v", err)
-	}
-
-	return &VGPUDeviceManager{
-		config:                 config,
-		nvmdev:                 nvmdev.New(),
-		parentDevices:          []*nvmdev.ParentDevice{},
-		availableVGPUTypesMap:  make(map[string][]string),
-		unconfiguredParentsMap: make(map[string]*nvmdev.ParentDevice),
-	}, nil
-}
-
-// AssertValidConfig asserts that the named vGPU config is present
-// in the vGPU configuration file.
-func (m *VGPUDeviceManager) AssertValidConfig(selectedConfig string) bool {
-	_, ok := m.config.VGPUConfigs[selectedConfig]
-	return ok
-}
-
-// ApplyConfig applies a named vGPU config.
-func (m *VGPUDeviceManager) ApplyConfig(selectedConfig string) error {
-	if !m.AssertValidConfig(selectedConfig) {
-		return fmt.Errorf("%s is not a valid config", selectedConfig)
-	}
-
-	// TODO: the below is temporary until this package is re-implemented
-	// to properly consume the updated api spec.
-	desiredTypes := []string{}
-	for _, vgpuConfig := range m.config.VGPUConfigs[selectedConfig] {
-		if err := vgpuConfig.VGPUDevices.AssertValid(); err != nil {
-			return fmt.Errorf("config '%s' is malformed: %v", selectedConfig, err)
-		}
-		for k := range vgpuConfig.VGPUDevices {
-			desiredTypes = append(desiredTypes, k)
-		}
-	}
-	err := m.reconcileVGPUDevices(desiredTypes)
-	if err != nil {
-		return fmt.Errorf("%v", err)
-	}
-	return nil
-}
-
-// reconcileVGPUDevices reconciles the list of desired vGPU types with the
-// actual vGPU devices present on the node. No vGPU device on the node will
-// will be of a type not present in the desired lsit of types.
-//
-// NOTE: Currently no pre-existing vGPU devices are retained on the node, and instead
-// every invocation of 'reconcileVGPUDevices()' deletes all existing vGPU
-// devices and create new ones based on the list of desired types.
-//
-// TODO: only delete existing vGPU devices if required.
-func (m *VGPUDeviceManager) reconcileVGPUDevices(desiredTypes []string) error {
-	m.mutex.Lock()
-	defer m.mutex.Unlock()
-
-	parentDevices, err := m.nvmdev.GetAllParentDevices()
-	log.Debugf("Number of parent devices: %d", len(parentDevices))
-	if err != nil {
-		return fmt.Errorf("error getting all NVIDIA PCI devices: %v", err)
-	}
-	m.parentDevices = parentDevices
-
-	log.Info("Deleting any existing vGPU devices...")
-	err = m.deleteAllVGPUDevices()
-	if err != nil {
-		return fmt.Errorf("error deleting existing vGPU devices: %v", err)
-	}
-
-	log.Info("Discovering vGPU devices to configure...")
-	err = m.discoverConfigurableVGPUTypes(desiredTypes)
-	if err != nil {
-		return fmt.Errorf("error discovering configurable vGPU types on the node: %v", err)
-	}
-
-	if (len(m.unconfiguredParentsMap) == 0) || (len(m.availableVGPUTypesMap) == 0) {
-		log.Info("Nothing to configure")
-		return nil
-	}
-
-	log.Info("Creating desired vGPU devices...")
-	err = m.createDesiredVGPUDevices()
-	if err != nil {
-		return fmt.Errorf("error creating desired vGPU devices: %v", err)
-	}
-
-	return nil
-}
-
-// discoverConfigurableVGPUTypes discovers the overlap between the desired vGPU types
-// from the config and the available vGPU types on the node. Based on this overlap,
-// the necessary data structures are populated which are later used when creating
-// vGPU devices.
-func (m *VGPUDeviceManager) discoverConfigurableVGPUTypes(desiredTypes []string) error {
-	for _, parent := range m.parentDevices {
-		for _, desiredType := range desiredTypes {
-			available, err := parent.IsMDEVTypeAvailable(desiredType)
-			if err != nil {
-				return fmt.Errorf("failure to detect if vGPU type %s is available on device %s: %v", desiredType, parent.Address, err)
-			}
-			if available {
-				// availableVGPUTypesMap maps vGPU types to a list of parent devices
-				// that can support vGPU devices of said types.
-				parentsArray, exists := m.availableVGPUTypesMap[desiredType]
-				if !exists {
-					parentsArray = []string{}
-				}
-				parentsArray = append(parentsArray, parent.Address)
-				m.availableVGPUTypesMap[desiredType] = parentsArray
-				// unconfiguredParentsMap maps a parent PCI address to its
-				// corresponding ParentDevice struct. Parent devices present
-				// in the map do not have any vGPU devices created yet.
-				m.unconfiguredParentsMap[parent.Address] = parent
-			}
-		}
-	}
-	return nil
-}
-
-// deleteAllVGPUDevices unconditionally deletes all vGPU devices
-// present on the node. vGPU devices can only be deleted if they
-// are not busy (e.g. assigned to a VM).
-func (m *VGPUDeviceManager) deleteAllVGPUDevices() error {
-	mdevs, err := m.nvmdev.GetAllDevices()
-	if err != nil {
-		return fmt.Errorf("unable to get all mdev devices: %v", err)
-	}
-
-	for _, device := range mdevs {
-		err := device.Delete()
-		if err != nil {
-			return fmt.Errorf("failed to delete mdev: %v", err)
-		}
-		log.WithFields(log.Fields{
-			"vGPUType": device.MDEVType,
-			"uuid":     device.UUID,
-		}).Info("Successfully deleted vGPU device")
-	}
-
-	return nil
-}
-
-// newVGPUTypesRing returns a new ring buffer containing vGPU types to configure.
-func (m *VGPUDeviceManager) newVGPUTypesRing() *ring.Ring {
-	r := ring.New(len(m.availableVGPUTypesMap))
-
-	for vGPUType := range m.availableVGPUTypesMap {
-		r.Value = vGPUType
-		r = r.Next()
-	}
-
-	return r
-}
-
-// getNextAvailableParentDevice returns the next available parent device from a list
-// of parent devices. Parent devices that are already configured (vGPU devices have
-// been created) are skipped.
-func (m *VGPUDeviceManager) getNextAvailableParentDevice(parents []string) (*nvmdev.ParentDevice, []string) {
-	for i := 0; i <= len(parents); i++ {
-		parent := parents[i]
-		if dev, exists := m.unconfiguredParentsMap[parent]; exists {
-			return dev, parents[i+1:]
-		}
-	}
-	return nil, parents
-}
-
-// createDesiredVGPUDevices iterates over a vGPU type ring buffer and creates vGPU devices.
-// The vGPU type ring buffer is initialized with a list of vGPU types -- the types form the
-// overlap between the desired types and those that are available on the node. The algorithm
-// continues until there are no more available parent devices or there are no more available
-// vGPU types to create from the desired list.
-//
-// Example:
-//      Given: Node has 3, A10 GPUs
-//      Input: Desired list of vGPU types - [A10-4C, A10-8C]
-//      Result:
-//          - 6, A10-4C devices get created on the first GPU
-//          - 3, A10-8C devices get created on the second GPU
-//          - 6, A10-4C devices get created on the third GPU
-func (m *VGPUDeviceManager) createDesiredVGPUDevices() error {
-	r := m.newVGPUTypesRing()
-
-	if r.Len() == 0 {
-		log.Warn("No available vGPU types to create")
-		return nil
-	}
-
-	for {
-		vGPUType := r.Value.(string)
-		if parents, ok := m.availableVGPUTypesMap[vGPUType]; ok {
-			if len(parents) == 0 {
-				log.Debugf("No available parent devices for vGPU type: %s\n", vGPUType)
-				delete(m.availableVGPUTypesMap, vGPUType)
-			}
-			parentDevice, parents := m.getNextAvailableParentDevice(parents)
-			availableInstances, err := parentDevice.GetAvailableMDEVInstances(vGPUType)
-			if err != nil {
-				return fmt.Errorf("unable to check if %s is available on device %s: %v", vGPUType, parentDevice.Address, err)
-			}
-			if availableInstances > 0 {
-				log.Infof("Creating %d instance(s) of vGPU type %s on device %s", availableInstances, vGPUType, parentDevice.Address)
-				for i := 0; i < availableInstances; i++ {
-					uuid := uuid.New().String()
-					err := parentDevice.CreateMDEVDevice(vGPUType, uuid)
-					if err != nil {
-						return fmt.Errorf("unable to create %s device on parent device %s: %v", vGPUType, parentDevice.Address, err)
-					}
-					log.WithFields(log.Fields{
-						"vGPUType":   vGPUType,
-						"pciAddress": parentDevice.Address,
-						"uuid":       uuid,
-					}).Info("Successfully created vGPU device")
-				}
-				delete(m.unconfiguredParentsMap, parentDevice.Address)
-			}
-
-			if len(parents) > 0 {
-				m.availableVGPUTypesMap[vGPUType] = parents
-			}
-			if len(parents) == 0 {
-				delete(m.availableVGPUTypesMap, vGPUType)
-			}
-		}
-		r = r.Next()
-
-		if (len(m.unconfiguredParentsMap) == 0) || (len(m.availableVGPUTypesMap) == 0) {
-			break
-		}
-	}
-	return nil
-}
-
-func parseConfigFile(configFile string) (*v1.Spec, error) {
-	var err error
-	var configYaml []byte
-	configYaml, err = os.ReadFile(configFile)
-	if err != nil {
-		return nil, fmt.Errorf("read error: %v", err)
-	}
-
-	var spec v1.Spec
-	err = yaml.Unmarshal(configYaml, &spec)
-	if err != nil {
-		return nil, fmt.Errorf("unmarshal error: %v", err)
-	}
-
-	return &spec, nil
-}
-
-func stringInSlice(slice []string, str string) bool {
-	for _, value := range slice {
-		if value == str {
-			return true
-		}
-	}
-	return false
-}