Skip to content

Commit

Permalink
Move to GetComputeDomainFunc instead of ComputeDomainExistsFunc
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Klues <[email protected]>
  • Loading branch information
klueska committed Jan 23, 2025
1 parent fa8bce2 commit dd113e8
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 63 deletions.
25 changes: 16 additions & 9 deletions cmd/nvidia-dra-imex-controller/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
nvlisters "github.com/NVIDIA/k8s-dra-driver/pkg/nvidia.com/listers/resource/v1beta1"
)

type ComputeDomainExistsFunc func(uid string) (bool, error)
type GetComputeDomainFunc func(uid string) (*nvapi.ComputeDomain, error)

const (
informerResyncPeriod = 10 * time.Minute
Expand Down Expand Up @@ -65,9 +65,9 @@ func NewComputeDomainManager(config *ManagerConfig) *ComputeDomainManager {
informer: informer,
lister: lister,
}
m.deploymentManager = NewDeploymentManager(config, m.Exists)
m.deviceClassManager = NewDeviceClassManager(config, m.Exists)
m.resourceClaimManager = NewResourceClaimManager(config, m.Exists)
m.deploymentManager = NewDeploymentManager(config, m.Get)
m.deviceClassManager = NewDeviceClassManager(config, m.Get)
m.resourceClaimManager = NewResourceClaimManager(config, m.Get)

return m
}
Expand Down Expand Up @@ -144,16 +144,23 @@ func (m *ComputeDomainManager) Stop() error {
return nil
}

// Exists checks if a ComputeDomain with a specific UID exists.
func (m *ComputeDomainManager) Exists(uid string) (bool, error) {
// Get gets a ComputeDomain with a specific UID.
func (m *ComputeDomainManager) Get(uid string) (*nvapi.ComputeDomain, error) {
cds, err := m.informer.GetIndexer().ByIndex("uid", uid)
if err != nil {
return false, fmt.Errorf("error retrieving MultiNodeInformer by UID: %w", err)
return nil, fmt.Errorf("error retrieving ComputeDomain by UID: %w", err)
}
if len(cds) == 0 {
return false, nil
return nil, nil
}
return true, nil
if len(cds) != 1 {
return nil, fmt.Errorf("multiple ComputeDomains with the same UID")
}
cd, ok := cds[0].(*nvapi.ComputeDomain)
if !ok {
return nil, fmt.Errorf("failed to cast to ComputeDomain")
}
return cd, nil
}

func (m *ComputeDomainManager) onComputeDomainAdd(ctx context.Context, obj any) error {
Expand Down
30 changes: 15 additions & 15 deletions cmd/nvidia-dra-imex-controller/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ type DeploymentTemplateData struct {
type DeploymentManager struct {
sync.Mutex

config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
computeDomainExists ComputeDomainExistsFunc
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
getComputeDomain GetComputeDomainFunc

factory informers.SharedInformerFactory
informer cache.SharedIndexInformer
Expand All @@ -69,7 +69,7 @@ type DeploymentManager struct {
podManagers map[string]*DeploymentPodManager
}

func NewDeploymentManager(config *ManagerConfig, cdExists ComputeDomainExistsFunc) *DeploymentManager {
func NewDeploymentManager(config *ManagerConfig, getComputeDomain GetComputeDomainFunc) *DeploymentManager {
labelSelector := &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Expand All @@ -92,15 +92,15 @@ func NewDeploymentManager(config *ManagerConfig, cdExists ComputeDomainExistsFun
lister := factory.Apps().V1().Deployments().Lister()

m := &DeploymentManager{
config: config,
computeDomainExists: cdExists,
factory: factory,
informer: informer,
lister: lister,
podManagers: make(map[string]*DeploymentPodManager),
config: config,
getComputeDomain: getComputeDomain,
factory: factory,
informer: informer,
lister: lister,
podManagers: make(map[string]*DeploymentPodManager),
}
m.imexChannelManager = NewImexChannelManager(config)
m.resourceClaimTemplateManager = NewResourceClaimTemplateManager(config, cdExists)
m.resourceClaimTemplateManager = NewResourceClaimTemplateManager(config, getComputeDomain)

return m
}
Expand Down Expand Up @@ -283,11 +283,11 @@ func (m *DeploymentManager) onAddOrUpdate(ctx context.Context, obj any) error {

klog.Infof("Processing added or updated Deployment: %s/%s", d.Namespace, d.Name)

exists, err := m.computeDomainExists(d.Labels[computeDomainLabelKey])
cd, err := m.getComputeDomain(d.Labels[computeDomainLabelKey])
if err != nil {
return fmt.Errorf("error checking if owner exists: %w", err)
return fmt.Errorf("error getting ComputeDomain: %w", err)
}
if !exists {
if cd == nil {
if err := m.Delete(ctx, d.Labels[computeDomainLabelKey]); err != nil {
return fmt.Errorf("error deleting Deployment '%s/%s': %w", d.Namespace, d.Name, err)
}
Expand Down
26 changes: 13 additions & 13 deletions cmd/nvidia-dra-imex-controller/deviceclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ import (
)

type DeviceClassManager struct {
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
computeDomainExists ComputeDomainExistsFunc
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
getComputeDomain GetComputeDomainFunc

factory informers.SharedInformerFactory
informer cache.SharedIndexInformer
lister resourcelisters.DeviceClassLister
}

func NewDeviceClassManager(config *ManagerConfig, cdExists ComputeDomainExistsFunc) *DeviceClassManager {
func NewDeviceClassManager(config *ManagerConfig, getComputeDomain GetComputeDomainFunc) *DeviceClassManager {
labelSelector := &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Expand All @@ -65,11 +65,11 @@ func NewDeviceClassManager(config *ManagerConfig, cdExists ComputeDomainExistsFu
lister := factory.Resource().V1beta1().DeviceClasses().Lister()

m := &DeviceClassManager{
config: config,
computeDomainExists: cdExists,
factory: factory,
informer: informer,
lister: lister,
config: config,
getComputeDomain: getComputeDomain,
factory: factory,
informer: informer,
lister: lister,
}

return m
Expand Down Expand Up @@ -236,11 +236,11 @@ func (m *DeviceClassManager) onAddOrUpdate(ctx context.Context, obj any) error {

klog.Infof("Processing added or updated DeviceClass: %s", dc.Name)

exists, err := m.computeDomainExists(dc.Labels[computeDomainLabelKey])
cd, err := m.getComputeDomain(dc.Labels[computeDomainLabelKey])
if err != nil {
return fmt.Errorf("error checking if owner exists: %w", err)
return fmt.Errorf("error getting ComputeDomain: %w", err)
}
if !exists {
if cd == nil {
if err := m.Delete(ctx, dc.Labels[computeDomainLabelKey]); err != nil {
return fmt.Errorf("error deleting DeviceClass '%s': %w", dc.Name, err)
}
Expand Down
26 changes: 13 additions & 13 deletions cmd/nvidia-dra-imex-controller/resourceclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ import (
)

type ResourceClaimManager struct {
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
computeDomainExists ComputeDomainExistsFunc
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
getComputeDomain GetComputeDomainFunc

factory informers.SharedInformerFactory
informer cache.SharedIndexInformer
lister resourcelisters.ResourceClaimLister
}

func NewResourceClaimManager(config *ManagerConfig, cdExists ComputeDomainExistsFunc) *ResourceClaimManager {
func NewResourceClaimManager(config *ManagerConfig, getComputeDomain GetComputeDomainFunc) *ResourceClaimManager {
labelSelector := &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Expand All @@ -65,11 +65,11 @@ func NewResourceClaimManager(config *ManagerConfig, cdExists ComputeDomainExists
lister := factory.Resource().V1beta1().ResourceClaims().Lister()

m := &ResourceClaimManager{
config: config,
computeDomainExists: cdExists,
factory: factory,
informer: informer,
lister: lister,
config: config,
getComputeDomain: getComputeDomain,
factory: factory,
informer: informer,
lister: lister,
}

return m
Expand Down Expand Up @@ -220,11 +220,11 @@ func (m *ResourceClaimManager) onAddOrUpdate(ctx context.Context, obj any) error

klog.Infof("Processing added or updated ResourceClaim: %s/%s", rc.Namespace, rc.Name)

exists, err := m.computeDomainExists(rc.Labels[computeDomainLabelKey])
cd, err := m.getComputeDomain(rc.Labels[computeDomainLabelKey])
if err != nil {
return fmt.Errorf("error checking if owner exists: %w", err)
return fmt.Errorf("error getting ComputeDomain: %w", err)
}
if !exists {
if cd == nil {
if err := m.Delete(ctx, rc.Labels[computeDomainLabelKey]); err != nil {
return fmt.Errorf("error deleting ResourceClaim '%s/%s': %w", rc.Namespace, rc.Name, err)
}
Expand Down
26 changes: 13 additions & 13 deletions cmd/nvidia-dra-imex-controller/resourceclaimtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ type ResourceClaimTemplateTemplateData struct {
}

type ResourceClaimTemplateManager struct {
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
computeDomainExists ComputeDomainExistsFunc
config *ManagerConfig
waitGroup sync.WaitGroup
cancelContext context.CancelFunc
getComputeDomain GetComputeDomainFunc

factory informers.SharedInformerFactory
informer cache.SharedIndexInformer
lister resourcelisters.ResourceClaimTemplateLister
}

func NewResourceClaimTemplateManager(config *ManagerConfig, cdExists ComputeDomainExistsFunc) *ResourceClaimTemplateManager {
func NewResourceClaimTemplateManager(config *ManagerConfig, getComputeDomain GetComputeDomainFunc) *ResourceClaimTemplateManager {
labelSelector := &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Expand All @@ -87,11 +87,11 @@ func NewResourceClaimTemplateManager(config *ManagerConfig, cdExists ComputeDoma
lister := factory.Resource().V1beta1().ResourceClaimTemplates().Lister()

m := &ResourceClaimTemplateManager{
config: config,
computeDomainExists: cdExists,
factory: factory,
informer: informer,
lister: lister,
config: config,
getComputeDomain: getComputeDomain,
factory: factory,
informer: informer,
lister: lister,
}

return m
Expand Down Expand Up @@ -261,11 +261,11 @@ func (m *ResourceClaimTemplateManager) onAddOrUpdate(ctx context.Context, obj an

klog.Infof("Processing added or updated ResourceClaimTemplate: %s/%s", rct.Namespace, rct.Name)

exists, err := m.computeDomainExists(rct.Labels[computeDomainLabelKey])
cd, err := m.getComputeDomain(rct.Labels[computeDomainLabelKey])
if err != nil {
return fmt.Errorf("error checking if owner exists: %w", err)
return fmt.Errorf("error getting ComputeDomain: %w", err)
}
if !exists {
if cd == nil {
if err := m.Delete(ctx, rct.Labels[computeDomainLabelKey]); err != nil {
return fmt.Errorf("error deleting ResourceClaimTemplate '%s/%s': %w", rct.Namespace, rct.Name, err)
}
Expand Down

0 comments on commit dd113e8

Please sign in to comment.