Skip to content

Commit 03708e7

Browse files
committed
Remove reliance on nvidia.com/gpu.clique label
Signed-off-by: Kevin Klues <[email protected]>
1 parent 132ae6d commit 03708e7

File tree

4 files changed

+71
-275
lines changed

4 files changed

+71
-275
lines changed

cmd/compute-domain-controller/daemonset.go

-68
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ type DaemonSetManager struct {
6363

6464
resourceClaimTemplateManager *DaemonSetResourceClaimTemplateManager
6565
cleanupManager *CleanupManager[*appsv1.DaemonSet]
66-
podManagers map[string]*DaemonSetPodManager
6766
}
6867

6968
func NewDaemonSetManager(config *ManagerConfig, getComputeDomain GetComputeDomainFunc) *DaemonSetManager {
@@ -92,7 +91,6 @@ func NewDaemonSetManager(config *ManagerConfig, getComputeDomain GetComputeDomai
9291
getComputeDomain: getComputeDomain,
9392
factory: factory,
9493
informer: informer,
95-
podManagers: make(map[string]*DaemonSetPodManager),
9694
}
9795
m.resourceClaimTemplateManager = NewDaemonSetResourceClaimTemplateManager(config, getComputeDomain)
9896
m.cleanupManager = NewCleanupManager[*appsv1.DaemonSet](informer, getComputeDomain, m.cleanup)
@@ -150,9 +148,6 @@ func (m *DaemonSetManager) Start(ctx context.Context) (rerr error) {
150148
}
151149

152150
func (m *DaemonSetManager) Stop() error {
153-
if err := m.removeAllPodManagers(); err != nil {
154-
return fmt.Errorf("error removing all Pod managers: %w", err)
155-
}
156151
if err := m.resourceClaimTemplateManager.Stop(); err != nil {
157152
return fmt.Errorf("error stopping ResourceClaimTemplate manager: %w", err)
158153
}
@@ -230,16 +225,11 @@ func (m *DaemonSetManager) Delete(ctx context.Context, cdUID string) error {
230225
}
231226

232227
d := ds[0]
233-
key := d.Spec.Selector.MatchLabels[computeDomainLabelKey]
234228

235229
if err := m.resourceClaimTemplateManager.Delete(ctx, cdUID); err != nil {
236230
return fmt.Errorf("error deleting ResourceClaimTemplate: %w", err)
237231
}
238232

239-
if err := m.removePodManager(key); err != nil {
240-
return fmt.Errorf("error removing Pod manager: %w", err)
241-
}
242-
243233
if d.GetDeletionTimestamp() != nil {
244234
return nil
245235
}
@@ -335,10 +325,6 @@ func (m *DaemonSetManager) onAddOrUpdate(ctx context.Context, obj any) error {
335325
return nil
336326
}
337327

338-
if err := m.addPodManager(ctx, d.Spec.Selector, cd.Spec.NumNodes); err != nil {
339-
return fmt.Errorf("error adding Pod manager '%s/%s': %w", d.Namespace, d.Name, err)
340-
}
341-
342328
if int(d.Status.NumberReady) != cd.Spec.NumNodes {
343329
return nil
344330
}
@@ -352,60 +338,6 @@ func (m *DaemonSetManager) onAddOrUpdate(ctx context.Context, obj any) error {
352338
return nil
353339
}
354340

355-
func (m *DaemonSetManager) addPodManager(ctx context.Context, labelSelector *metav1.LabelSelector, numPods int) error {
356-
key := labelSelector.MatchLabels[computeDomainLabelKey]
357-
358-
if _, exists := m.podManagers[key]; exists {
359-
return nil
360-
}
361-
362-
podManager := NewDaemonSetPodManager(m.config, labelSelector, numPods, m.getComputeDomain)
363-
364-
if err := podManager.Start(ctx); err != nil {
365-
return fmt.Errorf("error creating Pod manager: %w", err)
366-
}
367-
368-
m.Lock()
369-
m.podManagers[key] = podManager
370-
m.Unlock()
371-
372-
return nil
373-
}
374-
375-
func (m *DaemonSetManager) removePodManager(key string) error {
376-
if _, exists := m.podManagers[key]; !exists {
377-
return nil
378-
}
379-
380-
m.Lock()
381-
podManager := m.podManagers[key]
382-
m.Unlock()
383-
384-
if err := podManager.Stop(); err != nil {
385-
return fmt.Errorf("error stopping Pod manager: %w", err)
386-
}
387-
388-
m.Lock()
389-
delete(m.podManagers, key)
390-
m.Unlock()
391-
392-
return nil
393-
}
394-
395-
func (m *DaemonSetManager) removeAllPodManagers() error {
396-
m.Lock()
397-
for key, pm := range m.podManagers {
398-
m.Unlock()
399-
if err := pm.Stop(); err != nil {
400-
return fmt.Errorf("error stopping Pod manager: %w", err)
401-
}
402-
m.Lock()
403-
delete(m.podManagers, key)
404-
}
405-
m.Unlock()
406-
return nil
407-
}
408-
409341
func (m *DaemonSetManager) cleanup(ctx context.Context, cdUID string) error {
410342
if err := m.Delete(ctx, cdUID); err != nil {
411343
return fmt.Errorf("error deleting DaemonSet: %w", err)

cmd/compute-domain-controller/daemonsetpods.go

-203
This file was deleted.

0 commit comments

Comments
 (0)