Skip to content

Commit

Permalink
perf: Unregister the topology domain when failing NodeClaim creation (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored Nov 17, 2024
1 parent 965ff61 commit f0abf70
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 0 deletions.
6 changes: 6 additions & 0 deletions pkg/controllers/provisioning/scheduling/nodeclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type NodeClaim struct {
topology *Topology
hostPortUsage *scheduling.HostPortUsage
daemonResources v1.ResourceList
hostname string
}

var nodeID int64
Expand All @@ -59,6 +60,7 @@ func NewNodeClaim(nodeClaimTemplate *NodeClaimTemplate, topology *Topology, daem
hostPortUsage: scheduling.NewHostPortUsage(),
topology: topology,
daemonResources: daemonResources,
hostname: hostname,
}
}

Expand Down Expand Up @@ -119,6 +121,10 @@ func (n *NodeClaim) Add(pod *v1.Pod) error {
return nil
}

func (n *NodeClaim) Destroy() {
n.topology.Unregister(v1.LabelHostname, n.hostname)
}

// FinalizeScheduling is called once all scheduling has completed and allows the node to perform any cleanup
// necessary before its requirements are used for instance launching
func (n *NodeClaim) FinalizeScheduling() {
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/provisioning/scheduling/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ func (s *Scheduler) add(ctx context.Context, pod *corev1.Pod) error {
}
nodeClaim := NewNodeClaim(nodeClaimTemplate, s.topology, s.daemonOverhead[nodeClaimTemplate], instanceTypes)
if err := nodeClaim.Add(pod); err != nil {
nodeClaim.Destroy() // Ensure we cleanup any changes that we made while mocking out a NodeClaim
errs = multierr.Append(errs, fmt.Errorf("incompatible with nodepool %q, daemonset overhead=%s, %w",
nodeClaimTemplate.NodePoolName,
resources.String(s.daemonOverhead[nodeClaimTemplate]),
Expand Down
14 changes: 14 additions & 0 deletions pkg/controllers/provisioning/scheduling/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,20 @@ func (t *Topology) Register(topologyKey string, domain string) {
}
}

// Unregister is used to unregister a domain as available across topologies for the given topology key.
func (t *Topology) Unregister(topologyKey string, domain string) {
for _, topology := range t.topologies {
if topology.Key == topologyKey {
topology.Unregister(domain)
}
}
for _, topology := range t.inverseTopologies {
if topology.Key == topologyKey {
topology.Unregister(domain)
}
}
}

// updateInverseAffinities is used to identify pods with anti-affinity terms so we can track those topologies. We
// have to look at every pod in the cluster as there is no way to query for a pod with anti-affinity terms.
func (t *Topology) updateInverseAffinities(ctx context.Context) error {
Expand Down
8 changes: 8 additions & 0 deletions pkg/controllers/provisioning/scheduling/topologygroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ func (t *TopologyGroup) Register(domains ...string) {
}
}

// Unregister removes the topology group from being aware of the domain
func (t *TopologyGroup) Unregister(domains ...string) {
for _, domain := range domains {
delete(t.domains, domain)
t.emptyDomains.Delete(domain)
}
}

func (t *TopologyGroup) AddOwner(key types.UID) {
t.owners[key] = struct{}{}
}
Expand Down

0 comments on commit f0abf70

Please sign in to comment.