Skip to content

Commit

Permalink
chore: Remove Eventual Disruption cloud provider interface (#1832)
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam authored Nov 22, 2024
1 parent 9d472b7 commit 2ab4871
Show file tree
Hide file tree
Showing 15 changed files with 33 additions and 688 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ coverage.html
*.test
*.cpuprofile
*.heapprofile
go.work
go.work.sum

# Common in OSs and IDEs
.idea
Expand Down
5 changes: 1 addition & 4 deletions hack/kwok/requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,4 @@ yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.req

# NodePool Validation:
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [
{"message": "label domain \"karpenter.kwok.sh\" is restricted", "rule": "self in [\"karpenter.kwok.sh/instance-cpu\", \"karpenter.kwok.sh/instance-memory\", \"karpenter.kwok.sh/instance-family\", \"karpenter.kwok.sh/instance-size\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.kwok.sh\")"}]' -i kwok/charts/crds/karpenter.sh_nodepools.yaml

# Add ExampleReason in KwoK CloudProvider
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.disruption.properties.budgets.items.properties.reasons.items.enum += [ "ExampleReason" ]' -i kwok/charts/crds/karpenter.sh_nodepools.yaml
{"message": "label domain \"karpenter.kwok.sh\" is restricted", "rule": "self in [\"karpenter.kwok.sh/instance-cpu\", \"karpenter.kwok.sh/instance-memory\", \"karpenter.kwok.sh/instance-family\", \"karpenter.kwok.sh/instance-size\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.kwok.sh\")"}]' -i kwok/charts/crds/karpenter.sh_nodepools.yaml
4 changes: 0 additions & 4 deletions kwok/apis/v1alpha1/kwoknodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
)

// KWOKNodeClass is the Schema for the KWOKNodeClass API
Expand All @@ -40,5 +38,3 @@ type KWOKNodeClassList struct {
metav1.ListMeta `json:"metadata,omitempty"`
Items []KWOKNodeClass `json:"items"`
}

const DisruptionReasonExampleReason v1.DisruptionReason = "ExampleReason"
7 changes: 2 additions & 5 deletions kwok/charts/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,13 @@ spec:
description: |-
Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods.
Otherwise, this will apply to each reason defined.
allowed reasons are Underutilized, Empty, and Drifted and additional CloudProvider-specific reasons.
allowed reasons are Underutilized, Empty, and Drifted.
items:
description: |-
DisruptionReason defines valid reasons for disruption budgets.
CloudProviders will need to append to the list of enums when implementing cloud provider disruption reasons
description: DisruptionReason defines valid reasons for disruption budgets.
enum:
- Underutilized
- Empty
- Drifted
- ExampleReason
type: string
type: array
schedule:
Expand Down
4 changes: 0 additions & 4 deletions kwok/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ func (c CloudProvider) Create(ctx context.Context, nodeClaim *v1.NodeClaim) (*v1
return c.toNodeClaim(node)
}

func (c CloudProvider) DisruptionReasons() []v1.DisruptionReason {
return []v1.DisruptionReason{v1alpha1.DisruptionReasonExampleReason}
}

func (c CloudProvider) Delete(ctx context.Context, nodeClaim *v1.NodeClaim) error {
if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil {
if errors.IsNotFound(err) {
Expand Down
6 changes: 2 additions & 4 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,9 @@ spec:
description: |-
Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods.
Otherwise, this will apply to each reason defined.
allowed reasons are Underutilized, Empty, and Drifted and additional CloudProvider-specific reasons.
allowed reasons are Underutilized, Empty, and Drifted.
items:
description: |-
DisruptionReason defines valid reasons for disruption budgets.
CloudProviders will need to append to the list of enums when implementing cloud provider disruption reasons
description: DisruptionReason defines valid reasons for disruption budgets.
enum:
- Underutilized
- Empty
Expand Down
3 changes: 1 addition & 2 deletions pkg/apis/v1/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ type Disruption struct {
type Budget struct {
// Reasons is a list of disruption methods that this budget applies to. If Reasons is not set, this budget applies to all methods.
// Otherwise, this will apply to each reason defined.
// allowed reasons are Underutilized, Empty, and Drifted and additional CloudProvider-specific reasons.
// allowed reasons are Underutilized, Empty, and Drifted.
// +optional
Reasons []DisruptionReason `json:"reasons,omitempty"`
// Nodes dictates the maximum number of NodeClaims owned by this NodePool
Expand Down Expand Up @@ -129,7 +129,6 @@ const (
)

// DisruptionReason defines valid reasons for disruption budgets.
// CloudProviders will need to append to the list of enums when implementing cloud provider disruption reasons
// +kubebuilder:validation:Enum={Underutilized,Empty,Drifted}
type DisruptionReason string

Expand Down
8 changes: 3 additions & 5 deletions pkg/apis/v1/nodepool_budgets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ var _ = Describe("Budgets", func() {
DisruptionReasonUnderutilized,
DisruptionReasonDrifted,
DisruptionReasonEmpty,
"CloudProviderDisruptionReason",
},
Nodes: "0",
Schedule: lo.ToPtr("@weekly"),
Expand All @@ -93,12 +92,11 @@ var _ = Describe("Budgets", func() {
},
},
}
allKnownDisruptionReasons = append([]DisruptionReason{
allKnownDisruptionReasons = []DisruptionReason{
DisruptionReasonEmpty,
DisruptionReasonUnderutilized,
DisruptionReasonDrifted},
DisruptionReason("CloudProviderDisruptionReason"),
)
DisruptionReasonDrifted,
}
})

Context("GetAllowedDisruptionsByReason", func() {
Expand Down
4 changes: 0 additions & 4 deletions pkg/cloudprovider/fake/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,6 @@ func (c *CloudProvider) GetInstanceTypes(_ context.Context, np *v1.NodePool) ([]
}, nil
}

func (c *CloudProvider) DisruptionReasons() []v1.DisruptionReason {
return []v1.DisruptionReason{"CloudProviderDisruptionReason"}
}

func (c *CloudProvider) Delete(_ context.Context, nc *v1.NodeClaim) error {
c.mu.Lock()
defer c.mu.Unlock()
Expand Down
3 changes: 0 additions & 3 deletions pkg/cloudprovider/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ type CloudProvider interface {
// availability, the GetInstanceTypes method should always return all instance types,
// even those with no offerings available.
GetInstanceTypes(context.Context, *v1.NodePool) ([]*InstanceType, error)
// DisruptionReasons is for CloudProviders to hook into the Disruption Controller.
// Reasons will show up as StatusConditions on the NodeClaim.
DisruptionReasons() []v1.DisruptionReason
// IsDrifted returns whether a NodeClaim has drifted from the provisioning requirements
// it is tied to.
IsDrifted(context.Context, *v1.NodeClaim) (DriftReason, error)
Expand Down
28 changes: 10 additions & 18 deletions pkg/controllers/disruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,6 @@ func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provi
) *Controller {
c := MakeConsolidation(clk, cluster, kubeClient, provisioner, cp, recorder, queue)

// Generate eventually disruptable reason based on a combination of drift and cloudprovider disruption reason
eventualDisruptionMethods := []Method{}

for _, reason := range append(cp.DisruptionReasons(), v1.DisruptionReasonDrifted) {
eventualDisruptionMethods = append(eventualDisruptionMethods, NewEventualDisruption(kubeClient, cluster, provisioner, recorder, reason))
}

return &Controller{
queue: queue,
clock: clk,
Expand All @@ -87,17 +80,16 @@ func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provi
recorder: recorder,
cloudProvider: cp,
lastRun: map[string]time.Time{},
methods: append(
// Terminate any NodeClaims that have need to be eventually disrupted from provisioning specifications, allowing the pods to reschedule.
eventualDisruptionMethods,
[]Method{
// Delete any empty NodeClaims as there is zero cost in terms of disruption.
NewEmptiness(c),
// Attempt to identify multiple NodeClaims that we can consolidate simultaneously to reduce pod churn
NewMultiNodeConsolidation(c),
// And finally fall back our single NodeClaim consolidation to further reduce cluster cost.
NewSingleNodeConsolidation(c),
}...),
methods: []Method{
// Terminate any NodeClaims that have drifted from provisioning specifications, allowing the pods to reschedule.
NewDrift(kubeClient, cluster, provisioner, recorder),
// Delete any empty NodeClaims as there is zero cost in terms of disruption.
NewEmptiness(c),
// Attempt to identify multiple NodeClaims that we can consolidate simultaneously to reduce pod churn
NewMultiNodeConsolidation(c),
// And finally fall back our single NodeClaim consolidation to further reduce cluster cost.
NewSingleNodeConsolidation(c),
},
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,29 @@ import (
)

// Drift is a subreconciler that deletes drifted candidates.
type EventualDisruption struct {
type Drift struct {
kubeClient client.Client
cluster *state.Cluster
provisioner *provisioning.Provisioner
recorder events.Recorder
reason v1.DisruptionReason
}

func NewEventualDisruption(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder, reason v1.DisruptionReason) *EventualDisruption {
return &EventualDisruption{
func NewDrift(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Drift {
return &Drift{
kubeClient: kubeClient,
cluster: cluster,
provisioner: provisioner,
recorder: recorder,
reason: reason,
}
}

// ShouldDisrupt is a predicate used to filter candidates
func (d *EventualDisruption) ShouldDisrupt(ctx context.Context, c *Candidate) bool {
func (d *Drift) ShouldDisrupt(ctx context.Context, c *Candidate) bool {
return c.NodeClaim.StatusConditions().Get(string(d.Reason())).IsTrue()
}

// ComputeCommand generates a disruption command given candidates
func (d *EventualDisruption) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error) {
func (d *Drift) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[string]int, candidates ...*Candidate) (Command, scheduling.Results, error) {
sort.Slice(candidates, func(i int, j int) bool {
return candidates[i].NodeClaim.StatusConditions().Get(string(d.Reason())).LastTransitionTime.Time.Before(
candidates[j].NodeClaim.StatusConditions().Get(string(d.Reason())).LastTransitionTime.Time)
Expand Down Expand Up @@ -114,14 +112,14 @@ func (d *EventualDisruption) ComputeCommand(ctx context.Context, disruptionBudge
return Command{}, scheduling.Results{}, nil
}

func (d *EventualDisruption) Reason() v1.DisruptionReason {
return d.reason
func (d *Drift) Reason() v1.DisruptionReason {
return v1.DisruptionReasonDrifted
}

func (d *EventualDisruption) Class() string {
func (d *Drift) Class() string {
return EventualDisruptionClass
}

func (d *EventualDisruption) ConsolidationType() string {
func (d *Drift) ConsolidationType() string {
return ""
}
Loading

0 comments on commit 2ab4871

Please sign in to comment.