Skip to content

Commit

Permalink
feat: only operate on cloudprovider managed resources (#1818)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdeal authored Nov 24, 2024
1 parent af17c94 commit 330b98b
Show file tree
Hide file tree
Showing 79 changed files with 1,643 additions and 761 deletions.
9 changes: 9 additions & 0 deletions kwok/charts/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,21 @@ spec:
description: API version of the referent
pattern: ^[^/]*$
type: string
x-kubernetes-validations:
- message: group may not be empty
rule: self != ''
kind:
description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"'
type: string
x-kubernetes-validations:
- message: kind may not be empty
rule: self != ''
name:
description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names'
type: string
x-kubernetes-validations:
- message: name may not be empty
rule: self != ''
required:
- group
- kind
Expand Down
14 changes: 14 additions & 0 deletions kwok/charts/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,31 @@ spec:
description: API version of the referent
pattern: ^[^/]*$
type: string
x-kubernetes-validations:
- message: group may not be empty
rule: self != ''
kind:
description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"'
type: string
x-kubernetes-validations:
- message: kind may not be empty
rule: self != ''
name:
description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names'
type: string
x-kubernetes-validations:
- message: name may not be empty
rule: self != ''
required:
- group
- kind
- name
type: object
x-kubernetes-validations:
- message: nodeClassRef.group is immutable
rule: self.group == oldSelf.group
- message: nodeClassRef.kind is immutable
rule: self.kind == oldSelf.kind
requirements:
description: Requirements are layered with GetLabels and applied to every node.
items:
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,21 @@ spec:
description: API version of the referent
pattern: ^[^/]*$
type: string
x-kubernetes-validations:
- message: group may not be empty
rule: self != ''
kind:
description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"'
type: string
x-kubernetes-validations:
- message: kind may not be empty
rule: self != ''
name:
description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names'
type: string
x-kubernetes-validations:
- message: name may not be empty
rule: self != ''
required:
- group
- kind
Expand Down
14 changes: 14 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,31 @@ spec:
description: API version of the referent
pattern: ^[^/]*$
type: string
x-kubernetes-validations:
- message: group may not be empty
rule: self != ''
kind:
description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"'
type: string
x-kubernetes-validations:
- message: kind may not be empty
rule: self != ''
name:
description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names'
type: string
x-kubernetes-validations:
- message: name may not be empty
rule: self != ''
required:
- group
- kind
- name
type: object
x-kubernetes-validations:
- message: nodeClassRef.group is immutable
rule: self.group == oldSelf.group
- message: nodeClassRef.kind is immutable
rule: self.kind == oldSelf.kind
requirements:
description: Requirements are layered with GetLabels and applied to every node.
items:
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/v1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"strings"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"

"sigs.k8s.io/karpenter/pkg/apis"
Expand Down Expand Up @@ -141,3 +142,7 @@ func GetLabelDomain(key string) string {
}
return ""
}

func NodeClassLabelKey(gk schema.GroupKind) string {
return fmt.Sprintf("%s/%s", gk.Group, strings.ToLower(gk.Kind))
}
11 changes: 11 additions & 0 deletions pkg/apis/v1/nodeclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)

// NodeClaimSpec describes the desired state of the NodeClaim
Expand Down Expand Up @@ -97,17 +98,27 @@ type ResourceRequirements struct {

type NodeClassReference struct {
// Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"
// +kubebuilder:validation:XValidation:rule="self != ''",message="kind may not be empty"
// +required
Kind string `json:"kind"`
// Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names
// +kubebuilder:validation:XValidation:rule="self != ''",message="name may not be empty"
// +required
Name string `json:"name"`
// API version of the referent
// +kubebuilder:validation:XValidation:rule="self != ''",message="group may not be empty"
// +kubebuilder:validation:Pattern=`^[^/]*$`
// +required
Group string `json:"group"`
}

func (ncr *NodeClassReference) GroupKind() schema.GroupKind {
return schema.GroupKind{
Group: ncr.Group,
Kind: ncr.Kind,
}
}

// +kubebuilder:object:generate=false
type Provider = runtime.RawExtension

Expand Down
5 changes: 3 additions & 2 deletions pkg/apis/v1/nodeclaim_validation_cel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ var _ = Describe("Validation", func() {
ObjectMeta: metav1.ObjectMeta{Name: strings.ToLower(randomdata.SillyName())},
Spec: NodeClaimSpec{
NodeClassRef: &NodeClassReference{
Kind: "NodeClaim",
Name: "default",
Group: "karpenter.test.sh",
Kind: "TestNodeClaim",
Name: "default",
},
Requirements: []NodeSelectorRequirementWithMinValues{
{
Expand Down
20 changes: 2 additions & 18 deletions pkg/apis/v1/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package v1
import (
"fmt"
"math"
"sort"
"strconv"

"github.com/mitchellh/hashstructure/v2"
Expand Down Expand Up @@ -181,6 +180,8 @@ type NodeClaimTemplateSpec struct {
// +required
Requirements []NodeSelectorRequirementWithMinValues `json:"requirements" hash:"ignore"`
// NodeClassRef is a reference to an object that defines provider specific configuration
// +kubebuilder:validation:XValidation:rule="self.group == oldSelf.group",message="nodeClassRef.group is immutable"
// +kubebuilder:validation:XValidation:rule="self.kind == oldSelf.kind",message="nodeClassRef.kind is immutable"
// +required
NodeClassRef *NodeClassReference `json:"nodeClassRef"`
// TerminationGracePeriod is the maximum duration the controller will wait before forcefully deleting the pods on a node, measured from when deletion is first initiated.
Expand Down Expand Up @@ -289,23 +290,6 @@ type NodePoolList struct {
Items []NodePool `json:"items"`
}

// OrderByWeight orders the NodePools in the NodePoolList by their priority weight in-place.
// This priority evaluates the following things in precedence order:
// 1. NodePools that have a larger weight are ordered first
// 2. If two NodePools have the same weight, then the NodePool with the name later in the alphabet will come first
func (nl *NodePoolList) OrderByWeight() {
sort.Slice(nl.Items, func(a, b int) bool {
weightA := lo.FromPtr(nl.Items[a].Spec.Weight)
weightB := lo.FromPtr(nl.Items[b].Spec.Weight)

if weightA == weightB {
// Order NodePools by name for a consistent ordering when sorting equal weight
return nl.Items[a].Name > nl.Items[b].Name
}
return weightA > weightB
})
}

// MustGetAllowedDisruptions calls GetAllowedDisruptionsByReason if the error is not nil. This reduces the
// amount of state that the disruption controller must reconcile, while allowing the GetAllowedDisruptionsByReason()
// to bubble up any errors in validation.
Expand Down
5 changes: 3 additions & 2 deletions pkg/apis/v1/nodepool_default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ var _ = Describe("CEL/Default", func() {
Template: NodeClaimTemplate{
Spec: NodeClaimTemplateSpec{
NodeClassRef: &NodeClassReference{
Kind: "NodeClaim",
Name: "default",
Group: "karpenter.test.sh",
Kind: "TestNodeClaim",
Name: "default",
},
Requirements: []NodeSelectorRequirementWithMinValues{
{
Expand Down
29 changes: 27 additions & 2 deletions pkg/apis/v1/nodepool_validation_cel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ var _ = Describe("CEL/Validation", func() {
Template: NodeClaimTemplate{
Spec: NodeClaimTemplateSpec{
NodeClassRef: &NodeClassReference{
Kind: "NodeClaim",
Name: "default",
Group: "karpenter.test.sh",
Kind: "TestNodeClass",
Name: "default",
},
Requirements: []NodeSelectorRequirementWithMinValues{
{
Expand Down Expand Up @@ -619,4 +620,28 @@ var _ = Describe("CEL/Validation", func() {
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
})
Context("NodeClassRef", func() {
It("should fail to mutate group", func() {
Expect(env.Client.Create(ctx, nodePool)).To(Succeed())
nodePool.Spec.Template.Spec.NodeClassRef.Group = "karpenter.test.mutated.sh"
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
It("should fail to mutate kind", func() {
Expect(env.Client.Create(ctx, nodePool)).To(Succeed())
nodePool.Spec.Template.Spec.NodeClassRef.Group = "TestNodeClass2"
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
It("should fail if group is unset", func() {
nodePool.Spec.Template.Spec.NodeClassRef.Group = ""
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
It("should fail if kind is unset", func() {
nodePool.Spec.Template.Spec.NodeClassRef.Kind = ""
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
It("should fail if name is unset", func() {
nodePool.Spec.Template.Spec.NodeClassRef.Name = ""
Expect(env.Client.Create(ctx, nodePool)).ToNot(Succeed())
})
})
})
50 changes: 0 additions & 50 deletions pkg/apis/v1/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,12 @@ package v1_test

import (
"context"
"math/rand"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"

"sigs.k8s.io/karpenter/pkg/apis"
v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/test"
. "sigs.k8s.io/karpenter/pkg/test/expectations"
"sigs.k8s.io/karpenter/pkg/test/v1alpha1"
Expand All @@ -53,50 +50,3 @@ var _ = AfterEach(func() {
var _ = AfterSuite(func() {
Expect(env.Stop()).To(Succeed(), "Failed to stop environment")
})

var _ = Describe("OrderByWeight", func() {
It("should order the NodePools by weight", func() {
// Generate 10 NodePools that have random weights, some might have the same weights
var nodePools []v1.NodePool
for i := 0; i < 10; i++ {
np := test.NodePool(v1.NodePool{
Spec: v1.NodePoolSpec{
Weight: lo.ToPtr[int32](int32(rand.Intn(100) + 1)), //nolint:gosec
},
})
nodePools = append(nodePools, *np)
}

nodePools = lo.Shuffle(nodePools)
nodePoolList := v1.NodePoolList{Items: nodePools}
nodePoolList.OrderByWeight()

lastWeight := 101 // This is above the allowed weight values
for _, np := range nodePoolList.Items {
Expect(lo.FromPtr(np.Spec.Weight)).To(BeNumerically("<=", lastWeight))
lastWeight = int(lo.FromPtr(np.Spec.Weight))
}
})
It("should order the NodePools by name when the weights are the same", func() {
// Generate 10 NodePools with the same weight
var nodePools []v1.NodePool
for i := 0; i < 10; i++ {
np := test.NodePool(v1.NodePool{
Spec: v1.NodePoolSpec{
Weight: lo.ToPtr[int32](10),
},
})
nodePools = append(nodePools, *np)
}

nodePools = lo.Shuffle(nodePools)
nodePoolList := v1.NodePoolList{Items: nodePools}
nodePoolList.OrderByWeight()

lastName := "zzzzzzzzzzzzzzzzzzzzzzzz" // large string value
for _, np := range nodePoolList.Items {
Expect(np.Name < lastName).To(BeTrue())
lastName = np.Name
}
})
})
4 changes: 2 additions & 2 deletions pkg/cloudprovider/fake/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *v1.NodeClaim) (*v
}

func (c *CloudProvider) Get(_ context.Context, id string) (*v1.NodeClaim, error) {
c.mu.RLock()
defer c.mu.RUnlock()
c.mu.Lock()
defer c.mu.Unlock()

if c.NextGetErr != nil {
tempError := c.NextGetErr
Expand Down
21 changes: 10 additions & 11 deletions pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ func NewControllers(
recorder events.Recorder,
cloudProvider cloudprovider.CloudProvider,
) []controller.Controller {

cluster := state.NewCluster(clock, kubeClient)
cluster := state.NewCluster(clock, kubeClient, cloudProvider)
p := provisioning.NewProvisioner(kubeClient, recorder, cloudProvider, cluster, clock)
evictionQueue := terminator.NewQueue(kubeClient, recorder)
disruptionQueue := orchestration.NewQueue(kubeClient, recorder, cluster, clock, p)
Expand All @@ -73,22 +72,22 @@ func NewControllers(
disruption.NewController(clock, kubeClient, p, cloudProvider, recorder, cluster, disruptionQueue),
provisioning.NewPodController(kubeClient, p),
provisioning.NewNodeController(kubeClient, p),
nodepoolhash.NewController(kubeClient),
expiration.NewController(clock, kubeClient),
nodepoolhash.NewController(kubeClient, cloudProvider),
expiration.NewController(clock, kubeClient, cloudProvider),
informer.NewDaemonSetController(kubeClient, cluster),
informer.NewNodeController(kubeClient, cluster),
informer.NewPodController(kubeClient, cluster),
informer.NewNodePoolController(kubeClient, cluster),
informer.NewNodeClaimController(kubeClient, cluster),
informer.NewNodePoolController(kubeClient, cloudProvider, cluster),
informer.NewNodeClaimController(kubeClient, cloudProvider, cluster),
termination.NewController(clock, kubeClient, cloudProvider, terminator.NewTerminator(clock, kubeClient, evictionQueue, recorder), recorder),
metricspod.NewController(kubeClient, cluster),
metricsnodepool.NewController(kubeClient),
metricsnodepool.NewController(kubeClient, cloudProvider),
metricsnode.NewController(cluster),
nodepoolreadiness.NewController(kubeClient, cloudProvider),
nodepoolcounter.NewController(kubeClient, cluster),
nodepoolvalidation.NewController(kubeClient),
podevents.NewController(clock, kubeClient),
nodeclaimconsistency.NewController(clock, kubeClient, recorder),
nodepoolcounter.NewController(kubeClient, cloudProvider, cluster),
nodepoolvalidation.NewController(kubeClient, cloudProvider),
podevents.NewController(clock, kubeClient, cloudProvider),
nodeclaimconsistency.NewController(clock, kubeClient, cloudProvider, recorder),
nodeclaimlifecycle.NewController(clock, kubeClient, cloudProvider, recorder),
nodeclaimgarbagecollection.NewController(clock, kubeClient, cloudProvider),
nodeclaimdisruption.NewController(clock, kubeClient, cloudProvider),
Expand Down
Loading

0 comments on commit 330b98b

Please sign in to comment.