From 34ec3a8a17db86c7cb6fc7bb3301e670fee6a4e0 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Wed, 8 Jan 2025 12:13:02 -0800 Subject: [PATCH] Remove usage of 'state node' in eventing, logging, and errors --- pkg/controllers/disruption/consolidation.go | 4 +- .../disruption/consolidation_test.go | 2 +- pkg/controllers/disruption/drift.go | 4 +- pkg/controllers/disruption/events/events.go | 12 ++--- pkg/controllers/disruption/suite_test.go | 44 +++++++++---------- pkg/controllers/disruption/types.go | 10 +++-- pkg/controllers/state/statenode.go | 14 +++--- pkg/utils/pretty/pretty.go | 5 +++ 8 files changed, 53 insertions(+), 42 deletions(-) diff --git a/pkg/controllers/disruption/consolidation.go b/pkg/controllers/disruption/consolidation.go index 69ec01cad5..79180ca6b2 100644 --- a/pkg/controllers/disruption/consolidation.go +++ b/pkg/controllers/disruption/consolidation.go @@ -28,6 +28,8 @@ import ( "k8s.io/utils/clock" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" disruptionevents "sigs.k8s.io/karpenter/pkg/controllers/disruption/events" @@ -144,7 +146,7 @@ func (c *consolidation) computeConsolidation(ctx context.Context, candidates ... if !results.AllNonPendingPodsScheduled() { // This method is used by multi-node consolidation as well, so we'll only report in the single node case if len(candidates) == 1 { - c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, results.NonPendingPodSchedulingErrors())...) + c.recorder.Publish(disruptionevents.Unconsolidatable(candidates[0].Node, candidates[0].NodeClaim, pretty.Sentence(results.NonPendingPodSchedulingErrors()))...) } return Command{}, pscheduling.Results{}, nil } diff --git a/pkg/controllers/disruption/consolidation_test.go b/pkg/controllers/disruption/consolidation_test.go index 07a7795f55..d69554f1db 100644 --- a/pkg/controllers/disruption/consolidation_test.go +++ b/pkg/controllers/disruption/consolidation_test.go @@ -2748,7 +2748,7 @@ var _ = Describe("Consolidation", func() { // Expect Unconsolidatable events to be fired evts := recorder.Events() _, ok := lo.Find(evts, func(e events.Event) bool { - return strings.Contains(e.Message, "not all pods would schedule") + return strings.Contains(e.Message, "Not all pods would schedule") }) Expect(ok).To(BeTrue()) _, ok = lo.Find(evts, func(e events.Event) bool { diff --git a/pkg/controllers/disruption/drift.go b/pkg/controllers/disruption/drift.go index 06b951ce8a..72e6d317cb 100644 --- a/pkg/controllers/disruption/drift.go +++ b/pkg/controllers/disruption/drift.go @@ -23,6 +23,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" disruptionevents "sigs.k8s.io/karpenter/pkg/controllers/disruption/events" "sigs.k8s.io/karpenter/pkg/controllers/provisioning" @@ -100,7 +102,7 @@ func (d *Drift) ComputeCommand(ctx context.Context, disruptionBudgetMapping map[ } // Emit an event that we couldn't reschedule the pods on the node. if !results.AllNonPendingPodsScheduled() { - d.recorder.Publish(disruptionevents.Blocked(candidate.Node, candidate.NodeClaim, results.NonPendingPodSchedulingErrors())...) + d.recorder.Publish(disruptionevents.Blocked(candidate.Node, candidate.NodeClaim, pretty.Sentence(results.NonPendingPodSchedulingErrors()))...) continue } diff --git a/pkg/controllers/disruption/events/events.go b/pkg/controllers/disruption/events/events.go index dea770d5dd..e40eec8b4a 100644 --- a/pkg/controllers/disruption/events/events.go +++ b/pkg/controllers/disruption/events/events.go @@ -69,13 +69,13 @@ func Terminating(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) []ev // Unconsolidatable is an event that informs the user that a NodeClaim/Node combination cannot be consolidated // due to the state of the NodeClaim/Node or due to some state of the pods that are scheduled to the NodeClaim/Node -func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) []events.Event { +func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) []events.Event { return []events.Event{ { InvolvedObject: node, Type: corev1.EventTypeNormal, Reason: "Unconsolidatable", - Message: reason, + Message: msg, DedupeValues: []string{string(node.UID)}, DedupeTimeout: time.Minute * 15, }, @@ -83,7 +83,7 @@ func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, Reason: "Unconsolidatable", - Message: reason, + Message: msg, DedupeValues: []string{string(nodeClaim.UID)}, DedupeTimeout: time.Minute * 15, }, @@ -92,13 +92,13 @@ func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) // Blocked is an event that informs the user that a NodeClaim/Node combination is blocked on deprovisioning // due to the state of the NodeClaim/Node or due to some state of the pods that are scheduled to the NodeClaim/Node -func Blocked(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) (evs []events.Event) { +func Blocked(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) (evs []events.Event) { if node != nil { evs = append(evs, events.Event{ InvolvedObject: node, Type: corev1.EventTypeNormal, Reason: "DisruptionBlocked", - Message: fmt.Sprintf("Cannot disrupt Node: %s", reason), + Message: msg, DedupeValues: []string{string(node.UID)}, }) } @@ -107,7 +107,7 @@ func Blocked(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) (evs []e InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, Reason: "DisruptionBlocked", - Message: fmt.Sprintf("Cannot disrupt NodeClaim: %s", reason), + Message: msg, DedupeValues: []string{string(nodeClaim.UID)}, }) } diff --git a/pkg/controllers/disruption/suite_test.go b/pkg/controllers/disruption/suite_test.go index 88245eb349..9ce8559286 100644 --- a/pkg/controllers/disruption/suite_test.go +++ b/pkg/controllers/disruption/suite_test.go @@ -876,7 +876,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should not consider candidates that have do-not-disrupt mirror pods scheduled", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -914,7 +914,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should not consider candidates that have do-not-disrupt daemonset pods scheduled", func() { daemonSet := test.DaemonSet() @@ -953,7 +953,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should consider candidates that have do-not-disrupt pods scheduled with a terminationGracePeriod set for eventual disruption", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1043,7 +1043,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should not consider candidates that have PDB-blocked pods scheduled with a terminationGracePeriod set for graceful disruption", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1079,7 +1079,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) }) It("should not consider candidates that have do-not-disrupt pods scheduled without a terminationGracePeriod set for eventual disruption", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1107,7 +1107,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.EventualDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should not consider candidates that have PDB-blocked pods scheduled without a terminationGracePeriod set for eventual disruption", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1142,7 +1142,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.EventualDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) }) It("should consider candidates that have do-not-disrupt terminating pods", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1233,7 +1233,7 @@ var _ = Describe("Candidate Filtering", func() { _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(`disruption is blocked through the "karpenter.sh/do-not-disrupt" annotation`)) - Expect(recorder.DetectedEvent(`Cannot disrupt Node: disruption is blocked through the "karpenter.sh/do-not-disrupt" annotation`)).To(BeTrue()) + Expect(recorder.DetectedEvent(`Disruption is blocked through the "karpenter.sh/do-not-disrupt" annotation`)).To(BeTrue()) }) It("should not consider candidates that have fully blocking PDBs", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1269,7 +1269,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) }) It("should not consider candidates that have fully blocking PDBs on daemonset pods", func() { daemonSet := test.DaemonSet() @@ -1316,7 +1316,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) }) It("should consider candidates that have fully blocking PDBs on mirror pods", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1393,7 +1393,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pod %q has "karpenter.sh/do-not-disrupt" annotation`, client.ObjectKeyFromObject(pod)))).To(BeTrue()) }) It("should not consider candidates that have fully blocking PDBs without a terminationGracePeriod set for graceful disruption", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1427,7 +1427,7 @@ var _ = Describe("Candidate Filtering", func() { _, err = disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(fmt.Sprintf(`pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))) - Expect(recorder.DetectedEvent(fmt.Sprintf(`Cannot disrupt Node: pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) + Expect(recorder.DetectedEvent(fmt.Sprintf(`Pdb %q prevents pod evictions`, client.ObjectKeyFromObject(budget)))).To(BeTrue()) }) It("should consider candidates that have fully blocking PDBs on terminal pods", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1528,7 +1528,7 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("node is not managed by karpenter")) + Expect(err.Error()).To(Equal("node isn't managed by karpenter")) }) It("should not consider candidate that has just a NodeClaim representation", func() { nodeClaim, _ := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1567,8 +1567,8 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("state node is nominated for a pending pod")) - Expect(recorder.DetectedEvent("Cannot disrupt Node: state node is nominated for a pending pod")).To(BeTrue()) + Expect(err.Error()).To(Equal("node is nominated for a pending pod")) + Expect(recorder.DetectedEvent("Node is nominated for a pending pod")).To(BeTrue()) }) It("should not consider candidates that are deleting", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1590,7 +1590,7 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("state node is marked for deletion")) + Expect(err.Error()).To(Equal("node is deleting or marked for deletion")) }) It("should not consider candidates that are MarkedForDeletion", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1611,7 +1611,7 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("state node is marked for deletion")) + Expect(err.Error()).To(Equal("node is deleting or marked for deletion")) }) It("should not consider candidates that aren't yet initialized", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1631,7 +1631,7 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("state node isn't initialized")) + Expect(err.Error()).To(Equal("node isn't initialized")) }) It("should not consider candidates that are not owned by a NodePool (no karpenter.sh/nodepool label)", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1649,8 +1649,8 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal(`state node doesn't have required label "karpenter.sh/nodepool"`)) - Expect(recorder.DetectedEvent(`Cannot disrupt Node: state node doesn't have required label "karpenter.sh/nodepool"`)).To(BeTrue()) + Expect(err.Error()).To(Equal(`node doesn't have required label "karpenter.sh/nodepool"`)) + Expect(recorder.DetectedEvent(`Node doesn't have required label "karpenter.sh/nodepool"`)).To(BeTrue()) }) It("should not consider candidates that are have a non-existent NodePool", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ @@ -1674,8 +1674,8 @@ var _ = Describe("Candidate Filtering", func() { Expect(cluster.Nodes()).To(HaveLen(1)) _, err := disruption.NewCandidate(ctx, env.Client, recorder, fakeClock, cluster.Nodes()[0], pdbLimits, nodePoolMap, nodePoolInstanceTypeMap, queue, disruption.GracefulDisruptionClass) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal(fmt.Sprintf("nodepool %q can't be resolved for state node", nodePool.Name))) - Expect(recorder.DetectedEvent(fmt.Sprintf("Cannot disrupt Node: NodePool %q not found", nodePool.Name))).To(BeTrue()) + Expect(err.Error()).To(Equal(fmt.Sprintf("nodepool %q not found", nodePool.Name))) + Expect(recorder.DetectedEvent(fmt.Sprintf("NodePool %q not found", nodePool.Name))).To(BeTrue()) }) It("should consider candidates that do not have the karpenter.sh/capacity-type label", func() { nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ diff --git a/pkg/controllers/disruption/types.go b/pkg/controllers/disruption/types.go index 5b7bdf666b..b958f777c8 100644 --- a/pkg/controllers/disruption/types.go +++ b/pkg/controllers/disruption/types.go @@ -26,6 +26,8 @@ import ( "k8s.io/utils/clock" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" disruptionevents "sigs.k8s.io/karpenter/pkg/controllers/disruption/events" @@ -70,10 +72,10 @@ func NewCandidate(ctx context.Context, kubeClient client.Client, recorder events nodePoolMap map[string]*v1.NodePool, nodePoolToInstanceTypesMap map[string]map[string]*cloudprovider.InstanceType, queue *orchestration.Queue, disruptionClass string) (*Candidate, error) { var err error var pods []*corev1.Pod - if err = node.ValidateNodeDisruptable(ctx, kubeClient); err != nil { + if err = node.ValidateNodeDisruptable(); err != nil { // Only emit an event if the NodeClaim is not nil, ensuring that we only emit events for Karpenter-managed nodes if node.NodeClaim != nil { - recorder.Publish(disruptionevents.Blocked(node.Node, node.NodeClaim, err.Error())...) + recorder.Publish(disruptionevents.Blocked(node.Node, node.NodeClaim, pretty.Sentence(err.Error()))...) } return nil, err } @@ -88,7 +90,7 @@ func NewCandidate(ctx context.Context, kubeClient client.Client, recorder events // skip any candidates where we can't determine the nodePool if nodePool == nil || instanceTypeMap == nil { recorder.Publish(disruptionevents.Blocked(node.Node, node.NodeClaim, fmt.Sprintf("NodePool %q not found", nodePoolName))...) - return nil, fmt.Errorf("nodepool %q can't be resolved for state node", nodePoolName) + return nil, fmt.Errorf("nodepool %q not found", nodePoolName) } // We only care if instanceType in non-empty consolidation to do price-comparison. instanceType := instanceTypeMap[node.Labels()[corev1.LabelInstanceTypeStable]] @@ -98,7 +100,7 @@ func NewCandidate(ctx context.Context, kubeClient client.Client, recorder events // failure creating the candidate. eventualDisruptionCandidate := node.NodeClaim.Spec.TerminationGracePeriod != nil && disruptionClass == EventualDisruptionClass if lo.Ternary(eventualDisruptionCandidate, state.IgnorePodBlockEvictionError(err), err) != nil { - recorder.Publish(disruptionevents.Blocked(node.Node, node.NodeClaim, err.Error())...) + recorder.Publish(disruptionevents.Blocked(node.Node, node.NodeClaim, pretty.Sentence(err.Error()))...) return nil, err } } diff --git a/pkg/controllers/state/statenode.go b/pkg/controllers/state/statenode.go index 9b082d3b3f..ec1f4b084e 100644 --- a/pkg/controllers/state/statenode.go +++ b/pkg/controllers/state/statenode.go @@ -180,29 +180,29 @@ func (in *StateNode) Pods(ctx context.Context, kubeClient client.Client) ([]*cor // ValidateNodeDisruptable takes in a recorder to emit events on the nodeclaims when the state node is not a candidate // //nolint:gocyclo -func (in *StateNode) ValidateNodeDisruptable(ctx context.Context, kubeClient client.Client) error { +func (in *StateNode) ValidateNodeDisruptable() error { if in.NodeClaim == nil { - return fmt.Errorf("node is not managed by karpenter") + return fmt.Errorf("node isn't managed by karpenter") } if in.Node == nil { return fmt.Errorf("nodeclaim does not have an associated node") } if !in.Initialized() { - return fmt.Errorf("state node isn't initialized") + return fmt.Errorf("node isn't initialized") } if in.MarkedForDeletion() { - return fmt.Errorf("state node is marked for deletion") + return fmt.Errorf("node is deleting or marked for deletion") } // skip the node if it is nominated by a recent provisioning pass to be the target of a pending pod. if in.Nominated() { - return fmt.Errorf("state node is nominated for a pending pod") + return fmt.Errorf("node is nominated for a pending pod") } if in.Annotations()[v1.DoNotDisruptAnnotationKey] == "true" { return fmt.Errorf("disruption is blocked through the %q annotation", v1.DoNotDisruptAnnotationKey) } // check whether the node has the NodePool label if _, ok := in.Labels()[v1.NodePoolLabelKey]; !ok { - return fmt.Errorf("state node doesn't have required label %q", v1.NodePoolLabelKey) + return fmt.Errorf("node doesn't have required label %q", v1.NodePoolLabelKey) } return nil } @@ -215,7 +215,7 @@ func (in *StateNode) ValidateNodeDisruptable(ctx context.Context, kubeClient cli func (in *StateNode) ValidatePodsDisruptable(ctx context.Context, kubeClient client.Client, pdbs pdb.Limits) ([]*corev1.Pod, error) { pods, err := in.Pods(ctx, kubeClient) if err != nil { - return nil, fmt.Errorf("getting pods from state node, %w", err) + return nil, fmt.Errorf("getting pods from node, %w", err) } for _, po := range pods { // We only consider pods that are actively running for "karpenter.sh/do-not-disrupt" diff --git a/pkg/utils/pretty/pretty.go b/pkg/utils/pretty/pretty.go index 51cc3ba609..009be00366 100644 --- a/pkg/utils/pretty/pretty.go +++ b/pkg/utils/pretty/pretty.go @@ -22,6 +22,7 @@ import ( "fmt" "regexp" "strings" + "unicode" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" @@ -95,3 +96,7 @@ func ToSnakeCase(str string) string { snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}") return strings.ToLower(snake) } + +func Sentence(str string) string { + return string(unicode.ToUpper(rune(str[0]))) + str[1:] +}