Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Avoid multiple logs messages for the node health controller #1888

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions pkg/controllers/node/health/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"

Expand Down Expand Up @@ -120,10 +121,18 @@ func (c *Controller) Reconcile(ctx context.Context, node *corev1.Node) (reconcil
if err := c.annotateTerminationGracePeriod(ctx, nodeClaim); err != nil {
return reconcile.Result{}, client.IgnoreNotFound(err)
}

return c.deleteNodeClaim(ctx, nodeClaim, node, unhealthyNodeCondition)
}

// deleteNodeClaim removes the NodeClaim from the api-server
func (c *Controller) deleteNodeClaim(ctx context.Context, nodeClaim *v1.NodeClaim, node *corev1.Node, unhealthyNodeCondition *corev1.NodeCondition) (reconcile.Result, error) {
if !nodeClaim.DeletionTimestamp.IsZero() {
return reconcile.Result{}, nil
}
if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil {
return reconcile.Result{}, client.IgnoreNotFound(err)
}

// The deletion timestamp has successfully been set for the Node, update relevant metrics.
log.FromContext(ctx).V(1).Info("deleting unhealthy node")
metrics.NodeClaimsDisruptedTotal.Inc(map[string]string{
Expand Down Expand Up @@ -155,11 +164,22 @@ func (c *Controller) findUnhealthyConditions(node *corev1.Node) (nc *corev1.Node
}

func (c *Controller) annotateTerminationGracePeriod(ctx context.Context, nodeClaim *v1.NodeClaim) error {
if expirationTimeString, exists := nodeClaim.ObjectMeta.Annotations[v1.NodeClaimTerminationTimestampAnnotationKey]; exists {
expirationTime, err := time.Parse(time.RFC3339, expirationTimeString)
engedaam marked this conversation as resolved.
Show resolved Hide resolved
if err == nil && expirationTime.Before(c.clock.Now()) {
return nil
}
}

stored := nodeClaim.DeepCopy()
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: c.clock.Now().Format(time.RFC3339)})
terminationTime := c.clock.Now().Format(time.RFC3339)
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: terminationTime})

if err := c.kubeClient.Patch(ctx, nodeClaim, client.MergeFrom(stored)); err != nil {
return err
if !equality.Semantic.DeepEqual(stored, nodeClaim) {
if err := c.kubeClient.Patch(ctx, nodeClaim, client.MergeFrom(stored)); err != nil {
return err
}
log.FromContext(ctx).WithValues(v1.NodeClaimTerminationTimestampAnnotationKey, terminationTime).Info("annotated nodeclaim")
}

return nil
Expand Down
20 changes: 19 additions & 1 deletion pkg/controllers/node/health/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clock "k8s.io/utils/clock/testing"
Expand Down Expand Up @@ -171,7 +172,7 @@ var _ = Describe("Node Health", func() {
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, fakeClock.Now().Format(time.RFC3339)))
})
It("should not respect termination grace period if set on the nodepool", func() {
nodeClaim.Spec.TerminationGracePeriod = &metav1.Duration{Duration: time.Minute}
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: fakeClock.Now().Add(120 * time.Minute).Format(time.RFC3339)})
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
Type: "BadNode",
Status: corev1.ConditionFalse,
Expand All @@ -186,6 +187,23 @@ var _ = Describe("Node Health", func() {
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, fakeClock.Now().Format(time.RFC3339)))
})
It("should not update termination grace period if set before the current time", func() {
terminationTime := fakeClock.Now().Add(-3 * time.Minute).Format(time.RFC3339)
nodeClaim.ObjectMeta.Annotations = lo.Assign(nodeClaim.ObjectMeta.Annotations, map[string]string{v1.NodeClaimTerminationTimestampAnnotationKey: terminationTime})
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
Type: "BadNode",
Status: corev1.ConditionFalse,
// We expect the last transition for HealthyNode condition to wait 30 minutes
LastTransitionTime: metav1.Time{Time: time.Now()},
})
fakeClock.Step(60 * time.Minute)
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
// Determine to delete unhealthy nodes
ExpectObjectReconciled(ctx, env.Client, healthController, node)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.NodeClaimTerminationTimestampAnnotationKey, terminationTime))
})
It("should return the requeue interval for the condition closest to its terminationDuration", func() {
cloudProvider.RepairPolicy = []cloudprovider.RepairPolicy{
{
Expand Down
Loading