From 8239300b69af3137e5994558257375c678560756 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 8 Apr 2025 22:45:05 -0700 Subject: [PATCH 01/45] [Warm Replicas] Implement warm replica support for controllers. --- pkg/config/controller.go | 4 + pkg/controller/controller.go | 25 +-- pkg/controller/controller_test.go | 73 +++++++++ pkg/internal/controller/controller.go | 25 +++ pkg/internal/controller/controller_test.go | 121 ++++++++++++++ pkg/manager/internal.go | 9 ++ pkg/manager/manager.go | 6 + pkg/manager/manager_test.go | 69 ++++++++ pkg/manager/runnable_group.go | 72 ++++++--- pkg/manager/runnable_group_test.go | 173 +++++++++++++++++++++ 10 files changed, 550 insertions(+), 27 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index a5655593ef..60b805e4e2 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -60,6 +60,10 @@ type Controller struct { // Defaults to true, which means the controller will use leader election. NeedLeaderElection *bool + // NeedWarmUp indicates whether the controller needs to use warm up. + // Defaults to false, which means the controller will not use warm up. + NeedWarmUp *bool + // UsePriorityQueue configures the controllers queue to use the controller-runtime provided // priority queue. // diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 9de959b48f..1ce9ddbf23 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -93,6 +93,12 @@ type TypedOptions[request comparable] struct { // // Note: This flag is disabled by default until a future version. It's currently in beta. UsePriorityQueue *bool + + // ShouldWarmupWithoutLeadership specifies whether the controller should start its sources + // when the manager is not the leader. + // Defaults to false, which means that the controller will wait for leader election to start + // before starting sources. + ShouldWarmupWithoutLeadership *bool } // DefaultFromConfig defaults the config from a config.Controller @@ -244,15 +250,16 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req // Create controller with dependencies set return &controller.Controller[request]{ - Do: options.Reconciler, - RateLimiter: options.RateLimiter, - NewQueue: options.NewQueue, - MaxConcurrentReconciles: options.MaxConcurrentReconciles, - CacheSyncTimeout: options.CacheSyncTimeout, - Name: name, - LogConstructor: options.LogConstructor, - RecoverPanic: options.RecoverPanic, - LeaderElected: options.NeedLeaderElection, + Do: options.Reconciler, + RateLimiter: options.RateLimiter, + NewQueue: options.NewQueue, + MaxConcurrentReconciles: options.MaxConcurrentReconciles, + CacheSyncTimeout: options.CacheSyncTimeout, + Name: name, + LogConstructor: options.LogConstructor, + RecoverPanic: options.RecoverPanic, + LeaderElected: options.NeedLeaderElection, + ShouldWarmupWithoutLeadership: options.ShouldWarmupWithoutLeadership, }, nil } diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 1c5b11d709..48736b07b6 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -474,5 +474,78 @@ var _ = Describe("controller.Controller", func() { _, ok = q.(priorityqueue.PriorityQueue[reconcile.Request]) Expect(ok).To(BeFalse()) }) + + It("should set ShouldWarmupWithoutLeadership correctly", func() { + m, err := manager.New(cfg, manager.Options{}) + Expect(err).NotTo(HaveOccurred()) + + // Test with ShouldWarmupWithoutLeadership set to true + ctrlWithWarmup, err := controller.New("warmup-enabled-ctrl", m, controller.Options{ + Reconciler: reconcile.Func(nil), + ShouldWarmupWithoutLeadership: ptr.To(true), + }) + Expect(err).NotTo(HaveOccurred()) + + internalCtrlWithWarmup, ok := ctrlWithWarmup.(*internalcontroller.Controller[reconcile.Request]) + Expect(ok).To(BeTrue()) + Expect(internalCtrlWithWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) + Expect(*internalCtrlWithWarmup.ShouldWarmupWithoutLeadership).To(BeTrue()) + + // Test with ShouldWarmupWithoutLeadership set to false + ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ + Reconciler: reconcile.Func(nil), + ShouldWarmupWithoutLeadership: ptr.To(false), + }) + Expect(err).NotTo(HaveOccurred()) + + internalCtrlWithoutWarmup, ok := ctrlWithoutWarmup.(*internalcontroller.Controller[reconcile.Request]) + Expect(ok).To(BeTrue()) + Expect(internalCtrlWithoutWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) + Expect(*internalCtrlWithoutWarmup.ShouldWarmupWithoutLeadership).To(BeFalse()) + + // Test with ShouldWarmupWithoutLeadership not set (should default to nil) + ctrlWithDefaultWarmup, err := controller.New("warmup-default-ctrl", m, controller.Options{ + Reconciler: reconcile.Func(nil), + }) + Expect(err).NotTo(HaveOccurred()) + + internalCtrlWithDefaultWarmup, ok := ctrlWithDefaultWarmup.(*internalcontroller.Controller[reconcile.Request]) + Expect(ok).To(BeTrue()) + Expect(internalCtrlWithDefaultWarmup.ShouldWarmupWithoutLeadership).To(BeNil()) + }) + + It("should inherit ShouldWarmupWithoutLeadership from manager config", func() { + // Test with manager default setting ShouldWarmupWithoutLeadership to true + managerWithWarmup, err := manager.New(cfg, manager.Options{ + Controller: config.Controller{ + NeedWarmUp: ptr.To(true), + }, + }) + Expect(err).NotTo(HaveOccurred()) + ctrlInheritingWarmup, err := controller.New("inherit-warmup-enabled", managerWithWarmup, controller.Options{ + Reconciler: reconcile.Func(nil), + }) + Expect(err).NotTo(HaveOccurred()) + + internalCtrlInheritingWarmup, ok := ctrlInheritingWarmup.(*internalcontroller.Controller[reconcile.Request]) + Expect(ok).To(BeTrue()) + // Note: This test will fail until the DefaultFromConfig method is updated to set + // ShouldWarmupWithoutLeadership from config.Controller.NeedWarmUp + // This test demonstrates that the feature needs to be completed + Expect(internalCtrlInheritingWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) + Expect(*internalCtrlInheritingWarmup.ShouldWarmupWithoutLeadership).To(BeTrue()) + + // Test that explicit controller setting overrides manager setting + ctrlOverridingWarmup, err := controller.New("override-warmup-disabled", managerWithWarmup, controller.Options{ + Reconciler: reconcile.Func(nil), + ShouldWarmupWithoutLeadership: ptr.To(false), + }) + Expect(err).NotTo(HaveOccurred()) + + internalCtrlOverridingWarmup, ok := ctrlOverridingWarmup.(*internalcontroller.Controller[reconcile.Request]) + Expect(ok).To(BeTrue()) + Expect(internalCtrlOverridingWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) + Expect(*internalCtrlOverridingWarmup.ShouldWarmupWithoutLeadership).To(BeFalse()) + }) }) }) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 9fa7ec71e1..35949e0928 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -83,6 +83,9 @@ type Controller[request comparable] struct { // startWatches maintains a list of sources, handlers, and predicates to start when the controller is started. startWatches []source.TypedSource[request] + // didStartEventSources is used to indicate whether the event sources have been started. + didStartEventSources atomic.Bool + // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. // Note: LogConstructor has to be able to handle nil requests as we are also using it @@ -95,6 +98,12 @@ type Controller[request comparable] struct { // LeaderElected indicates whether the controller is leader elected or always running. LeaderElected *bool + + // ShouldWarmupWithoutLeadership specifies whether the controller should start its sources + // when the manager is not the leader. + // Defaults to false, which means that the controller will wait for leader election to start + // before starting sources. + ShouldWarmupWithoutLeadership *bool } // Reconcile implements reconcile.Reconciler. @@ -144,6 +153,15 @@ func (c *Controller[request]) NeedLeaderElection() bool { return *c.LeaderElected } +// Warmup implements the manager.WarmupRunnable interface. +func (c *Controller[request]) Warmup(ctx context.Context) error { + if c.ShouldWarmupWithoutLeadership == nil || !*c.ShouldWarmupWithoutLeadership { + return nil + } + + return c.startEventSources(ctx) +} + // Start implements controller.Controller. func (c *Controller[request]) Start(ctx context.Context) error { // use an IIFE to get proper lock handling @@ -221,6 +239,13 @@ func (c *Controller[request]) Start(ctx context.Context) error { // startEventSources launches all the sources registered with this controller and waits // for them to sync. It returns an error if any of the sources fail to start or sync. func (c *Controller[request]) startEventSources(ctx context.Context) error { + // CAS returns false if value is already true, so early exit since another goroutine must have + // called startEventSources previously + if !c.didStartEventSources.CompareAndSwap(false, true) { + c.LogConstructor(nil).Info("Skipping starting event sources since it was already started") + return nil + } + errGroup := &errgroup.Group{} for _, watch := range c.startWatches { log := c.LogConstructor(nil) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 3fde5da9c8..b7c3989fd2 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1014,6 +1014,127 @@ var _ = Describe("controller", func() { }) }) }) + + Describe("Warmup", func() { + It("should start event sources when ShouldWarmupWithoutLeadership is true", func() { + // Setup + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a mock source that we can verify was started + sourceStarted := false + mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + sourceStarted = true + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + + // Act + err := ctrl.Warmup(ctx) + + // Assert + Expect(err).NotTo(HaveOccurred()) + Expect(sourceStarted).To(BeTrue(), "Event source should have been started") + Expect(ctrl.didStartEventSources.Load()).To(BeTrue(), "didStartEventSources flag should be set") + }) + + It("should not start event sources when ShouldWarmupWithoutLeadership is false", func() { + // Setup + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a mock source that should not be started + sourceStarted := false + mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + sourceStarted = true + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + ctrl.ShouldWarmupWithoutLeadership = ptr.To(false) + + // Act + err := ctrl.Warmup(ctx) + + // Assert + Expect(err).NotTo(HaveOccurred()) + Expect(sourceStarted).To(BeFalse(), "Event source should not have been started") + Expect(ctrl.didStartEventSources.Load()).To(BeFalse(), "didStartEventSources flag should not be set") + }) + + It("should not start event sources when ShouldWarmupWithoutLeadership is nil", func() { + // Setup + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a mock source that should not be started + sourceStarted := false + mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + sourceStarted = true + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + ctrl.ShouldWarmupWithoutLeadership = nil + + // Act + err := ctrl.Warmup(ctx) + + // Assert + Expect(err).NotTo(HaveOccurred()) + Expect(sourceStarted).To(BeFalse(), "Event source should not have been started") + Expect(ctrl.didStartEventSources.Load()).To(BeFalse(), "didStartEventSources flag should not be set") + }) + + It("should not start event sources twice when called multiple times", func() { + // Setup + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a mock source that counts how many times it's started + startCount := 0 + mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + startCount++ + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + + // Act + err1 := ctrl.Warmup(ctx) + err2 := ctrl.Warmup(ctx) + + // Assert + Expect(err1).NotTo(HaveOccurred()) + Expect(err2).NotTo(HaveOccurred()) + Expect(startCount).To(Equal(1), "Event source should have been started only once") + Expect(ctrl.didStartEventSources.Load()).To(BeTrue(), "didStartEventSources flag should be set") + }) + + It("should propagate errors from event sources", func() { + // Setup + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a mock source that returns an error + expectedErr := errors.New("test error") + mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + return expectedErr + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + + // Act + err := ctrl.Warmup(ctx) + + // Assert + Expect(err).To(MatchError(expectedErr)) + }) + }) }) var _ = Describe("ReconcileIDFromContext function", func() { diff --git a/pkg/manager/internal.go b/pkg/manager/internal.go index e5204a7506..261bccac9d 100644 --- a/pkg/manager/internal.go +++ b/pkg/manager/internal.go @@ -439,6 +439,11 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) { return fmt.Errorf("failed to start other runnables: %w", err) } + // Start and wait for sources to start. + if err := cm.runnables.Warmup.Start(cm.internalCtx); err != nil { + return fmt.Errorf("failed to start warmup runnables: %w", err) + } + // Start the leader election and all required runnables. { ctx, cancel := context.WithCancel(context.Background()) @@ -544,6 +549,10 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e cm.runnables.LeaderElection.startOnce.Do(func() {}) cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx) + // Stop the warmup runnables + cm.logger.Info("Stopping and waiting for warmup runnables") + cm.runnables.Warmup.StopAndWait(cm.shutdownCtx) + // Stop the caches before the leader election runnables, this is an important // step to make sure that we don't race with the reconcilers by receiving more events // from the API servers and enqueueing them. diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index c3ae317b04..013321366c 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -314,6 +314,12 @@ type LeaderElectionRunnable interface { NeedLeaderElection() bool } +// WarmupRunnable knows if a Runnable should be a warmup runnable. +type WarmupRunnable interface { + // Warmup returns true if the Runnable should be run as warmup. + Warmup(context.Context) error +} + // New returns a new Manager for creating Controllers. // Note that if ContentType in the given config is not set, "application/vnd.kubernetes.protobuf" // will be used for all built-in resources of Kubernetes, and "application/json" is for other types diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 247a33f9dc..822a3b6200 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1928,6 +1928,75 @@ var _ = Describe("manger.Manager", func() { Expect(err).NotTo(HaveOccurred()) Expect(m.GetAPIReader()).NotTo(BeNil()) }) + + It("should run warmup runnables before leader election is won", func() { + By("Creating channels to track execution order") + warmupCalled := make(chan struct{}) + leaderElectionRunnableCalled := make(chan struct{}) + + By("Creating a manager with leader election enabled") + m, err := New(cfg, Options{ + LeaderElection: true, + LeaderElectionNamespace: "default", + LeaderElectionID: "test-leader-election-warmup", + newResourceLock: fakeleaderelection.NewResourceLock, + HealthProbeBindAddress: "0", + Metrics: metricsserver.Options{BindAddress: "0"}, + PprofBindAddress: "0", + }) + Expect(err).NotTo(HaveOccurred()) + + // Override onStoppedLeading to prevent os.Exit + cm := m.(*controllerManager) + cm.onStoppedLeading = func() {} + + By("Creating a runnable that implements WarmupRunnable interface") + // Create a warmup runnable + warmupRunnable := WarmupRunnableFunc{ + RunFunc: func(ctx context.Context) error { + // This is the main runnable that will be executed after leader election + <-ctx.Done() + return nil + }, + WarmupFunc: func(ctx context.Context) error { + // This should be called during startup before leader election + close(warmupCalled) + return nil + }, + } + Expect(m.Add(warmupRunnable)).To(Succeed()) + + By("Creating a runnable that requires leader election") + leaderElectionRunnable := LeaderElectionRunnableFunc{ + RunFunc: func(ctx context.Context) error { + // This will only be called after leader election is won + close(leaderElectionRunnableCalled) + <-ctx.Done() + return nil + }, + NeedLeaderElectionFunc: func() bool { + return true + }, + } + Expect(m.Add(leaderElectionRunnable)).To(Succeed()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { + defer GinkgoRecover() + Expect(m.Start(ctx)).NotTo(HaveOccurred()) + }() + + By("Waiting for the warmup runnable to be called") + <-warmupCalled + + By("Waiting for leader election to be won") + <-m.Elected() + + By("Verifying the leader election runnable is called after election") + <-leaderElectionRunnableCalled + }) }) type runnableError struct { diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index db5cda7c88..fd700173d4 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -32,6 +32,7 @@ type runnables struct { Webhooks *runnableGroup Caches *runnableGroup LeaderElection *runnableGroup + Warmup *runnableGroup Others *runnableGroup } @@ -42,6 +43,7 @@ func newRunnables(baseContext BaseContextFunc, errChan chan error) *runnables { Webhooks: newRunnableGroup(baseContext, errChan), Caches: newRunnableGroup(baseContext, errChan), LeaderElection: newRunnableGroup(baseContext, errChan), + Warmup: newRunnableGroup(baseContext, errChan), Others: newRunnableGroup(baseContext, errChan), } } @@ -65,8 +67,20 @@ func (r *runnables) Add(fn Runnable) error { }) case webhook.Server: return r.Webhooks.Add(fn, nil) - case LeaderElectionRunnable: - if !runnable.NeedLeaderElection() { + case WarmupRunnable, LeaderElectionRunnable: + if warmupRunnable, ok := fn.(WarmupRunnable); ok { + if err := r.Warmup.Add(RunnableFunc(warmupRunnable.Warmup), nil); err != nil { + return err + } + } + + leaderElectionRunnable, ok := fn.(LeaderElectionRunnable) + if !ok { + // If the runnable is not a LeaderElectionRunnable, add it to the leader election group for backwards compatibility + return r.LeaderElection.Add(fn, nil) + } + + if !leaderElectionRunnable.NeedLeaderElection() { return r.Others.Add(fn, nil) } return r.LeaderElection.Add(fn, nil) @@ -208,23 +222,50 @@ func (r *runnableGroup) reconcile() { // Start the runnable. go func(rn *readyRunnable) { + // If we return, the runnable ended cleanly + // or returned an error to the channel. + // + // We should always decrement the WaitGroup here. + defer r.wg.Done() + + // Track the ready check in the same WaitGroup to prevent goroutine leaks + done := make(chan struct{}) + + // Launch the ready check but make sure it doesn't outlive this goroutine go func() { + defer close(done) if rn.Check(r.ctx) { if rn.signalReady { - r.startReadyCh <- rn + // Use non-blocking send to avoid leaking this goroutine if the channel is never read + select { + case r.startReadyCh <- rn: + // Successfully sent + case <-r.ctx.Done(): + // Context canceled, exit without blocking + } } } }() - // If we return, the runnable ended cleanly - // or returned an error to the channel. - // - // We should always decrement the WaitGroup here. - defer r.wg.Done() - // Start the runnable. - if err := rn.Start(r.ctx); err != nil { - r.errChan <- err + err := rn.Start(r.ctx) + + // Now that the runnable is done, clean up the ready check goroutine if still running + select { + case <-done: + // Ready check already completed, nothing to do + case <-r.ctx.Done(): + // Context was canceled, ready check should exit soon + } + + // Send any error from the runnable + if err != nil { + select { + case r.errChan <- err: + // Error sent successfully + default: + // Channel full or closed, can't send the error + } } }(runnable) } @@ -283,18 +324,13 @@ func (r *runnableGroup) Add(rn Runnable, ready runnableCheck) error { // StopAndWait waits for all the runnables to finish before returning. func (r *runnableGroup) StopAndWait(ctx context.Context) { r.stopOnce.Do(func() { - // Close the reconciler channel once we're done. - defer func() { - r.stop.Lock() - close(r.ch) - r.stop.Unlock() - }() - _ = r.Start(ctx) r.stop.Lock() // Store the stopped variable so we don't accept any new // runnables for the time being. r.stopped = true + // Close the channel to signal the reconcile goroutine to exit + close(r.ch) r.stop.Unlock() // Cancel the internal channel. diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index f2f4119ba6..35373aa8f8 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -53,6 +53,130 @@ var _ = Describe("runnables", func() { Expect(r.Add(runnable)).To(Succeed()) Expect(r.LeaderElection.startQueue).To(HaveLen(1)) }) + + It("should add WarmupRunnable to the Warmup and LeaderElection group", func() { + warmupRunnable := WarmupRunnableFunc{ + RunFunc: func(c context.Context) error { + <-c.Done() + return nil + }, + WarmupFunc: func(c context.Context) error { + return nil + }, + } + + r := newRunnables(defaultBaseContext, errCh) + Expect(r.Add(warmupRunnable)).To(Succeed()) + Expect(r.Warmup.startQueue).To(HaveLen(1)) + Expect(r.LeaderElection.startQueue).To(HaveLen(1)) + }) + + It("should add WarmupRunnable that doesn't needs leader election to warmup group only", func() { + warmupRunnable := CombinedRunnable{ + RunFunc: func(c context.Context) error { + <-c.Done() + return nil + }, + WarmupFunc: func(c context.Context) error { + return nil + }, + NeedLeaderElectionFunc: func() bool { + return false + }, + } + + r := newRunnables(defaultBaseContext, errCh) + Expect(r.Add(warmupRunnable)).To(Succeed()) + + Expect(r.Warmup.startQueue).To(HaveLen(1)) + Expect(r.LeaderElection.startQueue).To(BeEmpty()) + }) + + It("should add WarmupRunnable that needs leader election to Warmup and LeaderElection group, not Others", func() { + warmupRunnable := CombinedRunnable{ + RunFunc: func(c context.Context) error { + <-c.Done() + return nil + }, + WarmupFunc: func(c context.Context) error { + return nil + }, + NeedLeaderElectionFunc: func() bool { + return true + }, + } + + r := newRunnables(defaultBaseContext, errCh) + Expect(r.Add(warmupRunnable)).To(Succeed()) + + Expect(r.Warmup.startQueue).To(HaveLen(1)) + Expect(r.LeaderElection.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) + }) + + It("should execute the Warmup function when Warmup group is started", func() { + warmupExecuted := false + + warmupRunnable := WarmupRunnableFunc{ + RunFunc: func(c context.Context) error { + <-c.Done() + return nil + }, + WarmupFunc: func(c context.Context) error { + warmupExecuted = true + return nil + }, + } + + r := newRunnables(defaultBaseContext, errCh) + Expect(r.Add(warmupRunnable)).To(Succeed()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start the Warmup group + Expect(r.Warmup.Start(ctx)).To(Succeed()) + + // Verify warmup function was called + Expect(warmupExecuted).To(BeTrue()) + }) + + It("should propagate errors from Warmup function to error channel", func() { + expectedErr := fmt.Errorf("expected warmup error") + + warmupRunnable := WarmupRunnableFunc{ + RunFunc: func(c context.Context) error { + <-c.Done() + return nil + }, + WarmupFunc: func(c context.Context) error { + return expectedErr + }, + } + + testErrChan := make(chan error, 1) + r := newRunnables(defaultBaseContext, testErrChan) + Expect(r.Add(warmupRunnable)).To(Succeed()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start the Warmup group in a goroutine + go func() { + Expect(r.Warmup.Start(ctx)).To(Succeed()) + }() + + // Error from Warmup should be sent to error channel + var receivedErr error + Eventually(func() error { + select { + case receivedErr = <-testErrChan: + return receivedErr + default: + return nil + } + }).Should(Equal(expectedErr)) + }) }) var _ = Describe("runnableGroup", func() { @@ -224,3 +348,52 @@ var _ = Describe("runnableGroup", func() { } }) }) + +// LeaderElectionRunnableFunc is a helper struct that implements LeaderElectionRunnable +// for testing purposes. +type LeaderElectionRunnableFunc struct { + RunFunc func(context.Context) error + NeedLeaderElectionFunc func() bool +} + +func (r LeaderElectionRunnableFunc) Start(ctx context.Context) error { + return r.RunFunc(ctx) +} + +func (r LeaderElectionRunnableFunc) NeedLeaderElection() bool { + return r.NeedLeaderElectionFunc() +} + +// WarmupRunnableFunc is a helper struct that implements WarmupRunnable +// for testing purposes. +type WarmupRunnableFunc struct { + RunFunc func(context.Context) error + WarmupFunc func(context.Context) error +} + +func (r WarmupRunnableFunc) Start(ctx context.Context) error { + return r.RunFunc(ctx) +} + +func (r WarmupRunnableFunc) Warmup(ctx context.Context) error { + return r.WarmupFunc(ctx) +} + +// CombinedRunnable implements both WarmupRunnable and LeaderElectionRunnable +type CombinedRunnable struct { + RunFunc func(context.Context) error + WarmupFunc func(context.Context) error + NeedLeaderElectionFunc func() bool +} + +func (r CombinedRunnable) Start(ctx context.Context) error { + return r.RunFunc(ctx) +} + +func (r CombinedRunnable) Warmup(ctx context.Context) error { + return r.WarmupFunc(ctx) +} + +func (r CombinedRunnable) NeedLeaderElection() bool { + return r.NeedLeaderElectionFunc() +} From 73fc8fa13fb92480c4c0e2a36127eb58b94c742b Mon Sep 17 00:00:00 2001 From: godwinpang Date: Sun, 13 Apr 2025 22:23:13 -0700 Subject: [PATCH 02/45] Remove irrelevant runnable_group.go code. --- pkg/manager/runnable_group.go | 56 +++++++++++------------------------ 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index fd700173d4..21b349529e 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -222,51 +222,24 @@ func (r *runnableGroup) reconcile() { // Start the runnable. go func(rn *readyRunnable) { - // If we return, the runnable ended cleanly - // or returned an error to the channel. - // - // We should always decrement the WaitGroup here. - defer r.wg.Done() - - // Track the ready check in the same WaitGroup to prevent goroutine leaks - done := make(chan struct{}) - - // Launch the ready check but make sure it doesn't outlive this goroutine go func() { - defer close(done) if rn.Check(r.ctx) { if rn.signalReady { - // Use non-blocking send to avoid leaking this goroutine if the channel is never read - select { - case r.startReadyCh <- rn: - // Successfully sent - case <-r.ctx.Done(): - // Context canceled, exit without blocking - } + r.startReadyCh <- rn } } }() - // Start the runnable. - err := rn.Start(r.ctx) - - // Now that the runnable is done, clean up the ready check goroutine if still running - select { - case <-done: - // Ready check already completed, nothing to do - case <-r.ctx.Done(): - // Context was canceled, ready check should exit soon - } + // If we return, the runnable ended cleanly + // or returned an error to the channel. + // + // We should always decrement the WaitGroup here. + defer r.wg.Done() - // Send any error from the runnable - if err != nil { - select { - case r.errChan <- err: - // Error sent successfully - default: - // Channel full or closed, can't send the error - } - } + // Start the runnable. + if err := rn.Start(r.ctx); err != nil { + r.errChan <- err + } }(runnable) } } @@ -324,13 +297,18 @@ func (r *runnableGroup) Add(rn Runnable, ready runnableCheck) error { // StopAndWait waits for all the runnables to finish before returning. func (r *runnableGroup) StopAndWait(ctx context.Context) { r.stopOnce.Do(func() { + // Close the reconciler channel once we're done. + defer func() { + r.stop.Lock() + close(r.ch) + r.stop.Unlock() + }() + _ = r.Start(ctx) r.stop.Lock() // Store the stopped variable so we don't accept any new // runnables for the time being. r.stopped = true - // Close the channel to signal the reconcile goroutine to exit - close(r.ch) r.stop.Unlock() // Cancel the internal channel. From be1b1c2b3bed3ba8ceff20f57c020e6a2fe8f457 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Sun, 13 Apr 2025 22:24:18 -0700 Subject: [PATCH 03/45] Rename ShouldWarmup. --- pkg/config/controller.go | 4 +-- pkg/controller/controller.go | 10 +++++--- pkg/controller/controller_test.go | 29 ++++++++++------------ pkg/internal/controller/controller.go | 6 ++--- pkg/internal/controller/controller_test.go | 21 +++++++++------- 5 files changed, 37 insertions(+), 33 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index 60b805e4e2..60c010025a 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -60,9 +60,9 @@ type Controller struct { // Defaults to true, which means the controller will use leader election. NeedLeaderElection *bool - // NeedWarmUp indicates whether the controller needs to use warm up. + // EnableWarmup indicates whether the controller needs to use warm up. // Defaults to false, which means the controller will not use warm up. - NeedWarmUp *bool + NeedWarmup *bool // UsePriorityQueue configures the controllers queue to use the controller-runtime provided // priority queue. diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1ce9ddbf23..2220d04c57 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -94,11 +94,11 @@ type TypedOptions[request comparable] struct { // Note: This flag is disabled by default until a future version. It's currently in beta. UsePriorityQueue *bool - // ShouldWarmupWithoutLeadership specifies whether the controller should start its sources + // NeedWarmup specifies whether the controller should start its sources // when the manager is not the leader. // Defaults to false, which means that the controller will wait for leader election to start // before starting sources. - ShouldWarmupWithoutLeadership *bool + NeedWarmup *bool } // DefaultFromConfig defaults the config from a config.Controller @@ -130,6 +130,10 @@ func (options *TypedOptions[request]) DefaultFromConfig(config config.Controller if options.NeedLeaderElection == nil { options.NeedLeaderElection = config.NeedLeaderElection } + + if options.NeedWarmup == nil { + options.NeedWarmup = config.NeedWarmup + } } // Controller implements an API. A Controller manages a work queue fed reconcile.Requests @@ -259,7 +263,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req LogConstructor: options.LogConstructor, RecoverPanic: options.RecoverPanic, LeaderElected: options.NeedLeaderElection, - ShouldWarmupWithoutLeadership: options.ShouldWarmupWithoutLeadership, + NeedWarmup: options.NeedWarmup, }, nil } diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 48736b07b6..13a2c4046e 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -482,26 +482,26 @@ var _ = Describe("controller.Controller", func() { // Test with ShouldWarmupWithoutLeadership set to true ctrlWithWarmup, err := controller.New("warmup-enabled-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), - ShouldWarmupWithoutLeadership: ptr.To(true), + NeedWarmup: ptr.To(true), }) Expect(err).NotTo(HaveOccurred()) internalCtrlWithWarmup, ok := ctrlWithWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) - Expect(*internalCtrlWithWarmup.ShouldWarmupWithoutLeadership).To(BeTrue()) + Expect(internalCtrlWithWarmup.NeedWarmup).NotTo(BeNil()) + Expect(*internalCtrlWithWarmup.NeedWarmup).To(BeTrue()) // Test with ShouldWarmupWithoutLeadership set to false ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), - ShouldWarmupWithoutLeadership: ptr.To(false), + NeedWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) internalCtrlWithoutWarmup, ok := ctrlWithoutWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithoutWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) - Expect(*internalCtrlWithoutWarmup.ShouldWarmupWithoutLeadership).To(BeFalse()) + Expect(internalCtrlWithoutWarmup.NeedWarmup).NotTo(BeNil()) + Expect(*internalCtrlWithoutWarmup.NeedWarmup).To(BeFalse()) // Test with ShouldWarmupWithoutLeadership not set (should default to nil) ctrlWithDefaultWarmup, err := controller.New("warmup-default-ctrl", m, controller.Options{ @@ -511,14 +511,14 @@ var _ = Describe("controller.Controller", func() { internalCtrlWithDefaultWarmup, ok := ctrlWithDefaultWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithDefaultWarmup.ShouldWarmupWithoutLeadership).To(BeNil()) + Expect(internalCtrlWithDefaultWarmup.NeedWarmup).To(BeNil()) }) It("should inherit ShouldWarmupWithoutLeadership from manager config", func() { // Test with manager default setting ShouldWarmupWithoutLeadership to true managerWithWarmup, err := manager.New(cfg, manager.Options{ Controller: config.Controller{ - NeedWarmUp: ptr.To(true), + NeedWarmup: ptr.To(true), }, }) Expect(err).NotTo(HaveOccurred()) @@ -529,23 +529,20 @@ var _ = Describe("controller.Controller", func() { internalCtrlInheritingWarmup, ok := ctrlInheritingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - // Note: This test will fail until the DefaultFromConfig method is updated to set - // ShouldWarmupWithoutLeadership from config.Controller.NeedWarmUp - // This test demonstrates that the feature needs to be completed - Expect(internalCtrlInheritingWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) - Expect(*internalCtrlInheritingWarmup.ShouldWarmupWithoutLeadership).To(BeTrue()) + Expect(internalCtrlInheritingWarmup.NeedWarmup).NotTo(BeNil()) + Expect(*internalCtrlInheritingWarmup.NeedWarmup).To(BeTrue()) // Test that explicit controller setting overrides manager setting ctrlOverridingWarmup, err := controller.New("override-warmup-disabled", managerWithWarmup, controller.Options{ Reconciler: reconcile.Func(nil), - ShouldWarmupWithoutLeadership: ptr.To(false), + NeedWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) internalCtrlOverridingWarmup, ok := ctrlOverridingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlOverridingWarmup.ShouldWarmupWithoutLeadership).NotTo(BeNil()) - Expect(*internalCtrlOverridingWarmup.ShouldWarmupWithoutLeadership).To(BeFalse()) + Expect(internalCtrlOverridingWarmup.NeedWarmup).NotTo(BeNil()) + Expect(*internalCtrlOverridingWarmup.NeedWarmup).To(BeFalse()) }) }) }) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 35949e0928..6a632c3825 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -99,11 +99,11 @@ type Controller[request comparable] struct { // LeaderElected indicates whether the controller is leader elected or always running. LeaderElected *bool - // ShouldWarmupWithoutLeadership specifies whether the controller should start its sources + // NeedWarmup specifies whether the controller should start its sources // when the manager is not the leader. // Defaults to false, which means that the controller will wait for leader election to start // before starting sources. - ShouldWarmupWithoutLeadership *bool + NeedWarmup *bool } // Reconcile implements reconcile.Reconciler. @@ -155,7 +155,7 @@ func (c *Controller[request]) NeedLeaderElection() bool { // Warmup implements the manager.WarmupRunnable interface. func (c *Controller[request]) Warmup(ctx context.Context) error { - if c.ShouldWarmupWithoutLeadership == nil || !*c.ShouldWarmupWithoutLeadership { + if c.NeedWarmup == nil || !*c.NeedWarmup { return nil } diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index b7c3989fd2..cb15a60d7b 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -403,7 +403,7 @@ var _ = Describe("controller", func() { return expectedErr }) - // // Set a sufficiently long timeout to avoid timeouts interfering with the error being returned + // Set a sufficiently long timeout to avoid timeouts interfering with the error being returned ctrl.CacheSyncTimeout = 5 * time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} err := ctrl.startEventSources(ctx) @@ -1016,7 +1016,7 @@ var _ = Describe("controller", func() { }) Describe("Warmup", func() { - It("should start event sources when ShouldWarmupWithoutLeadership is true", func() { + It("should start event sources when NeedWarmup is true", func() { // Setup ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1028,8 +1028,9 @@ var _ = Describe("controller", func() { return nil }) + ctrl.CacheSyncTimeout = time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + ctrl.NeedWarmup = ptr.To(true) // Act err := ctrl.Warmup(ctx) @@ -1040,7 +1041,7 @@ var _ = Describe("controller", func() { Expect(ctrl.didStartEventSources.Load()).To(BeTrue(), "didStartEventSources flag should be set") }) - It("should not start event sources when ShouldWarmupWithoutLeadership is false", func() { + It("should not start event sources when NeedWarmup is false", func() { // Setup ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1053,7 +1054,7 @@ var _ = Describe("controller", func() { }) ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.ShouldWarmupWithoutLeadership = ptr.To(false) + ctrl.NeedWarmup = ptr.To(false) // Act err := ctrl.Warmup(ctx) @@ -1064,7 +1065,7 @@ var _ = Describe("controller", func() { Expect(ctrl.didStartEventSources.Load()).To(BeFalse(), "didStartEventSources flag should not be set") }) - It("should not start event sources when ShouldWarmupWithoutLeadership is nil", func() { + It("should not start event sources when NeedWarmup is nil", func() { // Setup ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1077,7 +1078,7 @@ var _ = Describe("controller", func() { }) ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.ShouldWarmupWithoutLeadership = nil + ctrl.NeedWarmup = nil // Act err := ctrl.Warmup(ctx) @@ -1100,8 +1101,9 @@ var _ = Describe("controller", func() { return nil }) + ctrl.CacheSyncTimeout = time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + ctrl.NeedWarmup = ptr.To(true) // Act err1 := ctrl.Warmup(ctx) @@ -1125,8 +1127,9 @@ var _ = Describe("controller", func() { return expectedErr }) + ctrl.CacheSyncTimeout = time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.ShouldWarmupWithoutLeadership = ptr.To(true) + ctrl.NeedWarmup = ptr.To(true) // Act err := ctrl.Warmup(ctx) From c9b99eb13d7f7d9b49a354460e1fe888280c1635 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Sun, 13 Apr 2025 23:27:50 -0700 Subject: [PATCH 04/45] fmt --- pkg/controller/controller.go | 26 +++++++++++++------------- pkg/controller/controller_test.go | 6 +++--- pkg/manager/runnable_group.go | 10 +++++----- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 2220d04c57..f1d8c0e428 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -131,9 +131,9 @@ func (options *TypedOptions[request]) DefaultFromConfig(config config.Controller options.NeedLeaderElection = config.NeedLeaderElection } - if options.NeedWarmup == nil { - options.NeedWarmup = config.NeedWarmup - } + if options.NeedWarmup == nil { + options.NeedWarmup = config.NeedWarmup + } } // Controller implements an API. A Controller manages a work queue fed reconcile.Requests @@ -254,16 +254,16 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req // Create controller with dependencies set return &controller.Controller[request]{ - Do: options.Reconciler, - RateLimiter: options.RateLimiter, - NewQueue: options.NewQueue, - MaxConcurrentReconciles: options.MaxConcurrentReconciles, - CacheSyncTimeout: options.CacheSyncTimeout, - Name: name, - LogConstructor: options.LogConstructor, - RecoverPanic: options.RecoverPanic, - LeaderElected: options.NeedLeaderElection, - NeedWarmup: options.NeedWarmup, + Do: options.Reconciler, + RateLimiter: options.RateLimiter, + NewQueue: options.NewQueue, + MaxConcurrentReconciles: options.MaxConcurrentReconciles, + CacheSyncTimeout: options.CacheSyncTimeout, + Name: name, + LogConstructor: options.LogConstructor, + RecoverPanic: options.RecoverPanic, + LeaderElected: options.NeedLeaderElection, + NeedWarmup: options.NeedWarmup, }, nil } diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 13a2c4046e..53d923fc77 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -481,7 +481,7 @@ var _ = Describe("controller.Controller", func() { // Test with ShouldWarmupWithoutLeadership set to true ctrlWithWarmup, err := controller.New("warmup-enabled-ctrl", m, controller.Options{ - Reconciler: reconcile.Func(nil), + Reconciler: reconcile.Func(nil), NeedWarmup: ptr.To(true), }) Expect(err).NotTo(HaveOccurred()) @@ -493,7 +493,7 @@ var _ = Describe("controller.Controller", func() { // Test with ShouldWarmupWithoutLeadership set to false ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ - Reconciler: reconcile.Func(nil), + Reconciler: reconcile.Func(nil), NeedWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) @@ -534,7 +534,7 @@ var _ = Describe("controller.Controller", func() { // Test that explicit controller setting overrides manager setting ctrlOverridingWarmup, err := controller.New("override-warmup-disabled", managerWithWarmup, controller.Options{ - Reconciler: reconcile.Func(nil), + Reconciler: reconcile.Func(nil), NeedWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index 21b349529e..3d0d825fbe 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -230,16 +230,16 @@ func (r *runnableGroup) reconcile() { } }() - // If we return, the runnable ended cleanly + // If we return, the runnable ended cleanly // or returned an error to the channel. // // We should always decrement the WaitGroup here. defer r.wg.Done() // Start the runnable. - if err := rn.Start(r.ctx); err != nil { - r.errChan <- err - } + if err := rn.Start(r.ctx); err != nil { + r.errChan <- err + } }(runnable) } } @@ -297,7 +297,7 @@ func (r *runnableGroup) Add(rn Runnable, ready runnableCheck) error { // StopAndWait waits for all the runnables to finish before returning. func (r *runnableGroup) StopAndWait(ctx context.Context) { r.stopOnce.Do(func() { - // Close the reconciler channel once we're done. + // Close the reconciler channel once we're done. defer func() { r.stop.Lock() close(r.ch) From e7a2bbfa72d31c033557118c52441261d1ae5b01 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Sun, 13 Apr 2025 23:51:24 -0700 Subject: [PATCH 05/45] Change to atomic.Bool to avoid race in test. --- pkg/manager/runnable_group_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index 35373aa8f8..a211e5119f 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -115,7 +115,7 @@ var _ = Describe("runnables", func() { }) It("should execute the Warmup function when Warmup group is started", func() { - warmupExecuted := false + var warmupExecuted atomic.Bool warmupRunnable := WarmupRunnableFunc{ RunFunc: func(c context.Context) error { @@ -123,7 +123,7 @@ var _ = Describe("runnables", func() { return nil }, WarmupFunc: func(c context.Context) error { - warmupExecuted = true + warmupExecuted.Store(true) return nil }, } @@ -138,7 +138,7 @@ var _ = Describe("runnables", func() { Expect(r.Warmup.Start(ctx)).To(Succeed()) // Verify warmup function was called - Expect(warmupExecuted).To(BeTrue()) + Expect(warmupExecuted.Load()).To(BeTrue()) }) It("should propagate errors from Warmup function to error channel", func() { From 854987c5595311758cc04f48f62fc9ea83a2e505 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 29 Apr 2025 12:36:00 -0700 Subject: [PATCH 06/45] Address comments. --- pkg/config/controller.go | 2 +- pkg/controller/controller.go | 9 +++++---- pkg/manager/manager.go | 6 ++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index 60c010025a..3a01d0b218 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -60,7 +60,7 @@ type Controller struct { // Defaults to true, which means the controller will use leader election. NeedLeaderElection *bool - // EnableWarmup indicates whether the controller needs to use warm up. + // NeedWarmup indicates whether the controller needs to use warm up. // Defaults to false, which means the controller will not use warm up. NeedWarmup *bool diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index f1d8c0e428..efac3cc7fb 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -94,10 +94,11 @@ type TypedOptions[request comparable] struct { // Note: This flag is disabled by default until a future version. It's currently in beta. UsePriorityQueue *bool - // NeedWarmup specifies whether the controller should start its sources - // when the manager is not the leader. - // Defaults to false, which means that the controller will wait for leader election to start - // before starting sources. + // NeedWarmup specifies whether the controller should start its sources when the manager is not + // the leader. When set to true, the controller will not restart its sources when/if it + // transitions to be leader. + // + // Defaults to false. NeedWarmup *bool } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 013321366c..5eca2c6568 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -314,9 +314,11 @@ type LeaderElectionRunnable interface { NeedLeaderElection() bool } -// WarmupRunnable knows if a Runnable should be a warmup runnable. +// WarmupRunnable knows if a Runnable should be a warmup runnable. A warmup runnable is a runnable +// that should be run when the manager is started, but before the leader election is acquired. +// The expectation is that it will block startup until the warmup runnables have completed. type WarmupRunnable interface { - // Warmup returns true if the Runnable should be run as warmup. + // Warmup is called when the manager is started, but before the leader election is acquired. Warmup(context.Context) error } From 072ad4b84bddbea74ce9ada668f687e0b4b23cab Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 1 May 2025 22:48:14 -0700 Subject: [PATCH 07/45] Add ready check to block controller startup until warmup is complete. --- pkg/internal/controller/controller.go | 45 ++++++- pkg/internal/controller/controller_test.go | 136 +++++++-------------- pkg/manager/manager.go | 7 +- pkg/manager/runnable_group.go | 7 +- 4 files changed, 99 insertions(+), 96 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 6a632c3825..7ca43729fa 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -29,7 +29,9 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/workqueue" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics" @@ -38,6 +40,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) +const ( + // syncedPollPeriod is the period to poll for cache sync + syncedPollPeriod = 100 * time.Millisecond +) + // Controller implements controller.Controller. type Controller[request comparable] struct { // Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required. @@ -86,6 +93,13 @@ type Controller[request comparable] struct { // didStartEventSources is used to indicate whether the event sources have been started. didStartEventSources atomic.Bool + // didEventSourcesFinishSync is used to indicate whether the event sources have finished + // successfully. It stores a *bool where + // - nil: not finished syncing + // - true: finished syncing without error + // - false: finished syncing with error + didEventSourcesFinishSync atomic.Value + // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. // Note: LogConstructor has to be able to handle nil requests as we are also using it @@ -156,12 +170,37 @@ func (c *Controller[request]) NeedLeaderElection() bool { // Warmup implements the manager.WarmupRunnable interface. func (c *Controller[request]) Warmup(ctx context.Context) error { if c.NeedWarmup == nil || !*c.NeedWarmup { + c.didEventSourcesFinishSync.Store(ptr.To(true)) return nil } return c.startEventSources(ctx) } +// DidFinishWarmup implements the manager.WarmupRunnable interface. +func (c *Controller[request]) DidFinishWarmup(ctx context.Context) bool { + err := wait.PollUntilContextCancel(ctx, syncedPollPeriod, true, func(ctx context.Context) (bool, error) { + didFinishSync, ok := c.didEventSourcesFinishSync.Load().(*bool) + if !ok { + return false, errors.New("unexpected error: didEventSourcesFinishSync is not a bool pointer") + } + + if didFinishSync == nil { + // event sources not finished syncing + return false, nil + } + + if !*didFinishSync { + // event sources finished syncing with an error + return true, errors.New("event sources did not finish syncing succesfully") + } + + return true, nil + }) + + return err == nil +} + // Start implements controller.Controller. func (c *Controller[request]) Start(ctx context.Context) error { // use an IIFE to get proper lock handling @@ -299,7 +338,11 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { } }) } - return errGroup.Wait() + err := errGroup.Wait() + + c.didEventSourcesFinishSync.Store(ptr.To(err == nil)) + + return err } // processNextWorkItem will read a single work item off the workqueue and diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index cb15a60d7b..59f165b7c9 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1016,126 +1016,78 @@ var _ = Describe("controller", func() { }) Describe("Warmup", func() { - It("should start event sources when NeedWarmup is true", func() { - // Setup - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create a mock source that we can verify was started - sourceStarted := false - mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - sourceStarted = true - return nil - }) - - ctrl.CacheSyncTimeout = time.Second - ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} + JustBeforeEach(func() { ctrl.NeedWarmup = ptr.To(true) - - // Act - err := ctrl.Warmup(ctx) - - // Assert - Expect(err).NotTo(HaveOccurred()) - Expect(sourceStarted).To(BeTrue(), "Event source should have been started") - Expect(ctrl.didStartEventSources.Load()).To(BeTrue(), "didStartEventSources flag should be set") }) - It("should not start event sources when NeedWarmup is false", func() { - // Setup + It("should track warmup status correctly with successful sync", func() { + // Setup controller with sources that complete successfully ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Create a mock source that should not be started - sourceStarted := false - mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - sourceStarted = true - return nil - }) - - ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.NeedWarmup = ptr.To(false) + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + return nil + }), + } - // Act err := ctrl.Warmup(ctx) - - // Assert Expect(err).NotTo(HaveOccurred()) - Expect(sourceStarted).To(BeFalse(), "Event source should not have been started") - Expect(ctrl.didStartEventSources.Load()).To(BeFalse(), "didStartEventSources flag should not be set") + + // Verify DidFinishWarmup returns true for successful sync + result := ctrl.DidFinishWarmup(ctx) + Expect(result).To(BeTrue()) }) - It("should not start event sources when NeedWarmup is nil", func() { - // Setup + It("should track warmup status correctly with unsuccessful sync", func() { + // Setup controller with sources that complete with error ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Create a mock source that should not be started - sourceStarted := false - mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - sourceStarted = true - return nil - }) - - ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.NeedWarmup = nil + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + return errors.New("sync error") + }), + } - // Act err := ctrl.Warmup(ctx) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("sync error")) - // Assert - Expect(err).NotTo(HaveOccurred()) - Expect(sourceStarted).To(BeFalse(), "Event source should not have been started") - Expect(ctrl.didStartEventSources.Load()).To(BeFalse(), "didStartEventSources flag should not be set") + // Verify DidFinishWarmup returns false for unsuccessful sync + result := ctrl.DidFinishWarmup(ctx) + Expect(result).To(BeFalse()) }) + }) - It("should not start event sources twice when called multiple times", func() { - // Setup - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create a mock source that counts how many times it's started - startCount := 0 - mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - startCount++ - return nil - }) - - ctrl.CacheSyncTimeout = time.Second - ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.NeedWarmup = ptr.To(true) - - // Act - err1 := ctrl.Warmup(ctx) - err2 := ctrl.Warmup(ctx) - - // Assert - Expect(err1).NotTo(HaveOccurred()) - Expect(err2).NotTo(HaveOccurred()) - Expect(startCount).To(Equal(1), "Event source should have been started only once") - Expect(ctrl.didStartEventSources.Load()).To(BeTrue(), "didStartEventSources flag should be set") + Describe("Warmup without warmup enabled", func() { + JustBeforeEach(func() { + ctrl.NeedWarmup = ptr.To(false) }) - It("should propagate errors from event sources", func() { - // Setup + It("should not start sources if warmup is disabled.", func() { + // Setup controller with sources that complete successfully ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Create a mock source that returns an error - expectedErr := errors.New("test error") - mockSource := source.Func(func(ctx context.Context, queue workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - return expectedErr - }) - ctrl.CacheSyncTimeout = time.Second - ctrl.startWatches = []source.TypedSource[reconcile.Request]{mockSource} - ctrl.NeedWarmup = ptr.To(true) + isSourceStarted := false + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + isSourceStarted = true + return nil + }), + } - // Act err := ctrl.Warmup(ctx) + Expect(err).NotTo(HaveOccurred()) + + result := ctrl.DidFinishWarmup(ctx) + Expect(result).To(BeTrue()) - // Assert - Expect(err).To(MatchError(expectedErr)) + Expect(isSourceStarted).To(BeFalse()) }) }) }) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 5eca2c6568..3e36376bdf 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -316,10 +316,13 @@ type LeaderElectionRunnable interface { // WarmupRunnable knows if a Runnable should be a warmup runnable. A warmup runnable is a runnable // that should be run when the manager is started, but before the leader election is acquired. -// The expectation is that it will block startup until the warmup runnables have completed. type WarmupRunnable interface { - // Warmup is called when the manager is started, but before the leader election is acquired. + // Warmup is the implementation of the warmup runnable. Warmup(context.Context) error + + // WaitForWarmupComplete is a blocking function that waits for the warmup to be completed. It + // returns false if it could not successfully finish warmup. + WaitForWarmupComplete(context.Context) bool } // New returns a new Manager for creating Controllers. diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index 3d0d825fbe..0898c28053 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -69,7 +69,12 @@ func (r *runnables) Add(fn Runnable) error { return r.Webhooks.Add(fn, nil) case WarmupRunnable, LeaderElectionRunnable: if warmupRunnable, ok := fn.(WarmupRunnable); ok { - if err := r.Warmup.Add(RunnableFunc(warmupRunnable.Warmup), nil); err != nil { + if err := r.Warmup.Add( + RunnableFunc(warmupRunnable.Warmup), + func(ctx context.Context) bool { + return warmupRunnable.WaitForWarmupComplete(ctx) + }, + ); err != nil { return err } } From 43118a3c146f9ff4a523cc5652d09d9d41ac27e2 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 1 May 2025 22:50:42 -0700 Subject: [PATCH 08/45] Keep test helper structs private. --- pkg/internal/controller/controller.go | 2 +- pkg/manager/manager_test.go | 4 +-- pkg/manager/runnable_group_test.go | 48 +++++++++++++++++---------- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 7ca43729fa..4c4c2b587c 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -192,7 +192,7 @@ func (c *Controller[request]) DidFinishWarmup(ctx context.Context) bool { if !*didFinishSync { // event sources finished syncing with an error - return true, errors.New("event sources did not finish syncing succesfully") + return true, errors.New("event sources did not finish syncing successfully") } return true, nil diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 822a3b6200..5a31eca586 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1952,7 +1952,7 @@ var _ = Describe("manger.Manager", func() { By("Creating a runnable that implements WarmupRunnable interface") // Create a warmup runnable - warmupRunnable := WarmupRunnableFunc{ + warmupRunnable := warmupRunnableFunc{ RunFunc: func(ctx context.Context) error { // This is the main runnable that will be executed after leader election <-ctx.Done() @@ -1967,7 +1967,7 @@ var _ = Describe("manger.Manager", func() { Expect(m.Add(warmupRunnable)).To(Succeed()) By("Creating a runnable that requires leader election") - leaderElectionRunnable := LeaderElectionRunnableFunc{ + leaderElectionRunnable := leaderElectionRunnableFunc{ RunFunc: func(ctx context.Context) error { // This will only be called after leader election is won close(leaderElectionRunnableCalled) diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index a211e5119f..d7295bd831 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -55,7 +55,7 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable to the Warmup and LeaderElection group", func() { - warmupRunnable := WarmupRunnableFunc{ + warmupRunnable := warmupRunnableFunc{ RunFunc: func(c context.Context) error { <-c.Done() return nil @@ -72,7 +72,7 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that doesn't needs leader election to warmup group only", func() { - warmupRunnable := CombinedRunnable{ + warmupRunnable := combinedRunnable{ RunFunc: func(c context.Context) error { <-c.Done() return nil @@ -93,7 +93,7 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that needs leader election to Warmup and LeaderElection group, not Others", func() { - warmupRunnable := CombinedRunnable{ + warmupRunnable := combinedRunnable{ RunFunc: func(c context.Context) error { <-c.Done() return nil @@ -117,7 +117,7 @@ var _ = Describe("runnables", func() { It("should execute the Warmup function when Warmup group is started", func() { var warmupExecuted atomic.Bool - warmupRunnable := WarmupRunnableFunc{ + warmupRunnable := warmupRunnableFunc{ RunFunc: func(c context.Context) error { <-c.Done() return nil @@ -144,7 +144,7 @@ var _ = Describe("runnables", func() { It("should propagate errors from Warmup function to error channel", func() { expectedErr := fmt.Errorf("expected warmup error") - warmupRunnable := WarmupRunnableFunc{ + warmupRunnable := warmupRunnableFunc{ RunFunc: func(c context.Context) error { <-c.Done() return nil @@ -349,51 +349,63 @@ var _ = Describe("runnableGroup", func() { }) }) -// LeaderElectionRunnableFunc is a helper struct that implements LeaderElectionRunnable +var _ LeaderElectionRunnable = &leaderElectionRunnableFunc{} + +// leaderElectionRunnableFunc is a helper struct that implements LeaderElectionRunnable // for testing purposes. -type LeaderElectionRunnableFunc struct { +type leaderElectionRunnableFunc struct { RunFunc func(context.Context) error NeedLeaderElectionFunc func() bool } -func (r LeaderElectionRunnableFunc) Start(ctx context.Context) error { +func (r leaderElectionRunnableFunc) Start(ctx context.Context) error { return r.RunFunc(ctx) } -func (r LeaderElectionRunnableFunc) NeedLeaderElection() bool { +func (r leaderElectionRunnableFunc) NeedLeaderElection() bool { return r.NeedLeaderElectionFunc() } -// WarmupRunnableFunc is a helper struct that implements WarmupRunnable +var _ WarmupRunnable = &warmupRunnableFunc{} + +// warmupRunnableFunc is a helper struct that implements WarmupRunnable // for testing purposes. -type WarmupRunnableFunc struct { +type warmupRunnableFunc struct { RunFunc func(context.Context) error WarmupFunc func(context.Context) error } -func (r WarmupRunnableFunc) Start(ctx context.Context) error { +func (r warmupRunnableFunc) Start(ctx context.Context) error { return r.RunFunc(ctx) } -func (r WarmupRunnableFunc) Warmup(ctx context.Context) error { +func (r warmupRunnableFunc) Warmup(ctx context.Context) error { return r.WarmupFunc(ctx) } -// CombinedRunnable implements both WarmupRunnable and LeaderElectionRunnable -type CombinedRunnable struct { +func (r warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { + return true +} + +// combinedRunnable implements both WarmupRunnable and LeaderElectionRunnable +type combinedRunnable struct { RunFunc func(context.Context) error WarmupFunc func(context.Context) error NeedLeaderElectionFunc func() bool } -func (r CombinedRunnable) Start(ctx context.Context) error { +func (r combinedRunnable) Start(ctx context.Context) error { return r.RunFunc(ctx) } -func (r CombinedRunnable) Warmup(ctx context.Context) error { +func (r combinedRunnable) Warmup(ctx context.Context) error { return r.WarmupFunc(ctx) } -func (r CombinedRunnable) NeedLeaderElection() bool { +func (r combinedRunnable) WaitForWarmupComplete(ctx context.Context) bool { + return true +} + +func (r combinedRunnable) NeedLeaderElection() bool { return r.NeedLeaderElectionFunc() } From b67bc65b3211f567b769f12c17e05f78c3214ac4 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Mon, 12 May 2025 01:05:28 -0700 Subject: [PATCH 09/45] Address comments. --- pkg/config/controller.go | 10 ++++++++-- pkg/controller/controller.go | 8 +++++--- pkg/internal/controller/controller.go | 3 --- pkg/internal/controller/controller_test.go | 23 ++++++++++++++-------- pkg/manager/manager.go | 2 ++ 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index 3a01d0b218..f83906a2c6 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -60,8 +60,14 @@ type Controller struct { // Defaults to true, which means the controller will use leader election. NeedLeaderElection *bool - // NeedWarmup indicates whether the controller needs to use warm up. - // Defaults to false, which means the controller will not use warm up. + // NeedWarmup specifies whether the controller should start its sources when the manager is not + // the leader. This is useful for cases where sources take a long time to start, as it allows + // for the controller to warm up its caches even before it is elected as the leader. This + // improves leadership failover time, as the caches will be prepopulated before the controller + // transitions to be leader. + // + // When set to true, the controller will start its sources without transitioning to be leader. + // Defaults to false. NeedWarmup *bool // UsePriorityQueue configures the controllers queue to use the controller-runtime provided diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index efac3cc7fb..559858b592 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -1,5 +1,4 @@ -/* -Copyright 2018 The Kubernetes Authors. +/* Copyright 2018 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -95,9 +94,12 @@ type TypedOptions[request comparable] struct { UsePriorityQueue *bool // NeedWarmup specifies whether the controller should start its sources when the manager is not - // the leader. When set to true, the controller will not restart its sources when/if it + // the leader. This is useful for cases where sources take a long time to start, as it allows + // for the controller to warm up its caches even before it is elected as the leader. This + // improves leadership failover time, as the caches will be prepopulated before the controller // transitions to be leader. // + // When set to true, the controller will start its sources without transitioning to be leader. // Defaults to false. NeedWarmup *bool } diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 4c4c2b587c..a17b2e1eae 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -170,10 +170,8 @@ func (c *Controller[request]) NeedLeaderElection() bool { // Warmup implements the manager.WarmupRunnable interface. func (c *Controller[request]) Warmup(ctx context.Context) error { if c.NeedWarmup == nil || !*c.NeedWarmup { - c.didEventSourcesFinishSync.Store(ptr.To(true)) return nil } - return c.startEventSources(ctx) } @@ -291,7 +289,6 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { _, ok := watch.(interface { String() string }) - if !ok { log = log.WithValues("source", fmt.Sprintf("%T", watch)) } else { diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 59f165b7c9..dba90cd71f 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "sync" + "sync/atomic" "time" "github.com/go-logr/logr" @@ -1062,32 +1063,38 @@ var _ = Describe("controller", func() { }) }) - Describe("Warmup without warmup enabled", func() { + Describe("Warmup with warmup disabled", func() { JustBeforeEach(func() { ctrl.NeedWarmup = ptr.To(false) }) - It("should not start sources if warmup is disabled.", func() { + It("should not start sources when Warmup is called if warmup is disabled but start it when Start is called.", func() { // Setup controller with sources that complete successfully ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.CacheSyncTimeout = time.Second - isSourceStarted := false + var isSourceStarted atomic.Bool + isSourceStarted.Store(false) ctrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - isSourceStarted = true + isSourceStarted.Store(true) return nil }), } + By("Calling Warmup when NeedWarmup is false") err := ctrl.Warmup(ctx) Expect(err).NotTo(HaveOccurred()) + Expect(isSourceStarted.Load()).To(BeFalse()) - result := ctrl.DidFinishWarmup(ctx) - Expect(result).To(BeTrue()) - - Expect(isSourceStarted).To(BeFalse()) + By("Calling Start when NeedWarmup is false") + // Now call Start + go func() { + defer GinkgoRecover() + Expect(ctrl.Start(ctx)).To(Succeed()) + }() + Eventually(func() bool { return isSourceStarted.Load() }).Should(BeTrue()) }) }) }) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 3e36376bdf..e8b1ca104a 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -316,6 +316,8 @@ type LeaderElectionRunnable interface { // WarmupRunnable knows if a Runnable should be a warmup runnable. A warmup runnable is a runnable // that should be run when the manager is started, but before the leader election is acquired. +// Note: Implementing this interface is only useful when LeaderElection is enabled, as the behavior +// without leaderelection is to run LeaderElectionRunnables immediately. type WarmupRunnable interface { // Warmup is the implementation of the warmup runnable. Warmup(context.Context) error From fc7c8c5cb2ff25bcd38fc75258fabaf887ea8a83 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Mon, 12 May 2025 02:17:11 -0700 Subject: [PATCH 10/45] Fix lint. --- pkg/internal/controller/controller_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index dba90cd71f..e4c106a16c 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1094,7 +1094,7 @@ var _ = Describe("controller", func() { defer GinkgoRecover() Expect(ctrl.Start(ctx)).To(Succeed()) }() - Eventually(func() bool { return isSourceStarted.Load() }).Should(BeTrue()) + Eventually(isSourceStarted.Load).Should(BeTrue()) }) }) }) From 6bb4616ba3e651c98015a5382549982074f563ac Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 13 May 2025 16:04:17 -0700 Subject: [PATCH 11/45] Address naming + comments from sbueringer. --- pkg/config/controller.go | 2 +- pkg/controller/controller.go | 3 +- pkg/controller/controller_test.go | 12 +++---- pkg/internal/controller/controller.go | 15 ++++---- pkg/internal/controller/controller_test.go | 8 ++--- pkg/manager/internal.go | 2 +- pkg/manager/manager.go | 8 ++--- pkg/manager/manager_test.go | 4 +-- pkg/manager/runnable_group.go | 4 +-- pkg/manager/runnable_group_test.go | 40 +++++++++++----------- 10 files changed, 50 insertions(+), 48 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index f83906a2c6..4b155ead9b 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -66,7 +66,7 @@ type Controller struct { // improves leadership failover time, as the caches will be prepopulated before the controller // transitions to be leader. // - // When set to true, the controller will start its sources without transitioning to be leader. + // When set to true, the controller will start its sources without waiting to become leader. // Defaults to false. NeedWarmup *bool diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 559858b592..93a952c763 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -1,4 +1,5 @@ -/* Copyright 2018 The Kubernetes Authors. +/* +Copyright 2018 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 53d923fc77..1c5c1368e5 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -475,11 +475,11 @@ var _ = Describe("controller.Controller", func() { Expect(ok).To(BeFalse()) }) - It("should set ShouldWarmupWithoutLeadership correctly", func() { + It("should set NeedWarmup correctly", func() { m, err := manager.New(cfg, manager.Options{}) Expect(err).NotTo(HaveOccurred()) - // Test with ShouldWarmupWithoutLeadership set to true + // Test with NeedWarmup set to true ctrlWithWarmup, err := controller.New("warmup-enabled-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), NeedWarmup: ptr.To(true), @@ -491,7 +491,7 @@ var _ = Describe("controller.Controller", func() { Expect(internalCtrlWithWarmup.NeedWarmup).NotTo(BeNil()) Expect(*internalCtrlWithWarmup.NeedWarmup).To(BeTrue()) - // Test with ShouldWarmupWithoutLeadership set to false + // Test with NeedWarmup set to false ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), NeedWarmup: ptr.To(false), @@ -503,7 +503,7 @@ var _ = Describe("controller.Controller", func() { Expect(internalCtrlWithoutWarmup.NeedWarmup).NotTo(BeNil()) Expect(*internalCtrlWithoutWarmup.NeedWarmup).To(BeFalse()) - // Test with ShouldWarmupWithoutLeadership not set (should default to nil) + // Test with NeedWarmup not set (should default to nil) ctrlWithDefaultWarmup, err := controller.New("warmup-default-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), }) @@ -514,8 +514,8 @@ var _ = Describe("controller.Controller", func() { Expect(internalCtrlWithDefaultWarmup.NeedWarmup).To(BeNil()) }) - It("should inherit ShouldWarmupWithoutLeadership from manager config", func() { - // Test with manager default setting ShouldWarmupWithoutLeadership to true + It("should inherit NeedWarmup from manager config", func() { + // Test with manager default setting NeedWarmup to true managerWithWarmup, err := manager.New(cfg, manager.Options{ Controller: config.Controller{ NeedWarmup: ptr.To(true), diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index a17b2e1eae..a25a08d898 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -93,12 +93,12 @@ type Controller[request comparable] struct { // didStartEventSources is used to indicate whether the event sources have been started. didStartEventSources atomic.Bool - // didEventSourcesFinishSync is used to indicate whether the event sources have finished + // didEventSourcesFinishSyncSuccessfully is used to indicate whether the event sources have finished // successfully. It stores a *bool where // - nil: not finished syncing // - true: finished syncing without error // - false: finished syncing with error - didEventSourcesFinishSync atomic.Value + didEventSourcesFinishSyncSuccessfully atomic.Value // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. @@ -175,10 +175,11 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { return c.startEventSources(ctx) } -// DidFinishWarmup implements the manager.WarmupRunnable interface. -func (c *Controller[request]) DidFinishWarmup(ctx context.Context) bool { +// WaitForWarmupComplete returns true if warmup has completed without error, and false if there was +// an error during warmup. +func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { err := wait.PollUntilContextCancel(ctx, syncedPollPeriod, true, func(ctx context.Context) (bool, error) { - didFinishSync, ok := c.didEventSourcesFinishSync.Load().(*bool) + didFinishSync, ok := c.didEventSourcesFinishSyncSuccessfully.Load().(*bool) if !ok { return false, errors.New("unexpected error: didEventSourcesFinishSync is not a bool pointer") } @@ -279,7 +280,7 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { // CAS returns false if value is already true, so early exit since another goroutine must have // called startEventSources previously if !c.didStartEventSources.CompareAndSwap(false, true) { - c.LogConstructor(nil).Info("Skipping starting event sources since it was already started") + c.LogConstructor(nil).Info("Skipping starting event sources since they were previously started") return nil } @@ -337,7 +338,7 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { } err := errGroup.Wait() - c.didEventSourcesFinishSync.Store(ptr.To(err == nil)) + c.didEventSourcesFinishSyncSuccessfully.Store(ptr.To(err == nil)) return err } diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index e4c106a16c..36a4a37dcb 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1036,8 +1036,8 @@ var _ = Describe("controller", func() { err := ctrl.Warmup(ctx) Expect(err).NotTo(HaveOccurred()) - // Verify DidFinishWarmup returns true for successful sync - result := ctrl.DidFinishWarmup(ctx) + // Verify WaitForWarmupComplete returns true for successful sync + result := ctrl.WaitForWarmupComplete(ctx) Expect(result).To(BeTrue()) }) @@ -1057,8 +1057,8 @@ var _ = Describe("controller", func() { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("sync error")) - // Verify DidFinishWarmup returns false for unsuccessful sync - result := ctrl.DidFinishWarmup(ctx) + // Verify WaitForWarmupComplete returns false for unsuccessful sync + result := ctrl.WaitForWarmupComplete(ctx) Expect(result).To(BeFalse()) }) }) diff --git a/pkg/manager/internal.go b/pkg/manager/internal.go index 261bccac9d..54355600a4 100644 --- a/pkg/manager/internal.go +++ b/pkg/manager/internal.go @@ -439,7 +439,7 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) { return fmt.Errorf("failed to start other runnables: %w", err) } - // Start and wait for sources to start. + // Start WarmupRunnables and wait for warmup to complete. if err := cm.runnables.Warmup.Start(cm.internalCtx); err != nil { return fmt.Errorf("failed to start warmup runnables: %w", err) } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index e8b1ca104a..0740b22f52 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -314,11 +314,11 @@ type LeaderElectionRunnable interface { NeedLeaderElection() bool } -// WarmupRunnable knows if a Runnable should be a warmup runnable. A warmup runnable is a runnable +// warmupRunnable knows if a Runnable requires warmup. A warmup runnable is a runnable // that should be run when the manager is started, but before the leader election is acquired. -// Note: Implementing this interface is only useful when LeaderElection is enabled, as the behavior -// without leaderelection is to run LeaderElectionRunnables immediately. -type WarmupRunnable interface { +// Note: Implementing this interface is only useful when LeaderElection can be enabled, as the +// behavior when leaderelection is not enabled is to run LeaderElectionRunnables immediately. +type warmupRunnable interface { // Warmup is the implementation of the warmup runnable. Warmup(context.Context) error diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 5a31eca586..a0e77e62da 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1953,7 +1953,7 @@ var _ = Describe("manger.Manager", func() { By("Creating a runnable that implements WarmupRunnable interface") // Create a warmup runnable warmupRunnable := warmupRunnableFunc{ - RunFunc: func(ctx context.Context) error { + StartFunc: func(ctx context.Context) error { // This is the main runnable that will be executed after leader election <-ctx.Done() return nil @@ -1968,7 +1968,7 @@ var _ = Describe("manger.Manager", func() { By("Creating a runnable that requires leader election") leaderElectionRunnable := leaderElectionRunnableFunc{ - RunFunc: func(ctx context.Context) error { + StartFunc: func(ctx context.Context) error { // This will only be called after leader election is won close(leaderElectionRunnableCalled) <-ctx.Done() diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index 0898c28053..2ef5817a7d 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -67,8 +67,8 @@ func (r *runnables) Add(fn Runnable) error { }) case webhook.Server: return r.Webhooks.Add(fn, nil) - case WarmupRunnable, LeaderElectionRunnable: - if warmupRunnable, ok := fn.(WarmupRunnable); ok { + case warmupRunnable, LeaderElectionRunnable: + if warmupRunnable, ok := fn.(warmupRunnable); ok { if err := r.Warmup.Add( RunnableFunc(warmupRunnable.Warmup), func(ctx context.Context) bool { diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index d7295bd831..4b00d98542 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -56,7 +56,7 @@ var _ = Describe("runnables", func() { It("should add WarmupRunnable to the Warmup and LeaderElection group", func() { warmupRunnable := warmupRunnableFunc{ - RunFunc: func(c context.Context) error { + StartFunc: func(c context.Context) error { <-c.Done() return nil }, @@ -72,8 +72,8 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that doesn't needs leader election to warmup group only", func() { - warmupRunnable := combinedRunnable{ - RunFunc: func(c context.Context) error { + warmupRunnable := leaderElectionAndWarmupRunnable{ + StartFunc: func(c context.Context) error { <-c.Done() return nil }, @@ -93,8 +93,8 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that needs leader election to Warmup and LeaderElection group, not Others", func() { - warmupRunnable := combinedRunnable{ - RunFunc: func(c context.Context) error { + warmupRunnable := leaderElectionAndWarmupRunnable{ + StartFunc: func(c context.Context) error { <-c.Done() return nil }, @@ -118,7 +118,7 @@ var _ = Describe("runnables", func() { var warmupExecuted atomic.Bool warmupRunnable := warmupRunnableFunc{ - RunFunc: func(c context.Context) error { + StartFunc: func(c context.Context) error { <-c.Done() return nil }, @@ -145,7 +145,7 @@ var _ = Describe("runnables", func() { expectedErr := fmt.Errorf("expected warmup error") warmupRunnable := warmupRunnableFunc{ - RunFunc: func(c context.Context) error { + StartFunc: func(c context.Context) error { <-c.Done() return nil }, @@ -354,29 +354,29 @@ var _ LeaderElectionRunnable = &leaderElectionRunnableFunc{} // leaderElectionRunnableFunc is a helper struct that implements LeaderElectionRunnable // for testing purposes. type leaderElectionRunnableFunc struct { - RunFunc func(context.Context) error + StartFunc func(context.Context) error NeedLeaderElectionFunc func() bool } func (r leaderElectionRunnableFunc) Start(ctx context.Context) error { - return r.RunFunc(ctx) + return r.StartFunc(ctx) } func (r leaderElectionRunnableFunc) NeedLeaderElection() bool { return r.NeedLeaderElectionFunc() } -var _ WarmupRunnable = &warmupRunnableFunc{} +var _ warmupRunnable = &warmupRunnableFunc{} // warmupRunnableFunc is a helper struct that implements WarmupRunnable // for testing purposes. type warmupRunnableFunc struct { - RunFunc func(context.Context) error + StartFunc func(context.Context) error WarmupFunc func(context.Context) error } func (r warmupRunnableFunc) Start(ctx context.Context) error { - return r.RunFunc(ctx) + return r.StartFunc(ctx) } func (r warmupRunnableFunc) Warmup(ctx context.Context) error { @@ -387,25 +387,25 @@ func (r warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { return true } -// combinedRunnable implements both WarmupRunnable and LeaderElectionRunnable -type combinedRunnable struct { - RunFunc func(context.Context) error +// leaderElectionAndWarmupRunnable implements both WarmupRunnable and LeaderElectionRunnable +type leaderElectionAndWarmupRunnable struct { + StartFunc func(context.Context) error WarmupFunc func(context.Context) error NeedLeaderElectionFunc func() bool } -func (r combinedRunnable) Start(ctx context.Context) error { - return r.RunFunc(ctx) +func (r leaderElectionAndWarmupRunnable) Start(ctx context.Context) error { + return r.StartFunc(ctx) } -func (r combinedRunnable) Warmup(ctx context.Context) error { +func (r leaderElectionAndWarmupRunnable) Warmup(ctx context.Context) error { return r.WarmupFunc(ctx) } -func (r combinedRunnable) WaitForWarmupComplete(ctx context.Context) bool { +func (r leaderElectionAndWarmupRunnable) WaitForWarmupComplete(ctx context.Context) bool { return true } -func (r combinedRunnable) NeedLeaderElection() bool { +func (r leaderElectionAndWarmupRunnable) NeedLeaderElection() bool { return r.NeedLeaderElectionFunc() } From ccc7485d5c97717cdf4131727b96897f4995a0e6 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 13 May 2025 16:14:43 -0700 Subject: [PATCH 12/45] Refactor tests to use HaveValue. --- pkg/controller/controller_test.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 1c5c1368e5..391b6c2b04 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -488,8 +488,7 @@ var _ = Describe("controller.Controller", func() { internalCtrlWithWarmup, ok := ctrlWithWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithWarmup.NeedWarmup).NotTo(BeNil()) - Expect(*internalCtrlWithWarmup.NeedWarmup).To(BeTrue()) + Expect(internalCtrlWithWarmup.NeedWarmup).To(HaveValue(BeTrue())) // Test with NeedWarmup set to false ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ @@ -500,8 +499,7 @@ var _ = Describe("controller.Controller", func() { internalCtrlWithoutWarmup, ok := ctrlWithoutWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithoutWarmup.NeedWarmup).NotTo(BeNil()) - Expect(*internalCtrlWithoutWarmup.NeedWarmup).To(BeFalse()) + Expect(internalCtrlWithoutWarmup.NeedWarmup).To(HaveValue(BeFalse())) // Test with NeedWarmup not set (should default to nil) ctrlWithDefaultWarmup, err := controller.New("warmup-default-ctrl", m, controller.Options{ @@ -529,8 +527,7 @@ var _ = Describe("controller.Controller", func() { internalCtrlInheritingWarmup, ok := ctrlInheritingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlInheritingWarmup.NeedWarmup).NotTo(BeNil()) - Expect(*internalCtrlInheritingWarmup.NeedWarmup).To(BeTrue()) + Expect(internalCtrlInheritingWarmup.NeedWarmup).To(HaveValue(BeTrue())) // Test that explicit controller setting overrides manager setting ctrlOverridingWarmup, err := controller.New("override-warmup-disabled", managerWithWarmup, controller.Options{ @@ -541,8 +538,7 @@ var _ = Describe("controller.Controller", func() { internalCtrlOverridingWarmup, ok := ctrlOverridingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlOverridingWarmup.NeedWarmup).NotTo(BeNil()) - Expect(*internalCtrlOverridingWarmup.NeedWarmup).To(BeFalse()) + Expect(internalCtrlOverridingWarmup.NeedWarmup).To(HaveValue(BeFalse())) }) }) }) From 54f4fe313a89d88a55598d196f325dc825855337 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 13 May 2025 23:23:37 -0700 Subject: [PATCH 13/45] Document + add UT for WaitForWarmupComplete behavior on ctx cancellation. --- pkg/internal/controller/controller.go | 39 +++++++++++----------- pkg/internal/controller/controller_test.go | 28 ++++++++++++++++ 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index a25a08d898..1465e3cf0e 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -29,7 +29,6 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/uuid" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/workqueue" "k8s.io/utils/ptr" @@ -176,28 +175,28 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { } // WaitForWarmupComplete returns true if warmup has completed without error, and false if there was -// an error during warmup. +// an error during warmup. If context is cancelled, it returns true. func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { - err := wait.PollUntilContextCancel(ctx, syncedPollPeriod, true, func(ctx context.Context) (bool, error) { - didFinishSync, ok := c.didEventSourcesFinishSyncSuccessfully.Load().(*bool) - if !ok { - return false, errors.New("unexpected error: didEventSourcesFinishSync is not a bool pointer") - } - - if didFinishSync == nil { - // event sources not finished syncing - return false, nil - } + ticker := time.NewTicker(syncedPollPeriod) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return true + case <-ticker.C: + didFinishSync, ok := c.didEventSourcesFinishSyncSuccessfully.Load().(*bool) + if !ok { + return false + } - if !*didFinishSync { - // event sources finished syncing with an error - return true, errors.New("event sources did not finish syncing successfully") + if didFinishSync != nil && *didFinishSync { + // event sources finished syncing successfully + return true + } + return false } - - return true, nil - }) - - return err == nil + } } // Start implements controller.Controller. diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 36a4a37dcb..737604e166 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1061,6 +1061,34 @@ var _ = Describe("controller", func() { result := ctrl.WaitForWarmupComplete(ctx) Expect(result).To(BeFalse()) }) + + It("should return true if context is cancelled", func() { + // Setup controller with sources that complete with error + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + <-ctx.Done() + return nil + }), + } + + resultChan := make(chan bool) + + // Invoked in goroutines because the Warmup / WaitForWarmupComplete will block forever. + go func() { + err := ctrl.Warmup(ctx) + Expect(err).NotTo(HaveOccurred()) + }() + go func() { + resultChan <- ctrl.WaitForWarmupComplete(ctx) + }() + + cancel() + Expect(<-resultChan).To(BeTrue()) + }) }) Describe("Warmup with warmup disabled", func() { From 667bb0318710617e96f827b5da631c9c774fab23 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 14 May 2025 02:01:39 -0700 Subject: [PATCH 14/45] Add unit test that exercises controller warmup integration with manager. --- pkg/internal/controller/controller.go | 3 + pkg/internal/controller/controller_test.go | 73 ++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 1465e3cf0e..2a8d6d4dca 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -177,6 +177,9 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { // WaitForWarmupComplete returns true if warmup has completed without error, and false if there was // an error during warmup. If context is cancelled, it returns true. func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { + if c.NeedWarmup == nil || !*c.NeedWarmup { + return true + } ticker := time.NewTicker(syncedPollPeriod) defer ticker.Stop() diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 737604e166..9c6187907c 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -40,10 +40,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllertest" "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" + "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics" "sigs.k8s.io/controller-runtime/pkg/internal/log" + "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" ) @@ -1089,6 +1091,77 @@ var _ = Describe("controller", func() { cancel() Expect(<-resultChan).To(BeTrue()) }) + + It("should be called before leader election runnables if warmup is enabled", func() { + // This unit test exists to ensure that a warmup enabled controller will actually be + // called in the warmup phase before the leader election runnables are started. It + // catches regressions in the controller that would not implement warmupRunnable from + // pkg/manager. + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + hasCtrlWatchStarted, hasNonWarmupCtrlWatchStarted := atomic.Bool{}, atomic.Bool{} + + // ctrl watch will block from finishing until the channel is produced to + ctrlWatchBlockingChan := make(chan struct{}) + + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + hasCtrlWatchStarted.Store(true) + <-ctrlWatchBlockingChan + return nil + }), + } + + nonWarmupCtrl := &Controller[reconcile.Request]{ + MaxConcurrentReconciles: 1, + Do: fakeReconcile, + NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { + return queue + }, + LogConstructor: func(_ *reconcile.Request) logr.Logger { + return log.RuntimeLog.WithName("controller").WithName("test") + }, + CacheSyncTimeout: time.Second, + NeedWarmup: ptr.To(false), + LeaderElected: ptr.To(true), + startWatches: []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + hasNonWarmupCtrlWatchStarted.Store(true) + return nil + }), + }, + } + + By("Creating a manager") + testenv = &envtest.Environment{} + cfg, err := testenv.Start() + Expect(err).NotTo(HaveOccurred()) + m, err := manager.New(cfg, manager.Options{ + LeaderElection: false, + }) + Expect(err).NotTo(HaveOccurred()) + + By("Adding warmup and non-warmup controllers to the manager") + Expect(m.Add(ctrl)).To(Succeed()) + Expect(m.Add(nonWarmupCtrl)).To(Succeed()) + + By("Starting the manager") + go func() { + defer GinkgoRecover() + Expect(m.Start(ctx)).To(Succeed()) + }() + + By("Waiting for the warmup controller to start") + Eventually(hasCtrlWatchStarted.Load).Should(BeTrue()) + Expect(hasNonWarmupCtrlWatchStarted.Load()).To(BeFalse()) + + By("Unblocking the warmup controller source start") + close(ctrlWatchBlockingChan) + Eventually(hasNonWarmupCtrlWatchStarted.Load).Should(BeTrue()) + }) }) Describe("Warmup with warmup disabled", func() { From 66e3be4ddf5ccdf92e15be82f86fae92cc06ea37 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 14 May 2025 02:36:26 -0700 Subject: [PATCH 15/45] Add UT that verifies WaitForWarmupComplete blocking / non-blocking behavior. --- pkg/internal/controller/controller_test.go | 54 +++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 9c6187907c..361f6c898e 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1064,7 +1064,7 @@ var _ = Describe("controller", func() { Expect(result).To(BeFalse()) }) - It("should return true if context is cancelled", func() { + It("should return true if context is cancelled while waiting for source to start", func() { // Setup controller with sources that complete with error ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1198,6 +1198,58 @@ var _ = Describe("controller", func() { Eventually(isSourceStarted.Load).Should(BeTrue()) }) }) + + Describe("WaitForWarmupComplete", func() { + It("should short circuit without blocking if warmup is disabled", func() { + ctrl.NeedWarmup = ptr.To(false) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Call WaitForWarmupComplete and expect it to return immediately + result := ctrl.WaitForWarmupComplete(ctx) + Expect(result).To(BeTrue()) + }) + + It("should block until warmup is complete if warmup is enabled", func() { + ctrl.NeedWarmup = ptr.To(true) + // Setup controller with sources that complete successfully + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Close the channel to signal watch completion + shouldWatchCompleteChan := make(chan struct{}) + + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + <-shouldWatchCompleteChan + return nil + }), + } + + By("Starting a blocking warmup") + + go func() { + defer GinkgoRecover() + Expect(ctrl.Warmup(ctx)).To(Succeed()) + }() + + // didWaitForWarmupCompleteReturn is true when the call to WaitForWarmupComplete returns + didWaitForWarmupCompleteReturn := atomic.Bool{} + go func() { + // Verify WaitForWarmupComplete returns true for successful sync + Expect(ctrl.WaitForWarmupComplete(ctx)).To(BeTrue()) + didWaitForWarmupCompleteReturn.Store(true) + }() + Consistently(didWaitForWarmupCompleteReturn.Load).Should(BeFalse()) + + By("Unblocking the watch to simulate initial sync completion") + close(shouldWatchCompleteChan) + Eventually(didWaitForWarmupCompleteReturn.Load).Should(BeTrue()) + }) + + }) }) var _ = Describe("ReconcileIDFromContext function", func() { From d9cc96bb8c5157f03966c9ea5f99520b5a016326 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 14 May 2025 02:41:22 -0700 Subject: [PATCH 16/45] Verify r.Others.startQueue in runnables test cases. --- pkg/manager/runnable_group_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index 4b00d98542..1674ccb073 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -27,6 +27,7 @@ var _ = Describe("runnables", func() { r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(server)).To(Succeed()) Expect(r.HTTPServers.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) }) It("should add caches to the appropriate group", func() { @@ -34,6 +35,7 @@ var _ = Describe("runnables", func() { r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(cache)).To(Succeed()) Expect(r.Caches.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) }) It("should add webhooks to the appropriate group", func() { @@ -41,6 +43,7 @@ var _ = Describe("runnables", func() { r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(webhook)).To(Succeed()) Expect(r.Webhooks.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) }) It("should add any runnable to the leader election group", func() { @@ -52,6 +55,7 @@ var _ = Describe("runnables", func() { r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(runnable)).To(Succeed()) Expect(r.LeaderElection.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) }) It("should add WarmupRunnable to the Warmup and LeaderElection group", func() { @@ -69,6 +73,7 @@ var _ = Describe("runnables", func() { Expect(r.Add(warmupRunnable)).To(Succeed()) Expect(r.Warmup.startQueue).To(HaveLen(1)) Expect(r.LeaderElection.startQueue).To(HaveLen(1)) + Expect(r.Others.startQueue).To(BeEmpty()) }) It("should add WarmupRunnable that doesn't needs leader election to warmup group only", func() { @@ -90,6 +95,7 @@ var _ = Describe("runnables", func() { Expect(r.Warmup.startQueue).To(HaveLen(1)) Expect(r.LeaderElection.startQueue).To(BeEmpty()) + Expect(r.Others.startQueue).To(HaveLen(1)) }) It("should add WarmupRunnable that needs leader election to Warmup and LeaderElection group, not Others", func() { From 65a04d5318161c428ae96181e6777ee24087a770 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 14 May 2025 03:13:15 -0700 Subject: [PATCH 17/45] Fix UT to verify runnable ordering. --- pkg/manager/manager_test.go | 28 ++++++++++++++++------------ pkg/manager/runnable_group_test.go | 21 +++++++++++++++------ 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index a0e77e62da..5041610631 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1931,8 +1931,10 @@ var _ = Describe("manger.Manager", func() { It("should run warmup runnables before leader election is won", func() { By("Creating channels to track execution order") - warmupCalled := make(chan struct{}) - leaderElectionRunnableCalled := make(chan struct{}) + warmupCalled := atomic.Bool{} + leaderElectionRunnableStarted := atomic.Bool{} + // warmupRunnable's WarmupFunc will block until this channel is closed + warmupRunnableWarmupBlockingChan := make(chan struct{}) By("Creating a manager with leader election enabled") m, err := New(cfg, Options{ @@ -1946,31 +1948,29 @@ var _ = Describe("manger.Manager", func() { }) Expect(err).NotTo(HaveOccurred()) - // Override onStoppedLeading to prevent os.Exit - cm := m.(*controllerManager) - cm.onStoppedLeading = func() {} - By("Creating a runnable that implements WarmupRunnable interface") // Create a warmup runnable warmupRunnable := warmupRunnableFunc{ StartFunc: func(ctx context.Context) error { // This is the main runnable that will be executed after leader election + // Block forever <-ctx.Done() return nil }, WarmupFunc: func(ctx context.Context) error { // This should be called during startup before leader election - close(warmupCalled) + warmupCalled.Store(true) + <-warmupRunnableWarmupBlockingChan return nil }, + didWarmupFinish: make(chan bool), } Expect(m.Add(warmupRunnable)).To(Succeed()) By("Creating a runnable that requires leader election") leaderElectionRunnable := leaderElectionRunnableFunc{ StartFunc: func(ctx context.Context) error { - // This will only be called after leader election is won - close(leaderElectionRunnableCalled) + leaderElectionRunnableStarted.Store(true) <-ctx.Done() return nil }, @@ -1985,17 +1985,21 @@ var _ = Describe("manger.Manager", func() { go func() { defer GinkgoRecover() - Expect(m.Start(ctx)).NotTo(HaveOccurred()) + Expect(m.Start(ctx)).To(Succeed()) }() By("Waiting for the warmup runnable to be called") - <-warmupCalled + Eventually(warmupCalled.Load).Should(BeTrue()) + Expect(leaderElectionRunnableStarted.Load()).To(BeFalse()) + + By("Closing the channel to unblock the warmup runnable") + close(warmupRunnableWarmupBlockingChan) By("Waiting for leader election to be won") <-m.Elected() By("Verifying the leader election runnable is called after election") - <-leaderElectionRunnableCalled + Eventually(leaderElectionRunnableStarted.Load).Should(BeTrue()) }) }) diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index 1674ccb073..45e151263b 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -377,8 +377,9 @@ var _ warmupRunnable = &warmupRunnableFunc{} // warmupRunnableFunc is a helper struct that implements WarmupRunnable // for testing purposes. type warmupRunnableFunc struct { - StartFunc func(context.Context) error - WarmupFunc func(context.Context) error + StartFunc func(context.Context) error + WarmupFunc func(context.Context) error + didWarmupFinish chan bool } func (r warmupRunnableFunc) Start(ctx context.Context) error { @@ -386,18 +387,24 @@ func (r warmupRunnableFunc) Start(ctx context.Context) error { } func (r warmupRunnableFunc) Warmup(ctx context.Context) error { - return r.WarmupFunc(ctx) + err := r.WarmupFunc(ctx) + r.didWarmupFinish <- (err == nil) + return err } func (r warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { - return true + return <-r.didWarmupFinish } +var _ LeaderElectionRunnable = &leaderElectionAndWarmupRunnable{} +var _ warmupRunnable = &leaderElectionAndWarmupRunnable{} + // leaderElectionAndWarmupRunnable implements both WarmupRunnable and LeaderElectionRunnable type leaderElectionAndWarmupRunnable struct { StartFunc func(context.Context) error WarmupFunc func(context.Context) error NeedLeaderElectionFunc func() bool + didWarmupFinish chan bool } func (r leaderElectionAndWarmupRunnable) Start(ctx context.Context) error { @@ -405,11 +412,13 @@ func (r leaderElectionAndWarmupRunnable) Start(ctx context.Context) error { } func (r leaderElectionAndWarmupRunnable) Warmup(ctx context.Context) error { - return r.WarmupFunc(ctx) + err := r.WarmupFunc(ctx) + r.didWarmupFinish <- (err == nil) + return err } func (r leaderElectionAndWarmupRunnable) WaitForWarmupComplete(ctx context.Context) bool { - return true + return <-r.didWarmupFinish } func (r leaderElectionAndWarmupRunnable) NeedLeaderElection() bool { From c201bfafc90bb7330bf47d308db3765fef5499c6 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 15 May 2025 02:03:39 -0700 Subject: [PATCH 18/45] Fix UT for WaitForWarmupComplete blocking. --- pkg/internal/controller/controller.go | 14 ++++++++++++-- pkg/internal/controller/controller_test.go | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 2a8d6d4dca..351d39d177 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -188,12 +188,22 @@ func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { case <-ctx.Done(): return true case <-ticker.C: - didFinishSync, ok := c.didEventSourcesFinishSyncSuccessfully.Load().(*bool) + didFinishSync := c.didEventSourcesFinishSyncSuccessfully.Load() + if didFinishSync == nil { + // event source still syncing + continue + } + + // This *bool assertion is done after checking for nil because type asserting a nil + // interface as a *bool will return false, which is not what we want since nil should be + // treated as not finished syncing. + didFinishSyncPtr, ok := didFinishSync.(*bool) if !ok { + // programming error, should never happen return false } - if didFinishSync != nil && *didFinishSync { + if didFinishSyncPtr != nil && *didFinishSyncPtr { // event sources finished syncing successfully return true } diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 361f6c898e..759d19882c 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1229,7 +1229,6 @@ var _ = Describe("controller", func() { } By("Starting a blocking warmup") - go func() { defer GinkgoRecover() Expect(ctrl.Warmup(ctx)).To(Succeed()) @@ -1238,6 +1237,7 @@ var _ = Describe("controller", func() { // didWaitForWarmupCompleteReturn is true when the call to WaitForWarmupComplete returns didWaitForWarmupCompleteReturn := atomic.Bool{} go func() { + defer GinkgoRecover() // Verify WaitForWarmupComplete returns true for successful sync Expect(ctrl.WaitForWarmupComplete(ctx)).To(BeTrue()) didWaitForWarmupCompleteReturn.Store(true) From 5a13db428cc1553b2a00ea5313787196b7dbfd0a Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 15 May 2025 02:52:27 -0700 Subject: [PATCH 19/45] Document !NeedLeaderElection+NeedWarmup behavior --- pkg/config/controller.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index 4b155ead9b..fa9d1a3135 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -66,7 +66,10 @@ type Controller struct { // improves leadership failover time, as the caches will be prepopulated before the controller // transitions to be leader. // - // When set to true, the controller will start its sources without waiting to become leader. + // Setting NeedWarmup to true and NeedLeaderElection to true means the controller will start its + // sources without waiting to become leader. + // Setting NeedWarmup to true and NeedLeaderElection is false is a no-op as controllers without + // leader election do not wait on leader election to start their sources. // Defaults to false. NeedWarmup *bool From 4879527d05b52649ca98e4a9b21370b05fb318a0 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 15 May 2025 22:29:54 -0700 Subject: [PATCH 20/45] Fix test race. --- pkg/internal/controller/controller_test.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 759d19882c..7b230f1860 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -462,6 +462,7 @@ var _ = Describe("controller", func() { // Start the sources in a goroutine startErrCh := make(chan error) go func() { + defer GinkgoRecover() startErrCh <- ctrl.startEventSources(sourceCtx) }() @@ -1079,17 +1080,26 @@ var _ = Describe("controller", func() { resultChan := make(chan bool) + // Wait for the goroutines to finish before returning to avoid racing with the + // assignment in BeforeEach block. + var wg sync.WaitGroup + // Invoked in goroutines because the Warmup / WaitForWarmupComplete will block forever. + wg.Add(2) go func() { - err := ctrl.Warmup(ctx) - Expect(err).NotTo(HaveOccurred()) + defer GinkgoRecover() + defer wg.Done() + Expect(ctrl.Warmup(ctx)).To(Succeed()) }() go func() { + defer GinkgoRecover() + defer wg.Done() resultChan <- ctrl.WaitForWarmupComplete(ctx) }() cancel() Expect(<-resultChan).To(BeTrue()) + wg.Wait() }) It("should be called before leader election runnables if warmup is enabled", func() { From 57acc771846ffc12d9aabba874ae0e3ff8277acb Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 00:56:12 -0700 Subject: [PATCH 21/45] Cleanup test wrapper runnables. --- pkg/manager/manager_test.go | 19 ++-- pkg/manager/runnable_group_test.go | 142 +++++++++++++---------------- 2 files changed, 73 insertions(+), 88 deletions(-) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 5041610631..0e0586b8e7 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1950,34 +1950,31 @@ var _ = Describe("manger.Manager", func() { By("Creating a runnable that implements WarmupRunnable interface") // Create a warmup runnable - warmupRunnable := warmupRunnableFunc{ - StartFunc: func(ctx context.Context) error { + warmupRunnable := newWarmupRunnableFunc( + func(ctx context.Context) error { // This is the main runnable that will be executed after leader election // Block forever <-ctx.Done() return nil }, - WarmupFunc: func(ctx context.Context) error { + func(ctx context.Context) error { // This should be called during startup before leader election warmupCalled.Store(true) <-warmupRunnableWarmupBlockingChan return nil }, - didWarmupFinish: make(chan bool), - } + ) Expect(m.Add(warmupRunnable)).To(Succeed()) By("Creating a runnable that requires leader election") - leaderElectionRunnable := leaderElectionRunnableFunc{ - StartFunc: func(ctx context.Context) error { + + leaderElectionRunnable := RunnableFunc( + func(ctx context.Context) error { leaderElectionRunnableStarted.Store(true) <-ctx.Done() return nil }, - NeedLeaderElectionFunc: func() bool { - return true - }, - } + ) Expect(m.Add(leaderElectionRunnable)).To(Succeed()) ctx, cancel := context.WithCancel(context.Background()) diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index 45e151263b..b230b2baf0 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -59,15 +59,13 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable to the Warmup and LeaderElection group", func() { - warmupRunnable := warmupRunnableFunc{ - StartFunc: func(c context.Context) error { + warmupRunnable := newWarmupRunnableFunc( + func(c context.Context) error { <-c.Done() return nil }, - WarmupFunc: func(c context.Context) error { - return nil - }, - } + func(c context.Context) error { return nil }, + ) r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(warmupRunnable)).To(Succeed()) @@ -77,18 +75,14 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that doesn't needs leader election to warmup group only", func() { - warmupRunnable := leaderElectionAndWarmupRunnable{ - StartFunc: func(c context.Context) error { + warmupRunnable := newLeaderElectionAndWarmupRunnable( + func(c context.Context) error { <-c.Done() return nil }, - WarmupFunc: func(c context.Context) error { - return nil - }, - NeedLeaderElectionFunc: func() bool { - return false - }, - } + func(c context.Context) error { return nil }, + false, + ) r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(warmupRunnable)).To(Succeed()) @@ -99,18 +93,14 @@ var _ = Describe("runnables", func() { }) It("should add WarmupRunnable that needs leader election to Warmup and LeaderElection group, not Others", func() { - warmupRunnable := leaderElectionAndWarmupRunnable{ - StartFunc: func(c context.Context) error { + warmupRunnable := newLeaderElectionAndWarmupRunnable( + func(c context.Context) error { <-c.Done() return nil }, - WarmupFunc: func(c context.Context) error { - return nil - }, - NeedLeaderElectionFunc: func() bool { - return true - }, - } + func(c context.Context) error { return nil }, + true, + ) r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(warmupRunnable)).To(Succeed()) @@ -123,16 +113,16 @@ var _ = Describe("runnables", func() { It("should execute the Warmup function when Warmup group is started", func() { var warmupExecuted atomic.Bool - warmupRunnable := warmupRunnableFunc{ - StartFunc: func(c context.Context) error { + warmupRunnable := newWarmupRunnableFunc( + func(c context.Context) error { <-c.Done() return nil }, - WarmupFunc: func(c context.Context) error { + func(c context.Context) error { warmupExecuted.Store(true) return nil }, - } + ) r := newRunnables(defaultBaseContext, errCh) Expect(r.Add(warmupRunnable)).To(Succeed()) @@ -150,15 +140,13 @@ var _ = Describe("runnables", func() { It("should propagate errors from Warmup function to error channel", func() { expectedErr := fmt.Errorf("expected warmup error") - warmupRunnable := warmupRunnableFunc{ - StartFunc: func(c context.Context) error { + warmupRunnable := newWarmupRunnableFunc( + func(c context.Context) error { <-c.Done() return nil }, - WarmupFunc: func(c context.Context) error { - return expectedErr - }, - } + func(c context.Context) error { return expectedErr }, + ) testErrChan := make(chan error, 1) r := newRunnables(defaultBaseContext, testErrChan) @@ -355,45 +343,46 @@ var _ = Describe("runnableGroup", func() { }) }) -var _ LeaderElectionRunnable = &leaderElectionRunnableFunc{} - -// leaderElectionRunnableFunc is a helper struct that implements LeaderElectionRunnable -// for testing purposes. -type leaderElectionRunnableFunc struct { - StartFunc func(context.Context) error - NeedLeaderElectionFunc func() bool -} - -func (r leaderElectionRunnableFunc) Start(ctx context.Context) error { - return r.StartFunc(ctx) -} +var _ warmupRunnable = &warmupRunnableFunc{} -func (r leaderElectionRunnableFunc) NeedLeaderElection() bool { - return r.NeedLeaderElectionFunc() +func newWarmupRunnableFunc( + startFunc func(context.Context) error, + warmupFunc func(context.Context) error, +) *warmupRunnableFunc { + return &warmupRunnableFunc{ + startFunc: startFunc, + warmupFunc: warmupFunc, + didWarmupFinishChan: make(chan struct{}), + } } -var _ warmupRunnable = &warmupRunnableFunc{} - // warmupRunnableFunc is a helper struct that implements WarmupRunnable // for testing purposes. type warmupRunnableFunc struct { - StartFunc func(context.Context) error - WarmupFunc func(context.Context) error - didWarmupFinish chan bool + startFunc func(context.Context) error + warmupFunc func(context.Context) error + + // didWarmupFinishChan is closed when warmup is finished + didWarmupFinishChan chan struct{} + + // didWarmupFinishSuccessfully is set to true if warmup was successful + didWarmupFinishSuccessfully atomic.Bool } -func (r warmupRunnableFunc) Start(ctx context.Context) error { - return r.StartFunc(ctx) +func (r *warmupRunnableFunc) Start(ctx context.Context) error { + return r.startFunc(ctx) } -func (r warmupRunnableFunc) Warmup(ctx context.Context) error { - err := r.WarmupFunc(ctx) - r.didWarmupFinish <- (err == nil) +func (r *warmupRunnableFunc) Warmup(ctx context.Context) error { + err := r.warmupFunc(ctx) + r.didWarmupFinishSuccessfully.Store(err == nil) + close(r.didWarmupFinishChan) return err } -func (r warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { - return <-r.didWarmupFinish +func (r *warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { + <-r.didWarmupFinishChan + return r.didWarmupFinishSuccessfully.Load() } var _ LeaderElectionRunnable = &leaderElectionAndWarmupRunnable{} @@ -401,26 +390,25 @@ var _ warmupRunnable = &leaderElectionAndWarmupRunnable{} // leaderElectionAndWarmupRunnable implements both WarmupRunnable and LeaderElectionRunnable type leaderElectionAndWarmupRunnable struct { - StartFunc func(context.Context) error - WarmupFunc func(context.Context) error - NeedLeaderElectionFunc func() bool - didWarmupFinish chan bool -} - -func (r leaderElectionAndWarmupRunnable) Start(ctx context.Context) error { - return r.StartFunc(ctx) -} - -func (r leaderElectionAndWarmupRunnable) Warmup(ctx context.Context) error { - err := r.WarmupFunc(ctx) - r.didWarmupFinish <- (err == nil) - return err + *warmupRunnableFunc + needLeaderElection bool } -func (r leaderElectionAndWarmupRunnable) WaitForWarmupComplete(ctx context.Context) bool { - return <-r.didWarmupFinish +func newLeaderElectionAndWarmupRunnable( + startFunc func(context.Context) error, + warmupFunc func(context.Context) error, + needLeaderElection bool, +) *leaderElectionAndWarmupRunnable { + return &leaderElectionAndWarmupRunnable{ + warmupRunnableFunc: &warmupRunnableFunc{ + startFunc: startFunc, + warmupFunc: warmupFunc, + didWarmupFinishChan: make(chan struct{}), + }, + needLeaderElection: needLeaderElection, + } } func (r leaderElectionAndWarmupRunnable) NeedLeaderElection() bool { - return r.NeedLeaderElectionFunc() + return r.needLeaderElection } From 1987b54b92f4cf24cd8d8e554c0b569d28d54a02 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 01:44:43 -0700 Subject: [PATCH 22/45] Make didStartEventSources run once with sync.Once + UT. --- pkg/internal/controller/controller.go | 123 ++++++++++----------- pkg/internal/controller/controller_test.go | 30 +++++ 2 files changed, 91 insertions(+), 62 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 351d39d177..ffbdf761ba 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -89,8 +89,8 @@ type Controller[request comparable] struct { // startWatches maintains a list of sources, handlers, and predicates to start when the controller is started. startWatches []source.TypedSource[request] - // didStartEventSources is used to indicate whether the event sources have been started. - didStartEventSources atomic.Bool + // didStartEventSourcesOnce is used to ensure that the event sources are only started once. + didStartEventSourcesOnce sync.Once // didEventSourcesFinishSyncSuccessfully is used to indicate whether the event sources have finished // successfully. It stores a *bool where @@ -289,70 +289,69 @@ func (c *Controller[request]) Start(ctx context.Context) error { // startEventSources launches all the sources registered with this controller and waits // for them to sync. It returns an error if any of the sources fail to start or sync. func (c *Controller[request]) startEventSources(ctx context.Context) error { - // CAS returns false if value is already true, so early exit since another goroutine must have - // called startEventSources previously - if !c.didStartEventSources.CompareAndSwap(false, true) { - c.LogConstructor(nil).Info("Skipping starting event sources since they were previously started") - return nil - } - - errGroup := &errgroup.Group{} - for _, watch := range c.startWatches { - log := c.LogConstructor(nil) - _, ok := watch.(interface { - String() string - }) - if !ok { - log = log.WithValues("source", fmt.Sprintf("%T", watch)) - } else { - log = log.WithValues("source", fmt.Sprintf("%s", watch)) - } - didStartSyncingSource := &atomic.Bool{} - errGroup.Go(func() error { - // Use a timeout for starting and syncing the source to avoid silently - // blocking startup indefinitely if it doesn't come up. - sourceStartCtx, cancel := context.WithTimeout(ctx, c.CacheSyncTimeout) - defer cancel() - - sourceStartErrChan := make(chan error, 1) // Buffer chan to not leak goroutine if we time out - go func() { - defer close(sourceStartErrChan) - log.Info("Starting EventSource") - if err := watch.Start(ctx, c.Queue); err != nil { - sourceStartErrChan <- err - return - } - syncingSource, ok := watch.(source.TypedSyncingSource[request]) - if !ok { - return - } - didStartSyncingSource.Store(true) - if err := syncingSource.WaitForSync(sourceStartCtx); err != nil { - err := fmt.Errorf("failed to wait for %s caches to sync %v: %w", c.Name, syncingSource, err) - log.Error(err, "Could not wait for Cache to sync") - sourceStartErrChan <- err + var retErr error + + c.didStartEventSourcesOnce.Do(func() { + errGroup := &errgroup.Group{} + for _, watch := range c.startWatches { + log := c.LogConstructor(nil) + _, ok := watch.(interface { + String() string + }) + if !ok { + log = log.WithValues("source", fmt.Sprintf("%T", watch)) + } else { + log = log.WithValues("source", fmt.Sprintf("%s", watch)) + } + didStartSyncingSource := &atomic.Bool{} + errGroup.Go(func() error { + // Use a timeout for starting and syncing the source to avoid silently + // blocking startup indefinitely if it doesn't come up. + sourceStartCtx, cancel := context.WithTimeout(ctx, c.CacheSyncTimeout) + defer cancel() + + sourceStartErrChan := make(chan error, 1) // Buffer chan to not leak goroutine if we time out + go func() { + defer close(sourceStartErrChan) + log.Info("Starting EventSource") + if err := watch.Start(ctx, c.Queue); err != nil { + sourceStartErrChan <- err + return + } + syncingSource, ok := watch.(source.TypedSyncingSource[request]) + if !ok { + return + } + didStartSyncingSource.Store(true) + if err := syncingSource.WaitForSync(sourceStartCtx); err != nil { + err := fmt.Errorf("failed to wait for %s caches to sync %v: %w", c.Name, syncingSource, err) + log.Error(err, "Could not wait for Cache to sync") + sourceStartErrChan <- err + } + }() + + select { + case err := <-sourceStartErrChan: + return err + case <-sourceStartCtx.Done(): + if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened + return <-sourceStartErrChan + } + if ctx.Err() != nil { // Don't return an error if the root context got cancelled + return nil + } + return fmt.Errorf("timed out waiting for source %s to Start. Please ensure that its Start() method is non-blocking", watch) } - }() + }) + } + err := errGroup.Wait() - select { - case err := <-sourceStartErrChan: - return err - case <-sourceStartCtx.Done(): - if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened - return <-sourceStartErrChan - } - if ctx.Err() != nil { // Don't return an error if the root context got cancelled - return nil - } - return fmt.Errorf("timed out waiting for source %s to Start. Please ensure that its Start() method is non-blocking", watch) - } - }) - } - err := errGroup.Wait() + c.didEventSourcesFinishSyncSuccessfully.Store(ptr.To(err == nil)) - c.didEventSourcesFinishSyncSuccessfully.Store(ptr.To(err == nil)) + retErr = err + }) - return err + return retErr } // processNextWorkItem will read a single work item off the workqueue and diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 7b230f1860..e4a9495640 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -502,6 +502,36 @@ var _ = Describe("controller", func() { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("timed out waiting for source")) }) + + It("should only start sources once when called multiple times", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = 1 * time.Millisecond + + var startCount atomic.Int32 + src := source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + startCount.Add(1) + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} + + By("Calling startEventSources multiple times in parallel") + var wg sync.WaitGroup + for i := 1; i <= 5; i++ { + wg.Add(1) + go func() { + defer wg.Done() + err := ctrl.startEventSources(ctx) + // All calls should return the same nil error + Expect(err).NotTo(HaveOccurred()) + }() + } + + wg.Wait() + Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") + }) }) Describe("Processing queue items from a Controller", func() { From a49f3a4613bc6064f6a93911135e81abad259dae Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 02:09:10 -0700 Subject: [PATCH 23/45] Rewrite Warmup to avoid polling. --- pkg/internal/controller/controller.go | 79 +++++++++++---------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index ffbdf761ba..2feb24247f 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -30,7 +30,6 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/util/workqueue" - "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue" ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics" @@ -39,11 +38,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) -const ( - // syncedPollPeriod is the period to poll for cache sync - syncedPollPeriod = 100 * time.Millisecond -) - // Controller implements controller.Controller. type Controller[request comparable] struct { // Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required. @@ -92,12 +86,18 @@ type Controller[request comparable] struct { // didStartEventSourcesOnce is used to ensure that the event sources are only started once. didStartEventSourcesOnce sync.Once - // didEventSourcesFinishSyncSuccessfully is used to indicate whether the event sources have finished - // successfully. It stores a *bool where - // - nil: not finished syncing - // - true: finished syncing without error - // - false: finished syncing with error - didEventSourcesFinishSyncSuccessfully atomic.Value + // ensureDidWarmupFinishChanInitializedOnce is used to ensure that the didWarmupFinishChan is + // initialized to a non-nil channel. + ensureDidWarmupFinishChanInitializedOnce sync.Once + + // didWarmupFinish is closed when startEventSources returns. It is used to + // signal to WaitForWarmupComplete that the event sources have finished syncing. + didWarmupFinishChan chan struct{} + + // didWarmupFinishSuccessfully is used to indicate whether the event sources have finished + // successfully. If true, the event sources have finished syncing without error. If false, the + // event sources have finished syncing but with error. + didWarmupFinishSuccessfully atomic.Bool // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. @@ -171,7 +171,13 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { if c.NeedWarmup == nil || !*c.NeedWarmup { return nil } - return c.startEventSources(ctx) + + c.ensureDidWarmupFinishChanInitialized() + err := c.startEventSources(ctx) + c.didWarmupFinishSuccessfully.Store(err == nil) + close(c.didWarmupFinishChan) + + return err } // WaitForWarmupComplete returns true if warmup has completed without error, and false if there was @@ -180,36 +186,10 @@ func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { if c.NeedWarmup == nil || !*c.NeedWarmup { return true } - ticker := time.NewTicker(syncedPollPeriod) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return true - case <-ticker.C: - didFinishSync := c.didEventSourcesFinishSyncSuccessfully.Load() - if didFinishSync == nil { - // event source still syncing - continue - } - // This *bool assertion is done after checking for nil because type asserting a nil - // interface as a *bool will return false, which is not what we want since nil should be - // treated as not finished syncing. - didFinishSyncPtr, ok := didFinishSync.(*bool) - if !ok { - // programming error, should never happen - return false - } - - if didFinishSyncPtr != nil && *didFinishSyncPtr { - // event sources finished syncing successfully - return true - } - return false - } - } + c.ensureDidWarmupFinishChanInitialized() + <-c.didWarmupFinishChan + return c.didWarmupFinishSuccessfully.Load() } // Start implements controller.Controller. @@ -344,11 +324,7 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { } }) } - err := errGroup.Wait() - - c.didEventSourcesFinishSyncSuccessfully.Store(ptr.To(err == nil)) - - retErr = err + retErr = errGroup.Wait() }) return retErr @@ -460,6 +436,15 @@ func (c *Controller[request]) updateMetrics(reconcileTime time.Duration) { ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds()) } +// ensureDidWarmupFinishChanInitialized ensures that the didWarmupFinishChan is initialized. This is needed +// because controller can directly be created from other packages like controller.Controller, and +// there is no way for the caller to pass in the chan. +func (c *Controller[request]) ensureDidWarmupFinishChanInitialized() { + c.ensureDidWarmupFinishChanInitializedOnce.Do(func() { + c.didWarmupFinishChan = make(chan struct{}) + }) +} + // ReconcileIDFromContext gets the reconcileID from the current context. func ReconcileIDFromContext(ctx context.Context) types.UID { r, ok := ctx.Value(reconcileIDKey{}).(types.UID) From 89f5479fbb8b5c43bc98489dd0d3788d5f0a1450 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 02:14:19 -0700 Subject: [PATCH 24/45] Rename NeedWarmup to EnableWarmup. --- pkg/config/controller.go | 8 ++--- pkg/controller/controller.go | 10 +++--- pkg/controller/controller_test.go | 36 +++++++++++----------- pkg/internal/controller/controller.go | 8 ++--- pkg/internal/controller/controller_test.go | 14 ++++----- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index fa9d1a3135..a825bed076 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -60,18 +60,18 @@ type Controller struct { // Defaults to true, which means the controller will use leader election. NeedLeaderElection *bool - // NeedWarmup specifies whether the controller should start its sources when the manager is not + // EnableWarmup specifies whether the controller should start its sources when the manager is not // the leader. This is useful for cases where sources take a long time to start, as it allows // for the controller to warm up its caches even before it is elected as the leader. This // improves leadership failover time, as the caches will be prepopulated before the controller // transitions to be leader. // - // Setting NeedWarmup to true and NeedLeaderElection to true means the controller will start its + // Setting EnableWarmup to true and NeedLeaderElection to true means the controller will start its // sources without waiting to become leader. - // Setting NeedWarmup to true and NeedLeaderElection is false is a no-op as controllers without + // Setting EnableWarmup to true and NeedLeaderElection is false is a no-op as controllers without // leader election do not wait on leader election to start their sources. // Defaults to false. - NeedWarmup *bool + EnableWarmup *bool // UsePriorityQueue configures the controllers queue to use the controller-runtime provided // priority queue. diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 93a952c763..1d6bf5fe8a 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -94,7 +94,7 @@ type TypedOptions[request comparable] struct { // Note: This flag is disabled by default until a future version. It's currently in beta. UsePriorityQueue *bool - // NeedWarmup specifies whether the controller should start its sources when the manager is not + // EnableWarmup specifies whether the controller should start its sources when the manager is not // the leader. This is useful for cases where sources take a long time to start, as it allows // for the controller to warm up its caches even before it is elected as the leader. This // improves leadership failover time, as the caches will be prepopulated before the controller @@ -102,7 +102,7 @@ type TypedOptions[request comparable] struct { // // When set to true, the controller will start its sources without transitioning to be leader. // Defaults to false. - NeedWarmup *bool + EnableWarmup *bool } // DefaultFromConfig defaults the config from a config.Controller @@ -135,8 +135,8 @@ func (options *TypedOptions[request]) DefaultFromConfig(config config.Controller options.NeedLeaderElection = config.NeedLeaderElection } - if options.NeedWarmup == nil { - options.NeedWarmup = config.NeedWarmup + if options.EnableWarmup == nil { + options.EnableWarmup = config.EnableWarmup } } @@ -267,7 +267,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req LogConstructor: options.LogConstructor, RecoverPanic: options.RecoverPanic, LeaderElected: options.NeedLeaderElection, - NeedWarmup: options.NeedWarmup, + EnableWarmup: options.EnableWarmup, }, nil } diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index 391b6c2b04..244a9f17a4 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -475,33 +475,33 @@ var _ = Describe("controller.Controller", func() { Expect(ok).To(BeFalse()) }) - It("should set NeedWarmup correctly", func() { + It("should set EnableWarmup correctly", func() { m, err := manager.New(cfg, manager.Options{}) Expect(err).NotTo(HaveOccurred()) - // Test with NeedWarmup set to true + // Test with EnableWarmup set to true ctrlWithWarmup, err := controller.New("warmup-enabled-ctrl", m, controller.Options{ - Reconciler: reconcile.Func(nil), - NeedWarmup: ptr.To(true), + Reconciler: reconcile.Func(nil), + EnableWarmup: ptr.To(true), }) Expect(err).NotTo(HaveOccurred()) internalCtrlWithWarmup, ok := ctrlWithWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithWarmup.NeedWarmup).To(HaveValue(BeTrue())) + Expect(internalCtrlWithWarmup.EnableWarmup).To(HaveValue(BeTrue())) - // Test with NeedWarmup set to false + // Test with EnableWarmup set to false ctrlWithoutWarmup, err := controller.New("warmup-disabled-ctrl", m, controller.Options{ - Reconciler: reconcile.Func(nil), - NeedWarmup: ptr.To(false), + Reconciler: reconcile.Func(nil), + EnableWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) internalCtrlWithoutWarmup, ok := ctrlWithoutWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithoutWarmup.NeedWarmup).To(HaveValue(BeFalse())) + Expect(internalCtrlWithoutWarmup.EnableWarmup).To(HaveValue(BeFalse())) - // Test with NeedWarmup not set (should default to nil) + // Test with EnableWarmup not set (should default to nil) ctrlWithDefaultWarmup, err := controller.New("warmup-default-ctrl", m, controller.Options{ Reconciler: reconcile.Func(nil), }) @@ -509,14 +509,14 @@ var _ = Describe("controller.Controller", func() { internalCtrlWithDefaultWarmup, ok := ctrlWithDefaultWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlWithDefaultWarmup.NeedWarmup).To(BeNil()) + Expect(internalCtrlWithDefaultWarmup.EnableWarmup).To(BeNil()) }) - It("should inherit NeedWarmup from manager config", func() { - // Test with manager default setting NeedWarmup to true + It("should inherit EnableWarmup from manager config", func() { + // Test with manager default setting EnableWarmup to true managerWithWarmup, err := manager.New(cfg, manager.Options{ Controller: config.Controller{ - NeedWarmup: ptr.To(true), + EnableWarmup: ptr.To(true), }, }) Expect(err).NotTo(HaveOccurred()) @@ -527,18 +527,18 @@ var _ = Describe("controller.Controller", func() { internalCtrlInheritingWarmup, ok := ctrlInheritingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlInheritingWarmup.NeedWarmup).To(HaveValue(BeTrue())) + Expect(internalCtrlInheritingWarmup.EnableWarmup).To(HaveValue(BeTrue())) // Test that explicit controller setting overrides manager setting ctrlOverridingWarmup, err := controller.New("override-warmup-disabled", managerWithWarmup, controller.Options{ - Reconciler: reconcile.Func(nil), - NeedWarmup: ptr.To(false), + Reconciler: reconcile.Func(nil), + EnableWarmup: ptr.To(false), }) Expect(err).NotTo(HaveOccurred()) internalCtrlOverridingWarmup, ok := ctrlOverridingWarmup.(*internalcontroller.Controller[reconcile.Request]) Expect(ok).To(BeTrue()) - Expect(internalCtrlOverridingWarmup.NeedWarmup).To(HaveValue(BeFalse())) + Expect(internalCtrlOverridingWarmup.EnableWarmup).To(HaveValue(BeFalse())) }) }) }) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 2feb24247f..9ca4f1152a 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -112,11 +112,11 @@ type Controller[request comparable] struct { // LeaderElected indicates whether the controller is leader elected or always running. LeaderElected *bool - // NeedWarmup specifies whether the controller should start its sources + // EnableWarmup specifies whether the controller should start its sources // when the manager is not the leader. // Defaults to false, which means that the controller will wait for leader election to start // before starting sources. - NeedWarmup *bool + EnableWarmup *bool } // Reconcile implements reconcile.Reconciler. @@ -168,7 +168,7 @@ func (c *Controller[request]) NeedLeaderElection() bool { // Warmup implements the manager.WarmupRunnable interface. func (c *Controller[request]) Warmup(ctx context.Context) error { - if c.NeedWarmup == nil || !*c.NeedWarmup { + if c.EnableWarmup == nil || !*c.EnableWarmup { return nil } @@ -183,7 +183,7 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { // WaitForWarmupComplete returns true if warmup has completed without error, and false if there was // an error during warmup. If context is cancelled, it returns true. func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { - if c.NeedWarmup == nil || !*c.NeedWarmup { + if c.EnableWarmup == nil || !*c.EnableWarmup { return true } diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index e4a9495640..dfb34efd36 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1051,7 +1051,7 @@ var _ = Describe("controller", func() { Describe("Warmup", func() { JustBeforeEach(func() { - ctrl.NeedWarmup = ptr.To(true) + ctrl.EnableWarmup = ptr.To(true) }) It("should track warmup status correctly with successful sync", func() { @@ -1165,7 +1165,7 @@ var _ = Describe("controller", func() { return log.RuntimeLog.WithName("controller").WithName("test") }, CacheSyncTimeout: time.Second, - NeedWarmup: ptr.To(false), + EnableWarmup: ptr.To(false), LeaderElected: ptr.To(true), startWatches: []source.TypedSource[reconcile.Request]{ source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { @@ -1206,7 +1206,7 @@ var _ = Describe("controller", func() { Describe("Warmup with warmup disabled", func() { JustBeforeEach(func() { - ctrl.NeedWarmup = ptr.To(false) + ctrl.EnableWarmup = ptr.To(false) }) It("should not start sources when Warmup is called if warmup is disabled but start it when Start is called.", func() { @@ -1224,12 +1224,12 @@ var _ = Describe("controller", func() { }), } - By("Calling Warmup when NeedWarmup is false") + By("Calling Warmup when EnableWarmup is false") err := ctrl.Warmup(ctx) Expect(err).NotTo(HaveOccurred()) Expect(isSourceStarted.Load()).To(BeFalse()) - By("Calling Start when NeedWarmup is false") + By("Calling Start when EnableWarmup is false") // Now call Start go func() { defer GinkgoRecover() @@ -1241,7 +1241,7 @@ var _ = Describe("controller", func() { Describe("WaitForWarmupComplete", func() { It("should short circuit without blocking if warmup is disabled", func() { - ctrl.NeedWarmup = ptr.To(false) + ctrl.EnableWarmup = ptr.To(false) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1252,7 +1252,7 @@ var _ = Describe("controller", func() { }) It("should block until warmup is complete if warmup is enabled", func() { - ctrl.NeedWarmup = ptr.To(true) + ctrl.EnableWarmup = ptr.To(true) // Setup controller with sources that complete successfully ctx, cancel := context.WithCancel(context.Background()) defer cancel() From 9d5ddfb9a425c87b325884b89b44f847ec96e7c5 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 02:21:18 -0700 Subject: [PATCH 25/45] Clarify comment on Warmup. --- pkg/manager/manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 0740b22f52..331a13656a 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -315,11 +315,11 @@ type LeaderElectionRunnable interface { } // warmupRunnable knows if a Runnable requires warmup. A warmup runnable is a runnable -// that should be run when the manager is started, but before the leader election is acquired. +// that should be run when the manager is started but before it becomes leader. // Note: Implementing this interface is only useful when LeaderElection can be enabled, as the // behavior when leaderelection is not enabled is to run LeaderElectionRunnables immediately. type warmupRunnable interface { - // Warmup is the implementation of the warmup runnable. + // Warmup will be called when the manager is started but before it becomes leader. Warmup(context.Context) error // WaitForWarmupComplete is a blocking function that waits for the warmup to be completed. It From 66f64f0c86c0bc20499395e75b163e93dcab718f Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 03:46:55 -0700 Subject: [PATCH 26/45] Move reset watches critical section inside of startEventSources. --- pkg/internal/controller/controller.go | 12 ++++++------ pkg/internal/controller/controller_test.go | 19 ++++++++++++++++++- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 9ca4f1152a..da6195acb2 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -233,12 +233,6 @@ func (c *Controller[request]) Start(ctx context.Context) error { c.LogConstructor(nil).Info("Starting Controller") - // All the watches have been started, we can reset the local slice. - // - // We should never hold watches more than necessary, each watch source can hold a backing cache, - // which won't be garbage collected if we hold a reference to it. - c.startWatches = nil - // Launch workers to process resources c.LogConstructor(nil).Info("Starting workers", "worker count", c.MaxConcurrentReconciles) wg.Add(c.MaxConcurrentReconciles) @@ -325,6 +319,12 @@ func (c *Controller[request]) startEventSources(ctx context.Context) error { }) } retErr = errGroup.Wait() + + // All the watches have been started, we can reset the local slice. + // + // We should never hold watches more than necessary, each watch source can hold a backing cache, + // which won't be garbage collected if we hold a reference to it. + c.startWatches = nil }) return retErr diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index dfb34efd36..cac76591fa 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -503,7 +503,7 @@ var _ = Describe("controller", func() { Expect(err.Error()).To(ContainSubstring("timed out waiting for source")) }) - It("should only start sources once when called multiple times", func() { + It("should only start sources once when called multiple times concurrently", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -532,6 +532,23 @@ var _ = Describe("controller", func() { wg.Wait() Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") }) + + It("should reset c.startWatches to nil after returning", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = 1 * time.Millisecond + + src := source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + return nil + }) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} + + err := ctrl.startEventSources(ctx) + Expect(err).NotTo(HaveOccurred()) + Expect(ctrl.startWatches).To(BeNil(), "startWatches should be reset to nil after returning") + }) }) Describe("Processing queue items from a Controller", func() { From 0563114dd8b90be73addf555a249ec4ba5d3ced0 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 03:47:52 -0700 Subject: [PATCH 27/45] Add test to assert startEventSources blocking behavior. --- pkg/internal/controller/controller_test.go | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index cac76591fa..6116ab8cdc 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -533,6 +533,44 @@ var _ = Describe("controller", func() { Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") }) + It("should block subsequent calls from returning until the first call to startEventSources has returned", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + ctrl.CacheSyncTimeout = 5 * time.Second + + // finishSourceChan is closed to unblock startEventSources from returning + finishSourceChan := make(chan struct{}) + + src := source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + <-finishSourceChan + return nil + }) + ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} + + By("Calling startEventSources asynchronously") + go func() { + defer GinkgoRecover() + Expect(ctrl.startEventSources(ctx)).To(Succeed()) + }() + + By("Calling startEventSources again") + var didSubsequentCallComplete atomic.Bool + go func() { + defer GinkgoRecover() + Expect(ctrl.startEventSources(ctx)).To(Succeed()) + didSubsequentCallComplete.Store(true) + }() + + // Assert that second call to startEventSources is blocked while source has not finished + Consistently(didSubsequentCallComplete.Load).Should(BeFalse()) + + By("Finishing source start + sync") + finishSourceChan <- struct{}{} + + // Assert that second call to startEventSources is now complete + Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSources should complete after source is started and synced") + }) + It("should reset c.startWatches to nil after returning", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() From aa20ef533b13a583ff3977c2506c360d56181462 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Fri, 16 May 2025 04:04:53 -0700 Subject: [PATCH 28/45] Make Start threadsafe with Warmup + UT. --- pkg/internal/controller/controller.go | 5 ++++ pkg/internal/controller/controller_test.go | 35 +++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index da6195acb2..15917df7f8 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -172,6 +172,11 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { return nil } + // Hold the lock to avoid concurrent access to c.startWatches with Start() when calling + // startEventSources + c.mu.Lock() + defer c.mu.Unlock() + c.ensureDidWarmupFinishChanInitialized() err := c.startEventSources(ctx) c.didWarmupFinishSuccessfully.Store(err == nil) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 6116ab8cdc..ef14b8a10d 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -533,7 +533,7 @@ var _ = Describe("controller", func() { Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") }) - It("should block subsequent calls from returning until the first call to startEventSources has returned", func() { + It("should block subsequent calls from returning until the first call to startEventSources has returned", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.CacheSyncTimeout = 5 * time.Second @@ -1257,6 +1257,39 @@ var _ = Describe("controller", func() { close(ctrlWatchBlockingChan) Eventually(hasNonWarmupCtrlWatchStarted.Load).Should(BeTrue()) }) + + It("should not race with Start and only start sources once", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = time.Second + + var watchStartedCount atomic.Int32 + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + watchStartedCount.Add(1) + return nil + }), + } + + By("calling Warmup and Start concurrently") + go func() { + defer GinkgoRecover() + Expect(ctrl.Start(ctx)).To(Succeed()) + }() + + blockOnWarmupChan := make(chan struct{}) + go func() { + defer GinkgoRecover() + Expect(ctrl.Warmup(ctx)).To(Succeed()) + close(blockOnWarmupChan) + }() + + <-blockOnWarmupChan + + Expect(watchStartedCount.Load()).To(Equal(int32(1)), "source should only be started once") + Expect(ctrl.startWatches).To(BeNil(), "startWatches should be reset to nil after they are started") + }) }) Describe("Warmup with warmup disabled", func() { From c9a2973386a965942de85ad22360a99223709ff4 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Mon, 19 May 2025 01:06:12 -0700 Subject: [PATCH 29/45] Change warmup to use buffered error channel and add New method. --- pkg/controller/controller.go | 4 +- pkg/internal/controller/controller.go | 101 +++++++++++++++------ pkg/internal/controller/controller_test.go | 22 ++--- 3 files changed, 87 insertions(+), 40 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1d6bf5fe8a..b56d59ab2d 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -257,7 +257,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req } // Create controller with dependencies set - return &controller.Controller[request]{ + return controller.New[request](controller.ControllerOptions[request]{ Do: options.Reconciler, RateLimiter: options.RateLimiter, NewQueue: options.NewQueue, @@ -268,7 +268,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req RecoverPanic: options.RecoverPanic, LeaderElected: options.NeedLeaderElection, EnableWarmup: options.EnableWarmup, - }, nil + }), nil } // ReconcileIDFromContext gets the reconcileID from the current context. diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 15917df7f8..b983790e00 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -38,7 +38,54 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) +type ControllerOptions[request comparable] struct { + // Reconciler is a function that can be called at any time with the Name / Namespace of an object and + // ensures that the state of the system matches the state specified in the object. + // Defaults to the DefaultReconcileFunc. + Do reconcile.TypedReconciler[request] + + // RateLimiter is used to limit how frequently requests may be queued into the work queue. + RateLimiter workqueue.TypedRateLimiter[request] + + // NewQueue constructs the queue for this controller once the controller is ready to start. + // This is a func because the standard Kubernetes work queues start themselves immediately, which + // leads to goroutine leaks if something calls controller.New repeatedly. + NewQueue func(controllerName string, rateLimiter workqueue.TypedRateLimiter[request]) workqueue.TypedRateLimitingInterface[request] + + // MaxConcurrentReconciles is the maximum number of concurrent Reconciles which can be run. Defaults to 1. + MaxConcurrentReconciles int + + // CacheSyncTimeout refers to the time limit set on waiting for cache to sync + // Defaults to 2 minutes if not set. + CacheSyncTimeout time.Duration + + // Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required. + Name string + + // LogConstructor is used to construct a logger to then log messages to users during reconciliation, + // or for example when a watch is started. + // Note: LogConstructor has to be able to handle nil requests as we are also using it + // outside the context of a reconciliation. + LogConstructor func(request *request) logr.Logger + + // RecoverPanic indicates whether the panic caused by reconcile should be recovered. + // Defaults to true. + RecoverPanic *bool + + // LeaderElected indicates whether the controller is leader elected or always running. + LeaderElected *bool + + // EnableWarmup specifies whether the controller should start its sources + // when the manager is not the leader. + // Defaults to false, which means that the controller will wait for leader election to start + // before starting sources. + EnableWarmup *bool +} + // Controller implements controller.Controller. +// WARNING: If directly instantiating a Controller vs. using the New method, ensure that the +// warmupResultChan is instantiated as a buffered channel of size 1. Otherwise, the controller will +// panic on having Warmup called. type Controller[request comparable] struct { // Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required. Name string @@ -86,18 +133,9 @@ type Controller[request comparable] struct { // didStartEventSourcesOnce is used to ensure that the event sources are only started once. didStartEventSourcesOnce sync.Once - // ensureDidWarmupFinishChanInitializedOnce is used to ensure that the didWarmupFinishChan is - // initialized to a non-nil channel. - ensureDidWarmupFinishChanInitializedOnce sync.Once - - // didWarmupFinish is closed when startEventSources returns. It is used to - // signal to WaitForWarmupComplete that the event sources have finished syncing. - didWarmupFinishChan chan struct{} - - // didWarmupFinishSuccessfully is used to indicate whether the event sources have finished - // successfully. If true, the event sources have finished syncing without error. If false, the - // event sources have finished syncing but with error. - didWarmupFinishSuccessfully atomic.Bool + // warmupResultChan receives the result (nil / non-nil error) of the warmup method. It is + // consumed by the WaitForWarmupComplete method that the warmup has finished. + warmupResultChan chan error // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. @@ -119,6 +157,22 @@ type Controller[request comparable] struct { EnableWarmup *bool } +func New[request comparable](options ControllerOptions[request]) *Controller[request] { + return &Controller[request]{ + Do: options.Do, + RateLimiter: options.RateLimiter, + NewQueue: options.NewQueue, + MaxConcurrentReconciles: options.MaxConcurrentReconciles, + CacheSyncTimeout: options.CacheSyncTimeout, + Name: options.Name, + LogConstructor: options.LogConstructor, + RecoverPanic: options.RecoverPanic, + LeaderElected: options.LeaderElected, + EnableWarmup: options.EnableWarmup, + warmupResultChan: make(chan error, 1), + } +} + // Reconcile implements reconcile.Reconciler. func (c *Controller[request]) Reconcile(ctx context.Context, req request) (_ reconcile.Result, err error) { defer func() { @@ -177,10 +231,8 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { c.mu.Lock() defer c.mu.Unlock() - c.ensureDidWarmupFinishChanInitialized() err := c.startEventSources(ctx) - c.didWarmupFinishSuccessfully.Store(err == nil) - close(c.didWarmupFinishChan) + c.warmupResultChan <- err return err } @@ -192,9 +244,13 @@ func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { return true } - c.ensureDidWarmupFinishChanInitialized() - <-c.didWarmupFinishChan - return c.didWarmupFinishSuccessfully.Load() + warmupError, ok := <-c.warmupResultChan + if !ok { + // channel closed unexpectedly + return false + } + + return warmupError == nil } // Start implements controller.Controller. @@ -441,15 +497,6 @@ func (c *Controller[request]) updateMetrics(reconcileTime time.Duration) { ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds()) } -// ensureDidWarmupFinishChanInitialized ensures that the didWarmupFinishChan is initialized. This is needed -// because controller can directly be created from other packages like controller.Controller, and -// there is no way for the caller to pass in the chan. -func (c *Controller[request]) ensureDidWarmupFinishChanInitialized() { - c.ensureDidWarmupFinishChanInitializedOnce.Do(func() { - c.didWarmupFinishChan = make(chan struct{}) - }) -} - // ReconcileIDFromContext gets the reconcileID from the current context. func ReconcileIDFromContext(ctx context.Context) types.UID { r, ok := ctx.Value(reconcileIDKey{}).(types.UID) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index ef14b8a10d..cb524dbec8 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -72,7 +72,7 @@ var _ = Describe("controller", func() { queue = &controllertest.Queue{ TypedInterface: workqueue.NewTyped[reconcile.Request](), } - ctrl = &Controller[reconcile.Request]{ + ctrl = New[reconcile.Request](ControllerOptions[reconcile.Request]{ MaxConcurrentReconciles: 1, Do: fakeReconcile, NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { @@ -81,7 +81,7 @@ var _ = Describe("controller", func() { LogConstructor: func(_ *reconcile.Request) logr.Logger { return log.RuntimeLog.WithName("controller").WithName("test") }, - } + }) }) Describe("Reconciler", func() { @@ -353,14 +353,14 @@ var _ = Describe("controller", func() { TypedRateLimitingInterface: &controllertest.TypedQueue[TestRequest]{ TypedInterface: workqueue.NewTyped[TestRequest](), }} - ctrl := &Controller[TestRequest]{ + ctrl := New[TestRequest](ControllerOptions[TestRequest]{ NewQueue: func(string, workqueue.TypedRateLimiter[TestRequest]) workqueue.TypedRateLimitingInterface[TestRequest] { return queue }, LogConstructor: func(*TestRequest) logr.Logger { return log.RuntimeLog.WithName("controller").WithName("test") }, - } + }) ctrl.CacheSyncTimeout = time.Second src := &bisignallingSource[TestRequest]{ startCall: make(chan workqueue.TypedRateLimitingInterface[TestRequest]), @@ -1210,7 +1210,7 @@ var _ = Describe("controller", func() { }), } - nonWarmupCtrl := &Controller[reconcile.Request]{ + nonWarmupCtrl := New[reconcile.Request](ControllerOptions[reconcile.Request]{ MaxConcurrentReconciles: 1, Do: fakeReconcile, NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { @@ -1222,12 +1222,12 @@ var _ = Describe("controller", func() { CacheSyncTimeout: time.Second, EnableWarmup: ptr.To(false), LeaderElected: ptr.To(true), - startWatches: []source.TypedSource[reconcile.Request]{ - source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - hasNonWarmupCtrlWatchStarted.Store(true) - return nil - }), - }, + }) + nonWarmupCtrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + hasNonWarmupCtrlWatchStarted.Store(true) + return nil + }), } By("Creating a manager") From 79a7b95e6a01adbb51e07a0e9712fad91026df21 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 20 May 2025 11:31:33 -0700 Subject: [PATCH 30/45] Fail in warmup directly and rely on sync.Once for warmup thread-safety without WaitForWarmupComplete. --- pkg/internal/controller/controller.go | 34 +------- pkg/internal/controller/controller_test.go | 98 +++------------------- pkg/manager/manager.go | 4 - pkg/manager/manager_test.go | 32 +++---- pkg/manager/runnable_group.go | 7 +- pkg/manager/runnable_group_test.go | 26 ++---- 6 files changed, 29 insertions(+), 172 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index b983790e00..846aa67538 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -83,9 +83,6 @@ type ControllerOptions[request comparable] struct { } // Controller implements controller.Controller. -// WARNING: If directly instantiating a Controller vs. using the New method, ensure that the -// warmupResultChan is instantiated as a buffered channel of size 1. Otherwise, the controller will -// panic on having Warmup called. type Controller[request comparable] struct { // Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required. Name string @@ -133,10 +130,6 @@ type Controller[request comparable] struct { // didStartEventSourcesOnce is used to ensure that the event sources are only started once. didStartEventSourcesOnce sync.Once - // warmupResultChan receives the result (nil / non-nil error) of the warmup method. It is - // consumed by the WaitForWarmupComplete method that the warmup has finished. - warmupResultChan chan error - // LogConstructor is used to construct a logger to then log messages to users during reconciliation, // or for example when a watch is started. // Note: LogConstructor has to be able to handle nil requests as we are also using it @@ -169,7 +162,6 @@ func New[request comparable](options ControllerOptions[request]) *Controller[req RecoverPanic: options.RecoverPanic, LeaderElected: options.LeaderElected, EnableWarmup: options.EnableWarmup, - warmupResultChan: make(chan error, 1), } } @@ -226,31 +218,7 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { return nil } - // Hold the lock to avoid concurrent access to c.startWatches with Start() when calling - // startEventSources - c.mu.Lock() - defer c.mu.Unlock() - - err := c.startEventSources(ctx) - c.warmupResultChan <- err - - return err -} - -// WaitForWarmupComplete returns true if warmup has completed without error, and false if there was -// an error during warmup. If context is cancelled, it returns true. -func (c *Controller[request]) WaitForWarmupComplete(ctx context.Context) bool { - if c.EnableWarmup == nil || !*c.EnableWarmup { - return true - } - - warmupError, ok := <-c.warmupResultChan - if !ok { - // channel closed unexpectedly - return false - } - - return warmupError == nil + return c.startEventSources(ctx) } // Start implements controller.Controller. diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index cb524dbec8..625cd2bb5a 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1121,12 +1121,7 @@ var _ = Describe("controller", func() { }), } - err := ctrl.Warmup(ctx) - Expect(err).NotTo(HaveOccurred()) - - // Verify WaitForWarmupComplete returns true for successful sync - result := ctrl.WaitForWarmupComplete(ctx) - Expect(result).To(BeTrue()) + Expect(ctrl.Warmup(ctx)).To(Succeed()) }) It("should track warmup status correctly with unsuccessful sync", func() { @@ -1144,10 +1139,6 @@ var _ = Describe("controller", func() { err := ctrl.Warmup(ctx) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("sync error")) - - // Verify WaitForWarmupComplete returns false for unsuccessful sync - result := ctrl.WaitForWarmupComplete(ctx) - Expect(result).To(BeFalse()) }) It("should return true if context is cancelled while waiting for source to start", func() { @@ -1163,27 +1154,19 @@ var _ = Describe("controller", func() { }), } - resultChan := make(chan bool) - // Wait for the goroutines to finish before returning to avoid racing with the // assignment in BeforeEach block. var wg sync.WaitGroup - // Invoked in goroutines because the Warmup / WaitForWarmupComplete will block forever. - wg.Add(2) + // Invoked in a goroutine because Warmup will block + wg.Add(1) go func() { defer GinkgoRecover() defer wg.Done() Expect(ctrl.Warmup(ctx)).To(Succeed()) }() - go func() { - defer GinkgoRecover() - defer wg.Done() - resultChan <- ctrl.WaitForWarmupComplete(ctx) - }() cancel() - Expect(<-resultChan).To(BeTrue()) wg.Wait() }) @@ -1196,16 +1179,15 @@ var _ = Describe("controller", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - hasCtrlWatchStarted, hasNonWarmupCtrlWatchStarted := atomic.Bool{}, atomic.Bool{} - - // ctrl watch will block from finishing until the channel is produced to - ctrlWatchBlockingChan := make(chan struct{}) + By("Creating a channel to track execution order") + runnableExecutionOrderChan := make(chan string, 2) + const nonWarmupRunnableName = "nonWarmupRunnable" + const warmupRunnableName = "warmupRunnable" ctrl.CacheSyncTimeout = time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - hasCtrlWatchStarted.Store(true) - <-ctrlWatchBlockingChan + runnableExecutionOrderChan <- warmupRunnableName return nil }), } @@ -1225,7 +1207,7 @@ var _ = Describe("controller", func() { }) nonWarmupCtrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - hasNonWarmupCtrlWatchStarted.Store(true) + runnableExecutionOrderChan <- nonWarmupRunnableName return nil }), } @@ -1249,13 +1231,9 @@ var _ = Describe("controller", func() { Expect(m.Start(ctx)).To(Succeed()) }() - By("Waiting for the warmup controller to start") - Eventually(hasCtrlWatchStarted.Load).Should(BeTrue()) - Expect(hasNonWarmupCtrlWatchStarted.Load()).To(BeFalse()) - - By("Unblocking the warmup controller source start") - close(ctrlWatchBlockingChan) - Eventually(hasNonWarmupCtrlWatchStarted.Load).Should(BeTrue()) + <-m.Elected() + Expect(<-runnableExecutionOrderChan).To(Equal(warmupRunnableName)) + Expect(<-runnableExecutionOrderChan).To(Equal(nonWarmupRunnableName)) }) It("should not race with Start and only start sources once", func() { @@ -1326,58 +1304,6 @@ var _ = Describe("controller", func() { Eventually(isSourceStarted.Load).Should(BeTrue()) }) }) - - Describe("WaitForWarmupComplete", func() { - It("should short circuit without blocking if warmup is disabled", func() { - ctrl.EnableWarmup = ptr.To(false) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Call WaitForWarmupComplete and expect it to return immediately - result := ctrl.WaitForWarmupComplete(ctx) - Expect(result).To(BeTrue()) - }) - - It("should block until warmup is complete if warmup is enabled", func() { - ctrl.EnableWarmup = ptr.To(true) - // Setup controller with sources that complete successfully - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Close the channel to signal watch completion - shouldWatchCompleteChan := make(chan struct{}) - - ctrl.CacheSyncTimeout = time.Second - ctrl.startWatches = []source.TypedSource[reconcile.Request]{ - source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { - <-shouldWatchCompleteChan - return nil - }), - } - - By("Starting a blocking warmup") - go func() { - defer GinkgoRecover() - Expect(ctrl.Warmup(ctx)).To(Succeed()) - }() - - // didWaitForWarmupCompleteReturn is true when the call to WaitForWarmupComplete returns - didWaitForWarmupCompleteReturn := atomic.Bool{} - go func() { - defer GinkgoRecover() - // Verify WaitForWarmupComplete returns true for successful sync - Expect(ctrl.WaitForWarmupComplete(ctx)).To(BeTrue()) - didWaitForWarmupCompleteReturn.Store(true) - }() - Consistently(didWaitForWarmupCompleteReturn.Load).Should(BeFalse()) - - By("Unblocking the watch to simulate initial sync completion") - close(shouldWatchCompleteChan) - Eventually(didWaitForWarmupCompleteReturn.Load).Should(BeTrue()) - }) - - }) }) var _ = Describe("ReconcileIDFromContext function", func() { diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 331a13656a..a11ee07367 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -321,10 +321,6 @@ type LeaderElectionRunnable interface { type warmupRunnable interface { // Warmup will be called when the manager is started but before it becomes leader. Warmup(context.Context) error - - // WaitForWarmupComplete is a blocking function that waits for the warmup to be completed. It - // returns false if it could not successfully finish warmup. - WaitForWarmupComplete(context.Context) bool } // New returns a new Manager for creating Controllers. diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 0e0586b8e7..2af8ce93e4 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1930,11 +1930,10 @@ var _ = Describe("manger.Manager", func() { }) It("should run warmup runnables before leader election is won", func() { - By("Creating channels to track execution order") - warmupCalled := atomic.Bool{} - leaderElectionRunnableStarted := atomic.Bool{} - // warmupRunnable's WarmupFunc will block until this channel is closed - warmupRunnableWarmupBlockingChan := make(chan struct{}) + By("Creating a channel to track execution order") + runnableExecutionOrderChan := make(chan string, 2) + const leaderElectionRunnableName = "leaderElectionRunnable" + const warmupRunnableName = "warmupRunnable" By("Creating a manager with leader election enabled") m, err := New(cfg, Options{ @@ -1952,25 +1951,23 @@ var _ = Describe("manger.Manager", func() { // Create a warmup runnable warmupRunnable := newWarmupRunnableFunc( func(ctx context.Context) error { - // This is the main runnable that will be executed after leader election - // Block forever + // This is the leader election runnable that will be executed after leader election + // It will block until context is done/cancelled <-ctx.Done() return nil }, func(ctx context.Context) error { // This should be called during startup before leader election - warmupCalled.Store(true) - <-warmupRunnableWarmupBlockingChan + runnableExecutionOrderChan <- warmupRunnableName return nil }, ) Expect(m.Add(warmupRunnable)).To(Succeed()) By("Creating a runnable that requires leader election") - leaderElectionRunnable := RunnableFunc( func(ctx context.Context) error { - leaderElectionRunnableStarted.Store(true) + runnableExecutionOrderChan <- leaderElectionRunnableName <-ctx.Done() return nil }, @@ -1985,18 +1982,9 @@ var _ = Describe("manger.Manager", func() { Expect(m.Start(ctx)).To(Succeed()) }() - By("Waiting for the warmup runnable to be called") - Eventually(warmupCalled.Load).Should(BeTrue()) - Expect(leaderElectionRunnableStarted.Load()).To(BeFalse()) - - By("Closing the channel to unblock the warmup runnable") - close(warmupRunnableWarmupBlockingChan) - - By("Waiting for leader election to be won") <-m.Elected() - - By("Verifying the leader election runnable is called after election") - Eventually(leaderElectionRunnableStarted.Load).Should(BeTrue()) + Expect(<-runnableExecutionOrderChan).To(Equal(warmupRunnableName)) + Expect(<-runnableExecutionOrderChan).To(Equal(leaderElectionRunnableName)) }) }) diff --git a/pkg/manager/runnable_group.go b/pkg/manager/runnable_group.go index 2ef5817a7d..082b502f5d 100644 --- a/pkg/manager/runnable_group.go +++ b/pkg/manager/runnable_group.go @@ -69,12 +69,7 @@ func (r *runnables) Add(fn Runnable) error { return r.Webhooks.Add(fn, nil) case warmupRunnable, LeaderElectionRunnable: if warmupRunnable, ok := fn.(warmupRunnable); ok { - if err := r.Warmup.Add( - RunnableFunc(warmupRunnable.Warmup), - func(ctx context.Context) bool { - return warmupRunnable.WaitForWarmupComplete(ctx) - }, - ); err != nil { + if err := r.Warmup.Add(RunnableFunc(warmupRunnable.Warmup), nil); err != nil { return err } } diff --git a/pkg/manager/runnable_group_test.go b/pkg/manager/runnable_group_test.go index b230b2baf0..52086047af 100644 --- a/pkg/manager/runnable_group_test.go +++ b/pkg/manager/runnable_group_test.go @@ -350,9 +350,8 @@ func newWarmupRunnableFunc( warmupFunc func(context.Context) error, ) *warmupRunnableFunc { return &warmupRunnableFunc{ - startFunc: startFunc, - warmupFunc: warmupFunc, - didWarmupFinishChan: make(chan struct{}), + startFunc: startFunc, + warmupFunc: warmupFunc, } } @@ -361,12 +360,6 @@ func newWarmupRunnableFunc( type warmupRunnableFunc struct { startFunc func(context.Context) error warmupFunc func(context.Context) error - - // didWarmupFinishChan is closed when warmup is finished - didWarmupFinishChan chan struct{} - - // didWarmupFinishSuccessfully is set to true if warmup was successful - didWarmupFinishSuccessfully atomic.Bool } func (r *warmupRunnableFunc) Start(ctx context.Context) error { @@ -374,15 +367,7 @@ func (r *warmupRunnableFunc) Start(ctx context.Context) error { } func (r *warmupRunnableFunc) Warmup(ctx context.Context) error { - err := r.warmupFunc(ctx) - r.didWarmupFinishSuccessfully.Store(err == nil) - close(r.didWarmupFinishChan) - return err -} - -func (r *warmupRunnableFunc) WaitForWarmupComplete(ctx context.Context) bool { - <-r.didWarmupFinishChan - return r.didWarmupFinishSuccessfully.Load() + return r.warmupFunc(ctx) } var _ LeaderElectionRunnable = &leaderElectionAndWarmupRunnable{} @@ -401,9 +386,8 @@ func newLeaderElectionAndWarmupRunnable( ) *leaderElectionAndWarmupRunnable { return &leaderElectionAndWarmupRunnable{ warmupRunnableFunc: &warmupRunnableFunc{ - startFunc: startFunc, - warmupFunc: warmupFunc, - didWarmupFinishChan: make(chan struct{}), + startFunc: startFunc, + warmupFunc: warmupFunc, }, needLeaderElection: needLeaderElection, } From c1d8ea423f982697d2698cd85cd296892a3e12f2 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Tue, 20 May 2025 11:39:41 -0700 Subject: [PATCH 31/45] Sync controller EnableWarmup comments. --- pkg/config/controller.go | 2 +- pkg/controller/controller.go | 5 ++++- pkg/internal/controller/controller.go | 15 +++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pkg/config/controller.go b/pkg/config/controller.go index a825bed076..8920480319 100644 --- a/pkg/config/controller.go +++ b/pkg/config/controller.go @@ -68,7 +68,7 @@ type Controller struct { // // Setting EnableWarmup to true and NeedLeaderElection to true means the controller will start its // sources without waiting to become leader. - // Setting EnableWarmup to true and NeedLeaderElection is false is a no-op as controllers without + // Setting EnableWarmup to true and NeedLeaderElection to false is a no-op as controllers without // leader election do not wait on leader election to start their sources. // Defaults to false. EnableWarmup *bool diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index b56d59ab2d..3eb92f5b6b 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -100,7 +100,10 @@ type TypedOptions[request comparable] struct { // improves leadership failover time, as the caches will be prepopulated before the controller // transitions to be leader. // - // When set to true, the controller will start its sources without transitioning to be leader. + // Setting EnableWarmup to true and NeedLeaderElection to true means the controller will start its + // sources without waiting to become leader. + // Setting EnableWarmup to true and NeedLeaderElection to false is a no-op as controllers without + // leader election do not wait on leader election to start their sources. // Defaults to false. EnableWarmup *bool } diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 846aa67538..86ed01f7b3 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -143,10 +143,17 @@ type Controller[request comparable] struct { // LeaderElected indicates whether the controller is leader elected or always running. LeaderElected *bool - // EnableWarmup specifies whether the controller should start its sources - // when the manager is not the leader. - // Defaults to false, which means that the controller will wait for leader election to start - // before starting sources. + // EnableWarmup specifies whether the controller should start its sources when the manager is not + // the leader. This is useful for cases where sources take a long time to start, as it allows + // for the controller to warm up its caches even before it is elected as the leader. This + // improves leadership failover time, as the caches will be prepopulated before the controller + // transitions to be leader. + // + // Setting EnableWarmup to true and NeedLeaderElection to true means the controller will start its + // sources without waiting to become leader. + // Setting EnableWarmup to true and NeedLeaderElection to false is a no-op as controllers without + // leader election do not wait on leader election to start their sources. + // Defaults to false. EnableWarmup *bool } From 5df573fde37b68239e45503a12e9afeac2ec7ce4 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 21 May 2025 11:38:00 -0700 Subject: [PATCH 32/45] Rename to startEventSourcesLocked and lock with c.mu --- pkg/internal/controller/controller.go | 11 ++-- pkg/internal/controller/controller_test.go | 63 +++++++++++++++------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 86ed01f7b3..8d4ec31061 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -225,7 +225,10 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { return nil } - return c.startEventSources(ctx) + c.mu.Lock() + defer c.mu.Unlock() + + return c.startEventSourcesLocked(ctx) } // Start implements controller.Controller. @@ -263,7 +266,7 @@ func (c *Controller[request]) Start(ctx context.Context) error { // NB(directxman12): launch the sources *before* trying to wait for the // caches to sync so that they have a chance to register their intended // caches. - if err := c.startEventSources(ctx); err != nil { + if err := c.startEventSourcesLocked(ctx); err != nil { return err } @@ -296,9 +299,9 @@ func (c *Controller[request]) Start(ctx context.Context) error { return nil } -// startEventSources launches all the sources registered with this controller and waits +// startEventSourcesLocked launches all the sources registered with this controller and waits // for them to sync. It returns an error if any of the sources fail to start or sync. -func (c *Controller[request]) startEventSources(ctx context.Context) error { +func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error { var retErr error c.didStartEventSourcesOnce.Do(func() { diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 625cd2bb5a..86494aa07a 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -386,13 +386,13 @@ var _ = Describe("controller", func() { }) }) - Describe("startEventSources", func() { + Describe("startEventSourcesLocked", func() { It("should return nil when no sources are provided", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.startWatches = []source.TypedSource[reconcile.Request]{} - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).NotTo(HaveOccurred()) }) @@ -409,7 +409,7 @@ var _ = Describe("controller", func() { // Set a sufficiently long timeout to avoid timeouts interfering with the error being returned ctrl.CacheSyncTimeout = 5 * time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).To(Equal(expectedErr)) }) @@ -423,7 +423,7 @@ var _ = Describe("controller", func() { ctrl.Name = "test-controller" ctrl.CacheSyncTimeout = 5 * time.Second - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to wait for test-controller caches to sync")) }) @@ -439,7 +439,7 @@ var _ = Describe("controller", func() { ctrl.Name = "test-controller" ctrl.CacheSyncTimeout = 5 * time.Second - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).NotTo(HaveOccurred()) }) @@ -463,7 +463,7 @@ var _ = Describe("controller", func() { startErrCh := make(chan error) go func() { defer GinkgoRecover() - startErrCh <- ctrl.startEventSources(sourceCtx) + startErrCh <- ctrl.startEventSourcesLocked(sourceCtx) }() // Allow source to start successfully @@ -498,7 +498,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{blockingSrc} - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("timed out waiting for source")) }) @@ -517,13 +517,13 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - By("Calling startEventSources multiple times in parallel") + By("Calling startEventSourcesLocked multiple times in parallel") var wg sync.WaitGroup for i := 1; i <= 5; i++ { wg.Add(1) go func() { defer wg.Done() - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) // All calls should return the same nil error Expect(err).NotTo(HaveOccurred()) }() @@ -533,12 +533,12 @@ var _ = Describe("controller", func() { Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") }) - It("should block subsequent calls from returning until the first call to startEventSources has returned", func() { + It("should block subsequent calls from returning until the first call to startEventSourcesLocked has returned", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.CacheSyncTimeout = 5 * time.Second - // finishSourceChan is closed to unblock startEventSources from returning + // finishSourceChan is closed to unblock startEventSourcesLocked from returning finishSourceChan := make(chan struct{}) src := source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { @@ -547,28 +547,28 @@ var _ = Describe("controller", func() { }) ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - By("Calling startEventSources asynchronously") + By("Calling startEventSourcesLocked asynchronously") go func() { defer GinkgoRecover() - Expect(ctrl.startEventSources(ctx)).To(Succeed()) + Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) }() - By("Calling startEventSources again") + By("Calling startEventSourcesLocked again") var didSubsequentCallComplete atomic.Bool go func() { defer GinkgoRecover() - Expect(ctrl.startEventSources(ctx)).To(Succeed()) + Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) didSubsequentCallComplete.Store(true) }() - // Assert that second call to startEventSources is blocked while source has not finished + // Assert that second call to startEventSourcesLocked is blocked while source has not finished Consistently(didSubsequentCallComplete.Load).Should(BeFalse()) By("Finishing source start + sync") finishSourceChan <- struct{}{} - // Assert that second call to startEventSources is now complete - Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSources should complete after source is started and synced") + // Assert that second call to startEventSourcesLocked is now complete + Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSourcesLocked should complete after source is started and synced") }) It("should reset c.startWatches to nil after returning", func() { @@ -583,7 +583,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - err := ctrl.startEventSources(ctx) + err := ctrl.startEventSourcesLocked(ctx) Expect(err).NotTo(HaveOccurred()) Expect(ctrl.startWatches).To(BeNil(), "startWatches should be reset to nil after returning") }) @@ -1236,6 +1236,31 @@ var _ = Describe("controller", func() { Expect(<-runnableExecutionOrderChan).To(Equal(nonWarmupRunnableName)) }) + It("should not cause a data race when called concurrently", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = time.Second + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + return nil + }), + } + + var wg sync.WaitGroup + for i := 0; i < 5; i++ { + wg.Add(1) + go func() { + defer GinkgoRecover() + defer wg.Done() + Expect(ctrl.Warmup(ctx)).To(Succeed()) + }() + } + + wg.Wait() + }) + It("should not race with Start and only start sources once", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() From d8650df4da3becec198d63b3e35e6037fcd1b990 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 21 May 2025 12:32:41 -0700 Subject: [PATCH 33/45] Address edge case for watch added after warmup completes. --- pkg/internal/controller/controller.go | 19 ++++++++++++++---- pkg/internal/controller/controller_test.go | 23 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 8d4ec31061..e544ceff21 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -127,6 +127,11 @@ type Controller[request comparable] struct { // startWatches maintains a list of sources, handlers, and predicates to start when the controller is started. startWatches []source.TypedSource[request] + // startedEventSources is used to track if the event sources have been started. + // It ensures that we append sources to c.startWatches only until we call Start() / Warmup() + // It is true if startEventSourcesLocked has been called at least once. + startedEventSources bool + // didStartEventSourcesOnce is used to ensure that the event sources are only started once. didStartEventSourcesOnce sync.Once @@ -199,10 +204,9 @@ func (c *Controller[request]) Watch(src source.TypedSource[request]) error { c.mu.Lock() defer c.mu.Unlock() - // Controller hasn't started yet, store the watches locally and return. - // - // These watches are going to be held on the controller struct until the manager or user calls Start(...). - if !c.Started { + // Sources weren't started yet, store the watches locally and return. + // These sources are going to be held until either Warmup() or Start(...) is called. + if !c.startedEventSources { c.startWatches = append(c.startWatches, src) return nil } @@ -228,6 +232,9 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { c.mu.Lock() defer c.mu.Unlock() + // Set the ctx so later calls to watch use this internal context of nil + c.ctx = ctx + return c.startEventSourcesLocked(ctx) } @@ -364,6 +371,10 @@ func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error // We should never hold watches more than necessary, each watch source can hold a backing cache, // which won't be garbage collected if we hold a reference to it. c.startWatches = nil + + // Mark event sources as started after resetting the startWatches slice to no-op a Watch() + // call after event sources have been started. + c.startedEventSources = true }) return retErr diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 86494aa07a..9c82d77df0 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1293,6 +1293,29 @@ var _ = Describe("controller", func() { Expect(watchStartedCount.Load()).To(Equal(int32(1)), "source should only be started once") Expect(ctrl.startWatches).To(BeNil(), "startWatches should be reset to nil after they are started") }) + + It("should start sources added after Warmup is called", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = time.Second + + Expect(ctrl.Warmup(ctx)).To(Succeed()) + + By("starting a watch after warmup is added") + var didWatchStart atomic.Bool + Expect(ctrl.Watch(source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + didWatchStart.Store(true) + return nil + }))).To(Succeed()) + + go func() { + defer GinkgoRecover() + Expect(ctrl.Start(ctx)).To(Succeed()) + }() + + Eventually(didWatchStart.Load).Should(BeTrue(), "watch should be started if it is added after Warmup") + }) }) Describe("Warmup with warmup disabled", func() { From a03f404e018d13a251805523f6dff65f62b2102b Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 21 May 2025 16:35:00 -0700 Subject: [PATCH 34/45] Fix test description and set leaderelection==true --- pkg/internal/controller/controller_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 9c82d77df0..ab3e2dd1e7 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1217,7 +1217,7 @@ var _ = Describe("controller", func() { cfg, err := testenv.Start() Expect(err).NotTo(HaveOccurred()) m, err := manager.New(cfg, manager.Options{ - LeaderElection: false, + LeaderElection: true, }) Expect(err).NotTo(HaveOccurred()) @@ -1261,7 +1261,7 @@ var _ = Describe("controller", func() { wg.Wait() }) - It("should not race with Start and only start sources once", func() { + It("should not cause a data race when called concurrently with Start and only start sources once", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() From dcf4b8b411f7c0f3356435a9a51a238df96c8bf7 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 21 May 2025 16:52:50 -0700 Subject: [PATCH 35/45] Fix lint. --- pkg/controller/controller.go | 2 +- pkg/internal/controller/controller.go | 8 +++++--- pkg/internal/controller/controller_test.go | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 3eb92f5b6b..03e51abcc5 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -260,7 +260,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req } // Create controller with dependencies set - return controller.New[request](controller.ControllerOptions[request]{ + return controller.New[request](controller.Options[request]{ Do: options.Reconciler, RateLimiter: options.RateLimiter, NewQueue: options.NewQueue, diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index e544ceff21..895e3eb621 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -38,7 +38,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) -type ControllerOptions[request comparable] struct { +// Options are the arguments for creating a new Controller. +type Options[request comparable] struct { // Reconciler is a function that can be called at any time with the Name / Namespace of an object and // ensures that the state of the system matches the state specified in the object. // Defaults to the DefaultReconcileFunc. @@ -129,7 +130,7 @@ type Controller[request comparable] struct { // startedEventSources is used to track if the event sources have been started. // It ensures that we append sources to c.startWatches only until we call Start() / Warmup() - // It is true if startEventSourcesLocked has been called at least once. + // It is true if startEventSourcesLocked has been called at least once. startedEventSources bool // didStartEventSourcesOnce is used to ensure that the event sources are only started once. @@ -162,7 +163,8 @@ type Controller[request comparable] struct { EnableWarmup *bool } -func New[request comparable](options ControllerOptions[request]) *Controller[request] { +// New returns a new Controller configured with the given options. +func New[request comparable](options Options[request]) *Controller[request] { return &Controller[request]{ Do: options.Do, RateLimiter: options.RateLimiter, diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index ab3e2dd1e7..bac4cffb27 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -72,7 +72,7 @@ var _ = Describe("controller", func() { queue = &controllertest.Queue{ TypedInterface: workqueue.NewTyped[reconcile.Request](), } - ctrl = New[reconcile.Request](ControllerOptions[reconcile.Request]{ + ctrl = New[reconcile.Request](Options[reconcile.Request]{ MaxConcurrentReconciles: 1, Do: fakeReconcile, NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { @@ -353,7 +353,7 @@ var _ = Describe("controller", func() { TypedRateLimitingInterface: &controllertest.TypedQueue[TestRequest]{ TypedInterface: workqueue.NewTyped[TestRequest](), }} - ctrl := New[TestRequest](ControllerOptions[TestRequest]{ + ctrl := New[TestRequest](Options[TestRequest]{ NewQueue: func(string, workqueue.TypedRateLimiter[TestRequest]) workqueue.TypedRateLimitingInterface[TestRequest] { return queue }, @@ -1192,7 +1192,7 @@ var _ = Describe("controller", func() { }), } - nonWarmupCtrl := New[reconcile.Request](ControllerOptions[reconcile.Request]{ + nonWarmupCtrl := New[reconcile.Request](Options[reconcile.Request]{ MaxConcurrentReconciles: 1, Do: fakeReconcile, NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] { From ba51d280cee0066899346eeefbbc1436b7372254 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Wed, 21 May 2025 18:39:19 -0700 Subject: [PATCH 36/45] Change shutdown order to shutdown warmup runnables in parallel with other runnables. --- pkg/internal/controller/controller_test.go | 49 +++++++++++++++++++++- pkg/manager/internal.go | 16 +++++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index bac4cffb27..feb0b60011 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -29,6 +29,7 @@ import ( . "github.com/onsi/gomega" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "go.uber.org/goleak" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1217,7 +1218,9 @@ var _ = Describe("controller", func() { cfg, err := testenv.Start() Expect(err).NotTo(HaveOccurred()) m, err := manager.New(cfg, manager.Options{ - LeaderElection: true, + LeaderElection: true, + LeaderElectionID: "some-leader-election-id", + LeaderElectionNamespace: "default", }) Expect(err).NotTo(HaveOccurred()) @@ -1316,6 +1319,50 @@ var _ = Describe("controller", func() { Eventually(didWatchStart.Load).Should(BeTrue(), "watch should be started if it is added after Warmup") }) + + DescribeTable("should not leak goroutines when manager is stopped with warmup runnable", + func(leaderElection bool) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.CacheSyncTimeout = time.Second + + By("Creating a manager") + testenv = &envtest.Environment{} + cfg, err := testenv.Start() + Expect(err).NotTo(HaveOccurred()) + m, err := manager.New(cfg, manager.Options{ + LeaderElection: leaderElection, + LeaderElectionID: "some-leader-election-id", + LeaderElectionNamespace: "default", + }) + Expect(err).NotTo(HaveOccurred()) + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + <-ctx.Done() + return nil + }), + } + Expect(m.Add(ctrl)).To(Succeed()) + + // ignore needs to go after the testenv.Start() call to ignore the apiserver + // process + currentGRs := goleak.IgnoreCurrent() + go func() { + defer GinkgoRecover() + Expect(m.Start(ctx)).To(Succeed()) + }() + + <-m.Elected() + By("stopping the manager via context") + cancel() + + Eventually(func() error { return goleak.Find(currentGRs) }).Should(Succeed()) + }, + Entry("manager with leader election enabled", true), + Entry("manager without leader election enabled", false), + ) }) Describe("Warmup with warmup disabled", func() { diff --git a/pkg/manager/internal.go b/pkg/manager/internal.go index 54355600a4..a9f91cbdd5 100644 --- a/pkg/manager/internal.go +++ b/pkg/manager/internal.go @@ -539,6 +539,18 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e }() go func() { + go func() { + // Stop the warmup runnables in a separate goroutine to avoid blocking. + // It is important to stop the warmup runnables in parallel with the other runnables + // since we cannot assume ordering of whether or not one of the warmup runnables or one + // of the other runnables is holding a lock. + // Cancelling the wrong runnable (one that is not holding the lock) will cause the + // shutdown sequence to block indefinitely as it will wait for the runnable that is + // holding the lock to finish. + cm.logger.Info("Stopping and waiting for warmup runnables") + cm.runnables.Warmup.StopAndWait(cm.shutdownCtx) + }() + // First stop the non-leader election runnables. cm.logger.Info("Stopping and waiting for non leader election runnables") cm.runnables.Others.StopAndWait(cm.shutdownCtx) @@ -549,10 +561,6 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e cm.runnables.LeaderElection.startOnce.Do(func() {}) cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx) - // Stop the warmup runnables - cm.logger.Info("Stopping and waiting for warmup runnables") - cm.runnables.Warmup.StopAndWait(cm.shutdownCtx) - // Stop the caches before the leader election runnables, this is an important // step to make sure that we don't race with the reconcilers by receiving more events // from the API servers and enqueueing them. From ea2aa0ea782e12e9c2eb13c48e642fb75ebef37a Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 01:50:12 -0700 Subject: [PATCH 37/45] Fix test races by ensuring goroutines do not outlive their It blocks. --- pkg/internal/controller/controller.go | 2 +- pkg/internal/controller/controller_test.go | 45 +++++++++++++++------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 895e3eb621..2dc2afdb99 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -234,7 +234,7 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { c.mu.Lock() defer c.mu.Unlock() - // Set the ctx so later calls to watch use this internal context of nil + // Set the ctx so later calls to watch use this internal context c.ctx = ctx return c.startEventSourcesLocked(ctx) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index feb0b60011..b2a67a03c4 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -549,8 +549,12 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} By("Calling startEventSourcesLocked asynchronously") + wg := sync.WaitGroup{} go func() { defer GinkgoRecover() + defer wg.Done() + + wg.Add(1) Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) }() @@ -558,6 +562,9 @@ var _ = Describe("controller", func() { var didSubsequentCallComplete atomic.Bool go func() { defer GinkgoRecover() + defer wg.Done() + + wg.Add(1) Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) didSubsequentCallComplete.Store(true) }() @@ -570,6 +577,7 @@ var _ = Describe("controller", func() { // Assert that second call to startEventSourcesLocked is now complete Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSourcesLocked should complete after source is started and synced") + wg.Wait() }) It("should reset c.startWatches to nil after returning", func() { @@ -1155,20 +1163,18 @@ var _ = Describe("controller", func() { }), } - // Wait for the goroutines to finish before returning to avoid racing with the - // assignment in BeforeEach block. - var wg sync.WaitGroup + // channel to prevent the goroutine from outliving the It test + waitChan := make(chan struct{}) // Invoked in a goroutine because Warmup will block - wg.Add(1) go func() { defer GinkgoRecover() - defer wg.Done() + defer close(waitChan) Expect(ctrl.Warmup(ctx)).To(Succeed()) }() cancel() - wg.Wait() + <-waitChan }) It("should be called before leader election runnables if warmup is enabled", func() { @@ -1176,9 +1182,7 @@ var _ = Describe("controller", func() { // called in the warmup phase before the leader election runnables are started. It // catches regressions in the controller that would not implement warmupRunnable from // pkg/manager. - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() By("Creating a channel to track execution order") runnableExecutionOrderChan := make(chan string, 2) @@ -1229,14 +1233,19 @@ var _ = Describe("controller", func() { Expect(m.Add(nonWarmupCtrl)).To(Succeed()) By("Starting the manager") + waitChan := make(chan struct{}) go func() { defer GinkgoRecover() + defer close(waitChan) Expect(m.Start(ctx)).To(Succeed()) }() <-m.Elected() Expect(<-runnableExecutionOrderChan).To(Equal(warmupRunnableName)) Expect(<-runnableExecutionOrderChan).To(Equal(nonWarmupRunnableName)) + + cancel() + <-waitChan }) It("should not cause a data race when called concurrently", func() { @@ -1299,7 +1308,6 @@ var _ = Describe("controller", func() { It("should start sources added after Warmup is called", func() { ctx, cancel := context.WithCancel(context.Background()) - defer cancel() ctrl.CacheSyncTimeout = time.Second @@ -1312,12 +1320,17 @@ var _ = Describe("controller", func() { return nil }))).To(Succeed()) + waitChan := make(chan struct{}) go func() { defer GinkgoRecover() Expect(ctrl.Start(ctx)).To(Succeed()) + close(waitChan) }() Eventually(didWatchStart.Load).Should(BeTrue(), "watch should be started if it is added after Warmup") + + cancel() + <-waitChan }) DescribeTable("should not leak goroutines when manager is stopped with warmup runnable", @@ -1349,9 +1362,11 @@ var _ = Describe("controller", func() { // ignore needs to go after the testenv.Start() call to ignore the apiserver // process currentGRs := goleak.IgnoreCurrent() + waitChan := make(chan struct{}) go func() { defer GinkgoRecover() Expect(m.Start(ctx)).To(Succeed()) + close(waitChan) }() <-m.Elected() @@ -1359,9 +1374,10 @@ var _ = Describe("controller", func() { cancel() Eventually(func() error { return goleak.Find(currentGRs) }).Should(Succeed()) + <-waitChan }, - Entry("manager with leader election enabled", true), - Entry("manager without leader election enabled", false), + Entry("and with leader election enabled", true), + Entry("and without leader election enabled", false), ) }) @@ -1373,7 +1389,6 @@ var _ = Describe("controller", func() { It("should not start sources when Warmup is called if warmup is disabled but start it when Start is called.", func() { // Setup controller with sources that complete successfully ctx, cancel := context.WithCancel(context.Background()) - defer cancel() ctrl.CacheSyncTimeout = time.Second var isSourceStarted atomic.Bool @@ -1391,12 +1406,16 @@ var _ = Describe("controller", func() { Expect(isSourceStarted.Load()).To(BeFalse()) By("Calling Start when EnableWarmup is false") - // Now call Start + waitChan := make(chan struct{}) + go func() { defer GinkgoRecover() Expect(ctrl.Start(ctx)).To(Succeed()) + close(waitChan) }() Eventually(isSourceStarted.Load).Should(BeTrue()) + cancel() + <-waitChan }) }) }) From 730b30e19ad3f9aa16901e1fe4ac9b61d9d88926 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 02:29:51 -0700 Subject: [PATCH 38/45] Block on source start on context cancel. --- pkg/internal/controller/controller.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 2dc2afdb99..03b2d78d26 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -356,7 +356,8 @@ func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error case err := <-sourceStartErrChan: return err case <-sourceStartCtx.Done(): - if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened + defer func() { <-sourceStartErrChan }() // Ensure that watch.Start has been called to avoid prematurely releasing lock before accessing c.Queue + if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened return <-sourceStartErrChan } if ctx.Err() != nil { // Don't return an error if the root context got cancelled From bca3e2a6fd78832590b724abadd0c69ed44649ee Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 03:16:19 -0700 Subject: [PATCH 39/45] Guard access to c.Queue explicitly. --- pkg/internal/controller/controller.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 03b2d78d26..e7c2ad9a8e 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -332,11 +332,15 @@ func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error sourceStartCtx, cancel := context.WithTimeout(ctx, c.CacheSyncTimeout) defer cancel() - sourceStartErrChan := make(chan error, 1) // Buffer chan to not leak goroutine if we time out + sourceStartErrChan := make(chan error, 1) // Buffer chan to not leak goroutine if we time out + hasAccessedQueueChan := make(chan struct{}) // go func() { defer close(sourceStartErrChan) log.Info("Starting EventSource") - if err := watch.Start(ctx, c.Queue); err != nil { + + q := c.Queue + close(hasAccessedQueueChan) + if err := watch.Start(ctx, q); err != nil { sourceStartErrChan <- err return } @@ -356,8 +360,8 @@ func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error case err := <-sourceStartErrChan: return err case <-sourceStartCtx.Done(): - defer func() { <-sourceStartErrChan }() // Ensure that watch.Start has been called to avoid prematurely releasing lock before accessing c.Queue - if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened + defer func() { <-hasAccessedQueueChan }() // Ensure that watch.Start has been called to avoid prematurely releasing lock before accessing c.Queue + if didStartSyncingSource.Load() { // We are racing with WaitForSync, wait for it to let it tell us what happened return <-sourceStartErrChan } if ctx.Err() != nil { // Don't return an error if the root context got cancelled From 12e938c905595368c7ebfca5462fce673be73fb6 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 14:49:54 -0700 Subject: [PATCH 40/45] Initialize queue in warmup with test. --- pkg/internal/controller/controller.go | 40 ++++++------- pkg/internal/controller/controller_test.go | 67 ++++++++++++++++------ 2 files changed, 68 insertions(+), 39 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index e7c2ad9a8e..30bd2e0a26 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -128,10 +128,10 @@ type Controller[request comparable] struct { // startWatches maintains a list of sources, handlers, and predicates to start when the controller is started. startWatches []source.TypedSource[request] - // startedEventSources is used to track if the event sources have been started. + // startedEventSourcesAndQueue is used to track if the event sources have been started. // It ensures that we append sources to c.startWatches only until we call Start() / Warmup() - // It is true if startEventSourcesLocked has been called at least once. - startedEventSources bool + // It is true if startEventSourcesAndQueueLocked has been called at least once. + startedEventSourcesAndQueue bool // didStartEventSourcesOnce is used to ensure that the event sources are only started once. didStartEventSourcesOnce sync.Once @@ -208,7 +208,7 @@ func (c *Controller[request]) Watch(src source.TypedSource[request]) error { // Sources weren't started yet, store the watches locally and return. // These sources are going to be held until either Warmup() or Start(...) is called. - if !c.startedEventSources { + if !c.startedEventSourcesAndQueue { c.startWatches = append(c.startWatches, src) return nil } @@ -237,7 +237,7 @@ func (c *Controller[request]) Warmup(ctx context.Context) error { // Set the ctx so later calls to watch use this internal context c.ctx = ctx - return c.startEventSourcesLocked(ctx) + return c.startEventSourcesAndQueueLocked(ctx) } // Start implements controller.Controller. @@ -254,17 +254,6 @@ func (c *Controller[request]) Start(ctx context.Context) error { // Set the internal context. c.ctx = ctx - queue := c.NewQueue(c.Name, c.RateLimiter) - if priorityQueue, isPriorityQueue := queue.(priorityqueue.PriorityQueue[request]); isPriorityQueue { - c.Queue = priorityQueue - } else { - c.Queue = &priorityQueueWrapper[request]{TypedRateLimitingInterface: queue} - } - go func() { - <-ctx.Done() - c.Queue.ShutDown() - }() - wg := &sync.WaitGroup{} err := func() error { defer c.mu.Unlock() @@ -275,7 +264,7 @@ func (c *Controller[request]) Start(ctx context.Context) error { // NB(directxman12): launch the sources *before* trying to wait for the // caches to sync so that they have a chance to register their intended // caches. - if err := c.startEventSourcesLocked(ctx); err != nil { + if err := c.startEventSourcesAndQueueLocked(ctx); err != nil { return err } @@ -308,12 +297,23 @@ func (c *Controller[request]) Start(ctx context.Context) error { return nil } -// startEventSourcesLocked launches all the sources registered with this controller and waits +// startEventSourcesAndQueueLocked launches all the sources registered with this controller and waits // for them to sync. It returns an error if any of the sources fail to start or sync. -func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error { +func (c *Controller[request]) startEventSourcesAndQueueLocked(ctx context.Context) error { var retErr error c.didStartEventSourcesOnce.Do(func() { + queue := c.NewQueue(c.Name, c.RateLimiter) + if priorityQueue, isPriorityQueue := queue.(priorityqueue.PriorityQueue[request]); isPriorityQueue { + c.Queue = priorityQueue + } else { + c.Queue = &priorityQueueWrapper[request]{TypedRateLimitingInterface: queue} + } + go func() { + <-ctx.Done() + c.Queue.ShutDown() + }() + errGroup := &errgroup.Group{} for _, watch := range c.startWatches { log := c.LogConstructor(nil) @@ -381,7 +381,7 @@ func (c *Controller[request]) startEventSourcesLocked(ctx context.Context) error // Mark event sources as started after resetting the startWatches slice to no-op a Watch() // call after event sources have been started. - c.startedEventSources = true + c.startedEventSourcesAndQueue = true }) return retErr diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index b2a67a03c4..1393d453f4 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -387,16 +387,26 @@ var _ = Describe("controller", func() { }) }) - Describe("startEventSourcesLocked", func() { + Describe("startEventSourcesAndQueueLocked", func() { It("should return nil when no sources are provided", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.startWatches = []source.TypedSource[reconcile.Request]{} - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).NotTo(HaveOccurred()) }) + It("should initialize controller queue when called", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{} + err := ctrl.startEventSourcesAndQueueLocked(ctx) + Expect(err).NotTo(HaveOccurred()) + Expect(ctrl.Queue).NotTo(BeNil()) + }) + It("should return an error if a source fails to start", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -410,7 +420,7 @@ var _ = Describe("controller", func() { // Set a sufficiently long timeout to avoid timeouts interfering with the error being returned ctrl.CacheSyncTimeout = 5 * time.Second ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).To(Equal(expectedErr)) }) @@ -424,7 +434,7 @@ var _ = Describe("controller", func() { ctrl.Name = "test-controller" ctrl.CacheSyncTimeout = 5 * time.Second - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to wait for test-controller caches to sync")) }) @@ -440,7 +450,7 @@ var _ = Describe("controller", func() { ctrl.Name = "test-controller" ctrl.CacheSyncTimeout = 5 * time.Second - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).NotTo(HaveOccurred()) }) @@ -464,7 +474,7 @@ var _ = Describe("controller", func() { startErrCh := make(chan error) go func() { defer GinkgoRecover() - startErrCh <- ctrl.startEventSourcesLocked(sourceCtx) + startErrCh <- ctrl.startEventSourcesAndQueueLocked(sourceCtx) }() // Allow source to start successfully @@ -499,7 +509,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{blockingSrc} - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("timed out waiting for source")) }) @@ -518,13 +528,13 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - By("Calling startEventSourcesLocked multiple times in parallel") + By("Calling startEventSourcesAndQueueLocked multiple times in parallel") var wg sync.WaitGroup for i := 1; i <= 5; i++ { wg.Add(1) go func() { defer wg.Done() - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) // All calls should return the same nil error Expect(err).NotTo(HaveOccurred()) }() @@ -534,12 +544,12 @@ var _ = Describe("controller", func() { Expect(startCount.Load()).To(Equal(int32(1)), "Source should only be started once even when called multiple times") }) - It("should block subsequent calls from returning until the first call to startEventSourcesLocked has returned", func() { + It("should block subsequent calls from returning until the first call to startEventSourcesAndQueueLocked has returned", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ctrl.CacheSyncTimeout = 5 * time.Second - // finishSourceChan is closed to unblock startEventSourcesLocked from returning + // finishSourceChan is closed to unblock startEventSourcesAndQueueLocked from returning finishSourceChan := make(chan struct{}) src := source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { @@ -548,35 +558,35 @@ var _ = Describe("controller", func() { }) ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - By("Calling startEventSourcesLocked asynchronously") + By("Calling startEventSourcesAndQueueLocked asynchronously") wg := sync.WaitGroup{} go func() { defer GinkgoRecover() defer wg.Done() wg.Add(1) - Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) + Expect(ctrl.startEventSourcesAndQueueLocked(ctx)).To(Succeed()) }() - By("Calling startEventSourcesLocked again") + By("Calling startEventSourcesAndQueueLocked again") var didSubsequentCallComplete atomic.Bool go func() { defer GinkgoRecover() defer wg.Done() wg.Add(1) - Expect(ctrl.startEventSourcesLocked(ctx)).To(Succeed()) + Expect(ctrl.startEventSourcesAndQueueLocked(ctx)).To(Succeed()) didSubsequentCallComplete.Store(true) }() - // Assert that second call to startEventSourcesLocked is blocked while source has not finished + // Assert that second call to startEventSourcesAndQueueLocked is blocked while source has not finished Consistently(didSubsequentCallComplete.Load).Should(BeFalse()) By("Finishing source start + sync") finishSourceChan <- struct{}{} - // Assert that second call to startEventSourcesLocked is now complete - Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSourcesLocked should complete after source is started and synced") + // Assert that second call to startEventSourcesAndQueueLocked is now complete + Eventually(didSubsequentCallComplete.Load).Should(BeTrue(), "startEventSourcesAndQueueLocked should complete after source is started and synced") wg.Wait() }) @@ -592,7 +602,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{src} - err := ctrl.startEventSourcesLocked(ctx) + err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).NotTo(HaveOccurred()) Expect(ctrl.startWatches).To(BeNil(), "startWatches should be reset to nil after returning") }) @@ -1150,6 +1160,25 @@ var _ = Describe("controller", func() { Expect(err.Error()).To(ContainSubstring("sync error")) }) + It("should call Start on sources with the appropriate non-nil queue", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + started := false + ctx, cancel := context.WithCancel(context.Background()) + src := source.Func(func(ctx context.Context, q workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + defer GinkgoRecover() + Expect(q).ToNot(BeNil()) + Expect(q).To(Equal(ctrl.Queue)) + + started = true + cancel() // Cancel the context so ctrl.Start() doesn't block forever + return nil + }) + Expect(ctrl.Watch(src)).To(Succeed()) + Expect(ctrl.Warmup(ctx)).To(Succeed()) + Expect(ctrl.Queue).ToNot(BeNil()) + Expect(started).To(BeTrue()) + }) + It("should return true if context is cancelled while waiting for source to start", func() { // Setup controller with sources that complete with error ctx, cancel := context.WithCancel(context.Background()) From de4232dfbcd0558a7f14411438c5dacb4b4699cf Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 14:51:25 -0700 Subject: [PATCH 41/45] Fix watch comment. --- pkg/internal/controller/controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 30bd2e0a26..7febe07a0f 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -379,8 +379,8 @@ func (c *Controller[request]) startEventSourcesAndQueueLocked(ctx context.Contex // which won't be garbage collected if we hold a reference to it. c.startWatches = nil - // Mark event sources as started after resetting the startWatches slice to no-op a Watch() - // call after event sources have been started. + // Mark event sources as started after resetting the startWatches slice so that watches from + // a new Watch() call are immediately started. c.startedEventSourcesAndQueue = true }) From a3dc13b6adfb01f924cfbcf8c3ad345bfb2473bd Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 15:23:01 -0700 Subject: [PATCH 42/45] Add warmup to manager and controller integration tests. --- pkg/controller/controller_integration_test.go | 83 ++++++++++++++----- .../internal/integration/manager_test.go | 21 +++-- 2 files changed, 79 insertions(+), 25 deletions(-) diff --git a/pkg/controller/controller_integration_test.go b/pkg/controller/controller_integration_test.go index c8e8a790fc..ee6a45811f 100644 --- a/pkg/controller/controller_integration_test.go +++ b/pkg/controller/controller_integration_test.go @@ -18,16 +18,21 @@ package controller_test import ( "context" + "fmt" + "strconv" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllertest" "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" @@ -48,30 +53,46 @@ var _ = Describe("controller", func() { Describe("controller", func() { // TODO(directxman12): write a whole suite of controller-client interaction tests - It("should reconcile", func() { + // Since all tests are run in parallel and share the same testenv, we namespace the objects + // created by using a namespace per entry, and adding a watch predicate that filters by + // namespace. + DescribeTable("should reconcile", func(enableWarmup bool) { By("Creating the Manager") cm, err := manager.New(cfg, manager.Options{}) Expect(err).NotTo(HaveOccurred()) By("Creating the Controller") - instance, err := controller.New("foo-controller", cm, controller.Options{ - Reconciler: reconcile.Func( - func(_ context.Context, request reconcile.Request) (reconcile.Result, error) { - reconciled <- request - return reconcile.Result{}, nil - }), - }) + instance, err := controller.New( + fmt.Sprintf("foo-controller-%t", enableWarmup), + cm, + controller.Options{ + Reconciler: reconcile.Func( + func(_ context.Context, request reconcile.Request) (reconcile.Result, error) { + reconciled <- request + return reconcile.Result{}, nil + }), + EnableWarmup: ptr.To(enableWarmup), + }, + ) Expect(err).NotTo(HaveOccurred()) + testNamespace := strconv.FormatBool(enableWarmup) + By("Watching Resources") err = instance.Watch( source.Kind(cm.GetCache(), &appsv1.ReplicaSet{}, handler.TypedEnqueueRequestForOwner[*appsv1.ReplicaSet](cm.GetScheme(), cm.GetRESTMapper(), &appsv1.Deployment{}), + makeNamespacePredicate[*appsv1.ReplicaSet](testNamespace), ), ) Expect(err).NotTo(HaveOccurred()) - err = instance.Watch(source.Kind(cm.GetCache(), &appsv1.Deployment{}, &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{})) + err = instance.Watch( + source.Kind(cm.GetCache(), &appsv1.Deployment{}, + &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}, + makeNamespacePredicate[*appsv1.Deployment](testNamespace), + ), + ) Expect(err).NotTo(HaveOccurred()) err = cm.GetClient().Get(ctx, types.NamespacedName{Name: "foo"}, &corev1.Namespace{}) @@ -110,19 +131,25 @@ var _ = Describe("controller", func() { }, } expectedReconcileRequest := reconcile.Request{NamespacedName: types.NamespacedName{ - Namespace: "default", + Namespace: testNamespace, Name: "deployment-name", }} + By("Creating the test namespace") + _, err = clientset.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ Name: testNamespace }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Invoking Reconciling for Create") - deployment, err = clientset.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{}) + deployment, err = clientset.AppsV1().Deployments(testNamespace).Create(ctx, deployment, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) By("Invoking Reconciling for Update") newDeployment := deployment.DeepCopy() newDeployment.Labels = map[string]string{"foo": "bar"} - _, err = clientset.AppsV1().Deployments("default").Update(ctx, newDeployment, metav1.UpdateOptions{}) + _, err = clientset.AppsV1().Deployments(testNamespace).Update(ctx, newDeployment, metav1.UpdateOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) @@ -145,24 +172,24 @@ var _ = Describe("controller", func() { Template: deployment.Spec.Template, }, } - replicaset, err = clientset.AppsV1().ReplicaSets("default").Create(ctx, replicaset, metav1.CreateOptions{}) + replicaset, err = clientset.AppsV1().ReplicaSets(testNamespace).Create(ctx, replicaset, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) By("Invoking Reconciling for an OwnedObject when it is updated") newReplicaset := replicaset.DeepCopy() newReplicaset.Labels = map[string]string{"foo": "bar"} - _, err = clientset.AppsV1().ReplicaSets("default").Update(ctx, newReplicaset, metav1.UpdateOptions{}) + _, err = clientset.AppsV1().ReplicaSets(testNamespace).Update(ctx, newReplicaset, metav1.UpdateOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) By("Invoking Reconciling for an OwnedObject when it is deleted") - err = clientset.AppsV1().ReplicaSets("default").Delete(ctx, replicaset.Name, metav1.DeleteOptions{}) + err = clientset.AppsV1().ReplicaSets(testNamespace).Delete(ctx, replicaset.Name, metav1.DeleteOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) By("Invoking Reconciling for Delete") - err = clientset.AppsV1().Deployments("default"). + err = clientset.AppsV1().Deployments(testNamespace). Delete(ctx, "deployment-name", metav1.DeleteOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) @@ -174,7 +201,12 @@ var _ = Describe("controller", func() { By("Invoking Reconciling for a pod when it is created when adding watcher dynamically") // Add new watcher dynamically - err = instance.Watch(source.Kind(cm.GetCache(), &corev1.Pod{}, &handler.TypedEnqueueRequestForObject[*corev1.Pod]{})) + err = instance.Watch( + source.Kind(cm.GetCache(), &corev1.Pod{}, + &handler.TypedEnqueueRequestForObject[*corev1.Pod]{}, + makeNamespacePredicate[*corev1.Pod](testNamespace), + ), + ) Expect(err).NotTo(HaveOccurred()) pod := &corev1.Pod{ @@ -194,16 +226,27 @@ var _ = Describe("controller", func() { }, } expectedReconcileRequest = reconcile.Request{NamespacedName: types.NamespacedName{ - Namespace: "default", + Namespace: testNamespace, Name: "pod-name", }} - _, err = clientset.CoreV1().Pods("default").Create(ctx, pod, metav1.CreateOptions{}) + _, err = clientset.CoreV1().Pods(testNamespace).Create(ctx, pod, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(<-reconciled).To(Equal(expectedReconcileRequest)) - }) + }, + Entry("with controller warmup enabled", true), + Entry("with controller warmup not enabled", false), + ) }) }) +// makeNamespacePredicate returns a predicate that filters out all objects not in the passed in +// namespace. +func makeNamespacePredicate[object client.Object](namespace string) predicate.TypedPredicate[object] { + return predicate.NewTypedPredicateFuncs[object](func(obj object) bool { + return obj.GetNamespace() == namespace + }) +} + func truePtr() *bool { t := true return &t diff --git a/pkg/manager/internal/integration/manager_test.go b/pkg/manager/internal/integration/manager_test.go index 346daa1e68..c83eead3c1 100644 --- a/pkg/manager/internal/integration/manager_test.go +++ b/pkg/manager/internal/integration/manager_test.go @@ -34,10 +34,12 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -90,6 +92,8 @@ var ( }, }, } + + ctx = ctrl.SetupSignalHandler() ) var _ = Describe("manger.Manager Start", func() { @@ -107,9 +111,7 @@ var _ = Describe("manger.Manager Start", func() { // * Add an index on v2 Driver to ensure we start and wait for an informer during cache.Start (as part of manager.Start) // * Note: cache.Start would fail if the conversion webhook doesn't work (which in turn depends on the readiness probe) // * Note: Adding the index for v2 ensures the Driver list call during Informer sync goes through conversion. - It("should start all components without deadlock", func() { - ctx := ctrl.SetupSignalHandler() - + DescribeTable("should start all components without deadlock", func(warmupEnabled bool) { // Set up schema. Expect(clientgoscheme.AddToScheme(scheme)).To(Succeed()) Expect(apiextensionsv1.AddToScheme(scheme)).To(Succeed()) @@ -163,7 +165,13 @@ var _ = Describe("manger.Manager Start", func() { driverReconciler := &DriverReconciler{ Client: mgr.GetClient(), } - Expect(ctrl.NewControllerManagedBy(mgr).For(&crewv2.Driver{}).Complete(driverReconciler)).To(Succeed()) + Expect( + ctrl.NewControllerManagedBy(mgr). + For(&crewv2.Driver{}). + Named(fmt.Sprintf("driver_warmup_%t", warmupEnabled)). + WithOptions(controller.Options{EnableWarmup: ptr.To(warmupEnabled)}). + Complete(driverReconciler), + ).To(Succeed()) // Set up a conversion webhook. conversionWebhook := createConversionWebhook(mgr) @@ -211,7 +219,10 @@ var _ = Describe("manger.Manager Start", func() { // Shutdown the server cancel() - }) + }, + Entry("controller warmup enabled", true), + Entry("controller warmup not enabled", false), + ) }) type DriverReconciler struct { From 84d2053134373cac021ed299192a965bca6d02e5 Mon Sep 17 00:00:00 2001 From: godwinpang Date: Thu, 22 May 2025 17:59:03 -0700 Subject: [PATCH 43/45] fmt + lint. --- pkg/controller/controller_integration_test.go | 68 +++++++++---------- pkg/internal/controller/controller.go | 22 +++--- pkg/internal/controller/controller_test.go | 6 +- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/pkg/controller/controller_integration_test.go b/pkg/controller/controller_integration_test.go index ee6a45811f..0eb869f40f 100644 --- a/pkg/controller/controller_integration_test.go +++ b/pkg/controller/controller_integration_test.go @@ -53,9 +53,9 @@ var _ = Describe("controller", func() { Describe("controller", func() { // TODO(directxman12): write a whole suite of controller-client interaction tests - // Since all tests are run in parallel and share the same testenv, we namespace the objects - // created by using a namespace per entry, and adding a watch predicate that filters by - // namespace. + // Since all tests are run in parallel and share the same testenv, we namespace the objects + // created by using a namespace per entry, and adding a watch predicate that filters by + // namespace. DescribeTable("should reconcile", func(enableWarmup bool) { By("Creating the Manager") cm, err := manager.New(cfg, manager.Options{}) @@ -63,36 +63,36 @@ var _ = Describe("controller", func() { By("Creating the Controller") instance, err := controller.New( - fmt.Sprintf("foo-controller-%t", enableWarmup), - cm, - controller.Options{ - Reconciler: reconcile.Func( - func(_ context.Context, request reconcile.Request) (reconcile.Result, error) { - reconciled <- request - return reconcile.Result{}, nil - }), - EnableWarmup: ptr.To(enableWarmup), - }, - ) + fmt.Sprintf("foo-controller-%t", enableWarmup), + cm, + controller.Options{ + Reconciler: reconcile.Func( + func(_ context.Context, request reconcile.Request) (reconcile.Result, error) { + reconciled <- request + return reconcile.Result{}, nil + }), + EnableWarmup: ptr.To(enableWarmup), + }, + ) Expect(err).NotTo(HaveOccurred()) - testNamespace := strconv.FormatBool(enableWarmup) + testNamespace := strconv.FormatBool(enableWarmup) By("Watching Resources") err = instance.Watch( source.Kind(cm.GetCache(), &appsv1.ReplicaSet{}, handler.TypedEnqueueRequestForOwner[*appsv1.ReplicaSet](cm.GetScheme(), cm.GetRESTMapper(), &appsv1.Deployment{}), - makeNamespacePredicate[*appsv1.ReplicaSet](testNamespace), + makeNamespacePredicate[*appsv1.ReplicaSet](testNamespace), ), ) Expect(err).NotTo(HaveOccurred()) err = instance.Watch( - source.Kind(cm.GetCache(), &appsv1.Deployment{}, - &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}, - makeNamespacePredicate[*appsv1.Deployment](testNamespace), - ), - ) + source.Kind(cm.GetCache(), &appsv1.Deployment{}, + &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}, + makeNamespacePredicate[*appsv1.Deployment](testNamespace), + ), + ) Expect(err).NotTo(HaveOccurred()) err = cm.GetClient().Get(ctx, types.NamespacedName{Name: "foo"}, &corev1.Namespace{}) @@ -135,11 +135,11 @@ var _ = Describe("controller", func() { Name: "deployment-name", }} - By("Creating the test namespace") - _, err = clientset.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ Name: testNamespace }, - }, metav1.CreateOptions{}) - Expect(err).NotTo(HaveOccurred()) + By("Creating the test namespace") + _, err = clientset.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: testNamespace}, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) By("Invoking Reconciling for Create") deployment, err = clientset.AppsV1().Deployments(testNamespace).Create(ctx, deployment, metav1.CreateOptions{}) @@ -202,11 +202,11 @@ var _ = Describe("controller", func() { By("Invoking Reconciling for a pod when it is created when adding watcher dynamically") // Add new watcher dynamically err = instance.Watch( - source.Kind(cm.GetCache(), &corev1.Pod{}, - &handler.TypedEnqueueRequestForObject[*corev1.Pod]{}, - makeNamespacePredicate[*corev1.Pod](testNamespace), - ), - ) + source.Kind(cm.GetCache(), &corev1.Pod{}, + &handler.TypedEnqueueRequestForObject[*corev1.Pod]{}, + makeNamespacePredicate[*corev1.Pod](testNamespace), + ), + ) Expect(err).NotTo(HaveOccurred()) pod := &corev1.Pod{ @@ -242,9 +242,9 @@ var _ = Describe("controller", func() { // makeNamespacePredicate returns a predicate that filters out all objects not in the passed in // namespace. func makeNamespacePredicate[object client.Object](namespace string) predicate.TypedPredicate[object] { - return predicate.NewTypedPredicateFuncs[object](func(obj object) bool { - return obj.GetNamespace() == namespace - }) + return predicate.NewTypedPredicateFuncs[object](func(obj object) bool { + return obj.GetNamespace() == namespace + }) } func truePtr() *bool { diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 7febe07a0f..6475d60134 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -303,16 +303,16 @@ func (c *Controller[request]) startEventSourcesAndQueueLocked(ctx context.Contex var retErr error c.didStartEventSourcesOnce.Do(func() { - queue := c.NewQueue(c.Name, c.RateLimiter) - if priorityQueue, isPriorityQueue := queue.(priorityqueue.PriorityQueue[request]); isPriorityQueue { - c.Queue = priorityQueue - } else { - c.Queue = &priorityQueueWrapper[request]{TypedRateLimitingInterface: queue} - } - go func() { - <-ctx.Done() - c.Queue.ShutDown() - }() + queue := c.NewQueue(c.Name, c.RateLimiter) + if priorityQueue, isPriorityQueue := queue.(priorityqueue.PriorityQueue[request]); isPriorityQueue { + c.Queue = priorityQueue + } else { + c.Queue = &priorityQueueWrapper[request]{TypedRateLimitingInterface: queue} + } + go func() { + <-ctx.Done() + c.Queue.ShutDown() + }() errGroup := &errgroup.Group{} for _, watch := range c.startWatches { @@ -380,7 +380,7 @@ func (c *Controller[request]) startEventSourcesAndQueueLocked(ctx context.Contex c.startWatches = nil // Mark event sources as started after resetting the startWatches slice so that watches from - // a new Watch() call are immediately started. + // a new Watch() call are immediately started. c.startedEventSourcesAndQueue = true }) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 1393d453f4..2c3a399d7d 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -404,7 +404,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{} err := ctrl.startEventSourcesAndQueueLocked(ctx) Expect(err).NotTo(HaveOccurred()) - Expect(ctrl.Queue).NotTo(BeNil()) + Expect(ctrl.Queue).NotTo(BeNil()) }) It("should return an error if a source fails to start", func() { @@ -1166,7 +1166,7 @@ var _ = Describe("controller", func() { ctx, cancel := context.WithCancel(context.Background()) src := source.Func(func(ctx context.Context, q workqueue.TypedRateLimitingInterface[reconcile.Request]) error { defer GinkgoRecover() - Expect(q).ToNot(BeNil()) + Expect(q).ToNot(BeNil()) Expect(q).To(Equal(ctrl.Queue)) started = true @@ -1175,7 +1175,7 @@ var _ = Describe("controller", func() { }) Expect(ctrl.Watch(src)).To(Succeed()) Expect(ctrl.Warmup(ctx)).To(Succeed()) - Expect(ctrl.Queue).ToNot(BeNil()) + Expect(ctrl.Queue).ToNot(BeNil()) Expect(started).To(BeTrue()) }) From cefd22d6b4c15810adda16e5ef5874f453e20edf Mon Sep 17 00:00:00 2001 From: godwinpang Date: Mon, 23 Jun 2025 22:52:53 -0700 Subject: [PATCH 44/45] Add tests for Warmup for parity with Start. --- pkg/internal/controller/controller_test.go | 185 +++++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 2c3a399d7d..3a8cdf70f6 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -1143,6 +1143,191 @@ var _ = Describe("controller", func() { Expect(ctrl.Warmup(ctx)).To(Succeed()) }) + It("should return an error if there is an error waiting for the informers", func() { + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Kind(&informertest.FakeInformers{Synced: ptr.To(false)}, &corev1.Pod{}, &handler.TypedEnqueueRequestForObject[*corev1.Pod]{}), + } + ctrl.Name = "foo" + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + err := ctrl.Warmup(ctx) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to wait for foo caches to sync")) + }) + + It("should error when cache sync timeout occurs", func() { + c, err := cache.New(cfg, cache.Options{}) + Expect(err).NotTo(HaveOccurred()) + c = &cacheWithIndefinitelyBlockingGetInformer{c} + + ctrl.CacheSyncTimeout = time.Second + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Kind(c, &appsv1.Deployment{}, &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}), + } + ctrl.Name = "testcontroller" + + err = ctrl.Warmup(context.TODO()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to wait for testcontroller caches to sync kind source: *v1.Deployment: timed out waiting for cache to be synced")) + }) + + It("should not error when controller Warmup context is cancelled during Sources WaitForSync", func() { + ctrl.CacheSyncTimeout = 1 * time.Second + + sourceSynced := make(chan struct{}) + c, err := cache.New(cfg, cache.Options{}) + Expect(err).NotTo(HaveOccurred()) + c = &cacheWithIndefinitelyBlockingGetInformer{c} + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + &singnallingSourceWrapper{ + SyncingSource: source.Kind[client.Object](c, &appsv1.Deployment{}, &handler.EnqueueRequestForObject{}), + cacheSyncDone: sourceSynced, + }, + } + ctrl.Name = "testcontroller" + + ctx, cancel := context.WithCancel(context.TODO()) + go func() { + defer GinkgoRecover() + err = ctrl.Warmup(ctx) + Expect(err).To(Succeed()) + }() + + cancel() + <-sourceSynced + }) + + It("should error when Warmup() is blocking forever", func() { + ctrl.CacheSyncTimeout = time.Second + + controllerDone := make(chan struct{}) + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + source.Func(func(ctx context.Context, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + <-controllerDone + return ctx.Err() + })} + + ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second) + defer cancel() + + err := ctrl.Warmup(ctx) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("Please ensure that its Start() method is non-blocking")) + + close(controllerDone) + }) + + It("should not error when cache sync timeout is of sufficiently high", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sourceSynced := make(chan struct{}) + c := &informertest.FakeInformers{} + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ + &singnallingSourceWrapper{ + SyncingSource: source.Kind[client.Object](c, &appsv1.Deployment{}, &handler.EnqueueRequestForObject{}), + cacheSyncDone: sourceSynced, + }, + } + + go func() { + defer GinkgoRecover() + Expect(ctrl.Warmup(ctx)).To(Succeed()) + }() + + <-sourceSynced + }) + + It("should process events from source.Channel", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + // channel to be closed when event is processed + processed := make(chan struct{}) + // source channel + ch := make(chan event.GenericEvent, 1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + // event to be sent to the channel + p := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"}, + } + evt := event.GenericEvent{ + Object: p, + } + + ins := source.Channel( + ch, + handler.Funcs{ + GenericFunc: func(ctx context.Context, evt event.GenericEvent, q workqueue.TypedRateLimitingInterface[reconcile.Request]) { + defer GinkgoRecover() + close(processed) + }, + }, + ) + + // send the event to the channel + ch <- evt + + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ins} + + go func() { + defer GinkgoRecover() + Expect(ctrl.Warmup(ctx)).To(Succeed()) + }() + <-processed + }) + + It("should error when channel source is not specified", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ins := source.Channel[string](nil, nil) + ctrl.startWatches = []source.TypedSource[reconcile.Request]{ins} + + e := ctrl.Warmup(ctx) + Expect(e).To(HaveOccurred()) + Expect(e.Error()).To(ContainSubstring("must specify Channel.Source")) + }) + + It("should call Start on sources with the appropriate EventHandler, Queue, and Predicates", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + started := false + ctx, cancel := context.WithCancel(context.Background()) + src := source.Func(func(ctx context.Context, q workqueue.TypedRateLimitingInterface[reconcile.Request]) error { + defer GinkgoRecover() + Expect(q).To(Equal(ctrl.Queue)) + + started = true + cancel() // Cancel the context so ctrl.Warmup() doesn't block forever + return nil + }) + Expect(ctrl.Watch(src)).NotTo(HaveOccurred()) + + err := ctrl.Warmup(ctx) + Expect(err).To(Succeed()) + Expect(started).To(BeTrue()) + }) + + It("should return an error if there is an error starting sources", func() { + ctrl.CacheSyncTimeout = 10 * time.Second + err := fmt.Errorf("Expected Error: could not start source") + src := source.Func(func(context.Context, + workqueue.TypedRateLimitingInterface[reconcile.Request], + ) error { + defer GinkgoRecover() + return err + }) + Expect(ctrl.Watch(src)).To(Succeed()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + Expect(ctrl.Warmup(ctx)).To(Equal(err)) + }) + It("should track warmup status correctly with unsuccessful sync", func() { // Setup controller with sources that complete with error ctx, cancel := context.WithCancel(context.Background()) From 8c7b12ffb5ca5149222d48ceab8a6aa5a7d469fe Mon Sep 17 00:00:00 2001 From: godwinpang Date: Mon, 23 Jun 2025 23:33:03 -0700 Subject: [PATCH 45/45] golangci-lint --- pkg/internal/controller/controller_test.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 3a8cdf70f6..e819d01186 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -55,6 +55,8 @@ type TestRequest struct { Key string } +const testControllerName = "testcontroller" + var _ = Describe("controller", func() { var fakeReconcile *fakeReconciler var ctrl *Controller[reconcile.Request] @@ -176,7 +178,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Kind(c, &appsv1.Deployment{}, &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}), } - ctrl.Name = "testcontroller" + ctrl.Name = testControllerName err = ctrl.Start(context.TODO()) Expect(err).To(HaveOccurred()) @@ -196,7 +198,7 @@ var _ = Describe("controller", func() { cacheSyncDone: sourceSynced, }, } - ctrl.Name = "testcontroller" + ctrl.Name = testControllerName ctx, cancel := context.WithCancel(context.TODO()) go func() { @@ -1148,12 +1150,12 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Kind(&informertest.FakeInformers{Synced: ptr.To(false)}, &corev1.Pod{}, &handler.TypedEnqueueRequestForObject[*corev1.Pod]{}), } - ctrl.Name = "foo" + ctrl.Name = testControllerName ctx, cancel := context.WithCancel(context.Background()) defer cancel() err := ctrl.Warmup(ctx) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("failed to wait for foo caches to sync")) + Expect(err.Error()).To(ContainSubstring("failed to wait for testcontroller caches to sync")) }) It("should error when cache sync timeout occurs", func() { @@ -1165,7 +1167,7 @@ var _ = Describe("controller", func() { ctrl.startWatches = []source.TypedSource[reconcile.Request]{ source.Kind(c, &appsv1.Deployment{}, &handler.TypedEnqueueRequestForObject[*appsv1.Deployment]{}), } - ctrl.Name = "testcontroller" + ctrl.Name = testControllerName err = ctrl.Warmup(context.TODO()) Expect(err).To(HaveOccurred()) @@ -1185,7 +1187,7 @@ var _ = Describe("controller", func() { cacheSyncDone: sourceSynced, }, } - ctrl.Name = "testcontroller" + ctrl.Name = testControllerName ctx, cancel := context.WithCancel(context.TODO()) go func() {