-
Notifications
You must be signed in to change notification settings - Fork 291
fix: invalidate cluster cache after sync operations #745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8469ef6
2f0fe8e
5ce05d9
c9b3d14
be7e3da
6342d8f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,13 @@ func (e *gitOpsEngine) Sync(ctx context.Context, | |
namespace string, | ||
opts ...sync.SyncOpt, | ||
) ([]common.ResourceSyncResult, error) { | ||
// Ensure cache is synced before getting managed live objects | ||
// This forces a refresh if the cache was invalidated | ||
err := e.cache.EnsureSynced() | ||
if err != nil { | ||
return nil, fmt.Errorf("error during sync: failed to ensure cache is synced: %w", err) | ||
} | ||
|
||
managedResources, err := e.cache.GetManagedLiveObjs(resources, isManaged) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get managed live objects: %w", err) | ||
|
@@ -84,6 +91,15 @@ func (e *gitOpsEngine) Sync(ctx context.Context, | |
return nil, fmt.Errorf("failed to diff objects: %w", err) | ||
} | ||
opts = append(opts, sync.WithSkipHooks(!diffRes.Modified)) | ||
|
||
// Add cache invalidation callback to invalidate cache for modified resources after sync | ||
opts = append(opts, sync.WithCacheInvalidationCallback(func(modifiedResources []kube.ResourceKey) { | ||
// Only invalidate the specific resources that were modified | ||
if len(modifiedResources) > 0 { | ||
e.cache.InvalidateResources(modifiedResources) | ||
} | ||
})) | ||
Comment on lines
+96
to
+101
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we need to invalidate the cache on every sync if the cluster cache is based on resource watches? I feel that the root problem is elsewhere. WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah seems like this solution is reactively removing cached resources and is more of a workaround. The problem is more likely in |
||
|
||
syncCtx, cleanup, err := sync.NewSyncContext(revision, result, e.config, e.config, e.kubectl, namespace, e.cache.GetOpenAPISchema(), opts...) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to create sync context: %w", err) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -218,6 +218,14 @@ func WithClientSideApplyMigration(enabled bool, manager string) SyncOpt { | |
} | ||
} | ||
|
||
// WithCacheInvalidationCallback sets a callback that will be invoked after successful sync operations | ||
// to invalidate the cache | ||
func WithCacheInvalidationCallback(callback func([]kubeutil.ResourceKey)) SyncOpt { | ||
return func(ctx *syncContext) { | ||
ctx.cacheInvalidationCallback = callback | ||
} | ||
} | ||
|
||
// NewSyncContext creates new instance of a SyncContext | ||
func NewSyncContext( | ||
revision string, | ||
|
@@ -389,6 +397,10 @@ type syncContext struct { | |
applyOutOfSyncOnly bool | ||
// stores whether the resource is modified or not | ||
modificationResult map[kubeutil.ResourceKey]bool | ||
|
||
// cacheInvalidationCallback is a callback that will be invoked after successful sync operations | ||
// to invalidate the cache | ||
cacheInvalidationCallback func([]kubeutil.ResourceKey) | ||
} | ||
|
||
func (sc *syncContext) setRunningPhase(tasks []*syncTask, isPendingDeletion bool) { | ||
|
@@ -557,6 +569,15 @@ func (sc *syncContext) Sync() { | |
// delete all completed hooks which have appropriate delete policy | ||
sc.deleteHooks(hooksPendingDeletionSuccessful) | ||
sc.setOperationPhase(common.OperationSucceeded, "successfully synced (no more tasks)") | ||
|
||
// Invalidate cache after successful sync | ||
if sc.cacheInvalidationCallback != nil { | ||
modifiedResources := make([]kubeutil.ResourceKey, 0, len(sc.syncRes)) | ||
for _, result := range sc.syncRes { | ||
modifiedResources = append(modifiedResources, result.ResourceKey) | ||
} | ||
sc.cacheInvalidationCallback(modifiedResources) | ||
} | ||
Comment on lines
+573
to
+580
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment applies: Why would we need to invalidate the cache on every sync if the cluster cache is based on resource watches? I feel that the root problem is elsewhere. WDYT? |
||
return | ||
} | ||
|
||
|
@@ -593,11 +614,34 @@ func (sc *syncContext) Sync() { | |
syncFailedTasks, _ := tasks.Split(func(t *syncTask) bool { return t.syncStatus == common.ResultCodeSyncFailed }) | ||
sc.deleteHooks(hooksPendingDeletionFailed) | ||
sc.setOperationFailed(syncFailTasks, syncFailedTasks, "one or more objects failed to apply") | ||
|
||
// Invalidate cache for successfully synced resources even if overall operation failed | ||
if sc.cacheInvalidationCallback != nil { | ||
var modifiedResources []kubeutil.ResourceKey | ||
for _, result := range sc.syncRes { | ||
// Only invalidate resources that were successfully synced | ||
if result.Status == common.ResultCodeSynced { | ||
modifiedResources = append(modifiedResources, result.ResourceKey) | ||
} | ||
} | ||
if len(modifiedResources) > 0 { | ||
sc.cacheInvalidationCallback(modifiedResources) | ||
} | ||
} | ||
case successful: | ||
if remainingTasks.Len() == 0 { | ||
// delete all completed hooks which have appropriate delete policy | ||
sc.deleteHooks(hooksPendingDeletionSuccessful) | ||
sc.setOperationPhase(common.OperationSucceeded, "successfully synced (all tasks run)") | ||
|
||
// Invalidate cache after successful sync | ||
if sc.cacheInvalidationCallback != nil { | ||
modifiedResources := make([]kubeutil.ResourceKey, 0, len(sc.syncRes)) | ||
for _, result := range sc.syncRes { | ||
modifiedResources = append(modifiedResources, result.ResourceKey) | ||
} | ||
sc.cacheInvalidationCallback(modifiedResources) | ||
} | ||
} else { | ||
sc.setRunningPhase(remainingTasks, false) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe that this function was designed to be called just when the gitops engine is being initialized. This will call the
clusterCacheSync.synced
function that will just validate on the sync time. The ClusterCache should always be synced because it updates based on resource watches.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after looking at argo-cd code more it seems like
GetManagedLiveObjs()
actually callsc.getSyncedCluster(destCluster)
which eventually callsclusterCache.EnsureSynced()
anyway.So this part is definitely not needed