Skip to content

Commit 93f74c0

Browse files
authored
Merge pull request #7481 from jackfrancis/vmss-proactive-deleting
azure: StrictCacheUpdates to disable proactive vmss cache updates
2 parents adba590 + 1e5ed18 commit 93f74c0

File tree

2 files changed

+32
-15
lines changed

2 files changed

+32
-15
lines changed

cluster-autoscaler/cloudprovider/azure/azure_config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ type Config struct {
9494

9595
// (DEPRECATED, DO NOT USE) GetVmssSizeRefreshPeriod (seconds) defines how frequently to call GET VMSS API to fetch VMSS info per nodegroup instance
9696
GetVmssSizeRefreshPeriod int `json:"getVmssSizeRefreshPeriod,omitempty" yaml:"getVmssSizeRefreshPeriod,omitempty"`
97+
98+
// StrictCacheUpdates updates cache values only after positive validation from Azure APIs
99+
StrictCacheUpdates bool `json:"strictCacheUpdates,omitempty" yaml:"strictCacheUpdates,omitempty"`
97100
}
98101

99102
// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
@@ -122,6 +125,7 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
122125
cfg.CloudProviderBackoffJitter = providerazureconsts.BackoffJitterDefault
123126
cfg.VMType = providerazureconsts.VMTypeVMSS
124127
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
128+
cfg.StrictCacheUpdates = false
125129

126130
// Config file overrides defaults
127131
if configReader != nil {
@@ -247,6 +251,9 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
247251
if _, err = assignBoolFromEnvIfExists(&cfg.EnableForceDelete, "AZURE_ENABLE_FORCE_DELETE"); err != nil {
248252
return nil, err
249253
}
254+
if _, err = assignBoolFromEnvIfExists(&cfg.StrictCacheUpdates, "AZURE_STRICT_CACHE_UPDATES"); err != nil {
255+
return nil, err
256+
}
250257
if _, err = assignBoolFromEnvIfExists(&cfg.EnableDynamicInstanceList, "AZURE_ENABLE_DYNAMIC_INSTANCE_LIST"); err != nil {
251258
return nil, err
252259
}

cluster-autoscaler/cloudprovider/azure/azure_scale_set.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -530,19 +530,22 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
530530
return rerr.Error()
531531
}
532532

533-
// Proactively decrement scale set size so that we don't
534-
// go below minimum node count if cache data is stale
535-
// only do it for non-unregistered nodes
536-
if !hasUnregisteredNodes {
537-
scaleSet.sizeMutex.Lock()
538-
scaleSet.curSize -= int64(len(instanceIDs))
539-
scaleSet.lastSizeRefresh = time.Now()
540-
scaleSet.sizeMutex.Unlock()
541-
}
533+
if !scaleSet.manager.config.StrictCacheUpdates {
534+
// Proactively decrement scale set size so that we don't
535+
// go below minimum node count if cache data is stale
536+
// only do it for non-unregistered nodes
537+
538+
if !hasUnregisteredNodes {
539+
scaleSet.sizeMutex.Lock()
540+
scaleSet.curSize -= int64(len(instanceIDs))
541+
scaleSet.lastSizeRefresh = time.Now()
542+
scaleSet.sizeMutex.Unlock()
543+
}
542544

543-
// Proactively set the status of the instances to be deleted in cache
544-
for _, instance := range instancesToDelete {
545-
scaleSet.setInstanceStatusByProviderID(instance.Name, cloudprovider.InstanceStatus{State: cloudprovider.InstanceDeleting})
545+
// Proactively set the status of the instances to be deleted in cache
546+
for _, instance := range instancesToDelete {
547+
scaleSet.setInstanceStatusByProviderID(instance.Name, cloudprovider.InstanceStatus{State: cloudprovider.InstanceDeleting})
548+
}
546549
}
547550

548551
go scaleSet.waitForDeleteInstances(future, requiredIds)
@@ -558,11 +561,18 @@ func (scaleSet *ScaleSet) waitForDeleteInstances(future *azure.Future, requiredI
558561
isSuccess, err := isSuccessHTTPResponse(httpResponse, err)
559562
if isSuccess {
560563
klog.V(3).Infof(".WaitForDeleteInstancesResult(%v) for %s success", requiredIds.InstanceIds, scaleSet.Name)
561-
// No need to invalidateInstanceCache because instanceStates were proactively set to "deleting"
564+
if scaleSet.manager.config.StrictCacheUpdates {
565+
if err := scaleSet.manager.forceRefresh(); err != nil {
566+
klog.Errorf("forceRefresh failed with error: %v", err)
567+
}
568+
scaleSet.invalidateInstanceCache()
569+
}
562570
return
563571
}
564-
// On failure, invalidate the instanceCache - cannot have instances in deletingState
565-
scaleSet.invalidateInstanceCache()
572+
if !scaleSet.manager.config.StrictCacheUpdates {
573+
// On failure, invalidate the instanceCache - cannot have instances in deletingState
574+
scaleSet.invalidateInstanceCache()
575+
}
566576
klog.Errorf("WaitForDeleteInstancesResult(%v) for %s failed with error: %v", requiredIds.InstanceIds, scaleSet.Name, err)
567577
}
568578

0 commit comments

Comments
 (0)