-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Add some race-condition protection to VPA recommender #8320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b2df71a
c7f0511
5aab4b6
b01e23d
fc3e97e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
/* | ||
Copyright 2025 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package routines | ||
|
||
import ( | ||
"fmt" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
|
||
v1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" | ||
vpa_fake "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/fake" | ||
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/logic" | ||
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" | ||
metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender" | ||
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" | ||
) | ||
|
||
type mockPodResourceRecommender struct{} | ||
|
||
func (m *mockPodResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) logic.RecommendedPodResources { | ||
return logic.RecommendedPodResources{} | ||
} | ||
|
||
// TestProcessUpdateVPAsConcurrency tests processVPAUpdate for race conditions when run concurrently | ||
func TestProcessUpdateVPAsConcurrency(t *testing.T) { | ||
updateWorkerCount := 10 | ||
|
||
vpaCount := 1000 | ||
vpas := make(map[model.VpaID]*model.Vpa, vpaCount) | ||
apiObjectVPAs := make([]*v1.VerticalPodAutoscaler, vpaCount) | ||
fakedClient := make([]runtime.Object, vpaCount) | ||
|
||
for i := range vpaCount { | ||
vpaName := fmt.Sprintf("test-vpa-%d", i) | ||
vpaID := model.VpaID{ | ||
Namespace: "default", | ||
VpaName: vpaName, | ||
} | ||
selector, err := labels.Parse("app=test") | ||
assert.NoError(t, err, "Failed to parse label selector") | ||
vpas[vpaID] = model.NewVpa(vpaID, selector, time.Now()) | ||
|
||
apiObjectVPAs[i] = test.VerticalPodAutoscaler(). | ||
WithName(vpaName). | ||
WithNamespace("default"). | ||
WithContainer("test-container"). | ||
Get() | ||
|
||
fakedClient[i] = apiObjectVPAs[i] | ||
} | ||
|
||
fakeClient := vpa_fake.NewSimpleClientset(fakedClient...).AutoscalingV1() | ||
r := &recommender{ | ||
clusterState: model.NewClusterState(time.Minute), | ||
vpaClient: fakeClient, | ||
podResourceRecommender: &mockPodResourceRecommender{}, | ||
recommendationPostProcessor: []RecommendationPostProcessor{}, | ||
} | ||
|
||
labelSelector, err := metav1.ParseToLabelSelector("app=test") | ||
assert.NoError(t, err, "Failed to parse label selector") | ||
parsedSelector, err := metav1.LabelSelectorAsSelector(labelSelector) | ||
assert.NoError(t, err, "Failed to convert label selector to selector") | ||
|
||
// Inject into clusterState | ||
for _, vpa := range apiObjectVPAs { | ||
err := r.clusterState.AddOrUpdateVpa(vpa, parsedSelector) | ||
assert.NoError(t, err, "Failed to add or update VPA in cluster state") | ||
} | ||
r.clusterState.SetObservedVPAs(apiObjectVPAs) | ||
|
||
// Run processVPAUpdate concurrently for all VPAs | ||
var wg sync.WaitGroup | ||
|
||
cnt := metrics_recommender.NewObjectCounter() | ||
defer cnt.Observe() | ||
|
||
// Create a channel to send VPA updates to workers | ||
vpaUpdates := make(chan *v1.VerticalPodAutoscaler, len(apiObjectVPAs)) | ||
|
||
// Start workers | ||
for range updateWorkerCount { | ||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
for observedVpa := range vpaUpdates { | ||
key := model.VpaID{ | ||
Namespace: observedVpa.Namespace, | ||
VpaName: observedVpa.Name, | ||
} | ||
|
||
vpa, found := r.clusterState.VPAs()[key] | ||
if !found { | ||
return | ||
} | ||
|
||
processVPAUpdate(r, vpa, observedVpa) | ||
cnt.Add(vpa) | ||
} | ||
}() | ||
} | ||
|
||
// Send VPA updates to the workers | ||
for _, observedVpa := range apiObjectVPAs { | ||
vpaUpdates <- observedVpa | ||
} | ||
close(vpaUpdates) | ||
|
||
wg.Wait() | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -21,6 +21,7 @@ import ( | |||||||||||||
"fmt" | ||||||||||||||
"net/http" | ||||||||||||||
"strconv" | ||||||||||||||
"sync" | ||||||||||||||
"time" | ||||||||||||||
|
||||||||||||||
"github.com/prometheus/client_golang/prometheus" | ||||||||||||||
|
@@ -119,7 +120,8 @@ type objectCounterKey struct { | |||||||||||||
|
||||||||||||||
// ObjectCounter helps split all VPA objects into buckets | ||||||||||||||
type ObjectCounter struct { | ||||||||||||||
cnt map[objectCounterKey]int | ||||||||||||||
cnt map[objectCounterKey]int | ||||||||||||||
mutex sync.Mutex | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
// Register initializes all metrics for VPA Recommender | ||||||||||||||
|
@@ -189,7 +191,9 @@ func (oc *ObjectCounter) Add(vpa *model.Vpa) { | |||||||||||||
matchesPods: vpa.HasMatchedPods(), | ||||||||||||||
unsupportedConfig: vpa.Conditions.ConditionActive(vpa_types.ConfigUnsupported), | ||||||||||||||
} | ||||||||||||||
oc.mutex.Lock() | ||||||||||||||
oc.cnt[key]++ | ||||||||||||||
oc.mutex.Unlock() | ||||||||||||||
Comment on lines
+194
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked at changing this to use It works, but my concern is that the map need to gaurentee that all keys exist in the map. This is possible by pre-filling the map with all possible combinations. However, we need to remember to update this file each time a new type is added. Ie: autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender/recommender.go Lines 41 to 46 in 9bc4220
The problem is that there doesn't seem to be a good way to dynamically generate that list in the VPA. |
||||||||||||||
} | ||||||||||||||
|
||||||||||||||
// Observe passes all the computed bucket values to metrics | ||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I happen to notice this weirdness in the
--update-worker-count
due to the back-ticks, so I fixed them while I was here.Reference: https://pkg.go.dev/flag#PrintDefaults