From a4ec949baeb7e89311b6cbd7a1248562d750f1f5 Mon Sep 17 00:00:00 2001 From: Ismail Alidzhikov <9372594+ialidzhikov@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:19:04 +0100 Subject: [PATCH 1/4] vpa-recommender: Add support for configuring global max allowed resources --- vertical-pod-autoscaler/docs/examples.md | 14 + vertical-pod-autoscaler/docs/faq.md | 2 + .../docs/known-limitations.md | 4 +- .../pkg/recommender/main.go | 24 +- .../routines/capping_post_processor.go | 21 +- .../pkg/utils/vpa/capping.go | 45 ++- .../pkg/utils/vpa/capping_test.go | 277 +++++++++++++++--- 7 files changed, 321 insertions(+), 66 deletions(-) diff --git a/vertical-pod-autoscaler/docs/examples.md b/vertical-pod-autoscaler/docs/examples.md index ba6ab4aca11f..7751873c4dfa 100644 --- a/vertical-pod-autoscaler/docs/examples.md +++ b/vertical-pod-autoscaler/docs/examples.md @@ -11,6 +11,7 @@ - [Controlling eviction behavior based on scaling direction and resource](#controlling-eviction-behavior-based-on-scaling-direction-and-resource) - [Limiting which namespaces are used](#limiting-which-namespaces-are-used) - [Setting the webhook failurePolicy](#setting-the-webhook-failurepolicy) + - [Specifying global maximum allowed resources to prevent pods from being unschedulable](#specifying-global-maximum-allowed-resources-to-prevent-pods-from-being-unschedulable) ## Keeping limit proportional to request @@ -108,3 +109,16 @@ These options cannot be used together and are mutually exclusive. It is possible to set the failurePolicy of the webhook to `Fail` by passing `--webhook-failure-policy-fail=true` to the VPA admission controller. Please use this option with caution as it may be possible to break Pod creation if there is a failure with the VPA. Using it in conjunction with `--ignored-vpa-object-namespaces=kube-system` or `--vpa-object-namespace` to reduce risk. + +### Specifying global maximum allowed resources to prevent pods from being unschedulable + +The [Known limitations dcoument](./known-limitations.md) outlines that VPA (vpa-recommender in particular) is not aware of the cluster's maximum allocatable and can recommend resources which will not fit even the largest node in the cluster. This issue occurs even when the cluster uses the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics). The vpa-recommender's resource recommendation can exceed the allocatable of the largest node in the cluster. Hence, pod's will be unschedulable (in `Pending` state) and the pod wouldn't fit the cluster even if a new node is added by the Cluster Autoscaler. +It is possible to mitigate this issue by specifying the `--max-allowed-cpu` and `--max-allowed-memory` flags of the vpa-recommender. These flags represent the global maximum amount of cpu/memory that will be recommended **for a container**. If the VerticalPodAutoscaler already defines a max allowed (`.spec.resourcePolicy.containerPolicies.maxAllowed`) then it takes precedence over the global max allowed. The global max allowed is merged to the VerticalPodAutoscaler's max allowed if VerticalPodAutoscaler's max allowed is specified only for cpu or memory. If the VerticalPodAutoscaler does not specify a max allowed and a global max allowed is specified, then the global max allowed is being used. + +The recommendation for computing the `--max-allowed-cpu` and `--max-allowed-memory` values for your cluster is to use the largest node's allocatable (`.status.allocatable` field of a node) minus the DaemonSet pods resource requests minus a safety margin: +``` + = - - +``` + +> [!WARNING] +> Pay attention that `--max-allowed-cpu` and `--max-allowed-memory` are **container-level** flags. A pod enabling autoscaling for more than one container can theoretically still get unschedulable if the sum of the resource recommendations of the containers exceeds the largest Node's allocatable. In practice, it is not very likely to hit such case as usually a single container in a pod is the main one, the others are sidecars that either do not need autoscaling or do not consume high resource requests. diff --git a/vertical-pod-autoscaler/docs/faq.md b/vertical-pod-autoscaler/docs/faq.md index 80f1c8774af1..f73e2c463257 100644 --- a/vertical-pod-autoscaler/docs/faq.md +++ b/vertical-pod-autoscaler/docs/faq.md @@ -212,6 +212,8 @@ Name | Type | Description | Default `container-pod-name-label` | String | Label name to look for container pod names | "pod_name" `container-name-label` | String | Label name to look for container names | "name" `vpa-object-namespace` | String | Namespace to search for VPA objects and pod stats. Empty means all namespaces will be used. | apiv1.NamespaceAll +`max-allowed-cpu` | Quantity | Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed cpu by default) +`max-allowed-memory` | Quantity | Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed memory by default) `memory-aggregation-interval` | Duration | The length of a single interval, for which the peak memory usage is computed. Memory usage peaks are aggregated in multiples of this interval. In other words there is one memory usage sample per interval (the maximum usage over that interval | model.DefaultMemoryAggregationInterval `memory-aggregation-interval-count` | Int64 | The number of consecutive memory-aggregation-intervals which make up the MemoryAggregationWindowLength which in turn is the period for memory usage aggregation by VPA. In other words, MemoryAggregationWindowLength = memory-aggregation-interval * memory-aggregation-interval-count. | model.DefaultMemoryAggregationIntervalCount `memory-histogram-decay-half-life` | Duration | The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period. | model.DefaultMemoryHistogramDecayHalfLife diff --git a/vertical-pod-autoscaler/docs/known-limitations.md b/vertical-pod-autoscaler/docs/known-limitations.md index a6e08c849016..02a35e4cd0c7 100644 --- a/vertical-pod-autoscaler/docs/known-limitations.md +++ b/vertical-pod-autoscaler/docs/known-limitations.md @@ -21,7 +21,9 @@ - VPA performance has not been tested in large clusters. - VPA recommendation might exceed available resources (e.g. Node size, available size, available quota) and cause **pods to go pending**. This can be partly - addressed by using VPA together with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics). + addressed by: + * using VPA together with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics) - the drawback of this approach is that pods can still get unschedulable if the recommendation exceeds the largest Node's allocatable. + * specifying the `--max-allowed-cpu` and `--max-allowed-memory` flags - the drawback of this approach is that a pod can still get unschedulable if more than one container in the pod is scaled by VPA and the sum of the container recommendations exceeds the largest Node's allocatable. - Multiple VPA resources matching the same pod have undefined behavior. - Running the vpa-recommender with leader election enabled (`--leader-elect=true`) in a GKE cluster causes contention with a lease called `vpa-recommender` held by the GKE system component of the diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 8a53137f1e5c..3e836de18efe 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -25,6 +25,7 @@ import ( "github.com/spf13/pflag" apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/informers" @@ -103,6 +104,8 @@ var ( var ( // CPU as integer to benefit for CPU management Static Policy ( https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy ) postProcessorCPUasInteger = flag.Bool("cpu-integer-post-processor-enabled", false, "Enable the cpu-integer recommendation post processor. The post processor will round up CPU recommendations to a whole CPU for pods which were opted in by setting an appropriate label on VPA object (experimental)") + maxAllowedCPU = resource.QuantityValue{} + maxAllowedMemory = resource.QuantityValue{} ) const ( @@ -116,6 +119,9 @@ const ( ) func main() { + flag.Var(&maxAllowedCPU, "max-allowed-cpu", "Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") + flag.Var(&maxAllowedMemory, "max-allowed-memory", "Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") + commonFlags := common.InitCommonFlags() klog.InitFlags(nil) common.InitLoggingFlags() @@ -215,8 +221,16 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { postProcessors = append(postProcessors, &routines.IntegerCPUPostProcessor{}) } + var globalMaxAllowed apiv1.ResourceList + if !maxAllowedCPU.Quantity.IsZero() { + setGlobalMaxAllowed(&globalMaxAllowed, apiv1.ResourceCPU, maxAllowedCPU.Quantity) + } + if !maxAllowedMemory.Quantity.IsZero() { + setGlobalMaxAllowed(&globalMaxAllowed, apiv1.ResourceMemory, maxAllowedMemory.Quantity) + } + // CappingPostProcessor, should always come in the last position for post-processing - postProcessors = append(postProcessors, &routines.CappingPostProcessor{}) + postProcessors = append(postProcessors, routines.NewCappingRecommendationProcessor(globalMaxAllowed)) var source input_metrics.PodMetricsLister if *useExternalMetrics { resourceMetrics := map[apiv1.ResourceName]string{} @@ -307,3 +321,11 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { healthCheck.UpdateLastActivity() } } + +func setGlobalMaxAllowed(globalMaxAllowed *apiv1.ResourceList, key apiv1.ResourceName, value resource.Quantity) { + if *globalMaxAllowed == nil { + *globalMaxAllowed = make(map[apiv1.ResourceName]resource.Quantity, 2) + } + + (*globalMaxAllowed)[key] = value +} diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go b/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go index 851a1c4c5575..87e8d5a1b375 100644 --- a/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go +++ b/vertical-pod-autoscaler/pkg/recommender/routines/capping_post_processor.go @@ -19,20 +19,29 @@ package routines import ( "k8s.io/klog/v2" + apiv1 "k8s.io/api/core/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" vpa_utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) -// CappingPostProcessor ensure that the policy is applied to recommendation -// it applies policy for fields: MinAllowed and MaxAllowed -type CappingPostProcessor struct{} +type cappingPostProcessor struct { + globalMaxAllowed apiv1.ResourceList +} + +var _ RecommendationPostProcessor = &cappingPostProcessor{} -var _ RecommendationPostProcessor = &CappingPostProcessor{} +// NewCappingRecommendationProcessor constructs new RecommendationPostProcessor that adjusts recommendation +// for given pod to obey VPA resources policy and a global max allowed configuration. +func NewCappingRecommendationProcessor(globalMaxAllowed apiv1.ResourceList) RecommendationPostProcessor { + return &cappingPostProcessor{ + globalMaxAllowed: globalMaxAllowed, + } +} // Process apply the capping post-processing to the recommendation. (use to be function getCappedRecommendation) -func (c CappingPostProcessor) Process(vpa *vpa_types.VerticalPodAutoscaler, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedPodResources { +func (c cappingPostProcessor) Process(vpa *vpa_types.VerticalPodAutoscaler, recommendation *vpa_types.RecommendedPodResources) *vpa_types.RecommendedPodResources { // TODO: maybe rename the vpa_utils.ApplyVPAPolicy to something that mention that it is doing capping only - cappedRecommendation, err := vpa_utils.ApplyVPAPolicy(recommendation, vpa.Spec.ResourcePolicy) + cappedRecommendation, err := vpa_utils.ApplyVPAPolicy(recommendation, vpa.Spec.ResourcePolicy, c.globalMaxAllowed) if err != nil { klog.ErrorS(err, "Failed to apply policy for VPA", "vpa", klog.KObj(vpa)) return recommendation diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go index ffb92e755202..271c8f2d6154 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go @@ -184,23 +184,43 @@ func applyVPAPolicy(recommendation apiv1.ResourceList, policy *vpa_types.Contain func applyVPAPolicyForContainer(containerName string, containerRecommendation *vpa_types.RecommendedContainerResources, - policy *vpa_types.PodResourcePolicy) (*vpa_types.RecommendedContainerResources, error) { + policy *vpa_types.PodResourcePolicy, + globalMaxAllowed apiv1.ResourceList) (*vpa_types.RecommendedContainerResources, error) { if containerRecommendation == nil { return nil, fmt.Errorf("no recommendation available for container name %v", containerName) } cappedRecommendations := containerRecommendation.DeepCopy() - // containerPolicy can be nil (user does not have to configure it). containerPolicy := GetContainerResourcePolicy(containerName, policy) - if containerPolicy == nil { - return cappedRecommendations, nil - } process := func(recommendation apiv1.ResourceList) { for resourceName, recommended := range recommendation { - cappedToMin, _ := maybeCapToPolicyMin(recommended, resourceName, containerPolicy) - recommendation[resourceName] = cappedToMin - cappedToMax, _ := maybeCapToPolicyMax(cappedToMin, resourceName, containerPolicy) - recommendation[resourceName] = cappedToMax + // containerPolicy can be nil (user does not have to configure it). + if containerPolicy != nil { + cappedToMin, _ := maybeCapToPolicyMin(recommended, resourceName, containerPolicy) + recommendation[resourceName] = cappedToMin + + maxAllowed := containerPolicy.MaxAllowed + if globalMaxAllowed != nil { + if maxAllowed == nil { + maxAllowed = globalMaxAllowed + } else { + // Set resources from the global maxAllowed if the VPA maxAllowed is missing them. + for resourceName, quantity := range globalMaxAllowed { + if _, ok := maxAllowed[resourceName]; !ok { + maxAllowed[resourceName] = quantity + } + } + } + } + + cappedToMax, _ := maybeCapToMax(cappedToMin, resourceName, maxAllowed) + recommendation[resourceName] = cappedToMax + } else { + if globalMaxAllowed != nil { + cappedToMax, _ := maybeCapToMax(recommended, resourceName, globalMaxAllowed) + recommendation[resourceName] = cappedToMax + } + } } } @@ -241,19 +261,16 @@ func maybeCapToMin(recommended resource.Quantity, resourceName apiv1.ResourceNam // ApplyVPAPolicy returns a recommendation, adjusted to obey policy. func ApplyVPAPolicy(podRecommendation *vpa_types.RecommendedPodResources, - policy *vpa_types.PodResourcePolicy) (*vpa_types.RecommendedPodResources, error) { + policy *vpa_types.PodResourcePolicy, globalMaxAllowed apiv1.ResourceList) (*vpa_types.RecommendedPodResources, error) { if podRecommendation == nil { return nil, nil } - if policy == nil { - return podRecommendation, nil - } updatedRecommendations := []vpa_types.RecommendedContainerResources{} for _, containerRecommendation := range podRecommendation.ContainerRecommendations { containerName := containerRecommendation.ContainerName updatedContainerResources, err := applyVPAPolicyForContainer(containerName, - &containerRecommendation, policy) + &containerRecommendation, policy, globalMaxAllowed) if err != nil { return nil, fmt.Errorf("cannot apply policy on recommendation for container name %v", containerName) } diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go index b290cc69cac0..5eb8da249b1f 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go @@ -283,67 +283,256 @@ func TestApply(t *testing.T) { } } -func TestApplyVpa(t *testing.T) { - podRecommendation := vpa_types.RecommendedPodResources{ +var ( + recommendation = &vpa_types.RecommendedPodResources{ ContainerRecommendations: []vpa_types.RecommendedContainerResources{ { - ContainerName: "ctr-name", + ContainerName: "foo", Target: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(30, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(5000, 1), + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), }, LowerBound: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(20, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4300, 1), - }, - UncappedTarget: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(30, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(5000, 1), + apiv1.ResourceCPU: resource.MustParse("31m"), + apiv1.ResourceMemory: resource.MustParse("31Mi"), }, UpperBound: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(50, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(5500, 1), + apiv1.ResourceCPU: resource.MustParse("53m"), + apiv1.ResourceMemory: resource.MustParse("53Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), }, }, }, } - policy := vpa_types.PodResourcePolicy{ - ContainerPolicies: []vpa_types.ContainerResourcePolicy{ - { - ContainerName: "ctr-name", - MinAllowed: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(40, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4000, 1), +) + +func TestApplyVPAPolicy(t *testing.T) { + tests := []struct { + Name string + PodRecommendation *vpa_types.RecommendedPodResources + ResourcePolicy *vpa_types.PodResourcePolicy + GlobalMaxAllowed apiv1.ResourceList + Expected *vpa_types.RecommendedPodResources + }{ + { + Name: "recommendation is nil", + PodRecommendation: nil, + ResourcePolicy: nil, + GlobalMaxAllowed: nil, + Expected: nil, + }, + { + Name: "resource policy is nil and global max allowed is nil", + PodRecommendation: recommendation, + ResourcePolicy: nil, + GlobalMaxAllowed: nil, + Expected: recommendation.DeepCopy(), + }, + { + Name: "resource policy has min allowed and global max allowed is nil", + PodRecommendation: recommendation, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "foo", + MinAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("50m"), + apiv1.ResourceMemory: resource.MustParse("50Mi"), + }, + }, }, - MaxAllowed: apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(45, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4500, 1), + }, + GlobalMaxAllowed: nil, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("50m"), + apiv1.ResourceMemory: resource.MustParse("50Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("50m"), + apiv1.ResourceMemory: resource.MustParse("50Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("53m"), + apiv1.ResourceMemory: resource.MustParse("53Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, + }, + }, + }, + { + Name: "resource policy has max allowed and global max allowed is nil", + PodRecommendation: recommendation, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "foo", + MaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + }, + }, + }, + GlobalMaxAllowed: nil, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("31m"), + apiv1.ResourceMemory: resource.MustParse("31Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, + }, + }, + }, + { + Name: "resource policy is nil and global max allowed is set", + PodRecommendation: recommendation, + ResourcePolicy: nil, + GlobalMaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("31m"), + apiv1.ResourceMemory: resource.MustParse("31Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, + }, + }, + }, + { + Name: "resource policy has maxAllowed and global max allowed is set", + PodRecommendation: recommendation, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "foo", + MinAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("35m"), + apiv1.ResourceMemory: resource.MustParse("35Mi"), + }, + }, + }, + }, + GlobalMaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("50m"), + apiv1.ResourceMemory: resource.MustParse("50Mi"), + }, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("35m"), + apiv1.ResourceMemory: resource.MustParse("35Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("50m"), + apiv1.ResourceMemory: resource.MustParse("50Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, + }, + }, + }, + { + Name: "resource policy has max allowed for cpu and global max allowed is set for cpu and memory", + PodRecommendation: recommendation, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "foo", + MaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + }, + }, + }, + }, + GlobalMaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("30m"), + apiv1.ResourceMemory: resource.MustParse("30Mi"), + }, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("30Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("31m"), + apiv1.ResourceMemory: resource.MustParse("30Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("30Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, }, }, }, } - res, err := ApplyVPAPolicy(&podRecommendation, &policy) - assert.Nil(t, err) - assert.Equal(t, apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(40, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4500, 1), - }, res.ContainerRecommendations[0].Target) - - assert.Equal(t, apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(30, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(5000, 1), - }, res.ContainerRecommendations[0].UncappedTarget) - - assert.Equal(t, apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(40, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4300, 1), - }, res.ContainerRecommendations[0].LowerBound) - - assert.Equal(t, apiv1.ResourceList{ - apiv1.ResourceCPU: *resource.NewScaledQuantity(45, 1), - apiv1.ResourceMemory: *resource.NewScaledQuantity(4500, 1), - }, res.ContainerRecommendations[0].UpperBound) + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + actual, err := ApplyVPAPolicy(tt.PodRecommendation, tt.ResourcePolicy, tt.GlobalMaxAllowed) + assert.Nil(t, err) + assert.Equal(t, tt.Expected, actual) + }) + } } type fakeLimitRangeCalculator struct { From dd5add63a9db84ae7fcb58ca2dbe40ebbfba39bb Mon Sep 17 00:00:00 2001 From: Ismail Alidzhikov <9372594+ialidzhikov@users.noreply.github.com> Date: Thu, 5 Dec 2024 08:42:23 +0100 Subject: [PATCH 2/4] Rename `--max-allowed-{cpu,memory}` to `--container-recommendation-max-allowed-{cpu,memory}` --- vertical-pod-autoscaler/docs/examples.md | 6 +++--- vertical-pod-autoscaler/docs/faq.md | 4 ++-- vertical-pod-autoscaler/docs/known-limitations.md | 2 +- vertical-pod-autoscaler/pkg/recommender/main.go | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/vertical-pod-autoscaler/docs/examples.md b/vertical-pod-autoscaler/docs/examples.md index 7751873c4dfa..1bc8b7e397ae 100644 --- a/vertical-pod-autoscaler/docs/examples.md +++ b/vertical-pod-autoscaler/docs/examples.md @@ -113,12 +113,12 @@ Using it in conjunction with `--ignored-vpa-object-namespaces=kube-system` or `- ### Specifying global maximum allowed resources to prevent pods from being unschedulable The [Known limitations dcoument](./known-limitations.md) outlines that VPA (vpa-recommender in particular) is not aware of the cluster's maximum allocatable and can recommend resources which will not fit even the largest node in the cluster. This issue occurs even when the cluster uses the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics). The vpa-recommender's resource recommendation can exceed the allocatable of the largest node in the cluster. Hence, pod's will be unschedulable (in `Pending` state) and the pod wouldn't fit the cluster even if a new node is added by the Cluster Autoscaler. -It is possible to mitigate this issue by specifying the `--max-allowed-cpu` and `--max-allowed-memory` flags of the vpa-recommender. These flags represent the global maximum amount of cpu/memory that will be recommended **for a container**. If the VerticalPodAutoscaler already defines a max allowed (`.spec.resourcePolicy.containerPolicies.maxAllowed`) then it takes precedence over the global max allowed. The global max allowed is merged to the VerticalPodAutoscaler's max allowed if VerticalPodAutoscaler's max allowed is specified only for cpu or memory. If the VerticalPodAutoscaler does not specify a max allowed and a global max allowed is specified, then the global max allowed is being used. +It is possible to mitigate this issue by specifying the `--container-recommendation-max-allowed-cpu` and `--container-recommendation-max-allowed-memory` flags of the vpa-recommender. These flags represent the global maximum amount of cpu/memory that will be recommended **for a container**. If the VerticalPodAutoscaler already defines a max allowed (`.spec.resourcePolicy.containerPolicies.maxAllowed`) then it takes precedence over the global max allowed. The global max allowed is merged to the VerticalPodAutoscaler's max allowed if VerticalPodAutoscaler's max allowed is specified only for cpu or memory. If the VerticalPodAutoscaler does not specify a max allowed and a global max allowed is specified, then the global max allowed is being used. -The recommendation for computing the `--max-allowed-cpu` and `--max-allowed-memory` values for your cluster is to use the largest node's allocatable (`.status.allocatable` field of a node) minus the DaemonSet pods resource requests minus a safety margin: +The recommendation for computing the `--container-recommendation-max-allowed-cpu` and `--container-recommendation-max-allowed-memory` values for your cluster is to use the largest node's allocatable (`.status.allocatable` field of a node) minus the DaemonSet pods resource requests minus a safety margin: ``` = - - ``` > [!WARNING] -> Pay attention that `--max-allowed-cpu` and `--max-allowed-memory` are **container-level** flags. A pod enabling autoscaling for more than one container can theoretically still get unschedulable if the sum of the resource recommendations of the containers exceeds the largest Node's allocatable. In practice, it is not very likely to hit such case as usually a single container in a pod is the main one, the others are sidecars that either do not need autoscaling or do not consume high resource requests. +> Pay attention that `--container-recommendation-max-allowed-cpu` and `--container-recommendation-max-allowed-memory` are **container-level** flags. A pod enabling autoscaling for more than one container can theoretically still get unschedulable if the sum of the resource recommendations of the containers exceeds the largest Node's allocatable. In practice, it is not very likely to hit such case as usually a single container in a pod is the main one, the others are sidecars that either do not need autoscaling or do not consume high resource requests. diff --git a/vertical-pod-autoscaler/docs/faq.md b/vertical-pod-autoscaler/docs/faq.md index f73e2c463257..c753fbbb6e2e 100644 --- a/vertical-pod-autoscaler/docs/faq.md +++ b/vertical-pod-autoscaler/docs/faq.md @@ -212,8 +212,8 @@ Name | Type | Description | Default `container-pod-name-label` | String | Label name to look for container pod names | "pod_name" `container-name-label` | String | Label name to look for container names | "name" `vpa-object-namespace` | String | Namespace to search for VPA objects and pod stats. Empty means all namespaces will be used. | apiv1.NamespaceAll -`max-allowed-cpu` | Quantity | Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed cpu by default) -`max-allowed-memory` | Quantity | Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed memory by default) +`container-recommendation-max-allowed-cpu` | Quantity | Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed cpu by default) +`container-recommendation-max-allowed-memory` | Quantity | Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed. | Empty (no max allowed memory by default) `memory-aggregation-interval` | Duration | The length of a single interval, for which the peak memory usage is computed. Memory usage peaks are aggregated in multiples of this interval. In other words there is one memory usage sample per interval (the maximum usage over that interval | model.DefaultMemoryAggregationInterval `memory-aggregation-interval-count` | Int64 | The number of consecutive memory-aggregation-intervals which make up the MemoryAggregationWindowLength which in turn is the period for memory usage aggregation by VPA. In other words, MemoryAggregationWindowLength = memory-aggregation-interval * memory-aggregation-interval-count. | model.DefaultMemoryAggregationIntervalCount `memory-histogram-decay-half-life` | Duration | The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period. | model.DefaultMemoryHistogramDecayHalfLife diff --git a/vertical-pod-autoscaler/docs/known-limitations.md b/vertical-pod-autoscaler/docs/known-limitations.md index 02a35e4cd0c7..f115219d9ea5 100644 --- a/vertical-pod-autoscaler/docs/known-limitations.md +++ b/vertical-pod-autoscaler/docs/known-limitations.md @@ -23,7 +23,7 @@ size, available quota) and cause **pods to go pending**. This can be partly addressed by: * using VPA together with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics) - the drawback of this approach is that pods can still get unschedulable if the recommendation exceeds the largest Node's allocatable. - * specifying the `--max-allowed-cpu` and `--max-allowed-memory` flags - the drawback of this approach is that a pod can still get unschedulable if more than one container in the pod is scaled by VPA and the sum of the container recommendations exceeds the largest Node's allocatable. + * specifying the `--container-recommendation-max-allowed-cpu` and `--container-recommendation-max-allowed-memory` flags - the drawback of this approach is that a pod can still get unschedulable if more than one container in the pod is scaled by VPA and the sum of the container recommendations exceeds the largest Node's allocatable. - Multiple VPA resources matching the same pod have undefined behavior. - Running the vpa-recommender with leader election enabled (`--leader-elect=true`) in a GKE cluster causes contention with a lease called `vpa-recommender` held by the GKE system component of the diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 3e836de18efe..946981bff63f 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -119,8 +119,8 @@ const ( ) func main() { - flag.Var(&maxAllowedCPU, "max-allowed-cpu", "Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") - flag.Var(&maxAllowedMemory, "max-allowed-memory", "Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") + flag.Var(&maxAllowedCPU, "container-recommendation-max-allowed-cpu", "Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") + flag.Var(&maxAllowedMemory, "container-recommendation-max-allowed-memory", "Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") commonFlags := common.InitCommonFlags() klog.InitFlags(nil) From 04e33401fe93d83320414e87f30570cdc24586de Mon Sep 17 00:00:00 2001 From: Ismail Alidzhikov <9372594+ialidzhikov@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:32:26 +0200 Subject: [PATCH 3/4] Address review comment from adrianmoisey --- .../pkg/utils/vpa/capping.go | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go index 271c8f2d6154..f4881ab39d4f 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go @@ -194,33 +194,30 @@ func applyVPAPolicyForContainer(containerName string, process := func(recommendation apiv1.ResourceList) { for resourceName, recommended := range recommendation { + var maxAllowed apiv1.ResourceList // containerPolicy can be nil (user does not have to configure it). if containerPolicy != nil { cappedToMin, _ := maybeCapToPolicyMin(recommended, resourceName, containerPolicy) recommendation[resourceName] = cappedToMin - maxAllowed := containerPolicy.MaxAllowed - if globalMaxAllowed != nil { - if maxAllowed == nil { - maxAllowed = globalMaxAllowed - } else { - // Set resources from the global maxAllowed if the VPA maxAllowed is missing them. - for resourceName, quantity := range globalMaxAllowed { - if _, ok := maxAllowed[resourceName]; !ok { - maxAllowed[resourceName] = quantity - } + maxAllowed = containerPolicy.MaxAllowed + } + + if globalMaxAllowed != nil { + if maxAllowed == nil { + maxAllowed = globalMaxAllowed + } else { + // Set resources from the global maxAllowed if the VPA maxAllowed is missing them. + for resourceName, quantity := range globalMaxAllowed { + if _, ok := maxAllowed[resourceName]; !ok { + maxAllowed[resourceName] = quantity } } } - - cappedToMax, _ := maybeCapToMax(cappedToMin, resourceName, maxAllowed) - recommendation[resourceName] = cappedToMax - } else { - if globalMaxAllowed != nil { - cappedToMax, _ := maybeCapToMax(recommended, resourceName, globalMaxAllowed) - recommendation[resourceName] = cappedToMax - } } + + cappedToMax, _ := maybeCapToMax(recommendation[resourceName], resourceName, maxAllowed) + recommendation[resourceName] = cappedToMax } } From caa47f9d644a1bec7599d6ea9dd5134ea0330d0d Mon Sep 17 00:00:00 2001 From: Ismail Alidzhikov <9372594+ialidzhikov@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:40:05 +0200 Subject: [PATCH 4/4] Address review feedback from raywainman --- .../pkg/recommender/main.go | 25 ++++++----- .../pkg/utils/vpa/capping.go | 16 ++++--- .../pkg/utils/vpa/capping_test.go | 42 ++++++++++++++++++- 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 946981bff63f..eb864c665af9 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -118,10 +118,12 @@ const ( defaultResyncPeriod time.Duration = 10 * time.Minute ) -func main() { +func init() { flag.Var(&maxAllowedCPU, "container-recommendation-max-allowed-cpu", "Maximum amount of CPU that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") flag.Var(&maxAllowedMemory, "container-recommendation-max-allowed-memory", "Maximum amount of memory that will be recommended for a container. VerticalPodAutoscaler-level maximum allowed takes precedence over the global maximum allowed.") +} +func main() { commonFlags := common.InitCommonFlags() klog.InitFlags(nil) common.InitLoggingFlags() @@ -221,14 +223,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { postProcessors = append(postProcessors, &routines.IntegerCPUPostProcessor{}) } - var globalMaxAllowed apiv1.ResourceList - if !maxAllowedCPU.Quantity.IsZero() { - setGlobalMaxAllowed(&globalMaxAllowed, apiv1.ResourceCPU, maxAllowedCPU.Quantity) - } - if !maxAllowedMemory.Quantity.IsZero() { - setGlobalMaxAllowed(&globalMaxAllowed, apiv1.ResourceMemory, maxAllowedMemory.Quantity) - } - + globalMaxAllowed := initGlobalMaxAllowed() // CappingPostProcessor, should always come in the last position for post-processing postProcessors = append(postProcessors, routines.NewCappingRecommendationProcessor(globalMaxAllowed)) var source input_metrics.PodMetricsLister @@ -322,10 +317,14 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { } } -func setGlobalMaxAllowed(globalMaxAllowed *apiv1.ResourceList, key apiv1.ResourceName, value resource.Quantity) { - if *globalMaxAllowed == nil { - *globalMaxAllowed = make(map[apiv1.ResourceName]resource.Quantity, 2) +func initGlobalMaxAllowed() apiv1.ResourceList { + result := make(apiv1.ResourceList) + if !maxAllowedCPU.Quantity.IsZero() { + result[apiv1.ResourceCPU] = maxAllowedCPU.Quantity + } + if !maxAllowedMemory.Quantity.IsZero() { + result[apiv1.ResourceMemory] = maxAllowedMemory.Quantity } - (*globalMaxAllowed)[key] = value + return result } diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go index f4881ab39d4f..0ab6babee1e1 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go @@ -203,15 +203,13 @@ func applyVPAPolicyForContainer(containerName string, maxAllowed = containerPolicy.MaxAllowed } - if globalMaxAllowed != nil { - if maxAllowed == nil { - maxAllowed = globalMaxAllowed - } else { - // Set resources from the global maxAllowed if the VPA maxAllowed is missing them. - for resourceName, quantity := range globalMaxAllowed { - if _, ok := maxAllowed[resourceName]; !ok { - maxAllowed[resourceName] = quantity - } + if maxAllowed == nil { + maxAllowed = globalMaxAllowed + } else { + // Set resources from the global maxAllowed if the VPA maxAllowed is missing them. + for resourceName, quantity := range globalMaxAllowed { + if _, ok := maxAllowed[resourceName]; !ok { + maxAllowed[resourceName] = quantity } } } diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go index 5eb8da249b1f..5be81482fca9 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping_test.go @@ -410,7 +410,7 @@ func TestApplyVPAPolicy(t *testing.T) { }, }, { - Name: "resource policy is nil and global max allowed is set", + Name: "resource policy is nil and global max allowed is set for cpu and memory", PodRecommendation: recommendation, ResourcePolicy: nil, GlobalMaxAllowed: apiv1.ResourceList{ @@ -483,6 +483,46 @@ func TestApplyVPAPolicy(t *testing.T) { }, }, }, + { + Name: "resource policy has max allowed for cpu and global max allowed is set for memory", + PodRecommendation: recommendation, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "foo", + MaxAllowed: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + }, + }, + }, + }, + GlobalMaxAllowed: apiv1.ResourceList{ + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + Expected: &vpa_types.RecommendedPodResources{ + ContainerRecommendations: []vpa_types.RecommendedContainerResources{ + { + ContainerName: "foo", + Target: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + LowerBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("31m"), + apiv1.ResourceMemory: resource.MustParse("31Mi"), + }, + UpperBound: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("40m"), + apiv1.ResourceMemory: resource.MustParse("40Mi"), + }, + UncappedTarget: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("42m"), + apiv1.ResourceMemory: resource.MustParse("42Mi"), + }, + }, + }, + }, + }, { Name: "resource policy has max allowed for cpu and global max allowed is set for cpu and memory", PodRecommendation: recommendation,