Skip to content

Commit

Permalink
[Bug] Fix the way how podautoscaler handle 0 pods. (#508)
Browse files Browse the repository at this point in the history
* Bug fix

* Make APA comfortable with 0 originalReadyPodsCount for completeness.

* Verify ActivationScale > 1 settings.
Keep readyPodsCount value intact and fix affected code.

* Bug fix

* Document the new logic in comments.

---------

Co-authored-by: Jingyuan Zhang <[email protected]>
  • Loading branch information
zhangjyr and Jingyuan Zhang authored Dec 9, 2024
1 parent bea4f87 commit 4ebf0ff
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pkg/controller/podautoscaler/podautoscaler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ func (r *PodAutoscalerReconciler) reconcileCustomPA(ctx context.Context, pa auto
// computeReplicasForMetrics gives
// TODO: check why it return the metrics name here?
metricDesiredReplicas, metricName, metricTimestamp, err := r.computeReplicasForMetrics(ctx, pa, scale, metricKey)
if err != nil && metricDesiredReplicas == -1 {
if err != nil {
r.setCurrentReplicasAndMetricsInStatus(&pa, currentReplicas)
if err := r.updateStatusIfNeeded(ctx, paStatusOriginal, &pa); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update the resource status")
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/podautoscaler/scaler/apa.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ func (a *ApaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name
return ScaleResult{}
}

if originalReadyPodsCount == 0 {
klog.Errorf("Unexpected pod count for %s: %d", metricKey, originalReadyPodsCount)
return ScaleResult{}
}

currentUsePerPod := observedValue / float64(originalReadyPodsCount)
spec.SetCurrentUsePerPod(currentUsePerPod)

Expand Down
36 changes: 27 additions & 9 deletions pkg/controller/podautoscaler/scaler/kpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,23 @@ func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name
return ScaleResult{}
}

// Use 1 if there are zero current pods.
readyPodsCount := math.Max(1, float64(originalReadyPodsCount))
maxScaleUp := math.Ceil(spec.MaxScaleUpRate * readyPodsCount)
maxScaleDown := math.Floor(readyPodsCount / spec.MaxScaleDownRate)
// Old logic:
// readyPodsCount = min(1, originalReadyPodsCount)
// maxScaleUp = ceil(spec.MaxScaleUpRate*readyPodsCount)
// maxScaleDown = floor(readyPodsCount / spec.MaxScaleDownRate)
//
// The problems with old way was:
// 1. readyPodsCount did not reflect real pods count in "KPA Details" log.
// 2. If originalReadyPodsCount == 0 and spec.MaxScaleDownRate == 1, maxScaleDown will reset to 1,
// preventing down scale to 0.
//
// New implementation does follows:
// 1. readyPodsCount now reflects real pods count.
// 2. maxScaleUp is at lease to 1 to ensure 0 to 1 activation.
// 3. maxScaleDown will remain 0 in case spec.MaxScaleDownRate == 0
readyPodsCount := math.Max(0, float64(originalReadyPodsCount)) // A little sanitizing.
maxScaleUp := math.Max(1, math.Ceil(spec.MaxScaleUpRate*readyPodsCount)) // Keep scale up non zero
maxScaleDown := math.Floor(readyPodsCount / spec.MaxScaleDownRate) // Make scale down zero-able

dspc := math.Ceil(observedStableValue / spec.TargetValue)
dppc := math.Ceil(observedPanicValue / spec.TargetValue)
Expand All @@ -263,15 +276,17 @@ func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name

// If ActivationScale > 1, then adjust the desired pod counts
if k.scalingContext.ActivationScale > 1 {
if k.scalingContext.ActivationScale > desiredStablePodCount {
// ActivationScale only makes sense if activated (desired > 0)
if k.scalingContext.ActivationScale > desiredStablePodCount && desiredStablePodCount > 0 {
desiredStablePodCount = k.scalingContext.ActivationScale
}
if k.scalingContext.ActivationScale > desiredPanicPodCount {
if k.scalingContext.ActivationScale > desiredPanicPodCount && desiredPanicPodCount > 0 {
desiredPanicPodCount = k.scalingContext.ActivationScale
}
}

isOverPanicThreshold := dppc/readyPodsCount >= spec.PanicThreshold
// Now readyPodsCount can be 0, use max(1, readyPodsCount) to prevent error.
isOverPanicThreshold := dppc/math.Max(1, readyPodsCount) >= spec.PanicThreshold

klog.V(4).InfoS("--- KPA Details", "readyPodsCount", readyPodsCount,
"MaxScaleUpRate", spec.MaxScaleUpRate, "MaxScaleDownRate", spec.MaxScaleDownRate,
Expand Down Expand Up @@ -313,7 +328,7 @@ func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name
}
desiredPodCount = k.maxPanicPods
} else {
klog.InfoS("Operating in stable mode.")
klog.InfoS("Operating in stable mode.", "desiredPodCount", desiredPodCount)
}

// Delay scale down decisions, if a ScaleDownDelay was specified.
Expand All @@ -322,8 +337,9 @@ func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name
// not the same in the case where two Scale()s happen in the same time
// interval (because the largest will be picked rather than the most recent
// in that case).
klog.V(4).InfoS("DelayWindow details", "delayWindow", k.delayWindow.String())
if k.delayWindow != nil {
klog.V(4).InfoS("DelayWindow details", "delayWindow", k.delayWindow.String())

// the actual desiredPodCount will be recorded, but return the max replicas during passed delayWindow
k.delayWindow.Record(now, float64(desiredPodCount))
delayedPodCount, err := k.delayWindow.Max()
Expand All @@ -338,6 +354,8 @@ func (k *KpaAutoscaler) Scale(originalReadyPodsCount int, metricKey metrics.Name
)
desiredPodCount = int32(delayedPodCount)
}
} else {
klog.V(4).InfoS("No DelayWindow set")
}

// Compute excess burst capacity
Expand Down
2 changes: 1 addition & 1 deletion pkg/utils/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func GetPodListByLabelSelector(ctx context.Context, podLister client.Client, nam

func CountReadyPods(podList *v1.PodList) (int64, error) {
if podList == nil || len(podList.Items) == 0 {
return 0, fmt.Errorf("no pods provided or list is empty")
return 0, nil
}

readyPodCount := 0
Expand Down

0 comments on commit 4ebf0ff

Please sign in to comment.