Merge branch 'main' into add-model-ai

vllm-project · Oct 21, 2024 · abc26d7 · abc26d7
2 parents 16267a9 + d753c88
commit abc26d7
Show file tree

Hide file tree

Showing 24 changed files with 1,146 additions and 361 deletions.
diff --git a/.github/workflows/docker-build-images.yml b/.github/workflows/docker-build-images.yml
@@ -6,6 +6,9 @@ on:
 
 jobs:
   build:
+    # This prevents the job from running as other steps cover its functionality.
+    # We use 'if: false' to keep the file for future reference without deleting it.
+    if: false
     runs-on: ubuntu-latest
     steps:
     - name: Check out code

diff --git a/.github/workflows/release-build.yaml b/.github/workflows/release-build.yaml
@@ -24,16 +24,34 @@ jobs:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
-      # Build container images
+      # Log in to Github Registry
+      - name: Login to the Container registry
+        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Build container images with docker registry namespace
       - name: Build Container Images
         run: |
           GIT_COMMIT_HASH=${{ github.ref_name }} make docker-build-all
 
-      # Push container image to container registry
+      # Push container image to DockerHub
       - name: Push container image to container registry
         run: |
           GIT_COMMIT_HASH=${{ github.ref_name }} make docker-push-all
 
+      # Build container images with Github registry namespace
+      - name: Build Container Images with Github Container Registry prefix
+        run: |
+          GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-build-all
+
+      # Push container image to Github container registry
+      - name: Push Container Images to Github Container Registry
+        run: |
+          GIT_COMMIT_HASH=${{ github.ref_name }} AIBRIX_CONTAINER_REGISTRY_NAMESPACE=ghcr.io/aibrix make docker-push-all
+
   python-wheel-release:
     runs-on: ubuntu-latest
     strategy:

diff --git a/go.mod b/go.mod
@@ -23,6 +23,7 @@ require (
 	k8s.io/code-generator v0.29.6
 	k8s.io/klog/v2 v2.130.1
 	k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b
+	k8s.io/metrics v0.29.6
 	k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0
 	sigs.k8s.io/controller-runtime v0.17.5
 	sigs.k8s.io/gateway-api v1.0.0

diff --git a/go.sum b/go.sum
@@ -223,6 +223,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
 k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlLosk/JlfP3xQLNkQlHJeXw=
 k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc=
+k8s.io/metrics v0.29.6 h1:kjMGPYxtCi4OO0fUar76y0CiUoeGYDNmUV0LXJIis4Q=
+k8s.io/metrics v0.29.6/go.mod h1:vqGzOaYGuNSSAI7GM1+v6L5z8aAUSzui1W0eQB3wVJY=
 k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
 k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/controller-runtime v0.17.5 h1:1FI9Lm7NiOOmBsgTV36/s2XrEFXnO2C4sbg/Zme72Rw=

diff --git a/pkg/controller/podautoscaler/algorithm/README.md b/pkg/controller/podautoscaler/algorithm/README.md
@@ -1 +1,20 @@
-placeholder
+## Autoscaling Algorithms
+
+
+This package provides various scaling algorithms for Pod Autoscaling,
+including implementations for
+- APA (Adaptive Pod Autoscaler),
+- KPA (KNative Pod Autoscaler),
+- HPA (Horizontal Pod Autoscaler), and more.
+
+These algorithms are designed to dynamically compute the desired number of replicas based on current pod usage and scaling specifications,
+optimizing resource usage and ensuring high availability and performance for workloads.
+
+`ScalingAlgorithm Interface` is a common interface for all scaling algorithms, requiring the implementation of the `ComputeTargetReplicas` method,
+which calculates the number of replicas based on current metrics and scaling specifications.
+
+```go
+type ScalingAlgorithm interface {
+    ComputeTargetReplicas(currentPodCount float64, context ScalingContext) int32
+}
+```
diff --git a/pkg/controller/podautoscaler/algorithm/algorithm.go b/pkg/controller/podautoscaler/algorithm/algorithm.go
@@ -0,0 +1,32 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package algorithm
+
+import "github.com/aibrix/aibrix/pkg/controller/podautoscaler/common"
+
+type ScalingAlgorithm interface {
+	// ComputeTargetReplicas calculates the number of replicas needed based on current metrics
+	// and the provided scaling specifications.
+	//
+	// Parameters:
+	// currentPodCount - the current number of ready pods
+	// context - an interface that provides access to scaling parameters like target values and tolerances
+	//
+	// Returns:
+	// int32 - the calculated target number of replicas
+	ComputeTargetReplicas(currentPodCount float64, context common.ScalingContext) int32
+}
diff --git a/pkg/controller/podautoscaler/algorithm/apa.go b/pkg/controller/podautoscaler/algorithm/apa.go
@@ -0,0 +1,54 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package algorithm
+
+import (
+	"math"
+
+	"github.com/aibrix/aibrix/pkg/controller/podautoscaler/common"
+)
+
+type ApaScalingAlgorithm struct{}
+
+var _ ScalingAlgorithm = (*ApaScalingAlgorithm)(nil)
+
+// ComputeTargetReplicas - Apa's algorithm references and enhances the algorithm in the following paper:
+// Huo, Qizheng, et al. "High Concurrency Response Strategy based on Kubernetes Horizontal Pod Autoscaler."
+// Journal of Physics: Conference Series. Vol. 2451. No. 1. IOP Publishing, 2023.
+func (a *ApaScalingAlgorithm) ComputeTargetReplicas(currentPodCount float64, context common.ScalingContext) int32 {
+	expectedUse := context.GetTargetValue()
+	upTolerance := context.GetUpFluctuationTolerance()
+	downTolerance := context.GetDownFluctuationTolerance()
+	currentUsePerPod := context.GetCurrentUsePerPod()
+
+	if currentUsePerPod/expectedUse > (1 + upTolerance) {
+		maxScaleUp := math.Ceil(context.GetMaxScaleUpRate() * currentPodCount)
+		expectedPods := int32(math.Ceil(currentPodCount * (currentUsePerPod / expectedUse)))
+		if float64(expectedPods) > maxScaleUp {
+			expectedPods = int32(maxScaleUp)
+		}
+		return expectedPods
+	} else if currentUsePerPod/expectedUse < (1 - downTolerance) {
+		maxScaleDown := math.Floor(currentPodCount / context.GetMaxScaleDownRate())
+		expectedPods := int32(math.Ceil(currentPodCount * (currentUsePerPod / expectedUse)))
+		if float64(expectedPods) < maxScaleDown {
+			expectedPods = int32(maxScaleDown)
+		}
+		return expectedPods
+	}
+	return int32(currentPodCount)
+}
diff --git a/pkg/controller/podautoscaler/algorithm/hpa.go b/pkg/controller/podautoscaler/algorithm/hpa.go
@@ -0,0 +1,29 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package algorithm
+
+import "github.com/aibrix/aibrix/pkg/controller/podautoscaler/common"
+
+// HpaScalingAlgorithm can be used by any scaler without customized algorithms
+type HpaScalingAlgorithm struct{}
+
+var _ ScalingAlgorithm = (*HpaScalingAlgorithm)(nil)
+
+func (a *HpaScalingAlgorithm) ComputeTargetReplicas(currentPodCount float64, context common.ScalingContext) int32 {
+	// TODO: implement me!
+	return int32(currentPodCount)
+}
diff --git a/pkg/controller/podautoscaler/algorithm/kpa.go b/pkg/controller/podautoscaler/algorithm/kpa.go
@@ -0,0 +1,51 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package algorithm
+
+import (
+	"math"
+
+	"github.com/aibrix/aibrix/pkg/controller/podautoscaler/common"
+)
+
+type KpaScalingAlgorithm struct{}
+
+var _ ScalingAlgorithm = (*KpaScalingAlgorithm)(nil)
+
+func (a *KpaScalingAlgorithm) ComputeTargetReplicas(currentPodCount float64, context common.ScalingContext) int32 {
+	expectedUse := context.GetTargetValue()
+	upTolerance := context.GetUpFluctuationTolerance()
+	downTolerance := context.GetDownFluctuationTolerance()
+	currentUsePerPod := context.GetCurrentUsePerPod()
+
+	if currentUsePerPod/expectedUse > (1 + upTolerance) {
+		maxScaleUp := math.Ceil(context.GetMaxScaleUpRate() * currentPodCount)
+		expectedPods := int32(math.Ceil(currentPodCount * (currentUsePerPod / expectedUse)))
+		if float64(expectedPods) > maxScaleUp {
+			expectedPods = int32(maxScaleUp)
+		}
+		return expectedPods
+	} else if currentUsePerPod/expectedUse < (1 - downTolerance) {
+		maxScaleDown := math.Floor(currentPodCount / context.GetMaxScaleDownRate())
+		expectedPods := int32(math.Ceil(currentPodCount * (currentUsePerPod / expectedUse)))
+		if float64(expectedPods) < maxScaleDown {
+			expectedPods = int32(maxScaleDown)
+		}
+		return expectedPods
+	}
+	return int32(currentPodCount)
+}
diff --git a/pkg/controller/podautoscaler/common/context.go b/pkg/controller/podautoscaler/common/context.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package common
+
+// ScalingContext defines the generalized common that holds all necessary data for scaling calculations.
+type ScalingContext interface {
+	GetTargetValue() float64
+	GetUpFluctuationTolerance() float64
+	GetDownFluctuationTolerance() float64
+	GetMaxScaleUpRate() float64
+	GetMaxScaleDownRate() float64
+	GetCurrentUsePerPod() float64
+}
+
+// BaseScalingContext provides a base implementation of the ScalingContext interface.
+type BaseScalingContext struct {
+	currentUsePerPod float64
+	targetValue      float64
+	upTolerance      float64
+	downTolerance    float64
+}
+
+func (b *BaseScalingContext) SetCurrentUsePerPod(value float64) {
+	b.currentUsePerPod = value
+}
+
+func (b *BaseScalingContext) GetUpFluctuationTolerance() float64 {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (b *BaseScalingContext) GetDownFluctuationTolerance() float64 {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (b *BaseScalingContext) GetMaxScaleUpRate() float64 {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (b *BaseScalingContext) GetMaxScaleDownRate() float64 {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (b *BaseScalingContext) GetCurrentUsePerPod() float64 {
+	return b.currentUsePerPod
+}
+
+func (b *BaseScalingContext) GetTargetValue() float64 {
+	return b.targetValue
+}
+
+func (b *BaseScalingContext) GetScalingTolerance() (up float64, down float64) {
+	return b.upTolerance, b.downTolerance
+}
diff --git a/pkg/controller/podautoscaler/hpa_resources.go b/pkg/controller/podautoscaler/hpa_resources.go
@@ -42,6 +42,7 @@ func getHPANameFromPa(pa *pav1.PodAutoscaler) string {
 // MakeHPA creates an HPA resource from a PodAutoscaler resource.
 func makeHPA(pa *pav1.PodAutoscaler) *autoscalingv2.HorizontalPodAutoscaler {
 	minReplicas, maxReplicas := pa.Spec.MinReplicas, pa.Spec.MaxReplicas
+	// TODO: add some validation logics, has to be larger than minReplicas
 	if maxReplicas == 0 {
 		maxReplicas = math.MaxInt32 // Set default to no upper limit if not specified
 	}
@@ -75,14 +76,14 @@ func makeHPA(pa *pav1.PodAutoscaler) *autoscalingv2.HorizontalPodAutoscaler {
 
 		switch strings.ToLower(pa.Spec.TargetMetric) {
 		case pav1.CPU:
-			utilValue := int32(math.Ceil(targetValue))
+			cpu := int32(math.Ceil(targetValue))
 			hpa.Spec.Metrics = []autoscalingv2.MetricSpec{{
 				Type: autoscalingv2.ResourceMetricSourceType,
 				Resource: &autoscalingv2.ResourceMetricSource{
 					Name: corev1.ResourceCPU,
 					Target: autoscalingv2.MetricTarget{
 						Type:               autoscalingv2.UtilizationMetricType,
-						AverageUtilization: &utilValue,
+						AverageUtilization: &cpu,
 					},
 				},
 			}}