Merge branch 'main' into add-model-ai

vllm-project · Oct 23, 2024 · e57d7e3 · e57d7e3
2 parents abc26d7 + 5d8d843
commit e57d7e3
Show file tree

Hide file tree

Showing 15 changed files with 207 additions and 170 deletions.
diff --git a/README.md b/README.md
@@ -34,10 +34,10 @@ kubectl create -k config/default
 Install stable distribution
 ```shell
 # Install component dependencies
-kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.3"
+kubectl create -k "github.com/aibrix/aibrix/config/dependency?ref=v0.1.0-rc.4"
 
 # Install aibrix components
-kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.3"
+kubectl create -k "github.com/aibrix/aibrix/config/default?ref=v0.1.0-rc.4"
 ```
 
 ## Documentation

diff --git a/config/overlays/vke/default/kustomization.yaml b/config/overlays/vke/default/kustomization.yaml
@@ -12,5 +12,5 @@ resources:
 
 images:
 - name: quay.io/kuberay/operator
-  newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/quay.io/kuberay/operator
-  newTag: v1.2.1
+  newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/kuberay-operator
+  newTag: v1.2.1-patch
diff --git a/docs/source/development/release.rst b/docs/source/development/release.rst
@@ -20,9 +20,8 @@ For new minor version release like ``v0.1.0``, please checkout a new branch name
 
 .. code-block:: bash
 
+    git checkout main && git fetch origin main && git rebase origin/main
     git checkout -b release-0.1 # cut from main branch
-    git fetch origin main
-    git rebase origin/main
     git push origin release-0.1
 
 .. note::
@@ -31,6 +30,7 @@ For new minor version release like ``v0.1.0``, please checkout a new branch name
 Option 2: patch version release
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Cut a PR to sync `main` branch changes to `release-0.1`, a example PR is like `Sync main branch changes to release-0.1 for rc4 release <https://github.com/aibrix/aibrix/pull/312>`_
 For patch version like ``v0.1.1``, please reuse the release branch ``release-0.1``, it should be created earlier from the minor version release.
 for patch release, we do not rebase ``main`` because it will introduce new features. All fixes have to be cherry-picked or cut PR against ``release-0.1`` directly.
 
@@ -114,5 +114,5 @@ we need to retag the images and push to VKE Container Registry.
 
 .. code-block:: bash
 
-    ./hack/sync-images.sh v0.1.0-rc.3 aibrix-container-registry-cn-beijing.cr.volces.com
-    ./hack/sync-images.sh v0.1.0-rc.3 aibrix-container-registry-cn-shanghai.cr.volces.com
+    ./hack/sync-images.sh v0.1.0-rc.4 aibrix-container-registry-cn-beijing.cr.volces.com
+    ./hack/sync-images.sh v0.1.0-rc.4 aibrix-container-registry-cn-shanghai.cr.volces.com
diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
@@ -22,8 +22,8 @@ Stable Version
 
 .. code:: bash
 
-    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.3/aibrix-dependency-v0.1.0-rc.3.yaml
-    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.3/aibrix-core-v0.1.0-rc.3.yaml
+    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.4/aibrix-dependency-v0.1.0-rc.4.yaml
+    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.4/aibrix-core-v0.1.0-rc.4.yaml
 
 
 Nightly Version

diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst
@@ -12,8 +12,8 @@ Install AIBrix
 
 .. code-block:: bash
 
-    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.3/aibrix-dependency-v0.1.0-rc.3.yaml
-    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.3/aibrix-core-v0.1.0-rc.3.yaml
+    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.4/aibrix-dependency-v0.1.0-rc.4.yaml
+    kubectl apply -f https://github.com/aibrix/aibrix/releases/download/v0.1.0-rc.4/aibrix-core-v0.1.0-rc.4.yaml
 
 
 Deploy base model

diff --git a/docs/tutorial/podautoscaler/README.md b/docs/tutorial/podautoscaler/README.md
@@ -13,14 +13,9 @@ Go into the root directory:
 cd $AIBrix_HOME
 ```
 
-First, build and install the Custom Resource Definitions (CRDs) for AIBrix:
+First, please refer [README](../../../README.md) to build and install AIBrix.
 
-```shell
-
-make manifests && make build && make install
-```
-
-Verify the installation:
+After building the manager, you can verify the installation:
 
 ```shell
 kubectl get crds | grep podautoscalers
@@ -62,8 +57,8 @@ kubectl port-forward svc/llama2-70b 8000:8000 -n aibrix-system
 It's different from `make run`, since it may reveal the RBAC problem when manager what to watch HPA.
 
 ```shell
-make docker-build IMG=aibrix/aibrix-controller-manager:v0.1.0-rc.3
-make deploy IMG=aibrix/aibrix-controller-manager:v0.1.0-rc.3
+make docker-build IMG=aibrix/aibrix-controller-manager:v0.1.0-rc.4
+make deploy IMG=aibrix/aibrix-controller-manager:v0.1.0-rc.4
 ```
 
 check the deployed manager logs:
@@ -384,7 +379,9 @@ Events:
 
 # Cleanup
 
-To clean up the resources:
+Please refer [README](../../../README.md) to uninstall AIBrix.
+
+To clean up the additional resources created in this tutorial:
 
 ```shell
 # Remove AIBrix resources

diff --git a/hack/sync-images.sh b/hack/sync-images.sh
@@ -4,11 +4,11 @@
 if [ -z "$1" ] || [ -z "$2" ]; then
     echo "Error: Missing required parameters."
     echo "Usage: $0 <version> <region>"
-    echo "Example: $0 v0.1.0-rc.3 aibrix-container-registry-cn-beijing.cr.volces.com"
+    echo "Example: $0 v0.1.0-rc.4 aibrix-container-registry-cn-beijing.cr.volces.com"
     exit 1
 fi
 
-# aibrix tag，e.g. v0.1.0-rc.3
+# aibrix tag，e.g. v0.1.0-rc.4
 # registry，e.g. aibrix-container-registry-cn-beijing.cr.volces.com
 VERSION=$1
 REGISTRY=$2

diff --git a/pkg/controller/podautoscaler/common/context.go b/pkg/controller/podautoscaler/common/context.go
@@ -28,10 +28,29 @@ type ScalingContext interface {
 
 // BaseScalingContext provides a base implementation of the ScalingContext interface.
 type BaseScalingContext struct {
+	// Maximum rate at which to scale up
+	MaxScaleUpRate float64
+	// Maximum rate at which to scale down, a value of 2.5 means the count can reduce to at most 2.5 times less than the current value in one step.
+	MaxScaleDownRate float64
+	// The metric used for scaling, i.e. CPU, Memory, QPS.
+	ScalingMetric string
+	// The value of scaling metric per pod that we target to maintain.
+	TargetValue float64
+	// The total value of scaling metric that a pod can maintain.
+	TotalValue float64
+	// The current use per pod.
 	currentUsePerPod float64
-	targetValue      float64
-	upTolerance      float64
-	downTolerance    float64
+}
+
+// NewBaseScalingContext creates a new instance of BaseScalingContext with default values.
+func NewBaseScalingContext() *BaseScalingContext {
+	return &BaseScalingContext{
+		MaxScaleUpRate:   2,     // Scale up rate of 200%, allowing rapid scaling
+		MaxScaleDownRate: 2,     // Scale down rate of 50%, for more gradual reduction
+		ScalingMetric:    "CPU", // Metric used for scaling, here set to CPU utilization
+		TargetValue:      30.0,  // Target CPU utilization set at 10%
+		TotalValue:       100.0, // Total CPU utilization capacity for pods is 100%
+	}
 }
 
 func (b *BaseScalingContext) SetCurrentUsePerPod(value float64) {
@@ -49,23 +68,21 @@ func (b *BaseScalingContext) GetDownFluctuationTolerance() float64 {
 }
 
 func (b *BaseScalingContext) GetMaxScaleUpRate() float64 {
-	//TODO implement me
-	panic("implement me")
+	return b.MaxScaleUpRate
 }
 
 func (b *BaseScalingContext) GetMaxScaleDownRate() float64 {
-	//TODO implement me
-	panic("implement me")
+	return b.MaxScaleDownRate
 }
 
 func (b *BaseScalingContext) GetCurrentUsePerPod() float64 {
 	return b.currentUsePerPod
 }
 
 func (b *BaseScalingContext) GetTargetValue() float64 {
-	return b.targetValue
+	return b.TargetValue
 }
 
 func (b *BaseScalingContext) GetScalingTolerance() (up float64, down float64) {
-	return b.upTolerance, b.downTolerance
+	return b.MaxScaleUpRate, b.MaxScaleDownRate
 }
diff --git a/pkg/controller/podautoscaler/metrics/client.go b/pkg/controller/podautoscaler/metrics/client.go
@@ -21,57 +21,20 @@ import (
 	"fmt"
 	"sync"
 
+	corev1 "k8s.io/api/core/v1"
+
 	"github.com/aibrix/aibrix/pkg/controller/podautoscaler/aggregation"
 	"k8s.io/klog/v2"
 
-	corev1 "k8s.io/api/core/v1"
-
 	"time"
 )
 
 const (
 	metricServerDefaultMetricWindow = time.Minute
 )
 
-type PodMetricClient struct {
-	fetcher MetricFetcher
-}
-
-func (c *PodMetricClient) GetPodContainerMetric(ctx context.Context, pod corev1.Pod, metricName string, metricPort int) (PodMetricsInfo, time.Time, error) {
-	_, err := c.fetcher.FetchPodMetrics(ctx, pod, metricPort, metricName)
-	currentTimestamp := time.Now()
-	if err != nil {
-		return nil, currentTimestamp, err
-	}
-
-	// TODO(jiaxin.shan): convert this raw metric to PodMetrics
-	return nil, currentTimestamp, nil
-}
-
-func (c *PodMetricClient) GetMetricsFromPods(ctx context.Context, pods []corev1.Pod, metricName string, metricPort int) ([]float64, error) {
-	metrics := make([]float64, 0, len(pods))
-	for _, pod := range pods {
-		// TODO: Let's optimize the performance for multi-metrics later.
-		metric, err := c.fetcher.FetchPodMetrics(ctx, pod, metricPort, metricName)
-		if err != nil {
-			return nil, err
-		}
-		metrics = append(metrics, metric)
-	}
-	return metrics, nil
-}
-
-func (c *PodMetricClient) UpdatePodListMetric(metricValues []float64, metricKey NamespaceNameMetric, now time.Time) error {
-	// different metrics client implementation should implement this method
-	panic("implement me")
-}
-
-func NewMetricsClient(fetcher MetricFetcher) *PodMetricClient {
-	return &PodMetricClient{fetcher: fetcher}
-}
-
 type KPAMetricsClient struct {
-	*PodMetricClient
+	fetcher MetricFetcher
 
 	// collectionsMutex protects access to both panicWindowDict and stableWindowDict,
 	// ensuring thread-safe read and write operations. It uses a read-write mutex to
@@ -97,12 +60,11 @@ var _ MetricClient = (*KPAMetricsClient)(nil)
 
 // NewKPAMetricsClient initializes and returns a KPAMetricsClient with specified durations.
 func NewKPAMetricsClient(fetcher MetricFetcher) *KPAMetricsClient {
-	podMetricClient := NewMetricsClient(fetcher)
 	client := &KPAMetricsClient{
-		PodMetricClient:  podMetricClient,
+		fetcher:          fetcher,
 		stableDuration:   60 * time.Second,
 		panicDuration:    10 * time.Second,
-		granularity:      time.Second, //TODO: check with rong, is the granularity too small?
+		granularity:      time.Second,
 		panicWindowDict:  make(map[NamespaceNameMetric]*aggregation.TimeWindow),
 		stableWindowDict: make(map[NamespaceNameMetric]*aggregation.TimeWindow),
 	}
@@ -175,9 +137,16 @@ func (c *KPAMetricsClient) StableAndPanicMetrics(
 	return stableValue, panicValue, nil
 }
 
-type APAMetricsClient struct {
-	*PodMetricClient
+func (c *KPAMetricsClient) GetPodContainerMetric(ctx context.Context, pod corev1.Pod, metricName string, metricPort int) (PodMetricsInfo, time.Time, error) {
+	return GetPodContainerMetric(ctx, c.fetcher, pod, metricName, metricPort)
+}
+
+func (c *KPAMetricsClient) GetMetricsFromPods(ctx context.Context, pods []corev1.Pod, metricName string, metricPort int) ([]float64, error) {
+	return GetMetricsFromPods(ctx, c.fetcher, pods, metricName, metricPort)
+}
 
+type APAMetricsClient struct {
+	fetcher MetricFetcher
 	// collectionsMutex protects access to both panicWindowDict and stableWindowDict,
 	// ensuring thread-safe read and write operations. It uses a read-write mutex to
 	// allow multiple concurrent reads while preventing race conditions during write
@@ -199,13 +168,11 @@ var _ MetricClient = (*APAMetricsClient)(nil)
 
 // NewAPAMetricsClient initializes and returns a KPAMetricsClient with specified durations.
 func NewAPAMetricsClient(fetcher MetricFetcher) *APAMetricsClient {
-	podMetricClient := NewMetricsClient(fetcher)
-
 	client := &APAMetricsClient{
-		PodMetricClient: podMetricClient,
-		duration:        60 * time.Second,
-		granularity:     time.Second,
-		windowDict:      make(map[NamespaceNameMetric]*aggregation.TimeWindow),
+		fetcher:     fetcher,
+		duration:    60 * time.Second,
+		granularity: time.Second,
+		windowDict:  make(map[NamespaceNameMetric]*aggregation.TimeWindow),
 	}
 	return client
 }
@@ -265,3 +232,11 @@ func (c *APAMetricsClient) GetMetricValue(
 
 	return metricValue, nil
 }
+
+func (c *APAMetricsClient) GetPodContainerMetric(ctx context.Context, pod corev1.Pod, metricName string, metricPort int) (PodMetricsInfo, time.Time, error) {
+	return GetPodContainerMetric(ctx, c.fetcher, pod, metricName, metricPort)
+}
+
+func (c *APAMetricsClient) GetMetricsFromPods(ctx context.Context, pods []corev1.Pod, metricName string, metricPort int) ([]float64, error) {
+	return GetMetricsFromPods(ctx, c.fetcher, pods, metricName, metricPort)
+}
diff --git a/pkg/controller/podautoscaler/metrics/utils.go b/pkg/controller/podautoscaler/metrics/utils.go
@@ -17,9 +17,13 @@ limitations under the License.
 package metrics
 
 import (
+	"context"
 	"fmt"
 	"strconv"
 	"strings"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
 )
 
 func ParseMetricFromBody(body []byte, metricName string) (float64, error) {
@@ -89,3 +93,27 @@ func GetMetricUsageRatio(metrics PodMetricsInfo, targetUsage int64) (usageRatio
 
 	return float64(currentUsage) / float64(targetUsage), currentUsage
 }
+
+func GetPodContainerMetric(ctx context.Context, fetcher MetricFetcher, pod corev1.Pod, metricName string, metricPort int) (PodMetricsInfo, time.Time, error) {
+	_, err := fetcher.FetchPodMetrics(ctx, pod, metricPort, metricName)
+	currentTimestamp := time.Now()
+	if err != nil {
+		return nil, currentTimestamp, err
+	}
+
+	// TODO(jiaxin.shan): convert this raw metric to PodMetrics
+	return nil, currentTimestamp, nil
+}
+
+func GetMetricsFromPods(ctx context.Context, fetcher MetricFetcher, pods []corev1.Pod, metricName string, metricPort int) ([]float64, error) {
+	metrics := make([]float64, 0, len(pods))
+	for _, pod := range pods {
+		// TODO: Let's optimize the performance for multi-metrics later.
+		metric, err := fetcher.FetchPodMetrics(ctx, pod, metricPort, metricName)
+		if err != nil {
+			return nil, err
+		}
+		metrics = append(metrics, metric)
+	}
+	return metrics, nil
+}