diff --git a/development/app/requirements.txt b/development/app/requirements.txt index e529cf21..8f587987 100644 --- a/development/app/requirements.txt +++ b/development/app/requirements.txt @@ -8,4 +8,5 @@ ddsketch plotly_express fasteners transformers -git+https://github.com/zhangjyr/vidur.git \ No newline at end of file +git+https://github.com/zhangjyr/vidur.git +ray[default] \ No newline at end of file diff --git a/development/tutorials/distributed/fleet.yaml b/development/tutorials/distributed/fleet.yaml index 1333c304..f08e77ad 100644 --- a/development/tutorials/distributed/fleet.yaml +++ b/development/tutorials/distributed/fleet.yaml @@ -3,7 +3,7 @@ kind: RayClusterFleet metadata: labels: app.kubernetes.io/name: aibrix - app.kubernetes.io/managed-by: kustomize + model.aibrix.ai/name: facebook-opt-13b name: facebook-opt-13b spec: replicas: 1 @@ -20,13 +20,17 @@ spec: labels: model.aibrix.ai/name: facebook-opt-13b annotations: - ray.io/overwrite-container-cmd: "true" + ray.io/overwrite-container-cmd: "true" spec: - rayVersion: '2.10.0' # should match the Ray version in the image of the containers + rayVersion: "2.10.0" headGroupSpec: rayStartParams: dashboard-host: '0.0.0.0' + block: 'false' template: + metadata: + labels: + model.aibrix.ai/name: facebook-opt-13b spec: containers: - name: ray-head @@ -40,35 +44,35 @@ spec: name: client - containerPort: 8000 name: service - command: ["/bin/bash", "-lc", "--"] - args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD"; vllm serve facebook/opt-125m --tensor-parallel-size 2 --distributed-executor-backend ray] + command: ["/bin/bash", "-lc", "--"] + args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD; sleep 600"] resources: limits: - cpu: "1000m" - nvidia.com/gpu: 1 + cpu: 1000m requests: - cpu: "200m" - nvidia.com/gpu: 1 + cpu: 200m workerGroupSpecs: - # the pod replicas in this group typed worker - - replicas: 1 + - replicas: 2 minReplicas: 1 maxReplicas: 5 groupName: small-group rayStartParams: {} template: + metadata: + labels: + model.aibrix.ai/name: facebook-opt-13b spec: containers: - - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' - image: rayproject/ray:2.10.0 + - name: ray-worker + image: 'rayproject/ray:2.10.0' + command: [ "/bin/bash", "-lc", "--" ] + args: [ "ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD;" ] lifecycle: preStop: exec: command: [ "/bin/sh","-c","ray stop" ] resources: limits: - cpu: "1000m" - nvidia.com/gpu: 1 + cpu: 1000m requests: - cpu: "200m" - nvidia.com/gpu: 1 + cpu: 200m diff --git a/pkg/controller/podautoscaler/podautoscaler_controller.go b/pkg/controller/podautoscaler/podautoscaler_controller.go index 60cf72d0..75eeb433 100644 --- a/pkg/controller/podautoscaler/podautoscaler_controller.go +++ b/pkg/controller/podautoscaler/podautoscaler_controller.go @@ -22,8 +22,11 @@ import ( "time" autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1" + orchestrationv1alpha1 "github.com/vllm-project/aibrix/api/orchestration/v1alpha1" "github.com/vllm-project/aibrix/pkg/config" "github.com/vllm-project/aibrix/pkg/controller/podautoscaler/metrics" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" "github.com/vllm-project/aibrix/pkg/controller/podautoscaler/scaler" podutil "github.com/vllm-project/aibrix/pkg/utils" @@ -619,6 +622,16 @@ func (r *PodAutoscalerReconciler) computeReplicasForMetrics(ctx context.Context, return 0, "", currentTimestamp, err } + // Append ray head worker requirement for label selector + if scale.GetAPIVersion() == orchestrationv1alpha1.GroupVersion.String() && scale.GetKind() == "RayClusterFleet" { + newRequirement, err := labels.NewRequirement("ray.io/node-type", selection.Equals, []string{"head"}) + if err != nil { + klog.ErrorS(err, "Failed to add new requirements ray.io/node-type: head to label selector") + return 0, "", currentTimestamp, err + } + labelsSelector = labelsSelector.Add(*newRequirement) + } + originalReadyPodsCount, err := scaler.GetReadyPodsCount(ctx, r.Client, pa.Namespace, labelsSelector) if err != nil { @@ -700,6 +713,16 @@ func (r *PodAutoscalerReconciler) updateMetricsForScale(ctx context.Context, pa return err } + // Append ray head worker requirement for label selector + if scale.GetAPIVersion() == orchestrationv1alpha1.GroupVersion.String() && scale.GetKind() == "RayClusterFleet" { + newRequirement, err := labels.NewRequirement("ray.io/node-type", selection.Equals, []string{"head"}) + if err != nil { + klog.ErrorS(err, "Failed to add new requirements ray.io/node-type: head to label selector") + return err + } + labelsSelector = labelsSelector.Add(*newRequirement) + } + // Get pod list managed by scaleTargetRef podList, err := podutil.GetPodListByLabelSelector(ctx, r.Client, pa.Namespace, labelsSelector) if err != nil {