Skip to content

Commit

Permalink
Append ray head label selector in PodAutoscaler (#789)
Browse files Browse the repository at this point in the history
It helps to only consider the engine pod for multi-node inference. Ray worker doesn’t have http server running and it can not expose any application but only resource metrics. For resource metrics, since we use Tensor Parallelism, we think the utilization across GPU are same.

Signed-off-by: Jiaxin Shan <[email protected]>
  • Loading branch information
Jeffwan authored Mar 4, 2025
1 parent 3ad08a1 commit c73e063
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 18 deletions.
3 changes: 2 additions & 1 deletion development/app/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ ddsketch
plotly_express
fasteners
transformers
git+https://github.com/zhangjyr/vidur.git
git+https://github.com/zhangjyr/vidur.git
ray[default]
38 changes: 21 additions & 17 deletions development/tutorials/distributed/fleet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: RayClusterFleet
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
model.aibrix.ai/name: facebook-opt-13b
name: facebook-opt-13b
spec:
replicas: 1
Expand All @@ -20,13 +20,17 @@ spec:
labels:
model.aibrix.ai/name: facebook-opt-13b
annotations:
ray.io/overwrite-container-cmd: "true"
ray.io/overwrite-container-cmd: "true"
spec:
rayVersion: '2.10.0' # should match the Ray version in the image of the containers
rayVersion: "2.10.0"
headGroupSpec:
rayStartParams:
dashboard-host: '0.0.0.0'
block: 'false'
template:
metadata:
labels:
model.aibrix.ai/name: facebook-opt-13b
spec:
containers:
- name: ray-head
Expand All @@ -40,35 +44,35 @@ spec:
name: client
- containerPort: 8000
name: service
command: ["/bin/bash", "-lc", "--"]
args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD"; vllm serve facebook/opt-125m --tensor-parallel-size 2 --distributed-executor-backend ray]
command: ["/bin/bash", "-lc", "--"]
args: ["ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD; sleep 600"]
resources:
limits:
cpu: "1000m"
nvidia.com/gpu: 1
cpu: 1000m
requests:
cpu: "200m"
nvidia.com/gpu: 1
cpu: 200m
workerGroupSpecs:
# the pod replicas in this group typed worker
- replicas: 1
- replicas: 2
minReplicas: 1
maxReplicas: 5
groupName: small-group
rayStartParams: {}
template:
metadata:
labels:
model.aibrix.ai/name: facebook-opt-13b
spec:
containers:
- name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
image: rayproject/ray:2.10.0
- name: ray-worker
image: 'rayproject/ray:2.10.0'
command: [ "/bin/bash", "-lc", "--" ]
args: [ "ulimit -n 65536; echo head; $KUBERAY_GEN_RAY_START_CMD;" ]
lifecycle:
preStop:
exec:
command: [ "/bin/sh","-c","ray stop" ]
resources:
limits:
cpu: "1000m"
nvidia.com/gpu: 1
cpu: 1000m
requests:
cpu: "200m"
nvidia.com/gpu: 1
cpu: 200m
23 changes: 23 additions & 0 deletions pkg/controller/podautoscaler/podautoscaler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ import (
"time"

autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1"
orchestrationv1alpha1 "github.com/vllm-project/aibrix/api/orchestration/v1alpha1"
"github.com/vllm-project/aibrix/pkg/config"
"github.com/vllm-project/aibrix/pkg/controller/podautoscaler/metrics"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"

"github.com/vllm-project/aibrix/pkg/controller/podautoscaler/scaler"
podutil "github.com/vllm-project/aibrix/pkg/utils"
Expand Down Expand Up @@ -619,6 +622,16 @@ func (r *PodAutoscalerReconciler) computeReplicasForMetrics(ctx context.Context,
return 0, "", currentTimestamp, err
}

// Append ray head worker requirement for label selector
if scale.GetAPIVersion() == orchestrationv1alpha1.GroupVersion.String() && scale.GetKind() == "RayClusterFleet" {
newRequirement, err := labels.NewRequirement("ray.io/node-type", selection.Equals, []string{"head"})
if err != nil {
klog.ErrorS(err, "Failed to add new requirements ray.io/node-type: head to label selector")
return 0, "", currentTimestamp, err
}
labelsSelector = labelsSelector.Add(*newRequirement)
}

originalReadyPodsCount, err := scaler.GetReadyPodsCount(ctx, r.Client, pa.Namespace, labelsSelector)

if err != nil {
Expand Down Expand Up @@ -700,6 +713,16 @@ func (r *PodAutoscalerReconciler) updateMetricsForScale(ctx context.Context, pa
return err
}

// Append ray head worker requirement for label selector
if scale.GetAPIVersion() == orchestrationv1alpha1.GroupVersion.String() && scale.GetKind() == "RayClusterFleet" {
newRequirement, err := labels.NewRequirement("ray.io/node-type", selection.Equals, []string{"head"})
if err != nil {
klog.ErrorS(err, "Failed to add new requirements ray.io/node-type: head to label selector")
return err
}
labelsSelector = labelsSelector.Add(*newRequirement)
}

// Get pod list managed by scaleTargetRef
podList, err := podutil.GetPodListByLabelSelector(ctx, r.Client, pa.Namespace, labelsSelector)
if err != nil {
Expand Down

0 comments on commit c73e063

Please sign in to comment.