Skip to content

Commit

Permalink
[Misc] Improve gpu optimizer debugging on podautoscaler. (#509)
Browse files Browse the repository at this point in the history
* Enable gpu optimizer debugging on scaling.

* Lint fix

* Remove unnecessary config/overlay/dev/default

---------

Co-authored-by: Jingyuan Zhang <[email protected]>
  • Loading branch information
zhangjyr and Jingyuan Zhang authored Dec 10, 2024
1 parent 4ebf0ff commit 6e6016f
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 19 deletions.
2 changes: 1 addition & 1 deletion config/gpu-optimizer/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: apps/v1
kind: Deployment
metadata:
name: gpu-optimizer
namespace: aibrix-system
namespace: system
spec:
replicas: 1
selector:
Expand Down
4 changes: 2 additions & 2 deletions config/gpu-optimizer/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: gpu-optimizer-sa
namespace: aibrix-system
namespace: system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand All @@ -20,7 +20,7 @@ metadata:
subjects:
- kind: ServiceAccount
name: gpu-optimizer-sa
namespace: aibrix-system
namespace: system
roleRef:
kind: ClusterRole
name: gpu-optimizer-clusterrole
Expand Down
2 changes: 1 addition & 1 deletion config/gpu-optimizer/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v1
kind: Service
metadata:
name: gpu-optimizer
namespace: aibrix-system
namespace: system
spec:
selector:
app: gpu-optimizer
Expand Down
24 changes: 24 additions & 0 deletions config/overlays/dev/gpu-optimizer/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
kind: Kustomization

resources:
- ../../../default

patches:
- patch: |- # Use the '|' and '-' for inline patching
apiVersion: apps/v1
kind: Deployment
metadata:
name: gpu-optimizer
spec:
template:
spec:
containers:
- name: gpu-optimizer
command: ["python", "-m", "aibrix.gpu_optimizer.app", "--debug"]
target:
kind: Deployment
name: gpu-optimizer
namespace: system
version: v1

apiVersion: kustomize.config.k8s.io/v1beta1
28 changes: 28 additions & 0 deletions config/overlays/dev/manager/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
kind: Kustomization

resources:
- ../../../default

patches:
- patch: |- # Use the '|' and '-' for inline patching
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
spec:
template:
spec:
containers:
- name: manager
args:
- --leader-elect
- --health-probe-bind-address=:8081
- --metrics-bind-address=0
- -v=4
target:
kind: Deployment
name: controller-manager
namespace: system
version: v1

apiVersion: kustomize.config.k8s.io/v1beta1
6 changes: 3 additions & 3 deletions development/app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ docker-build-simulator-a40:
docker-build: docker-build-mock

deploy-mock:
kubectl create -k config/mock
kubectl apply -k config/mock
sleep 2
kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &

deploy-simulator:
kubectl create -k config/simulator
kubectl apply -k config/simulator
sleep 2
kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &

deploy-heterogeneous:
kubectl create -k config/heterogeneous
kubectl apply -k config/heterogeneous
sleep 2
kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
labels:
model.aibrix.ai/name: "llama2-7b"
spec:
replicas: 1
replicas: 0
selector:
matchLabels:
model.aibrix.ai/name: "llama2-7b"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
kpa.autoscaling.aibrix.ai/scale-down-delay: "0"
namespace: default
spec:
scaleTargetRef:
Expand Down
7 changes: 4 additions & 3 deletions python/aibrix/aibrix/gpu_optimizer/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ gen-profile:

.PHONY: debug-init
debug-init:
kubectl -n aibrix-system port-forward svc/aibrix-gpu-optimizer 8080:8080 1>/dev/null 2>&1 &
kubectl -n aibrix-system port-forward svc/aibrix-redis-master 6379:6379 1>/dev/null 2>&1 &

.PHONY: debug
Expand All @@ -28,10 +29,10 @@ debug-init-simulator:

.PHONY: debug-scale-simulator
debug-scale-simulator:
curl http://localhost:8080/scale/default/simulator-llama2-7b-a100-a100/2 \
curl -X PUT http://localhost:8080/scale/default/simulator-llama2-7b-a100\
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{}'
-d '{"replicas":"0"}'

.PHONY: debug-stop-simulator
debug-stop-simulator:
Expand All @@ -45,7 +46,7 @@ debug-update-profile:

.PHONY: debug-metrics
debug-metrics:
curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b-a100
curl http://localhost:8080/metrics/default/simulator-llama2-7b-a100

.PHONY: debug-workload
debug-workload:
Expand Down
26 changes: 22 additions & 4 deletions python/aibrix/aibrix/gpu_optimizer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
from starlette.responses import JSONResponse, PlainTextResponse

from aibrix.gpu_optimizer.load_monitor.load_reader import GatewayLoadReader
from aibrix.gpu_optimizer.load_monitor.monitor import DeploymentStates, ModelMonitor
from aibrix.gpu_optimizer.load_monitor.monitor import (
DeploymentStates,
DeploymentStates_Replicas_No_Overriden,
ModelMonitor,
)
from aibrix.gpu_optimizer.load_monitor.profile_reader import RedisProfileReader
from aibrix.gpu_optimizer.load_monitor.visualizer import mount_to as mount_visulizer
from aibrix.gpu_optimizer.utils import ExcludePathsFilter
Expand Down Expand Up @@ -201,11 +205,23 @@ async def update_profile(request):
)


@app.route("/scale/{namespace}/{deployment_name}/{replicas}", methods=["POST"])
@app.route("/scale/{namespace}/{deployment_name}", methods=["PUT"])
async def scale_deployment(request):
"""Scale deployment manually by overriding replicas number. Once overriden, optimization result will be ignored. Reset overriden by pass -1.
Args:
replicas: The overriden number of replicas. Pass -1 to disable overriden for all deployments of the model.
"""
namespace = request.path_params["namespace"]
deployment_name = request.path_params["deployment_name"]
replicas = request.path_params["replicas"]
data = await request.json()
try:
replicas = int(data.get("replicas", DeploymentStates_Replicas_No_Overriden))
if replicas < DeploymentStates_Replicas_No_Overriden:
replicas = DeploymentStates_Replicas_No_Overriden
except ValueError:
replicas = DeploymentStates_Replicas_No_Overriden # reset

try:
# Verify the deployment exists
apps_v1 = client.AppsV1Api()
Expand All @@ -216,7 +232,9 @@ async def scale_deployment(request):
raise Exception(f'Model "{model_name}" is not monitored.')

# Set the scaling metrics
monitor.update_deployment_num_replicas(deployment_name, namespace, replicas)
monitor.update_deployment_num_replicas(
deployment_name, namespace, replicas, overriding=True
)

return JSONResponse({"message": f"Scaled to {replicas}"})
except Exception as e:
Expand Down
46 changes: 42 additions & 4 deletions python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,20 @@
gpu="default", cost=1.0, tputs=[[100]], indexes=[[10], [10]]
)

DeploymentStates_Replicas_No_Overriden = -1


class DeploymentStates:
"""States of a deployment with resource version."""

def __init__(self, name: str, replicas: int = 1, min_replicas: int = 0):
self.name = name
self.replicas = replicas

# _replicas stores optimized value
self._replicas = replicas
# _replicas_overriden stores replicas value for debugging
self._replicas_overriden = DeploymentStates_Replicas_No_Overriden

"""The replicas output, ignore min_replicas in the normal mode."""
self.min_replicas = min_replicas
"""The replicas for minimum mode. Ignore in normal optimization mode."""
Expand All @@ -55,12 +62,30 @@ def __init__(self, name: str, replicas: int = 1, min_replicas: int = 0):
def cost(self):
return 0.0 if self.profile is None else self.profile.cost * self.replicas

@property
def replicas(self):
return self._replicas_overriden if self.overriden else self._replicas

@replicas.setter
def replicas(self, value):
self._replicas = value

@property
def overriden(self):
return self._replicas_overriden != DeploymentStates_Replicas_No_Overriden

def override_replicas(self, value):
self._replicas_overriden = value

def minimize(self):
"""Set replica to minimum mode."""
self.replicas = max(0, self.min_replicas)

def __repr__(self):
return f"{self.name}: {self.replicas}(${self.cost})"
if self.overriden:
return f"overriden {self.name}: {self.replicas}(${self.cost}), should be {self._replicas}"
else:
return f"{self.name}: {self.replicas}(${self.cost})"


class ModelMonitor:
Expand Down Expand Up @@ -180,15 +205,28 @@ def read_deployment_num_replicas(self, deployment_name: str, namespace: str) ->
return self.deployments[key].replicas

def update_deployment_num_replicas(
self, deployment_name: str, namespace: str, replicas: int
self,
deployment_name: str,
namespace: str,
replicas: int,
overriding: bool = False,
):
key = self._deployment_entry_point(deployment_name, namespace)
if key not in self.deployments:
raise Exception(
f"Deployment {namespace}:{deployment_name} of model {self.model_name} is not monitored"
)

self.deployments[key].replicas = replicas
if overriding:
# Disable all override once for all if no overriden is detected.
if replicas == DeploymentStates_Replicas_No_Overriden:
for states in self.deployments.values():
states.override_replicas(replicas)
return

self.deployments[key].override_replicas(replicas)
else:
self.deployments[key].replicas = replicas

def mark_deployments_outdated(self):
"""Save last resource version and start the validation"""
Expand Down

0 comments on commit 6e6016f

Please sign in to comment.