From 6e6016f81fa5c6b8d6991e44e99128ad1321361f Mon Sep 17 00:00:00 2001 From: Jingyuan Date: Mon, 9 Dec 2024 16:36:09 -0800 Subject: [PATCH] [Misc] Improve gpu optimizer debugging on podautoscaler. (#509) * Enable gpu optimizer debugging on scaling. * Lint fix * Remove unnecessary config/overlay/dev/default --------- Co-authored-by: Jingyuan Zhang --- config/gpu-optimizer/deployment.yaml | 2 +- config/gpu-optimizer/rbac.yaml | 4 +- config/gpu-optimizer/service.yaml | 2 +- .../dev/gpu-optimizer/kustomization.yaml | 24 ++++++++++ .../overlays/dev/manager/kustomization.yaml | 28 +++++++++++ development/app/Makefile | 6 +-- .../simulator_a40/patch_deployment_a40.yaml | 2 +- .../podautoscaler/podautoscaler.yaml | 1 + python/aibrix/aibrix/gpu_optimizer/Makefile | 7 +-- python/aibrix/aibrix/gpu_optimizer/app.py | 26 +++++++++-- .../gpu_optimizer/load_monitor/monitor.py | 46 +++++++++++++++++-- 11 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 config/overlays/dev/gpu-optimizer/kustomization.yaml create mode 100644 config/overlays/dev/manager/kustomization.yaml diff --git a/config/gpu-optimizer/deployment.yaml b/config/gpu-optimizer/deployment.yaml index 796912e4..07e12716 100644 --- a/config/gpu-optimizer/deployment.yaml +++ b/config/gpu-optimizer/deployment.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: gpu-optimizer - namespace: aibrix-system + namespace: system spec: replicas: 1 selector: diff --git a/config/gpu-optimizer/rbac.yaml b/config/gpu-optimizer/rbac.yaml index dd56e2e1..f384d0d1 100644 --- a/config/gpu-optimizer/rbac.yaml +++ b/config/gpu-optimizer/rbac.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: gpu-optimizer-sa - namespace: aibrix-system + namespace: system --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -20,7 +20,7 @@ metadata: subjects: - kind: ServiceAccount name: gpu-optimizer-sa - namespace: aibrix-system + namespace: system roleRef: kind: ClusterRole name: gpu-optimizer-clusterrole diff --git a/config/gpu-optimizer/service.yaml b/config/gpu-optimizer/service.yaml index 6968aeed..480ecfdc 100644 --- a/config/gpu-optimizer/service.yaml +++ b/config/gpu-optimizer/service.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: name: gpu-optimizer - namespace: aibrix-system + namespace: system spec: selector: app: gpu-optimizer diff --git a/config/overlays/dev/gpu-optimizer/kustomization.yaml b/config/overlays/dev/gpu-optimizer/kustomization.yaml new file mode 100644 index 00000000..60ac6534 --- /dev/null +++ b/config/overlays/dev/gpu-optimizer/kustomization.yaml @@ -0,0 +1,24 @@ +kind: Kustomization + +resources: +- ../../../default + +patches: +- patch: |- # Use the '|' and '-' for inline patching + apiVersion: apps/v1 + kind: Deployment + metadata: + name: gpu-optimizer + spec: + template: + spec: + containers: + - name: gpu-optimizer + command: ["python", "-m", "aibrix.gpu_optimizer.app", "--debug"] + target: + kind: Deployment + name: gpu-optimizer + namespace: system + version: v1 + +apiVersion: kustomize.config.k8s.io/v1beta1 \ No newline at end of file diff --git a/config/overlays/dev/manager/kustomization.yaml b/config/overlays/dev/manager/kustomization.yaml new file mode 100644 index 00000000..9b589f5c --- /dev/null +++ b/config/overlays/dev/manager/kustomization.yaml @@ -0,0 +1,28 @@ +kind: Kustomization + +resources: +- ../../../default + +patches: +- patch: |- # Use the '|' and '-' for inline patching + apiVersion: apps/v1 + kind: Deployment + metadata: + name: controller-manager + spec: + template: + spec: + containers: + - name: manager + args: + - --leader-elect + - --health-probe-bind-address=:8081 + - --metrics-bind-address=0 + - -v=4 + target: + kind: Deployment + name: controller-manager + namespace: system + version: v1 + +apiVersion: kustomize.config.k8s.io/v1beta1 \ No newline at end of file diff --git a/development/app/Makefile b/development/app/Makefile index 42edae1b..e54a3d9e 100644 --- a/development/app/Makefile +++ b/development/app/Makefile @@ -12,19 +12,19 @@ docker-build-simulator-a40: docker-build: docker-build-mock deploy-mock: - kubectl create -k config/mock + kubectl apply -k config/mock sleep 2 kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 & kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 & deploy-simulator: - kubectl create -k config/simulator + kubectl apply -k config/simulator sleep 2 kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 & kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 & deploy-heterogeneous: - kubectl create -k config/heterogeneous + kubectl apply -k config/heterogeneous sleep 2 kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 & kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 & diff --git a/development/app/config/heterogeneous/simulator_a40/patch_deployment_a40.yaml b/development/app/config/heterogeneous/simulator_a40/patch_deployment_a40.yaml index 5daa884c..adbb0931 100644 --- a/development/app/config/heterogeneous/simulator_a40/patch_deployment_a40.yaml +++ b/development/app/config/heterogeneous/simulator_a40/patch_deployment_a40.yaml @@ -5,7 +5,7 @@ metadata: labels: model.aibrix.ai/name: "llama2-7b" spec: - replicas: 1 + replicas: 0 selector: matchLabels: model.aibrix.ai/name: "llama2-7b" diff --git a/development/app/config/templates/podautoscaler/podautoscaler.yaml b/development/app/config/templates/podautoscaler/podautoscaler.yaml index a75a60c5..042523fe 100644 --- a/development/app/config/templates/podautoscaler/podautoscaler.yaml +++ b/development/app/config/templates/podautoscaler/podautoscaler.yaml @@ -6,6 +6,7 @@ metadata: labels: app.kubernetes.io/name: aibrix app.kubernetes.io/managed-by: kustomize + kpa.autoscaling.aibrix.ai/scale-down-delay: "0" namespace: default spec: scaleTargetRef: diff --git a/python/aibrix/aibrix/gpu_optimizer/Makefile b/python/aibrix/aibrix/gpu_optimizer/Makefile index cbd761cb..bec0f214 100644 --- a/python/aibrix/aibrix/gpu_optimizer/Makefile +++ b/python/aibrix/aibrix/gpu_optimizer/Makefile @@ -13,6 +13,7 @@ gen-profile: .PHONY: debug-init debug-init: + kubectl -n aibrix-system port-forward svc/aibrix-gpu-optimizer 8080:8080 1>/dev/null 2>&1 & kubectl -n aibrix-system port-forward svc/aibrix-redis-master 6379:6379 1>/dev/null 2>&1 & .PHONY: debug @@ -28,10 +29,10 @@ debug-init-simulator: .PHONY: debug-scale-simulator debug-scale-simulator: - curl http://localhost:8080/scale/default/simulator-llama2-7b-a100-a100/2 \ + curl -X PUT http://localhost:8080/scale/default/simulator-llama2-7b-a100\ -H "Content-Type: application/json" \ -H "Authorization: Bearer any_key" \ - -d '{}' + -d '{"replicas":"0"}' .PHONY: debug-stop-simulator debug-stop-simulator: @@ -45,7 +46,7 @@ debug-update-profile: .PHONY: debug-metrics debug-metrics: - curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b-a100 + curl http://localhost:8080/metrics/default/simulator-llama2-7b-a100 .PHONY: debug-workload debug-workload: diff --git a/python/aibrix/aibrix/gpu_optimizer/app.py b/python/aibrix/aibrix/gpu_optimizer/app.py index 405abe4e..7f6b6e44 100644 --- a/python/aibrix/aibrix/gpu_optimizer/app.py +++ b/python/aibrix/aibrix/gpu_optimizer/app.py @@ -22,7 +22,11 @@ from starlette.responses import JSONResponse, PlainTextResponse from aibrix.gpu_optimizer.load_monitor.load_reader import GatewayLoadReader -from aibrix.gpu_optimizer.load_monitor.monitor import DeploymentStates, ModelMonitor +from aibrix.gpu_optimizer.load_monitor.monitor import ( + DeploymentStates, + DeploymentStates_Replicas_No_Overriden, + ModelMonitor, +) from aibrix.gpu_optimizer.load_monitor.profile_reader import RedisProfileReader from aibrix.gpu_optimizer.load_monitor.visualizer import mount_to as mount_visulizer from aibrix.gpu_optimizer.utils import ExcludePathsFilter @@ -201,11 +205,23 @@ async def update_profile(request): ) -@app.route("/scale/{namespace}/{deployment_name}/{replicas}", methods=["POST"]) +@app.route("/scale/{namespace}/{deployment_name}", methods=["PUT"]) async def scale_deployment(request): + """Scale deployment manually by overriding replicas number. Once overriden, optimization result will be ignored. Reset overriden by pass -1. + + Args: + replicas: The overriden number of replicas. Pass -1 to disable overriden for all deployments of the model. + """ namespace = request.path_params["namespace"] deployment_name = request.path_params["deployment_name"] - replicas = request.path_params["replicas"] + data = await request.json() + try: + replicas = int(data.get("replicas", DeploymentStates_Replicas_No_Overriden)) + if replicas < DeploymentStates_Replicas_No_Overriden: + replicas = DeploymentStates_Replicas_No_Overriden + except ValueError: + replicas = DeploymentStates_Replicas_No_Overriden # reset + try: # Verify the deployment exists apps_v1 = client.AppsV1Api() @@ -216,7 +232,9 @@ async def scale_deployment(request): raise Exception(f'Model "{model_name}" is not monitored.') # Set the scaling metrics - monitor.update_deployment_num_replicas(deployment_name, namespace, replicas) + monitor.update_deployment_num_replicas( + deployment_name, namespace, replicas, overriding=True + ) return JSONResponse({"message": f"Scaled to {replicas}"}) except Exception as e: diff --git a/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py index dbc295ee..af27223f 100644 --- a/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py +++ b/python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py @@ -38,13 +38,20 @@ gpu="default", cost=1.0, tputs=[[100]], indexes=[[10], [10]] ) +DeploymentStates_Replicas_No_Overriden = -1 + class DeploymentStates: """States of a deployment with resource version.""" def __init__(self, name: str, replicas: int = 1, min_replicas: int = 0): self.name = name - self.replicas = replicas + + # _replicas stores optimized value + self._replicas = replicas + # _replicas_overriden stores replicas value for debugging + self._replicas_overriden = DeploymentStates_Replicas_No_Overriden + """The replicas output, ignore min_replicas in the normal mode.""" self.min_replicas = min_replicas """The replicas for minimum mode. Ignore in normal optimization mode.""" @@ -55,12 +62,30 @@ def __init__(self, name: str, replicas: int = 1, min_replicas: int = 0): def cost(self): return 0.0 if self.profile is None else self.profile.cost * self.replicas + @property + def replicas(self): + return self._replicas_overriden if self.overriden else self._replicas + + @replicas.setter + def replicas(self, value): + self._replicas = value + + @property + def overriden(self): + return self._replicas_overriden != DeploymentStates_Replicas_No_Overriden + + def override_replicas(self, value): + self._replicas_overriden = value + def minimize(self): """Set replica to minimum mode.""" self.replicas = max(0, self.min_replicas) def __repr__(self): - return f"{self.name}: {self.replicas}(${self.cost})" + if self.overriden: + return f"overriden {self.name}: {self.replicas}(${self.cost}), should be {self._replicas}" + else: + return f"{self.name}: {self.replicas}(${self.cost})" class ModelMonitor: @@ -180,7 +205,11 @@ def read_deployment_num_replicas(self, deployment_name: str, namespace: str) -> return self.deployments[key].replicas def update_deployment_num_replicas( - self, deployment_name: str, namespace: str, replicas: int + self, + deployment_name: str, + namespace: str, + replicas: int, + overriding: bool = False, ): key = self._deployment_entry_point(deployment_name, namespace) if key not in self.deployments: @@ -188,7 +217,16 @@ def update_deployment_num_replicas( f"Deployment {namespace}:{deployment_name} of model {self.model_name} is not monitored" ) - self.deployments[key].replicas = replicas + if overriding: + # Disable all override once for all if no overriden is detected. + if replicas == DeploymentStates_Replicas_No_Overriden: + for states in self.deployments.values(): + states.override_replicas(replicas) + return + + self.deployments[key].override_replicas(replicas) + else: + self.deployments[key].replicas = replicas def mark_deployments_outdated(self): """Save last resource version and start the validation"""