Skip to content

Commit

Permalink
Fix k8s accessibility regard namespaces. GPU optimizer now monitor al…
Browse files Browse the repository at this point in the history
…l namespaces with model label.
  • Loading branch information
Jingyuan Zhang committed Dec 5, 2024
1 parent aa77efb commit d2be10a
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 42 deletions.
2 changes: 1 addition & 1 deletion development/app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Alternatively, [vidur](https://github.com/microsoft/vidur) is integrated for hig

1. Builder simulator base model image
```dockerfile
docker build -t aibrix/vllm-simulator:nightly --build-arg GPU_TYPE=a100 -f Dockerfile .
docker build -t aibrix/vllm-simulator:nightly --build-arg SIMULATION=a100 -f Dockerfile .
```

1.b (Optional) Load container image to docker context
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ spec:
kind: Deployment
name: simulator-llama2-7b-a40
metricsSources:
- endpoint: gpu-optimizer.aibrix-system.svc.cluster.local:8080
- endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080
path: /metrics/default/simulator-llama2-7b-a40
metric: "vllm:deployment_replicas"
targetValue: "1"
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ spec:
kind: Deployment
name: simulator-llama2-7b-a100
metricsSources:
- endpoint: gpu-optimizer.aibrix-system.svc.cluster.local:8080
- endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080
path: /metrics/default/simulator-llama2-7b-a100
metric: "vllm:deployment_replicas"
targetValue: "1"
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
maxReplicas: 10
targetMetric: "avg_prompt_throughput_toks_per_s" # Ignore if metricsSources is configured
metricsSources:
- endpoint: gpu-optimizer.aibrix-system.svc.cluster.local:8080
- endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080
path: /metrics/default/simulator-llama2-7b
metric: "vllm:deployment_replicas"
targetValue: "1"
Expand Down
16 changes: 10 additions & 6 deletions python/aibrix/aibrix/gpu_optimizer/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ DATASET ?= [set your DATASET path]
deploy:
kubectl apply -f deployment.yaml
sleep 2
kubectl -n aibrix-system port-forward svc/gpu-optimizer 8080:8080 1>/dev/null 2>&1 &
kubectl -n aibrix-system port-forward svc/aibrix-gpu-optimizer 8080:8080 1>/dev/null 2>&1 &

.PHONY: clean
clean:
kubectl delete -f deployment.yaml
sleep 1
curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b
curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b-a100

.PHONY: benchmark
benchmark:
Expand All @@ -33,27 +33,31 @@ debug:

.PHONY: debug-init-simulator
debug-init-simulator:
curl http://localhost:8080/monitor/aibrix-system/simulator-llama2-7b \
curl http://localhost:8080/monitor/default/simulator-llama2-7b-a100 \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{}'

.PHONY: debug-scale-simulator
debug-scale-simulator:
curl http://localhost:8080/scale/aibrix-system/simulator-llama2-7b/2 \
curl http://localhost:8080/scale/default/simulator-llama2-7b-a100-a100/2 \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{}'

.PHONY: debug-stop-simulator
debug-stop-simulator:
curl -X DELETE http://localhost:8080/monitor/aibrix-system/simulator-llama2-7b \
curl -X DELETE http://localhost:8080/monitor/default/simulator-llama2-7b-a100 \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key"

.PHONY: debug-update-profile
debug-update-profile:
curl http://localhost:8080/update_profile/llama2-7b

.PHONY: debug-metrics
debug-metrics:
curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b
curl http://localhost:8080/metrics/aibrix-system/simulator-llama2-7b-a100

.PHONY: debug-workload
debug-workload:
Expand Down
26 changes: 19 additions & 7 deletions python/aibrix/aibrix/gpu_optimizer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from aibrix.gpu_optimizer.load_monitor.visualizer import mount_to as mount_visulizer
from aibrix.gpu_optimizer.utils import ExcludePathsFilter

NAMESPACE = os.getenv("NAMESPACE", "aibrix-system")
MODEL_LABEL = "model.aibrix.ai/name"
MIN_REPLICAS_LABEL = "model.aibrix.ai/min_replicas"
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
Expand Down Expand Up @@ -186,6 +185,22 @@ async def stop_deployment_optimization(request):
)


@app.route("/update_profile/{model_name}")
async def update_profile(request):
model_name = request.path_params["model_name"]
monitor = model_monitors.get(model_name, None)
if monitor is None:
return JSONResponse({"error": f"{model_name} not monitored"}, status_code=404)

if monitor.load_profiles():
return JSONResponse({"message": f"workload profile of {model_name} updated"})
else:
return JSONResponse(
{"error": f"failed to update workload profile of {model_name}"},
status_code=500,
)


@app.route("/scale/{namespace}/{deployment_name}/{replicas}", methods=["POST"])
async def scale_deployment(request):
namespace = request.path_params["namespace"]
Expand Down Expand Up @@ -249,10 +264,8 @@ def main(signal, timeout):
apps_v1 = client.AppsV1Api()

# List existing deployments
logger.info(f"Looking for deployments in {NAMESPACE} with {MODEL_LABEL}")
deployments = apps_v1.list_namespaced_deployment(
namespace=NAMESPACE, label_selector=MODEL_LABEL
)
logger.info(f"Looking for deployments with {MODEL_LABEL}")
deployments = apps_v1.list_deployment_for_all_namespaces(label_selector=MODEL_LABEL)
watch_version = deployments.metadata.resource_version
logger.debug(f"last watch version: {watch_version}")
for deployment in deployments.items:
Expand Down Expand Up @@ -284,8 +297,7 @@ def main(signal, timeout):
w = watch.Watch()
signal["watch"] = w
for event in w.stream(
apps_v1.list_namespaced_deployment,
namespace=NAMESPACE,
apps_v1.list_deployment_for_all_namespaces,
label_selector=MODEL_LABEL,
resource_version=watch_version,
timeout_seconds=timeout,
Expand Down
36 changes: 15 additions & 21 deletions python/aibrix/aibrix/gpu_optimizer/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,73 +1,67 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: pod-autoscaler
name: aibrix-gpu-optimizer-sa
namespace: aibrix-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
kind: ClusterRole
metadata:
namespace: aibrix-system
name: deployment-reader
name: gpu-optimizer-clusterrole
rules:
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
kind: ClusterRoleBinding
metadata:
name: deployment-reader-binding
namespace: aibrix-system
name: aibrix-gpu-optimizer-clusterrole-binding
subjects:
- kind: ServiceAccount
name: pod-autoscaler
name: aibrix-gpu-optimizer-sa
namespace: aibrix-system
roleRef:
kind: Role
name: deployment-reader
kind: ClusterRole
name: gpu-optimizer-clusterrole
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: gpu-optimizer
name: aibrix-gpu-optimizer
namespace: aibrix-system
spec:
replicas: 1
selector:
matchLabels:
app: gpu-optimizer
app: aibrix-gpu-optimizer
template:
metadata:
labels:
app: gpu-optimizer
app: aibrix-gpu-optimizer
spec:
serviceAccountName: pod-autoscaler
serviceAccountName: aibrix-gpu-optimizer-sa
automountServiceAccountToken: true # Important!
containers:
- name: gpu-optimizer
- name: aibrix-gpu-optimizer
image: aibrix/runtime:nightly
command: ["python", "-m", "aibrix.gpu_optimizer.app", "--debug"]
ports:
- containerPort: 8080
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: REDIS_HOST
value: aibrix-redis-master.aibrix-system.svc.cluster.local
---
# Debug only: Make sure pod can be visited from controller that deployed in mac.
apiVersion: v1
kind: Service
metadata:
name: gpu-optimizer
name: aibrix-gpu-optimizer
namespace: aibrix-system
spec:
selector:
app: gpu-optimizer
app: aibrix-gpu-optimizer
ports:
- protocol: TCP
port: 8080
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ def validate(self) -> bool:
if len(self.clusterers) < self.buffer_size:
self.clusterers.append(self.clusterers[current].clone())
self.frontier = len(self.clusterers) - 1
logger.debug("test")
logger.debug(
"moving buffer created: %s, buffers: %s",
self._reason,
Expand Down
16 changes: 13 additions & 3 deletions python/aibrix/aibrix/gpu_optimizer/load_monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,12 @@ def add_deployment(
profile = self._match_profile(key, deployment_name)
if profile is not None:
# No lock required here since the deployment has not been added to deployments.
self._optimizer.set_profile(profile)
try:
self._optimizer.set_profile(profile)
except Exception as e:
logger.warning(
f"Failed to set GPU profile for {key}. Optimizer will skip the GPU: {e}"
)
else:
logger.warning(
f"No GPU profile found for {key}. Optimizer will skip the GPU."
Expand Down Expand Up @@ -197,12 +202,13 @@ def clear_outdated_deployments(self) -> int:
del self.deployments[key]
return len(self.deployments)

def load_profiles(self, profile_reader: Optional[ProfileReader] = None):
def load_profiles(self, profile_reader: Optional[ProfileReader] = None) -> bool:
"""Load profiles from a file"""
try:
if profile_reader is None:
if self._profile_reader is None:
return
logger.error("Profile reader not initialized")
return False
profile_reader = self._profile_reader
else:
self._profile_reader = profile_reader
Expand All @@ -211,9 +217,13 @@ def load_profiles(self, profile_reader: Optional[ProfileReader] = None):
for profile in profiles:
if self._update_profile(profile):
logger.debug(f"Profile of {profile.gpu} updated.")

return True
except Exception as e:
logger.error(f"Failed to load profiles: {e}")

return False

def _update_profile(self, profile: GPUProfile) -> bool:
"""Update a profile, will update the formal alias copy, too."""
key = profile.gpu
Expand Down

0 comments on commit d2be10a

Please sign in to comment.