vllm-project · Jeffwan · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/development/app/Dockerfile b/development/app/Dockerfile
@@ -1,9 +1,10 @@
 # Use the official Python base image
-FROM python:3.9-slim
+FROM python:3.10-slim
 
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
+ENV WANDB_MODE=disabled
 
 # Set the working directory
 WORKDIR /app
@@ -12,15 +13,24 @@ WORKDIR /app
 COPY requirements.txt /app/
 
 # Install dependencies
-RUN apt update && apt install -y curl jq
+RUN apt update && apt install -y curl jq git
 
 RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the rest of the application code into the container
-COPY . /app/
+COPY ./*.py /app/
+
+ENV MODEL_NAME=llama2-7b
+ARG GPU_TYPE=disabled
+
+# Trigger profiling
+RUN if [ "$GPU_TYPE" != "disabled" ]; then \
+        python app.py --time_limit 1000 --replica_config_device ${GPU_TYPE}; \
+    fi
 
 # Expose the port the app runs on
 EXPOSE 8000
 
-# Run the application
-CMD ["python", "app.py"]
+# Run the application, environment variable is necessary to apply ARG
+ENV GPU_TYPE=$GPU_TYPE
+CMD python app.py --replica_config_device ${GPU_TYPE}
diff --git a/development/simulator/Makefile → development/app/Makefile b/development/simulator/Makefile → development/app/Makefile
@@ -1,28 +1,55 @@
 all: build
 
-build-a100: 
-	docker build -t aibrix/vllm-simulator:nightly -f Dockerfile .
+docker-build-mock:
+	docker build -t aibrix/vllm-mock:nightly -f Dockerfile .
 
-build-a40: 
+docker-build-simulator: 
+	docker build -t aibrix/vllm-simulator:nightly --build-arg GPU_TYPE=a100 -f Dockerfile .
+
+docker-build-simulator-a40:
 	docker build -t aibrix/vllm-simulator-a40:nightly --build-arg GPU_TYPE=a40 -f Dockerfile .
 
-build: build-a100
+docker-build: docker-build-mock
 
-deploy-a100:
-	kubectl apply -f deployment-a100.yaml
+deploy-mock:
+	kubectl create -k config/mock
+	sleep 2
+	kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
+	kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &
 
-deploy-a40:
-	kubectl apply -f deployment-a40.yaml
+deploy-simulator:
+	kubectl create -k config/simulator
+	sleep 2
+	kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
+	kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &
 
-deploy: deploy-a100
+deploy-heterogeneous:
+	kubectl create -k config/heterogeneous
 	sleep 2
-	kubectl -n aibrix-system port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
+	kubectl port-forward svc/llama2-7b 8000:8000 1>/dev/null 2>&1 &
+	kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &
+
+deploy: deploy-mock
+
+clean-mock:
+	kubectl delete -k config/mock
+	sleep 1
+	curl http://localhost:8000/metrics
+	curl http://localhost:8888/metrics
 
-clean:
-	kubectl delete -f deployment-a100.yaml
-	kubectl delete -f deployment-a40.yaml
+clean-simulator:
+	kubectl delete -k config/simulator
 	sleep 1
 	curl http://localhost:8000/metrics
+	curl http://localhost:8888/metrics
+
+clean-heterogeneous:
+	kubectl delete -k config/heterogeneous
+	sleep 1
+	curl http://localhost:8000/metrics
+	curl http://localhost:8888/metrics
+
+clean: clean-mock
 
 test:
 	curl http://localhost:8000/v1/chat/completions \
@@ -56,17 +83,8 @@ test-long:
 			"max_tokens": 50 \
 		}'
 
-init-local-gateway-call:
-	kubectl -n aibrix-system port-forward svc/aibrix-gateway-users 8090:8090 1>/dev/null 2>&1 &
-	kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-eg-903790dc 8888:80 1>/dev/null 2>&1 &
-	sleep 1
-	curl http://localhost:8090/CreateUser \
-		-H "Content-Type: application/json" \
-		-d '{"name": "your-user-name","rpm": 1000,"tpm": 100000}'
-
 test-gateway:
 	curl -v http://localhost:8888/v1/chat/completions \
-		-H "user: your-user-name" \
 		-H "model: llama2-7b" \
 		-H "Content-Type: application/json" \
 		-H "Authorization: Bearer any_key" \

diff --git a/development/app/README.md b/development/app/README.md
@@ -13,7 +13,7 @@
 docker build -t aibrix/vllm-mock:nightly -f Dockerfile .
 ```
 
-1. (Optional) Load container image to docker context
+1.b (Optional) Load container image to docker context
 
 > Note: If you are using Docker-Desktop on Mac, Kubernetes shares the local image repository with Docker.
 > Therefore, the following command is not necessary. Only kind user need this step.
@@ -22,12 +22,36 @@ docker build -t aibrix/vllm-mock:nightly -f Dockerfile .
 kind load docker-image aibrix/vllm-mock:nightly
 ```
 
-1. Deploy mocked model image
+2. Deploy mocked model image
 ```shell
-kubectl apply -f deployment.yaml
+kubectl create -k config/mock
 
 # you can run following command to delete the deployment 
-kubectl delete -f deployment.yaml
+kubectl delete -k config/mock
+```
+
+### Deploy the simulator app
+Alternatively, [vidur](https://github.com/microsoft/vidur) is integrated for high-fidality vLLM simulation:
+1. Builder simulator base model image
+```dockerfile
+docker build -t aibrix/vllm-simulator:nightly --build-arg GPU_TYPE=a100 -f Dockerfile .
+```
+
+1.b (Optional) Load container image to docker context
+
+> Note: If you are using Docker-Desktop on Mac, Kubernetes shares the local image repository with Docker.
+> Therefore, the following command is not necessary. Only kind user need this step.
+
+```shell
+kind load docker-image aibrix/vllm-simulator:nightly
+```
+
+2. Deploy simulator model image
+```shell
+kubectl create -k config/simulator
+
+# you can run following command to delete the deployment 
+kubectl delete -k config/simulator
 ```
 
 ### Test the metric invocation