vllm-project · Jeffwan · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/benchmarks/autoscaling/7b.yaml b/benchmarks/autoscaling/7b.yaml
@@ -157,7 +157,7 @@ metadata:
   annotations:
     prometheus.io/scrape: "true"
     prometheus.io/port: "8000"
-  name: aibrix-deepseek-coder-7b-instruct-svc
+  name: deepseek-coder-7b-instruct
   namespace: default
 spec:
   ports:

diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst
@@ -27,11 +27,12 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
     kind: Deployment
     metadata:
       labels:
+        # Note: The label value `model.aibrix.ai/name` here must match with the service name.
         model.aibrix.ai/name: llama-2-7b-hf
         model.aibrix.ai/port: "8000"
         adapter.model.aibrix.ai/enabled: true
       name: llama-2-7b-hf
-      namespace: aibrix-system
+      namespace: default
     spec:
       replicas: 1
       selector:
@@ -59,7 +60,8 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
                 - --model
                 - meta-llama/Llama-2-7b-hf
                 - --served-model-name
-                - meta-llama/llama-2-7b-hf
+                # Note: The `--served-model-name` argument value must also match the Service name and the Deployment label `model.aibrix.ai/name`
+                - llama-2-7b-hf
                 - --trust-remote-code
                 - --enable-lora
               env:
@@ -113,13 +115,14 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
     kind: Service
     metadata:
       labels:
+        # Note: The Service name must match the label value `model.aibrix.ai/name` in the Deployment
         model.aibrix.ai/name: llama-2-7b-hf
         prometheus-discovery: "true"
       annotations:
         prometheus.io/scrape: "true"
         prometheus.io/port: "8080"
       name: llama-2-7b-hf
-      namespace: aibrix-system
+      namespace: default
     spec:
       ports:
         - name: serve
@@ -134,6 +137,14 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
         model.aibrix.ai/name: llama-2-7b-hf
       type: ClusterIP
 
+.. note::
+
+   Ensure that:
+
+   1. The `Service` name matches the `model.aibrix.ai/name` label value in the `Deployment`.
+   2. The `--served-model-name` argument value in the `Deployment` command is also consistent with the `Service` name and `model.aibrix.ai/name` label.
+
+
 Register a user to authenticate the gateway
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/docs/tutorial/runtime/runtime-hf-download.yaml b/docs/tutorial/runtime/runtime-hf-download.yaml
@@ -2,16 +2,16 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
-    models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+    model.aibrix.ai/port: "8000"
     adapter.model.aibrix.ai/enabled: "true"
-  name: aibricks-model-deepseek-coder-6.7b-instruct
+  name: aibrix-model-deepseek-coder-7b-instruct
   namespace: default
 spec:
   replicas: 1
   selector:
     matchLabels:
-      models.aibricks.ai: deepseek-coder-6.7b-instruct
+      model.aibrix.ai/name: deepseek-coder-7b-instruct
   strategy:
     rollingUpdate:
       maxSurge: 25%
@@ -20,7 +20,7 @@ spec:
   template:
     metadata:
       labels:
-        models.aibricks.ai: deepseek-coder-6.7b-instruct
+        model.aibrix.ai/name: deepseek-coder-7b-instruct
     spec:
       containers:
       - command:
@@ -32,9 +32,9 @@ spec:
         - --port
         - "8000"
         - --model
-        - /models/deepseek-coder-6.7b-instruct
+        - /models/deepseek-coder-7b-instruct
         - --served-model-name
-        - deepseek-ai/deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         - --distributed-executor-backend
         - ray
         - --trust-remote-code
@@ -99,7 +99,7 @@ spec:
         - --local-dir
         - /models/
         - --model-name
-        - deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         env:
         - name: DOWNLOADER_ALLOW_FILE_SUFFIX
           value: json, safetensors
@@ -126,12 +126,12 @@ apiVersion: v1
 kind: Service
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
     prometheus-discovery: "true"
   annotations:
     prometheus.io/scrape: "true"
     prometheus.io/port: "8080"
-  name: aibricks-model-deepseek-coder-6-7b-instruct-svc
+  name: deepseek-coder-7b-instruct
   namespace: default
 spec:
   ports:
@@ -140,5 +140,5 @@ spec:
     protocol: TCP
     targetPort: 8000
   selector:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
   type: ClusterIP
diff --git a/docs/tutorial/runtime/runtime-s3-download.yaml b/docs/tutorial/runtime/runtime-s3-download.yaml
@@ -2,16 +2,16 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
-    models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+    model.aibrix.ai/port: "8000"
     adapter.model.aibrix.ai/enabled: "true"
-  name: aibricks-model-deepseek-coder-6.7b-instruct
+  name: aibrix-model-deepseek-coder-7b-instruct
   namespace: default
 spec:
   replicas: 1
   selector:
     matchLabels:
-      models.aibricks.ai: deepseek-coder-6.7b-instruct
+      model.aibrix.ai/name: deepseek-coder-7b-instruct
   strategy:
     rollingUpdate:
       maxSurge: 25%
@@ -20,7 +20,7 @@ spec:
   template:
     metadata:
       labels:
-        models.aibricks.ai: deepseek-coder-6.7b-instruct
+        model.aibrix.ai/name: deepseek-coder-7b-instruct
     spec:
       containers:
       - command:
@@ -32,9 +32,9 @@ spec:
         - --port
         - "8000"
         - --model
-        - /models/deepseek-coder-6.7b-instruct
+        - /models/deepseek-coder-7b-instruct
         - --served-model-name
-        - deepseek-ai/deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         - --distributed-executor-backend
         - ray
         - --trust-remote-code
@@ -99,7 +99,7 @@ spec:
         - --local-dir
         - /models/
         - --model-name
-        - deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         env:
         - name: DOWNLOADER_ALLOW_FILE_SUFFIX
           value: json, safetensors
@@ -128,12 +128,12 @@ apiVersion: v1
 kind: Service
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
     prometheus-discovery: "true"
   annotations:
     prometheus.io/scrape: "true"
     prometheus.io/port: "8080"
-  name: aibricks-model-deepseek-coder-6-7b-instruct-svc
+  name: deepseek-coder-7b-instruct
   namespace: default
 spec:
   ports:
@@ -142,5 +142,5 @@ spec:
     protocol: TCP
     targetPort: 8000
   selector:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
   type: ClusterIP
diff --git a/docs/tutorial/runtime/runtime-tos-download.yaml b/docs/tutorial/runtime/runtime-tos-download.yaml
@@ -2,16 +2,16 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
-    models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
+    model.aibrix.ai/port: "8000"
     adapter.model.aibrix.ai/enabled: "true"
-  name: aibricks-model-deepseek-coder-6.7b-instruct
+  name: aibrix-model-deepseek-coder-7b-instruct
   namespace: default
 spec:
   replicas: 1
   selector:
     matchLabels:
-      models.aibricks.ai: deepseek-coder-6.7b-instruct
+      model.aibrix.ai/name: deepseek-coder-7b-instruct
   strategy:
     rollingUpdate:
       maxSurge: 25%
@@ -20,7 +20,7 @@ spec:
   template:
     metadata:
       labels:
-        models.aibricks.ai: deepseek-coder-6.7b-instruct
+        model.aibrix.ai/name: deepseek-coder-7b-instruct
     spec:
       containers:
       - command:
@@ -32,9 +32,9 @@ spec:
         - --port
         - "8000"
         - --model
-        - /models/deepseek-coder-6.7b-instruct
+        - /models/deepseek-coder-7b-instruct
         - --served-model-name
-        - deepseek-ai/deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         - --distributed-executor-backend
         - ray
         - --trust-remote-code
@@ -99,7 +99,7 @@ spec:
         - --local-dir
         - /models/
         - --model-name
-        - deepseek-coder-6.7b-instruct
+        - deepseek-coder-7b-instruct
         env:
         - name: DOWNLOADER_ALLOW_FILE_SUFFIX
           value: json, safetensors
@@ -128,12 +128,12 @@ apiVersion: v1
 kind: Service
 metadata:
   labels:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
     prometheus-discovery: "true"
   annotations:
     prometheus.io/scrape: "true"
     prometheus.io/port: "8080"
-  name: aibricks-model-deepseek-coder-6-7b-instruct-svc
+  name: deepseek-coder-7b-instruct
   namespace: default
 spec:
   ports:
@@ -142,5 +142,5 @@ spec:
     protocol: TCP
     targetPort: 8000
   selector:
-    models.aibricks.ai: deepseek-coder-6.7b-instruct
+    model.aibrix.ai/name: deepseek-coder-7b-instruct
   type: ClusterIP