Skip to content

Commit

Permalink
Update manifests & docs about service httproute naming trick (#362)
Browse files Browse the repository at this point in the history
* Make sure service name is aligned with model.aibrix.ai/name value

This is to avoid httproute backend ref name mismatch issue

* Update docs about service httproute naming trick
  • Loading branch information
Jeffwan authored Nov 11, 2024
1 parent aa5edec commit 19a6093
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 37 deletions.
2 changes: 1 addition & 1 deletion benchmarks/autoscaling/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
name: aibrix-deepseek-coder-7b-instruct-svc
name: deepseek-coder-7b-instruct
namespace: default
spec:
ports:
Expand Down
17 changes: 14 additions & 3 deletions docs/source/getting_started/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
kind: Deployment
metadata:
labels:
# Note: The label value `model.aibrix.ai/name` here must match with the service name.
model.aibrix.ai/name: llama-2-7b-hf
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: true
name: llama-2-7b-hf
namespace: aibrix-system
namespace: default
spec:
replicas: 1
selector:
Expand Down Expand Up @@ -59,7 +60,8 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
- --model
- meta-llama/Llama-2-7b-hf
- --served-model-name
- meta-llama/llama-2-7b-hf
# Note: The `--served-model-name` argument value must also match the Service name and the Deployment label `model.aibrix.ai/name`
- llama-2-7b-hf
- --trust-remote-code
- --enable-lora
env:
Expand Down Expand Up @@ -113,13 +115,14 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
kind: Service
metadata:
labels:
# Note: The Service name must match the label value `model.aibrix.ai/name` in the Deployment
model.aibrix.ai/name: llama-2-7b-hf
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: llama-2-7b-hf
namespace: aibrix-system
namespace: default
spec:
ports:
- name: serve
Expand All @@ -134,6 +137,14 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
model.aibrix.ai/name: llama-2-7b-hf
type: ClusterIP
.. note::

Ensure that:

1. The `Service` name matches the `model.aibrix.ai/name` label value in the `Deployment`.
2. The `--served-model-name` argument value in the `Deployment` command is also consistent with the `Service` name and `model.aibrix.ai/name` label.


Register a user to authenticate the gateway
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
22 changes: 11 additions & 11 deletions docs/tutorial/runtime/runtime-hf-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
name: aibrix-model-deepseek-coder-7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
strategy:
rollingUpdate:
maxSurge: 25%
Expand All @@ -20,7 +20,7 @@ spec:
template:
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
spec:
containers:
- command:
Expand All @@ -32,9 +32,9 @@ spec:
- --port
- "8000"
- --model
- /models/deepseek-coder-6.7b-instruct
- /models/deepseek-coder-7b-instruct
- --served-model-name
- deepseek-ai/deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
- --distributed-executor-backend
- ray
- --trust-remote-code
Expand Down Expand Up @@ -99,7 +99,7 @@ spec:
- --local-dir
- /models/
- --model-name
- deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
env:
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
Expand All @@ -126,12 +126,12 @@ apiVersion: v1
kind: Service
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: aibricks-model-deepseek-coder-6-7b-instruct-svc
name: deepseek-coder-7b-instruct
namespace: default
spec:
ports:
Expand All @@ -140,5 +140,5 @@ spec:
protocol: TCP
targetPort: 8000
selector:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
type: ClusterIP
22 changes: 11 additions & 11 deletions docs/tutorial/runtime/runtime-s3-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
name: aibrix-model-deepseek-coder-7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
strategy:
rollingUpdate:
maxSurge: 25%
Expand All @@ -20,7 +20,7 @@ spec:
template:
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
spec:
containers:
- command:
Expand All @@ -32,9 +32,9 @@ spec:
- --port
- "8000"
- --model
- /models/deepseek-coder-6.7b-instruct
- /models/deepseek-coder-7b-instruct
- --served-model-name
- deepseek-ai/deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
- --distributed-executor-backend
- ray
- --trust-remote-code
Expand Down Expand Up @@ -99,7 +99,7 @@ spec:
- --local-dir
- /models/
- --model-name
- deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
env:
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
Expand Down Expand Up @@ -128,12 +128,12 @@ apiVersion: v1
kind: Service
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: aibricks-model-deepseek-coder-6-7b-instruct-svc
name: deepseek-coder-7b-instruct
namespace: default
spec:
ports:
Expand All @@ -142,5 +142,5 @@ spec:
protocol: TCP
targetPort: 8000
selector:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
type: ClusterIP
22 changes: 11 additions & 11 deletions docs/tutorial/runtime/runtime-tos-download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
model.aibrix.ai/port: "8000"
adapter.model.aibrix.ai/enabled: "true"
name: aibricks-model-deepseek-coder-6.7b-instruct
name: aibrix-model-deepseek-coder-7b-instruct
namespace: default
spec:
replicas: 1
selector:
matchLabels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
strategy:
rollingUpdate:
maxSurge: 25%
Expand All @@ -20,7 +20,7 @@ spec:
template:
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
spec:
containers:
- command:
Expand All @@ -32,9 +32,9 @@ spec:
- --port
- "8000"
- --model
- /models/deepseek-coder-6.7b-instruct
- /models/deepseek-coder-7b-instruct
- --served-model-name
- deepseek-ai/deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
- --distributed-executor-backend
- ray
- --trust-remote-code
Expand Down Expand Up @@ -99,7 +99,7 @@ spec:
- --local-dir
- /models/
- --model-name
- deepseek-coder-6.7b-instruct
- deepseek-coder-7b-instruct
env:
- name: DOWNLOADER_ALLOW_FILE_SUFFIX
value: json, safetensors
Expand Down Expand Up @@ -128,12 +128,12 @@ apiVersion: v1
kind: Service
metadata:
labels:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
name: aibricks-model-deepseek-coder-6-7b-instruct-svc
name: deepseek-coder-7b-instruct
namespace: default
spec:
ports:
Expand All @@ -142,5 +142,5 @@ spec:
protocol: TCP
targetPort: 8000
selector:
models.aibricks.ai: deepseek-coder-6.7b-instruct
model.aibrix.ai/name: deepseek-coder-7b-instruct
type: ClusterIP

0 comments on commit 19a6093

Please sign in to comment.