vllm-project · Jeffwan · Sep 30, 2024 · Sep 27, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/Makefile b/Makefile
@@ -52,7 +52,7 @@ help: ## Display this help.
 
 .PHONY: manifests
 manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
-	$(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+	$(CONTROLLER_GEN) rbac:roleName=controller-manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
 
 .PHONY: generate
 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.

diff --git a/config/gateway/kustomization.yaml b/config/gateway/kustomization.yaml
@@ -9,7 +9,7 @@ kind: Kustomization
 images:
 - name: plugins
   newName: aibrix/plugins
-  newTag: v0.1.0-rc.2
+  newTag: nightly
 - name: users
   newName: aibrix/users
-  newTag: v0.1.0-rc.2
+  newTag: nightly
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
@@ -5,4 +5,4 @@ kind: Kustomization
 images:
 - name: controller
   newName: aibrix/controller-manager
-  newTag: v0.1.0-rc.2
+  newTag: nightly
diff --git a/docs/development/app/deployment.yaml b/docs/development/app/deployment.yaml
@@ -4,20 +4,20 @@ metadata:
   name: llama2-70b
   namespace: aibrix-system
   labels:
-    modeladapter.aibricks.ai/enabled: "true"
-    model.aibrix.ai: "llama2-70b"
+    model.aibrix.ai/name: "llama2-70b"
     model.aibrix.ai/port: "8000"
+    adapter.model.aibrix.ai/enabled: "true"
 spec:
   replicas: 3
   selector:
     matchLabels:
-      modeladapter.aibricks.ai/enabled: "true"
-      model.aibrix.ai: "llama2-70b"
+      adapter.model.aibrix.ai/enabled: "true"
+      model.aibrix.ai/name: "llama2-70b"
   template:
     metadata:
       labels:
-        modeladapter.aibricks.ai/enabled: "true"
-        model.aibrix.ai: "llama2-70b"
+        adapter.model.aibrix.ai/enabled: "true"
+        model.aibrix.ai/name: "llama2-70b"
     spec:
       containers:
         - name: llmengine
@@ -46,7 +46,7 @@ metadata:
   namespace: aibrix-system
 spec:
   selector:
-    model.aibrix.ai: "llama2-70b"
+    model.aibrix.ai/name: "llama2-70b"
   ports:
     - protocol: TCP
       port: 8000

diff --git a/docs/source/features/lora-dynamic-loading.rst b/docs/source/features/lora-dynamic-loading.rst
@@ -52,13 +52,13 @@ Here's one model adapter example.
       name: llama-2-7b-sql-lora-test
       namespace: aibrix-system
       labels:
-        model.aibrix.ai: "llama-2-7b-sql-lora-test"
+        model.aibrix.ai/name: "llama-2-7b-sql-lora-test"
         model.aibrix.ai/port: "8000"
     spec:
       baseModel: llama2-70b
       podSelector:
         matchLabels:
-          model.aibrix.ai: llama2-70b
+          model.aibrix.ai/name: llama2-70b
       artifactURL:  huggingface://yard1/llama-2-7b-sql-lora-test
       schedulerName: default
 

diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst
@@ -27,15 +27,16 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
     kind: Deployment
     metadata:
       labels:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
         model.aibrix.ai/port: "8000"
+        adapter.model.aibrix.ai/enabled: true
       name: llama-2-7b-hf
       namespace: aibrix-system
     spec:
       replicas: 1
       selector:
         matchLabels:
-          model.aibrix.ai: llama-2-7b-hf
+          model.aibrix.ai/name: llama-2-7b-hf
       strategy:
         rollingUpdate:
           maxSurge: 25%
@@ -44,7 +45,7 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
       template:
         metadata:
           labels:
-            model.aibrix.ai: llama-2-7b-hf
+            model.aibrix.ai/name: llama-2-7b-hf
         spec:
           containers:
             - command:
@@ -112,7 +113,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
     kind: Service
     metadata:
       labels:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
         prometheus-discovery: "true"
       annotations:
         prometheus.io/scrape: "true"
@@ -130,7 +131,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
           protocol: TCP
           targetPort: 8080
       selector:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
       type: ClusterIP
 
 Register a user to authenticate the gateway

diff --git a/docs/tutorial/lora/model_adapter.yaml b/docs/tutorial/lora/model_adapter.yaml
@@ -4,13 +4,13 @@ metadata:
   name: lora-1
   namespace: aibrix-system
   labels:
-    model.aibrix.ai: "lora-1"
+    model.aibrix.ai/name: "lora-1"
     model.aibrix.ai/port: "8000"
 spec:
   baseModel: llama2-70b
   podSelector:
     matchLabels:
-      model.aibrix.ai: llama2-70b
+      model.aibrix.ai/name: llama2-70b
   artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test
   schedulerName: default
 # ---

diff --git a/docs/tutorial/runtime/runtime-hf-download.yaml b/docs/tutorial/runtime/runtime-hf-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:

diff --git a/docs/tutorial/runtime/runtime-s3-download.yaml b/docs/tutorial/runtime/runtime-s3-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:

diff --git a/docs/tutorial/runtime/runtime-tos-download.yaml b/docs/tutorial/runtime/runtime-tos-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:

diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go
@@ -53,7 +53,7 @@ var (
 )
 
 const (
-	modelIdentifier = "model.aibrix.ai"
+	modelIdentifier = "model.aibrix.ai/name"
 )
 
 func GetCache() (*Cache, error) {

diff --git a/pkg/controller/modeladapter/README.md b/pkg/controller/modeladapter/README.md
@@ -6,16 +6,17 @@ metadata:
   name: deepseek-33b-instruct
   namespace: default
   labels:
-    model.aibrix.ai: deepseek-33b-instruct
+    model.aibrix.ai/name: deepseek-33b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
 spec:
   replicas: 1
   selector:
     matchLabels:
-      model.aibrix.ai: deepseek-33b-instruct
+      model.aibrix.ai/name: deepseek-33b-instruct
   template:
     metadata:
       labels:
-        model.aibrix.ai: deepseek-33b-instruct
+        model.aibrix.ai/name: deepseek-33b-instruct
     spec:
       containers:
       - name: deepseek-33b-instruct
@@ -59,7 +60,7 @@ spec:
   baseModel: llama2-70b
   podSelector:
     matchLabels:
-      model.aibrix.ai: llama2-70b
+      model.aibrix.ai/name: llama2-70b
   schedulerName: default-model-adapter-scheduler
 status:
   phase: Configuring
@@ -71,8 +72,8 @@ kind: Service
 metadata:
   creationTimestamp: "2024-07-14T21:42:57Z"
   labels:
-    model.aibrix.ai/base-model: llama2-70b
-    model.aibrix.ai/model-adapter: text2sql-lora-1
+    model.aibrix.ai/name: llama2-70b
+    adapter.model.aibrix.ai/name: text2sql-lora-1
   name: text2sql-lora-1
   namespace: default
   ownerReferences:
@@ -99,7 +100,7 @@ spec:
     targetPort: 8000
   publishNotReadyAddresses: true
   selector:
-    model.aibrix.ai: llama2-70b
+    model.aibrix.ai/name: llama2-70b
   sessionAffinity: None
   type: ClusterIP
 status:
@@ -152,8 +153,8 @@ metadata:
     endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z"
   creationTimestamp: "2024-07-14T21:42:57Z"
   labels:
-    model.aibrix.ai/base-model: llama2-70b
-    model.aibrix.ai/model-adapter: text2sql-lora-1
+    model.aibrix.ai/name: llama2-70b
+    adapter.model.aibrix.ai/name: text2sql-lora-1
     service.kubernetes.io/headless: ""
   name: text2sql-lora-1
   namespace: default
-Original file line number
+Diff line change
@@ Expand Up / @@ -53,7 +53,7 @@ var ( @@
     )
     const (
-    	modelIdentifier = "model.aibrix.ai"
+    	modelIdentifier = "model.aibrix.ai/name"
     )
     func GetCache() (*Cache, error) {
@@ Expand Down @@