diff --git a/Makefile b/Makefile index ed1c0eb5..0fd842c0 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,7 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases + $(CONTROLLER_GEN) rbac:roleName=controller-manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases .PHONY: generate generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. diff --git a/config/gateway/kustomization.yaml b/config/gateway/kustomization.yaml index d7882c2c..4f1426cf 100644 --- a/config/gateway/kustomization.yaml +++ b/config/gateway/kustomization.yaml @@ -9,7 +9,7 @@ kind: Kustomization images: - name: plugins newName: aibrix/plugins - newTag: v0.1.0-rc.2 + newTag: nightly - name: users newName: aibrix/users - newTag: v0.1.0-rc.2 + newTag: nightly diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index bda1c94f..b79bda07 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: aibrix/controller-manager - newTag: v0.1.0-rc.2 + newTag: nightly diff --git a/docs/development/app/deployment.yaml b/docs/development/app/deployment.yaml index 550305d8..28f0280d 100644 --- a/docs/development/app/deployment.yaml +++ b/docs/development/app/deployment.yaml @@ -4,20 +4,20 @@ metadata: name: llama2-70b namespace: aibrix-system labels: - modeladapter.aibricks.ai/enabled: "true" - model.aibrix.ai: "llama2-70b" + model.aibrix.ai/name: "llama2-70b" model.aibrix.ai/port: "8000" + adapter.model.aibrix.ai/enabled: "true" spec: replicas: 3 selector: matchLabels: - modeladapter.aibricks.ai/enabled: "true" - model.aibrix.ai: "llama2-70b" + adapter.model.aibrix.ai/enabled: "true" + model.aibrix.ai/name: "llama2-70b" template: metadata: labels: - modeladapter.aibricks.ai/enabled: "true" - model.aibrix.ai: "llama2-70b" + adapter.model.aibrix.ai/enabled: "true" + model.aibrix.ai/name: "llama2-70b" spec: containers: - name: llmengine @@ -46,7 +46,7 @@ metadata: namespace: aibrix-system spec: selector: - model.aibrix.ai: "llama2-70b" + model.aibrix.ai/name: "llama2-70b" ports: - protocol: TCP port: 8000 diff --git a/docs/source/features/lora-dynamic-loading.rst b/docs/source/features/lora-dynamic-loading.rst index 89b475e6..4c61de53 100644 --- a/docs/source/features/lora-dynamic-loading.rst +++ b/docs/source/features/lora-dynamic-loading.rst @@ -52,13 +52,13 @@ Here's one model adapter example. name: llama-2-7b-sql-lora-test namespace: aibrix-system labels: - model.aibrix.ai: "llama-2-7b-sql-lora-test" + model.aibrix.ai/name: "llama-2-7b-sql-lora-test" model.aibrix.ai/port: "8000" spec: baseModel: llama2-70b podSelector: matchLabels: - model.aibrix.ai: llama2-70b + model.aibrix.ai/name: llama2-70b artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test schedulerName: default diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst index 4694c4ce..fb509556 100644 --- a/docs/source/getting_started/quickstart.rst +++ b/docs/source/getting_started/quickstart.rst @@ -27,15 +27,16 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`. kind: Deployment metadata: labels: - model.aibrix.ai: llama-2-7b-hf + model.aibrix.ai/name: llama-2-7b-hf model.aibrix.ai/port: "8000" + adapter.model.aibrix.ai/enabled: true name: llama-2-7b-hf namespace: aibrix-system spec: replicas: 1 selector: matchLabels: - model.aibrix.ai: llama-2-7b-hf + model.aibrix.ai/name: llama-2-7b-hf strategy: rollingUpdate: maxSurge: 25% @@ -44,7 +45,7 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`. template: metadata: labels: - model.aibrix.ai: llama-2-7b-hf + model.aibrix.ai/name: llama-2-7b-hf spec: containers: - command: @@ -112,7 +113,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`. kind: Service metadata: labels: - model.aibrix.ai: llama-2-7b-hf + model.aibrix.ai/name: llama-2-7b-hf prometheus-discovery: "true" annotations: prometheus.io/scrape: "true" @@ -130,7 +131,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`. protocol: TCP targetPort: 8080 selector: - model.aibrix.ai: llama-2-7b-hf + model.aibrix.ai/name: llama-2-7b-hf type: ClusterIP Register a user to authenticate the gateway diff --git a/docs/tutorial/lora/model_adapter.yaml b/docs/tutorial/lora/model_adapter.yaml index 3dba328e..33230e32 100644 --- a/docs/tutorial/lora/model_adapter.yaml +++ b/docs/tutorial/lora/model_adapter.yaml @@ -4,13 +4,13 @@ metadata: name: lora-1 namespace: aibrix-system labels: - model.aibrix.ai: "lora-1" + model.aibrix.ai/name: "lora-1" model.aibrix.ai/port: "8000" spec: baseModel: llama2-70b podSelector: matchLabels: - model.aibrix.ai: llama2-70b + model.aibrix.ai/name: llama2-70b artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test schedulerName: default # --- diff --git a/docs/tutorial/runtime/runtime-hf-download.yaml b/docs/tutorial/runtime/runtime-hf-download.yaml index d27a0212..5193cf6e 100644 --- a/docs/tutorial/runtime/runtime-hf-download.yaml +++ b/docs/tutorial/runtime/runtime-hf-download.yaml @@ -4,6 +4,7 @@ metadata: labels: models.aibricks.ai: deepseek-coder-6.7b-instruct models.aibricks.com/model-name: deepseek-coder-6.7b-instruct + adapter.model.aibrix.ai/enabled: "true" name: aibricks-model-deepseek-coder-6.7b-instruct namespace: default spec: diff --git a/docs/tutorial/runtime/runtime-s3-download.yaml b/docs/tutorial/runtime/runtime-s3-download.yaml index 2fa984a0..b3c3d309 100644 --- a/docs/tutorial/runtime/runtime-s3-download.yaml +++ b/docs/tutorial/runtime/runtime-s3-download.yaml @@ -4,6 +4,7 @@ metadata: labels: models.aibricks.ai: deepseek-coder-6.7b-instruct models.aibricks.com/model-name: deepseek-coder-6.7b-instruct + adapter.model.aibrix.ai/enabled: "true" name: aibricks-model-deepseek-coder-6.7b-instruct namespace: default spec: diff --git a/docs/tutorial/runtime/runtime-tos-download.yaml b/docs/tutorial/runtime/runtime-tos-download.yaml index f884f9e5..5a9abc90 100644 --- a/docs/tutorial/runtime/runtime-tos-download.yaml +++ b/docs/tutorial/runtime/runtime-tos-download.yaml @@ -4,6 +4,7 @@ metadata: labels: models.aibricks.ai: deepseek-coder-6.7b-instruct models.aibricks.com/model-name: deepseek-coder-6.7b-instruct + adapter.model.aibrix.ai/enabled: "true" name: aibricks-model-deepseek-coder-6.7b-instruct namespace: default spec: diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index b95d777f..7aa6e755 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -53,7 +53,7 @@ var ( ) const ( - modelIdentifier = "model.aibrix.ai" + modelIdentifier = "model.aibrix.ai/name" ) func GetCache() (*Cache, error) { diff --git a/pkg/controller/modeladapter/README.md b/pkg/controller/modeladapter/README.md index 609d4aaf..3cee8f5b 100644 --- a/pkg/controller/modeladapter/README.md +++ b/pkg/controller/modeladapter/README.md @@ -6,16 +6,17 @@ metadata: name: deepseek-33b-instruct namespace: default labels: - model.aibrix.ai: deepseek-33b-instruct + model.aibrix.ai/name: deepseek-33b-instruct + adapter.model.aibrix.ai/enabled: "true" spec: replicas: 1 selector: matchLabels: - model.aibrix.ai: deepseek-33b-instruct + model.aibrix.ai/name: deepseek-33b-instruct template: metadata: labels: - model.aibrix.ai: deepseek-33b-instruct + model.aibrix.ai/name: deepseek-33b-instruct spec: containers: - name: deepseek-33b-instruct @@ -59,7 +60,7 @@ spec: baseModel: llama2-70b podSelector: matchLabels: - model.aibrix.ai: llama2-70b + model.aibrix.ai/name: llama2-70b schedulerName: default-model-adapter-scheduler status: phase: Configuring @@ -71,8 +72,8 @@ kind: Service metadata: creationTimestamp: "2024-07-14T21:42:57Z" labels: - model.aibrix.ai/base-model: llama2-70b - model.aibrix.ai/model-adapter: text2sql-lora-1 + model.aibrix.ai/name: llama2-70b + adapter.model.aibrix.ai/name: text2sql-lora-1 name: text2sql-lora-1 namespace: default ownerReferences: @@ -99,7 +100,7 @@ spec: targetPort: 8000 publishNotReadyAddresses: true selector: - model.aibrix.ai: llama2-70b + model.aibrix.ai/name: llama2-70b sessionAffinity: None type: ClusterIP status: @@ -152,8 +153,8 @@ metadata: endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z" creationTimestamp: "2024-07-14T21:42:57Z" labels: - model.aibrix.ai/base-model: llama2-70b - model.aibrix.ai/model-adapter: text2sql-lora-1 + model.aibrix.ai/name: llama2-70b + adapter.model.aibrix.ai/name: text2sql-lora-1 service.kubernetes.io/headless: "" name: text2sql-lora-1 namespace: default diff --git a/pkg/controller/modeladapter/modeladapter_controller.go b/pkg/controller/modeladapter/modeladapter_controller.go index cac7d3b5..e7422454 100644 --- a/pkg/controller/modeladapter/modeladapter_controller.go +++ b/pkg/controller/modeladapter/modeladapter_controller.go @@ -29,6 +29,7 @@ import ( modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1" "github.com/aibrix/aibrix/pkg/cache" "github.com/aibrix/aibrix/pkg/controller/modeladapter/scheduling" + "github.com/aibrix/aibrix/pkg/utils" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -43,23 +44,29 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) const ( //ControllerUIDLabelKey = "model-adapter-controller-uid" - ModelAdapterFinalizer = "modeladapter.aibrix.ai/finalizer" + ModelIdentifierKey = "model.aibrix.ai/name" + ModelAdapterFinalizer = "adapter.model.aibrix.ai/finalizer" + ModelAdapterPodTemplateLabelKey = "adapter.model.aibrix.ai/enabled" + ModelAdapterPodTemplateLabelValue = "true" ) var ( controllerKind = modelv1alpha1.GroupVersion.WithKind("ModelAdapter") controllerName = "model-adapter-controller" defaultModelAdapterSchedulerPolicy = "leastAdapters" - defaultRequeueDuration = 1 * time.Second + defaultRequeueDuration = 3 * time.Second ) // Add creates a new ModelAdapter Controller and adds it to the Manager with default RBAC. @@ -120,15 +127,63 @@ func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) { return reconciler, nil } +func podWithLabelFilter(labelKey, labelValue, modelIdKey string) predicate.Predicate { + hasLabelAndModelIdentifier := func(labels map[string]string, labelKey, labelValue, modelIdentifierKey string) bool { + if _, exists := labels[modelIdentifierKey]; !exists { + return false + } + return labels[labelKey] == labelValue + } + + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return hasLabelAndModelIdentifier(e.ObjectNew.GetLabels(), labelKey, labelValue, modelIdKey) + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey) + }, + GenericFunc: func(e event.GenericEvent) bool { + return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey) + }, + } +} + +func lookupLinkedModelAdapterInNamespace(c client.Client) handler.MapFunc { + return func(ctx context.Context, a client.Object) []reconcile.Request { + modelAdapterList := &modelv1alpha1.ModelAdapterList{} + if err := c.List(ctx, modelAdapterList, client.InNamespace(a.GetNamespace())); err != nil { + klog.ErrorS(err, "unable to list model adapters in namespace", "namespace", a.GetNamespace()) + return []reconcile.Request{} + } + + requests := make([]reconcile.Request, 0, len(modelAdapterList.Items)) + for _, modelAdapter := range modelAdapterList.Items { + // Originally, we think it's better to check model adapter.Status.Instances, if it includes the pod name, then we put the model adapter into the queue + // However, there's other cases like pending model adapter needs to wait for new pods to be scheduled immediately, so we should reconcile all adapters if there're new pods added + requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Namespace: a.GetNamespace(), Name: modelAdapter.GetName()}}) + } + + return requests + } +} + // add adds a new Controller to mgr with r as the reconcile.Reconciler func add(mgr manager.Manager, r reconcile.Reconciler) error { // use the builder fashion. If we need more fine grain control later, we can switch to `controller.New()` err := ctrl.NewControllerManagedBy(mgr). Named(controllerName). - For(&modelv1alpha1.ModelAdapter{}). + For(&modelv1alpha1.ModelAdapter{}, builder.WithPredicates(predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.LabelChangedPredicate{}, + predicate.AnnotationChangedPredicate{}, + ))). Owns(&corev1.Service{}). Owns(&discoveryv1.EndpointSlice{}). - Watches(&corev1.Pod{}, &handler.EnqueueRequestForObject{}). + Watches(&corev1.Pod{}, handler.EnqueueRequestsFromMapFunc(lookupLinkedModelAdapterInNamespace(mgr.GetClient())), + builder.WithPredicates(podWithLabelFilter(ModelAdapterPodTemplateLabelKey, ModelAdapterPodTemplateLabelValue, ModelIdentifierKey))). Complete(r) klog.V(4).InfoS("Finished to add model-adapter-controller") @@ -296,6 +351,14 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, r.clearModelAdapterInstanceList(ctx, instance) } + if !utils.IsPodReady(selectedPod) || utils.IsPodTerminating(selectedPod) { + klog.Warning(fmt.Sprintf("current assigned pod %s/%s is not ready, let's clean it up and reschedule the adapter", selectedPod.Namespace, selectedPod.Name)) + // continue to requeue the object to remove endpoint etc in current loop. + if err = r.clearModelAdapterInstanceList(ctx, instance); err != nil { + return ctrl.Result{}, err + } + } + existPods = true } } @@ -306,25 +369,27 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque selectedPod, err = r.schedulePod(ctx, instance) if err != nil { klog.ErrorS(err, "Failed to schedule Pod for ModelAdapter", "modelAdapter", instance.Name) - return ctrl.Result{RequeueAfter: defaultRequeueDuration}, err + return ctrl.Result{}, err } + if selectedPod != nil { + instance.Status.Phase = modelv1alpha1.ModelAdapterScheduling + instance.Status.Instances = []string{selectedPod.Name} + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: string(modelv1alpha1.ModelAdapterConditionTypeSelectorMatched), + Status: metav1.ConditionTrue, + Reason: "Reconciling", + Message: fmt.Sprintf("ModelAdapter %s has been allocated to pod %s", klog.KObj(instance), selectedPod.Name), + LastTransitionTime: metav1.Now(), + }) - instance.Status.Phase = modelv1alpha1.ModelAdapterScheduling - instance.Status.Instances = []string{selectedPod.Name} - meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ - Type: string(modelv1alpha1.ModelAdapterConditionTypeSelectorMatched), - Status: metav1.ConditionTrue, - Reason: "Reconciling", - Message: fmt.Sprintf("ModelAdapter %s has been allocated to pod %s", klog.KObj(instance), selectedPod.Name), - LastTransitionTime: metav1.Now(), - }) + if err := r.Status().Update(ctx, instance); err != nil { + klog.InfoS("Got error when updating status", "cluster name", req.Name, "error", err, "ModelAdapter", instance) + return ctrl.Result{}, err + } - if err := r.Status().Update(ctx, instance); err != nil { - klog.InfoS("Got error when updating status", "cluster name", req.Name, "error", err, "ModelAdapter", instance) - return ctrl.Result{RequeueAfter: defaultRequeueDuration}, err + return ctrl.Result{Requeue: true}, nil } - - return ctrl.Result{Requeue: true}, nil + // selectedPod is nil means there's no valid pods, it should wait for new pods coming pod or any pod related changes like label change. } // Step 2: Reconcile Loading @@ -358,7 +423,6 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque // Check if need to update the status. if r.inconsistentModelAdapterStatus(oldInstance.Status, instance.Status) { klog.InfoS("model adapter reconcile", "Update CR status", req.Name, "status", instance.Status) - instance.Status.Phase = modelv1alpha1.ModelAdapterRunning if err = r.updateStatus(ctx, instance); err != nil { return reconcile.Result{}, fmt.Errorf("update modelAdapter status error: %v", err) } @@ -386,10 +450,13 @@ func (r *ModelAdapterReconciler) clearModelAdapterInstanceList(ctx context.Conte Type: string(modelv1alpha1.ModelAdapterConditionCleanup), Status: metav1.ConditionTrue, Reason: "Reconciling", - Message: fmt.Sprintf("Pod (%s) can not be fetched for model adapter (%s), clean up the list", stalePodName, instance.Name), + Message: fmt.Sprintf("Pod (%s) can not be fetched or invalid for model adapter (%s), clean up the list", stalePodName, instance.Name), LastTransitionTime: metav1.Now(), }) + // remove instance means the lora has not targets at this moment. + instance.Status.Phase = modelv1alpha1.ModelAdapterPending + if err := r.Status().Update(ctx, instance); err != nil { klog.Error(err, "Failed to update modelAdapter status") return err @@ -410,21 +477,40 @@ func (r *ModelAdapterReconciler) schedulePod(ctx context.Context, instance *mode return nil, err } - if len(podList.Items) == 0 { - return nil, fmt.Errorf("no pods found matching selector") + // filter active pod + var activePods []corev1.Pod + + for _, pod := range podList.Items { + if !utils.IsPodTerminating(&pod) && utils.IsPodReady(&pod) { + activePods = append(activePods, pod) + } } - return r.scheduler.SelectPod(ctx, podList.Items) + if len(activePods) == 0 { + klog.Warning("no pods found matching selector") + return nil, nil + } + + return r.scheduler.SelectPod(ctx, activePods) } func (r *ModelAdapterReconciler) reconcileLoading(ctx context.Context, instance *modelv1alpha1.ModelAdapter, pod *corev1.Pod) error { + if pod == nil { + return nil + } + + // selectPod could be in termination, in this case, we just do nothing. + if pod.DeletionTimestamp != nil { + return nil + } + // Define the key you want to check key := "DEBUG_MODE" value, exists := getEnvKey(key) host := fmt.Sprintf("http://%s:8000", pod.Status.PodIP) if exists && value == "on" { // 30080 is the nodePort of the base model service. - host = fmt.Sprintf("http://%s:30080", "localhost") + host = fmt.Sprintf("http://%s:30081", "localhost") } // Check if the model is already loaded @@ -661,6 +747,7 @@ func (r *ModelAdapterReconciler) reconcileService(ctx context.Context, instance klog.ErrorS(err, "Failed to get Service") return ctrl.Result{}, err } + // TODO: add `else` logic let's compare the service major fields and update to the target state. // TODO: Now, we are using the name comparison which is not enough, // compare the object difference in future. @@ -708,6 +795,25 @@ func (r *ModelAdapterReconciler) reconcileEndpointSlice(ctx context.Context, ins klog.ErrorS(err, "Failed to get EndpointSlice") return ctrl.Result{}, err } else { + // Check if pod is nil, and if so, clear the endpoints and set the phase to Pending + if pod == nil { + klog.InfoS("Pod is nil, clearing all endpoints and setting status to Pending") + found.Endpoints = []discoveryv1.Endpoint{} + + if err := r.Update(ctx, found); err != nil { + klog.ErrorS(err, "Failed to update EndpointSlice after clearing endpoints", "EndpointSlice", found.Name) + return ctrl.Result{}, err + } + + instance.Status.Phase = modelv1alpha1.ModelAdapterPending + if err := r.Status().Update(ctx, instance); err != nil { + klog.Error(err, "Failed to update modelAdapter status to Pending") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil + } + // Existing EndpointSlice Found. Check if the Pod IP is already in the EndpointSlice podIP := pod.Status.PodIP alreadyExists := false @@ -733,9 +839,44 @@ func (r *ModelAdapterReconciler) reconcileEndpointSlice(ctx context.Context, ins klog.ErrorS(err, "Failed to update EndpointSlice", "EndpointSlice", found.Name) return ctrl.Result{}, err } + instance.Status.Phase = modelv1alpha1.ModelAdapterRunning klog.InfoS("Successfully updated EndpointSlice", "EndpointSlice", found.Name) } else { - klog.InfoS("Pod IP already exists in EndpointSlice", "PodIP", podIP) + // pod has been deleted, and we should remove the pod name from the list + if pod.DeletionTimestamp != nil { + var updatedEndpoints []discoveryv1.Endpoint + podIP := pod.Status.PodIP + + for _, endpoint := range found.Endpoints { + shouldRemove := false + var newAddresses []string + + for _, address := range endpoint.Addresses { + if address == podIP { + shouldRemove = true + } else { + newAddresses = append(newAddresses, address) + } + } + + if !shouldRemove || len(newAddresses) > 0 { + endpoint.Addresses = newAddresses + updatedEndpoints = append(updatedEndpoints, endpoint) + } + } + + found.Endpoints = updatedEndpoints + if err := r.Update(ctx, found); err != nil { + klog.ErrorS(err, "Failed to update EndpointSlice after removing PodIP", "EndpointSlice", found.Name) + return ctrl.Result{}, err + } + + instance.Status.Phase = modelv1alpha1.ModelAdapterPending + klog.InfoS("Successfully removed Pod IP from EndpointSlice", "PodIP", podIP, "EndpointSlice", found.Name) + } else { + klog.InfoS("Pod IP already exists in EndpointSlice", "PodName", pod.Name, "PodIP", podIP) + instance.Status.Phase = modelv1alpha1.ModelAdapterRunning + } } } diff --git a/pkg/controller/modeladapter/resources.go b/pkg/controller/modeladapter/resources.go index ec238558..23c3186c 100644 --- a/pkg/controller/modeladapter/resources.go +++ b/pkg/controller/modeladapter/resources.go @@ -61,8 +61,8 @@ func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *c func buildModelAdapterService(instance *modelv1alpha1.ModelAdapter) (*corev1.Service, error) { labels := map[string]string{ - "model.aibrix.ai/base-model": instance.Spec.BaseModel, - "model.aibrix.ai/model-adapter": instance.Name, + "model.aibrix.ai/name": instance.Spec.BaseModel, + "adapter.model.aibrix.ai/name": instance.Name, } ports := []corev1.ServicePort{ diff --git a/pkg/controller/modeladapter/resources_test.go b/pkg/controller/modeladapter/resources_test.go index 81cc47da..31a5e921 100644 --- a/pkg/controller/modeladapter/resources_test.go +++ b/pkg/controller/modeladapter/resources_test.go @@ -92,8 +92,8 @@ func TestBuildModelAdapterService(t *testing.T) { assert.Equal(t, "test-instance", service.Name) assert.Equal(t, "default", service.Namespace) assert.Equal(t, map[string]string{ - "model.aibrix.ai/base-model": "test-model", - "model.aibrix.ai/model-adapter": "test-instance", + "model.aibrix.ai/name": "test-model", + "adapter.model.aibrix.ai/name": "test-instance", }, service.Labels) // Check ports diff --git a/pkg/controller/modeladapter/utils.go b/pkg/controller/modeladapter/utils.go index 04b3b168..350f317d 100644 --- a/pkg/controller/modeladapter/utils.go +++ b/pkg/controller/modeladapter/utils.go @@ -127,3 +127,12 @@ func extractHuggingFacePath(artifactURL string) (string, error) { return path, nil } + +func stringInSlice(slice []string, str string) bool { + for _, v := range slice { + if v == str { + return true + } + } + return false +} diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go index a53dd6e0..b4b252b7 100644 --- a/pkg/controller/modelrouter/modelrouter_controller.go +++ b/pkg/controller/modelrouter/modelrouter_controller.go @@ -38,7 +38,7 @@ import ( const ( // TODO (varun): cleanup model related identifiers and establish common consensus modelHeaderIdentifier = "model" - modelIdentifier = "model.aibrix.ai" + modelIdentifier = "model.aibrix.ai/name" modelPortIdentifier = "model.aibrix.ai/port" // TODO (varun): parameterize it or dynamically resolve it aibrixEnvoyGateway = "aibrix-eg" diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index 3da920a1..1ad18faa 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -30,6 +30,11 @@ const ( NAMESPACE = "aibrix-system" ) +// IsPodTerminating check if pod is in terminating status via whether the deletion timestamp is set +func IsPodTerminating(pod *v1.Pod) bool { + return pod.ObjectMeta.DeletionTimestamp != nil +} + // IsPodReady returns true if a pod is ready; false otherwise. func IsPodReady(pod *v1.Pod) bool { return IsPodReadyConditionTrue(pod.Status)