diff --git a/Makefile b/Makefile
index ed1c0eb5..0fd842c0 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ help: ## Display this help.
 
 .PHONY: manifests
 manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
-	$(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+	$(CONTROLLER_GEN) rbac:roleName=controller-manager-role crd:maxDescLen=0,generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases
 
 .PHONY: generate
 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
diff --git a/config/gateway/kustomization.yaml b/config/gateway/kustomization.yaml
index d7882c2c..4f1426cf 100644
--- a/config/gateway/kustomization.yaml
+++ b/config/gateway/kustomization.yaml
@@ -9,7 +9,7 @@ kind: Kustomization
 images:
 - name: plugins
   newName: aibrix/plugins
-  newTag: v0.1.0-rc.2
+  newTag: nightly
 - name: users
   newName: aibrix/users
-  newTag: v0.1.0-rc.2
+  newTag: nightly
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
index bda1c94f..b79bda07 100644
--- a/config/manager/kustomization.yaml
+++ b/config/manager/kustomization.yaml
@@ -5,4 +5,4 @@ kind: Kustomization
 images:
 - name: controller
   newName: aibrix/controller-manager
-  newTag: v0.1.0-rc.2
+  newTag: nightly
diff --git a/docs/development/app/deployment.yaml b/docs/development/app/deployment.yaml
index 550305d8..28f0280d 100644
--- a/docs/development/app/deployment.yaml
+++ b/docs/development/app/deployment.yaml
@@ -4,20 +4,20 @@ metadata:
   name: llama2-70b
   namespace: aibrix-system
   labels:
-    modeladapter.aibricks.ai/enabled: "true"
-    model.aibrix.ai: "llama2-70b"
+    model.aibrix.ai/name: "llama2-70b"
     model.aibrix.ai/port: "8000"
+    adapter.model.aibrix.ai/enabled: "true"
 spec:
   replicas: 3
   selector:
     matchLabels:
-      modeladapter.aibricks.ai/enabled: "true"
-      model.aibrix.ai: "llama2-70b"
+      adapter.model.aibrix.ai/enabled: "true"
+      model.aibrix.ai/name: "llama2-70b"
   template:
     metadata:
       labels:
-        modeladapter.aibricks.ai/enabled: "true"
-        model.aibrix.ai: "llama2-70b"
+        adapter.model.aibrix.ai/enabled: "true"
+        model.aibrix.ai/name: "llama2-70b"
     spec:
       containers:
         - name: llmengine
@@ -46,7 +46,7 @@ metadata:
   namespace: aibrix-system
 spec:
   selector:
-    model.aibrix.ai: "llama2-70b"
+    model.aibrix.ai/name: "llama2-70b"
   ports:
     - protocol: TCP
       port: 8000
diff --git a/docs/source/features/lora-dynamic-loading.rst b/docs/source/features/lora-dynamic-loading.rst
index 89b475e6..4c61de53 100644
--- a/docs/source/features/lora-dynamic-loading.rst
+++ b/docs/source/features/lora-dynamic-loading.rst
@@ -52,13 +52,13 @@ Here's one model adapter example.
       name: llama-2-7b-sql-lora-test
       namespace: aibrix-system
       labels:
-        model.aibrix.ai: "llama-2-7b-sql-lora-test"
+        model.aibrix.ai/name: "llama-2-7b-sql-lora-test"
         model.aibrix.ai/port: "8000"
     spec:
       baseModel: llama2-70b
       podSelector:
         matchLabels:
-          model.aibrix.ai: llama2-70b
+          model.aibrix.ai/name: llama2-70b
       artifactURL:  huggingface://yard1/llama-2-7b-sql-lora-test
       schedulerName: default
 
diff --git a/docs/source/getting_started/quickstart.rst b/docs/source/getting_started/quickstart.rst
index 4694c4ce..fb509556 100644
--- a/docs/source/getting_started/quickstart.rst
+++ b/docs/source/getting_started/quickstart.rst
@@ -27,15 +27,16 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
     kind: Deployment
     metadata:
       labels:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
         model.aibrix.ai/port: "8000"
+        adapter.model.aibrix.ai/enabled: true
       name: llama-2-7b-hf
       namespace: aibrix-system
     spec:
       replicas: 1
       selector:
         matchLabels:
-          model.aibrix.ai: llama-2-7b-hf
+          model.aibrix.ai/name: llama-2-7b-hf
       strategy:
         rollingUpdate:
           maxSurge: 25%
@@ -44,7 +45,7 @@ Save yaml as `deployment.yaml` and run `kubectl apply -f deployment.yaml`.
       template:
         metadata:
           labels:
-            model.aibrix.ai: llama-2-7b-hf
+            model.aibrix.ai/name: llama-2-7b-hf
         spec:
           containers:
             - command:
@@ -112,7 +113,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
     kind: Service
     metadata:
       labels:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
         prometheus-discovery: "true"
       annotations:
         prometheus.io/scrape: "true"
@@ -130,7 +131,7 @@ Save yaml as `service.yaml` and run `kubectl apply -f service.yaml`.
           protocol: TCP
           targetPort: 8080
       selector:
-        model.aibrix.ai: llama-2-7b-hf
+        model.aibrix.ai/name: llama-2-7b-hf
       type: ClusterIP
 
 Register a user to authenticate the gateway
diff --git a/docs/tutorial/lora/model_adapter.yaml b/docs/tutorial/lora/model_adapter.yaml
index 3dba328e..33230e32 100644
--- a/docs/tutorial/lora/model_adapter.yaml
+++ b/docs/tutorial/lora/model_adapter.yaml
@@ -4,13 +4,13 @@ metadata:
   name: lora-1
   namespace: aibrix-system
   labels:
-    model.aibrix.ai: "lora-1"
+    model.aibrix.ai/name: "lora-1"
     model.aibrix.ai/port: "8000"
 spec:
   baseModel: llama2-70b
   podSelector:
     matchLabels:
-      model.aibrix.ai: llama2-70b
+      model.aibrix.ai/name: llama2-70b
   artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test
   schedulerName: default
 # ---
diff --git a/docs/tutorial/runtime/runtime-hf-download.yaml b/docs/tutorial/runtime/runtime-hf-download.yaml
index d27a0212..5193cf6e 100644
--- a/docs/tutorial/runtime/runtime-hf-download.yaml
+++ b/docs/tutorial/runtime/runtime-hf-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:
diff --git a/docs/tutorial/runtime/runtime-s3-download.yaml b/docs/tutorial/runtime/runtime-s3-download.yaml
index 2fa984a0..b3c3d309 100644
--- a/docs/tutorial/runtime/runtime-s3-download.yaml
+++ b/docs/tutorial/runtime/runtime-s3-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:
diff --git a/docs/tutorial/runtime/runtime-tos-download.yaml b/docs/tutorial/runtime/runtime-tos-download.yaml
index f884f9e5..5a9abc90 100644
--- a/docs/tutorial/runtime/runtime-tos-download.yaml
+++ b/docs/tutorial/runtime/runtime-tos-download.yaml
@@ -4,6 +4,7 @@ metadata:
   labels:
     models.aibricks.ai: deepseek-coder-6.7b-instruct
     models.aibricks.com/model-name: deepseek-coder-6.7b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
   name: aibricks-model-deepseek-coder-6.7b-instruct
   namespace: default
 spec:
diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go
index b95d777f..7aa6e755 100644
--- a/pkg/cache/cache.go
+++ b/pkg/cache/cache.go
@@ -53,7 +53,7 @@ var (
 )
 
 const (
-	modelIdentifier = "model.aibrix.ai"
+	modelIdentifier = "model.aibrix.ai/name"
 )
 
 func GetCache() (*Cache, error) {
diff --git a/pkg/controller/modeladapter/README.md b/pkg/controller/modeladapter/README.md
index 609d4aaf..3cee8f5b 100644
--- a/pkg/controller/modeladapter/README.md
+++ b/pkg/controller/modeladapter/README.md
@@ -6,16 +6,17 @@ metadata:
   name: deepseek-33b-instruct
   namespace: default
   labels:
-    model.aibrix.ai: deepseek-33b-instruct
+    model.aibrix.ai/name: deepseek-33b-instruct
+    adapter.model.aibrix.ai/enabled: "true"
 spec:
   replicas: 1
   selector:
     matchLabels:
-      model.aibrix.ai: deepseek-33b-instruct
+      model.aibrix.ai/name: deepseek-33b-instruct
   template:
     metadata:
       labels:
-        model.aibrix.ai: deepseek-33b-instruct
+        model.aibrix.ai/name: deepseek-33b-instruct
     spec:
       containers:
       - name: deepseek-33b-instruct
@@ -59,7 +60,7 @@ spec:
   baseModel: llama2-70b
   podSelector:
     matchLabels:
-      model.aibrix.ai: llama2-70b
+      model.aibrix.ai/name: llama2-70b
   schedulerName: default-model-adapter-scheduler
 status:
   phase: Configuring
@@ -71,8 +72,8 @@ kind: Service
 metadata:
   creationTimestamp: "2024-07-14T21:42:57Z"
   labels:
-    model.aibrix.ai/base-model: llama2-70b
-    model.aibrix.ai/model-adapter: text2sql-lora-1
+    model.aibrix.ai/name: llama2-70b
+    adapter.model.aibrix.ai/name: text2sql-lora-1
   name: text2sql-lora-1
   namespace: default
   ownerReferences:
@@ -99,7 +100,7 @@ spec:
     targetPort: 8000
   publishNotReadyAddresses: true
   selector:
-    model.aibrix.ai: llama2-70b
+    model.aibrix.ai/name: llama2-70b
   sessionAffinity: None
   type: ClusterIP
 status:
@@ -152,8 +153,8 @@ metadata:
     endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z"
   creationTimestamp: "2024-07-14T21:42:57Z"
   labels:
-    model.aibrix.ai/base-model: llama2-70b
-    model.aibrix.ai/model-adapter: text2sql-lora-1
+    model.aibrix.ai/name: llama2-70b
+    adapter.model.aibrix.ai/name: text2sql-lora-1
     service.kubernetes.io/headless: ""
   name: text2sql-lora-1
   namespace: default
diff --git a/pkg/controller/modeladapter/modeladapter_controller.go b/pkg/controller/modeladapter/modeladapter_controller.go
index cac7d3b5..e7422454 100644
--- a/pkg/controller/modeladapter/modeladapter_controller.go
+++ b/pkg/controller/modeladapter/modeladapter_controller.go
@@ -29,6 +29,7 @@ import (
 	modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
 	"github.com/aibrix/aibrix/pkg/cache"
 	"github.com/aibrix/aibrix/pkg/controller/modeladapter/scheduling"
+	"github.com/aibrix/aibrix/pkg/utils"
 	corev1 "k8s.io/api/core/v1"
 	discoveryv1 "k8s.io/api/discovery/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -43,23 +44,29 @@ import (
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
 	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/event"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 )
 
 const (
 	//ControllerUIDLabelKey = "model-adapter-controller-uid"
-	ModelAdapterFinalizer = "modeladapter.aibrix.ai/finalizer"
+	ModelIdentifierKey                = "model.aibrix.ai/name"
+	ModelAdapterFinalizer             = "adapter.model.aibrix.ai/finalizer"
+	ModelAdapterPodTemplateLabelKey   = "adapter.model.aibrix.ai/enabled"
+	ModelAdapterPodTemplateLabelValue = "true"
 )
 
 var (
 	controllerKind                     = modelv1alpha1.GroupVersion.WithKind("ModelAdapter")
 	controllerName                     = "model-adapter-controller"
 	defaultModelAdapterSchedulerPolicy = "leastAdapters"
-	defaultRequeueDuration             = 1 * time.Second
+	defaultRequeueDuration             = 3 * time.Second
 )
 
 // Add creates a new ModelAdapter Controller and adds it to the Manager with default RBAC.
@@ -120,15 +127,63 @@ func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) {
 	return reconciler, nil
 }
 
+func podWithLabelFilter(labelKey, labelValue, modelIdKey string) predicate.Predicate {
+	hasLabelAndModelIdentifier := func(labels map[string]string, labelKey, labelValue, modelIdentifierKey string) bool {
+		if _, exists := labels[modelIdentifierKey]; !exists {
+			return false
+		}
+		return labels[labelKey] == labelValue
+	}
+
+	return predicate.Funcs{
+		CreateFunc: func(e event.CreateEvent) bool {
+			return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey)
+		},
+		UpdateFunc: func(e event.UpdateEvent) bool {
+			return hasLabelAndModelIdentifier(e.ObjectNew.GetLabels(), labelKey, labelValue, modelIdKey)
+		},
+		DeleteFunc: func(e event.DeleteEvent) bool {
+			return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey)
+		},
+		GenericFunc: func(e event.GenericEvent) bool {
+			return hasLabelAndModelIdentifier(e.Object.GetLabels(), labelKey, labelValue, modelIdKey)
+		},
+	}
+}
+
+func lookupLinkedModelAdapterInNamespace(c client.Client) handler.MapFunc {
+	return func(ctx context.Context, a client.Object) []reconcile.Request {
+		modelAdapterList := &modelv1alpha1.ModelAdapterList{}
+		if err := c.List(ctx, modelAdapterList, client.InNamespace(a.GetNamespace())); err != nil {
+			klog.ErrorS(err, "unable to list model adapters in namespace", "namespace", a.GetNamespace())
+			return []reconcile.Request{}
+		}
+
+		requests := make([]reconcile.Request, 0, len(modelAdapterList.Items))
+		for _, modelAdapter := range modelAdapterList.Items {
+			// Originally, we think it's better to check model adapter.Status.Instances, if it includes the pod name, then we put the model adapter into the queue
+			// However, there's other cases like pending model adapter needs to wait for new pods to be scheduled immediately, so we should reconcile all adapters if there're new pods added
+			requests = append(requests, reconcile.Request{NamespacedName: types.NamespacedName{Namespace: a.GetNamespace(), Name: modelAdapter.GetName()}})
+		}
+
+		return requests
+	}
+}
+
 // add adds a new Controller to mgr with r as the reconcile.Reconciler
 func add(mgr manager.Manager, r reconcile.Reconciler) error {
 	// use the builder fashion. If we need more fine grain control later, we can switch to `controller.New()`
 	err := ctrl.NewControllerManagedBy(mgr).
 		Named(controllerName).
-		For(&modelv1alpha1.ModelAdapter{}).
+		For(&modelv1alpha1.ModelAdapter{}, builder.WithPredicates(predicate.Or(
+			predicate.GenerationChangedPredicate{},
+			predicate.LabelChangedPredicate{},
+			predicate.AnnotationChangedPredicate{},
+		))).
 		Owns(&corev1.Service{}).
 		Owns(&discoveryv1.EndpointSlice{}).
-		Watches(&corev1.Pod{}, &handler.EnqueueRequestForObject{}).
+		Watches(&corev1.Pod{}, handler.EnqueueRequestsFromMapFunc(lookupLinkedModelAdapterInNamespace(mgr.GetClient())),
+			builder.WithPredicates(podWithLabelFilter(ModelAdapterPodTemplateLabelKey, ModelAdapterPodTemplateLabelValue, ModelIdentifierKey))).
 		Complete(r)
 
 	klog.V(4).InfoS("Finished to add model-adapter-controller")
@@ -296,6 +351,14 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque
 				return ctrl.Result{}, r.clearModelAdapterInstanceList(ctx, instance)
 			}
 
+			if !utils.IsPodReady(selectedPod) || utils.IsPodTerminating(selectedPod) {
+				klog.Warning(fmt.Sprintf("current assigned pod %s/%s is not ready, let's clean it up and reschedule the adapter", selectedPod.Namespace, selectedPod.Name))
+				// continue to requeue the object to remove endpoint etc in current loop.
+				if err = r.clearModelAdapterInstanceList(ctx, instance); err != nil {
+					return ctrl.Result{}, err
+				}
+			}
+
 			existPods = true
 		}
 	}
@@ -306,25 +369,27 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque
 		selectedPod, err = r.schedulePod(ctx, instance)
 		if err != nil {
 			klog.ErrorS(err, "Failed to schedule Pod for ModelAdapter", "modelAdapter", instance.Name)
-			return ctrl.Result{RequeueAfter: defaultRequeueDuration}, err
+			return ctrl.Result{}, err
 		}
+		if selectedPod != nil {
+			instance.Status.Phase = modelv1alpha1.ModelAdapterScheduling
+			instance.Status.Instances = []string{selectedPod.Name}
+			meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{
+				Type:               string(modelv1alpha1.ModelAdapterConditionTypeSelectorMatched),
+				Status:             metav1.ConditionTrue,
+				Reason:             "Reconciling",
+				Message:            fmt.Sprintf("ModelAdapter %s has been allocated to pod %s", klog.KObj(instance), selectedPod.Name),
+				LastTransitionTime: metav1.Now(),
+			})
 
-		instance.Status.Phase = modelv1alpha1.ModelAdapterScheduling
-		instance.Status.Instances = []string{selectedPod.Name}
-		meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{
-			Type:               string(modelv1alpha1.ModelAdapterConditionTypeSelectorMatched),
-			Status:             metav1.ConditionTrue,
-			Reason:             "Reconciling",
-			Message:            fmt.Sprintf("ModelAdapter %s has been allocated to pod %s", klog.KObj(instance), selectedPod.Name),
-			LastTransitionTime: metav1.Now(),
-		})
+			if err := r.Status().Update(ctx, instance); err != nil {
+				klog.InfoS("Got error when updating status", "cluster name", req.Name, "error", err, "ModelAdapter", instance)
+				return ctrl.Result{}, err
+			}
 
-		if err := r.Status().Update(ctx, instance); err != nil {
-			klog.InfoS("Got error when updating status", "cluster name", req.Name, "error", err, "ModelAdapter", instance)
-			return ctrl.Result{RequeueAfter: defaultRequeueDuration}, err
+			return ctrl.Result{Requeue: true}, nil
 		}
-
-		return ctrl.Result{Requeue: true}, nil
+		// selectedPod is nil means there's no valid pods, it should wait for new pods coming pod or any pod related changes like label change.
 	}
 
 	// Step 2: Reconcile Loading
@@ -358,7 +423,6 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque
 	// Check if need to update the status.
 	if r.inconsistentModelAdapterStatus(oldInstance.Status, instance.Status) {
 		klog.InfoS("model adapter reconcile", "Update CR status", req.Name, "status", instance.Status)
-		instance.Status.Phase = modelv1alpha1.ModelAdapterRunning
 		if err = r.updateStatus(ctx, instance); err != nil {
 			return reconcile.Result{}, fmt.Errorf("update modelAdapter status error: %v", err)
 		}
@@ -386,10 +450,13 @@ func (r *ModelAdapterReconciler) clearModelAdapterInstanceList(ctx context.Conte
 		Type:               string(modelv1alpha1.ModelAdapterConditionCleanup),
 		Status:             metav1.ConditionTrue,
 		Reason:             "Reconciling",
-		Message:            fmt.Sprintf("Pod (%s) can not be fetched for model adapter (%s), clean up the list", stalePodName, instance.Name),
+		Message:            fmt.Sprintf("Pod (%s) can not be fetched or invalid for model adapter (%s), clean up the list", stalePodName, instance.Name),
 		LastTransitionTime: metav1.Now(),
 	})
 
+	// remove instance means the lora has not targets at this moment.
+	instance.Status.Phase = modelv1alpha1.ModelAdapterPending
+
 	if err := r.Status().Update(ctx, instance); err != nil {
 		klog.Error(err, "Failed to update modelAdapter status")
 		return err
@@ -410,21 +477,40 @@ func (r *ModelAdapterReconciler) schedulePod(ctx context.Context, instance *mode
 		return nil, err
 	}
 
-	if len(podList.Items) == 0 {
-		return nil, fmt.Errorf("no pods found matching selector")
+	// filter active pod
+	var activePods []corev1.Pod
+
+	for _, pod := range podList.Items {
+		if !utils.IsPodTerminating(&pod) && utils.IsPodReady(&pod) {
+			activePods = append(activePods, pod)
+		}
 	}
 
-	return r.scheduler.SelectPod(ctx, podList.Items)
+	if len(activePods) == 0 {
+		klog.Warning("no pods found matching selector")
+		return nil, nil
+	}
+
+	return r.scheduler.SelectPod(ctx, activePods)
 }
 
 func (r *ModelAdapterReconciler) reconcileLoading(ctx context.Context, instance *modelv1alpha1.ModelAdapter, pod *corev1.Pod) error {
+	if pod == nil {
+		return nil
+	}
+
+	// selectPod could be in termination, in this case, we just do nothing.
+	if pod.DeletionTimestamp != nil {
+		return nil
+	}
+
 	// Define the key you want to check
 	key := "DEBUG_MODE"
 	value, exists := getEnvKey(key)
 	host := fmt.Sprintf("http://%s:8000", pod.Status.PodIP)
 	if exists && value == "on" {
 		// 30080 is the nodePort of the base model service.
-		host = fmt.Sprintf("http://%s:30080", "localhost")
+		host = fmt.Sprintf("http://%s:30081", "localhost")
 	}
 
 	// Check if the model is already loaded
@@ -661,6 +747,7 @@ func (r *ModelAdapterReconciler) reconcileService(ctx context.Context, instance
 		klog.ErrorS(err, "Failed to get Service")
 		return ctrl.Result{}, err
 	}
+	// TODO: add `else` logic let's compare the service major fields and update to the target state.
 
 	// TODO: Now, we are using the name comparison which is not enough,
 	// compare the object difference in future.
@@ -708,6 +795,25 @@ func (r *ModelAdapterReconciler) reconcileEndpointSlice(ctx context.Context, ins
 		klog.ErrorS(err, "Failed to get EndpointSlice")
 		return ctrl.Result{}, err
 	} else {
+		// Check if pod is nil, and if so, clear the endpoints and set the phase to Pending
+		if pod == nil {
+			klog.InfoS("Pod is nil, clearing all endpoints and setting status to Pending")
+			found.Endpoints = []discoveryv1.Endpoint{}
+
+			if err := r.Update(ctx, found); err != nil {
+				klog.ErrorS(err, "Failed to update EndpointSlice after clearing endpoints", "EndpointSlice", found.Name)
+				return ctrl.Result{}, err
+			}
+
+			instance.Status.Phase = modelv1alpha1.ModelAdapterPending
+			if err := r.Status().Update(ctx, instance); err != nil {
+				klog.Error(err, "Failed to update modelAdapter status to Pending")
+				return ctrl.Result{}, err
+			}
+
+			return ctrl.Result{}, nil
+		}
+
 		// Existing EndpointSlice Found. Check if the Pod IP is already in the EndpointSlice
 		podIP := pod.Status.PodIP
 		alreadyExists := false
@@ -733,9 +839,44 @@ func (r *ModelAdapterReconciler) reconcileEndpointSlice(ctx context.Context, ins
 				klog.ErrorS(err, "Failed to update EndpointSlice", "EndpointSlice", found.Name)
 				return ctrl.Result{}, err
 			}
+			instance.Status.Phase = modelv1alpha1.ModelAdapterRunning
 			klog.InfoS("Successfully updated EndpointSlice", "EndpointSlice", found.Name)
 		} else {
-			klog.InfoS("Pod IP already exists in EndpointSlice", "PodIP", podIP)
+			// pod has been deleted, and we should remove the pod name from the list
+			if pod.DeletionTimestamp != nil {
+				var updatedEndpoints []discoveryv1.Endpoint
+				podIP := pod.Status.PodIP
+
+				for _, endpoint := range found.Endpoints {
+					shouldRemove := false
+					var newAddresses []string
+
+					for _, address := range endpoint.Addresses {
+						if address == podIP {
+							shouldRemove = true
+						} else {
+							newAddresses = append(newAddresses, address)
+						}
+					}
+
+					if !shouldRemove || len(newAddresses) > 0 {
+						endpoint.Addresses = newAddresses
+						updatedEndpoints = append(updatedEndpoints, endpoint)
+					}
+				}
+
+				found.Endpoints = updatedEndpoints
+				if err := r.Update(ctx, found); err != nil {
+					klog.ErrorS(err, "Failed to update EndpointSlice after removing PodIP", "EndpointSlice", found.Name)
+					return ctrl.Result{}, err
+				}
+
+				instance.Status.Phase = modelv1alpha1.ModelAdapterPending
+				klog.InfoS("Successfully removed Pod IP from EndpointSlice", "PodIP", podIP, "EndpointSlice", found.Name)
+			} else {
+				klog.InfoS("Pod IP already exists in EndpointSlice", "PodName", pod.Name, "PodIP", podIP)
+				instance.Status.Phase = modelv1alpha1.ModelAdapterRunning
+			}
 		}
 	}
 
diff --git a/pkg/controller/modeladapter/resources.go b/pkg/controller/modeladapter/resources.go
index ec238558..23c3186c 100644
--- a/pkg/controller/modeladapter/resources.go
+++ b/pkg/controller/modeladapter/resources.go
@@ -61,8 +61,8 @@ func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *c
 
 func buildModelAdapterService(instance *modelv1alpha1.ModelAdapter) (*corev1.Service, error) {
 	labels := map[string]string{
-		"model.aibrix.ai/base-model":    instance.Spec.BaseModel,
-		"model.aibrix.ai/model-adapter": instance.Name,
+		"model.aibrix.ai/name":         instance.Spec.BaseModel,
+		"adapter.model.aibrix.ai/name": instance.Name,
 	}
 
 	ports := []corev1.ServicePort{
diff --git a/pkg/controller/modeladapter/resources_test.go b/pkg/controller/modeladapter/resources_test.go
index 81cc47da..31a5e921 100644
--- a/pkg/controller/modeladapter/resources_test.go
+++ b/pkg/controller/modeladapter/resources_test.go
@@ -92,8 +92,8 @@ func TestBuildModelAdapterService(t *testing.T) {
 	assert.Equal(t, "test-instance", service.Name)
 	assert.Equal(t, "default", service.Namespace)
 	assert.Equal(t, map[string]string{
-		"model.aibrix.ai/base-model":    "test-model",
-		"model.aibrix.ai/model-adapter": "test-instance",
+		"model.aibrix.ai/name":         "test-model",
+		"adapter.model.aibrix.ai/name": "test-instance",
 	}, service.Labels)
 
 	// Check ports
diff --git a/pkg/controller/modeladapter/utils.go b/pkg/controller/modeladapter/utils.go
index 04b3b168..350f317d 100644
--- a/pkg/controller/modeladapter/utils.go
+++ b/pkg/controller/modeladapter/utils.go
@@ -127,3 +127,12 @@ func extractHuggingFacePath(artifactURL string) (string, error) {
 
 	return path, nil
 }
+
+func stringInSlice(slice []string, str string) bool {
+	for _, v := range slice {
+		if v == str {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go
index a53dd6e0..b4b252b7 100644
--- a/pkg/controller/modelrouter/modelrouter_controller.go
+++ b/pkg/controller/modelrouter/modelrouter_controller.go
@@ -38,7 +38,7 @@ import (
 const (
 	// TODO (varun): cleanup model related identifiers and establish common consensus
 	modelHeaderIdentifier = "model"
-	modelIdentifier       = "model.aibrix.ai"
+	modelIdentifier       = "model.aibrix.ai/name"
 	modelPortIdentifier   = "model.aibrix.ai/port"
 	// TODO (varun): parameterize it or dynamically resolve it
 	aibrixEnvoyGateway = "aibrix-eg"
diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go
index 3da920a1..1ad18faa 100644
--- a/pkg/utils/pod.go
+++ b/pkg/utils/pod.go
@@ -30,6 +30,11 @@ const (
 	NAMESPACE = "aibrix-system"
 )
 
+// IsPodTerminating check if pod is in terminating status via whether the deletion timestamp is set
+func IsPodTerminating(pod *v1.Pod) bool {
+	return pod.ObjectMeta.DeletionTimestamp != nil
+}
+
 // IsPodReady returns true if a pod is ready; false otherwise.
 func IsPodReady(pod *v1.Pod) bool {
 	return IsPodReadyConditionTrue(pod.Status)