From c4263437f6cc70304033eae54f99ad7fa836fb26 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 30 Sep 2024 13:30:00 -0700 Subject: [PATCH] Cache bug fix in update pod and model mapping (#259) --- Makefile | 2 +- pkg/cache/cache.go | 23 +++++++++++++++---- .../modelrouter/modelrouter_controller.go | 2 ++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 0fd842c0..1bb405e5 100644 --- a/Makefile +++ b/Makefile @@ -207,7 +207,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified .PHONY: deploy deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} - $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + $(KUSTOMIZE) build config/default | $(KUBECTL) create -f - .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 7aa6e755..60a23412 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -172,13 +172,20 @@ func (c *Cache) deletePod(obj interface{}) { defer c.mu.Unlock() pod := obj.(*v1.Pod) - modelName, ok := pod.Labels[modelIdentifier] + _, ok := pod.Labels[modelIdentifier] if !ok { return } + // delete base model and associated lora models on this pod + if models, ok := c.podToModelMapping[pod.Name]; ok { + for modelName := range models { + c.deletePodAndModelMapping(pod.Name, modelName) + } + } + delete(c.podToModelMapping, pod.Name) delete(c.pods, pod.Name) - c.deletePodAndModelMapping(pod.Name, modelName) + klog.V(4).Infof("POD DELETED: %s/%s", pod.Namespace, pod.Name) c.debugInfo() } @@ -223,6 +230,7 @@ func (c *Cache) deleteModelAdapter(obj interface{}) { for _, pod := range model.Status.Instances { c.deletePodAndModelMapping(pod, model.Name) } + delete(c.modelToPodMapping, model.Name) klog.V(4).Infof("MODELADAPTER DELETED: %s/%s", model.Namespace, model.Name) c.debugInfo() @@ -257,8 +265,15 @@ func (c *Cache) addPodAndModelMapping(podName, modelName string) { } func (c *Cache) deletePodAndModelMapping(podName, modelName string) { - delete(c.podToModelMapping, podName) - delete(c.modelToPodMapping, modelName) + if models, ok := c.podToModelMapping[podName]; ok { + delete(models, modelName) + c.podToModelMapping[podName] = models + } + + if pods, ok := c.modelToPodMapping[modelName]; ok { + delete(pods, podName) + c.modelToPodMapping[modelName] = pods + } } func (c *Cache) debugInfo() { diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go index b4b252b7..38b44043 100644 --- a/pkg/controller/modelrouter/modelrouter_controller.go +++ b/pkg/controller/modelrouter/modelrouter_controller.go @@ -162,6 +162,7 @@ func (m *ModelRouter) createHTTPRoute(namespace string, labels map[string]string } if err := m.Client.Create(context.Background(), &httpRoute); err != nil { klog.Errorln(err) + return } klog.Infof("httproute: %v created for model: %v", httpRoute.Name, modelName) } @@ -181,6 +182,7 @@ func (m *ModelRouter) deleteHTTPRoute(namespace string, labels map[string]string if err := m.Client.Delete(context.Background(), &httpRoute); err != nil { klog.Errorln(err) + return } klog.Infof("httproute: %v deleted for model: %v", httpRoute.Name, modelName) }