diff --git a/internal/controller/components/kserve/kserve_controller_actions.go b/internal/controller/components/kserve/kserve_controller_actions.go index 02ab041cade6..3e3666a3b25b 100644 --- a/internal/controller/components/kserve/kserve_controller_actions.go +++ b/internal/controller/components/kserve/kserve_controller_actions.go @@ -11,7 +11,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -291,17 +290,9 @@ func cleanUpTemplatedResources(ctx context.Context, rr *odhtypes.ReconciliationR // CR generation hasn't changed. for _, res := range rr.Resources { if isForDependency("serverless")(&res) || isForDependency("servicemesh")(&res) { - err := rr.Client.Delete(ctx, &res, client.PropagationPolicy(metav1.DeletePropagationForeground)) - if err != nil { - if k8serr.IsNotFound(err) { - continue - } - if errors.Is(err, &meta.NoKindMatchError{}) { // when CRD is missing, - continue - } - return odherrors.NewStopErrorW(err) + if err := deleteResourceIfOwnedByKserve(ctx, rr.Client, res, logger); err != nil { + return err } - logger.Info("Deleted", "kind", res.GetKind(), "name", res.GetName(), "namespace", res.GetNamespace()) } } } @@ -311,17 +302,9 @@ func cleanUpTemplatedResources(ctx context.Context, rr *odhtypes.ReconciliationR if !authorinoInstalled { for _, res := range rr.Resources { if isForDependency("servicemesh")(&res) { - err := rr.Client.Delete(ctx, &res, client.PropagationPolicy(metav1.DeletePropagationForeground)) - if err != nil { - if k8serr.IsNotFound(err) { - continue - } - if errors.Is(err, &meta.NoKindMatchError{}) { // when CRD is missing, - continue - } - return odherrors.NewStopErrorW(err) + if err := deleteResourceIfOwnedByKserve(ctx, rr.Client, res, logger); err != nil { + return err } - logger.Info("Deleted", "kind", res.GetKind(), "name", res.GetName(), "namespace", res.GetNamespace()) } } if err := rr.RemoveResources(isForDependency("servicemesh")); err != nil { diff --git a/internal/controller/components/kserve/kserve_controller_actions_test.go b/internal/controller/components/kserve/kserve_controller_actions_test.go index a60eadba641c..b7338e1a4e23 100644 --- a/internal/controller/components/kserve/kserve_controller_actions_test.go +++ b/internal/controller/components/kserve/kserve_controller_actions_test.go @@ -9,6 +9,8 @@ import ( ofapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/opendatahub-io/opendatahub-operator/v2/api/common" componentApi "github.com/opendatahub-io/opendatahub-operator/v2/api/components/v1alpha1" @@ -479,3 +481,144 @@ func TestCleanUpTemplatedResources_withoutAuthorino(t *testing.T) { ), ) } + +func TestCleanUpTemplatedResources_DoesNotDeleteExternalResources(t *testing.T) { + // This test verifies that cleanUpTemplatedResources does NOT delete resources + // that lack Kserve OwnerReferences, preventing accidental deletion of user-created + // or externally-managed resources. + // + // Tests multiple code paths in cleanUpTemplatedResources: + // 1. First deletion loop (ServiceMesh.ManagementState == Removed) + // 2. Second deletion loop (!authorinoInstalled) + + testCases := []struct { + name string + authorinoInstalled bool + servingManagementState operatorv1.ManagementState + serviceMeshManagement operatorv1.ManagementState + externalResourceGVK schema.GroupVersionKind + externalResourceName string + externalResourceNs string + description string + }{ + { + name: "ServiceMesh Removed, Serving Unmanaged, with Authorino", + authorinoInstalled: true, + servingManagementState: operatorv1.Unmanaged, + serviceMeshManagement: operatorv1.Removed, + externalResourceGVK: gvk.KnativeServing, + externalResourceName: "knative-serving", + externalResourceNs: "knative-serving", + description: "User wants to use existing KnativeServing but not manage it", + }, + { + name: "ServiceMesh Removed, Serving Removed, with Authorino", + authorinoInstalled: true, + servingManagementState: operatorv1.Removed, + serviceMeshManagement: operatorv1.Removed, + externalResourceGVK: gvk.KnativeServing, + externalResourceName: "knative-serving", + externalResourceNs: "knative-serving", + description: "User wants RawDeployment mode without Serverless", + }, + { + name: "ServiceMesh Managed, No Authorino", + authorinoInstalled: false, + servingManagementState: operatorv1.Managed, + serviceMeshManagement: operatorv1.Managed, + externalResourceGVK: gvk.EnvoyFilter, + externalResourceName: "activator-host-header", + externalResourceNs: "istio-system", + description: "Tests second deletion loop when authorino is not installed", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + g := NewWithT(t) + + // Create an external resource WITHOUT Kserve OwnerReference. + // This simulates a resource created by the user or another controller. + externalResource := resources.GvkToUnstructured(tc.externalResourceGVK) + externalResource.SetName(tc.externalResourceName) + externalResource.SetNamespace(tc.externalResourceNs) + // Intentionally not setting Kserve OwnerReference + externalResource.SetLabels(map[string]string{ + "external-label": "true", + }) + + // Build initial fake client objects + initialObjects := []client.Object{ + &ofapiv2.OperatorCondition{ObjectMeta: metav1.ObjectMeta{ + Name: serviceMeshOperator, + }}, + &ofapiv2.OperatorCondition{ObjectMeta: metav1.ObjectMeta{ + Name: serverlessOperator, + }}, + externalResource, + } + + // Conditionally add authorino-operator subscription + if tc.authorinoInstalled { + initialObjects = append(initialObjects, &ofapiv1alpha1.Subscription{ + ObjectMeta: metav1.ObjectMeta{ + Name: "authorino-operator", + Namespace: "openshift-operators", + }, + }) + } + + cli, err := fakeclient.New( + fakeclient.WithObjects(initialObjects...), + ) + g.Expect(err).ShouldNot(HaveOccurred()) + + ks := componentApi.Kserve{} + ks.Spec.Serving.ManagementState = tc.servingManagementState + ks.Spec.Serving.Name = "knative-serving" + + dsci := dsciv1.DSCInitialization{} + dsci.Spec.ApplicationsNamespace = "opendatahub" + dsci.Spec.ServiceMesh = &infrav1.ServiceMeshSpec{ + ManagementState: tc.serviceMeshManagement, + ControlPlane: infrav1.ControlPlaneSpec{ + Namespace: "istio-system", + }, + } + + rr := types.ReconciliationRequest{ + Client: cli, + Instance: &ks, + DSCI: &dsci, + Conditions: conditions.NewManager(&ks, status.ConditionTypeReady), + } + + // Add template files and render them + err = addTemplateFiles(ctx, &rr) + g.Expect(err).ShouldNot(HaveOccurred()) + + err = template.NewAction( + template.WithDataFn(getTemplateData), + )(ctx, &rr) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Verify the external resource exists before cleanup + resourceBefore := resources.GvkToUnstructured(tc.externalResourceGVK) + err = cli.Get(ctx, client.ObjectKey{Name: tc.externalResourceName, Namespace: tc.externalResourceNs}, resourceBefore) + g.Expect(err).ShouldNot(HaveOccurred(), "External resource should exist before cleanup") + + // Execute the cleanup function + err = cleanUpTemplatedResources(ctx, &rr) + g.Expect(err).ShouldNot(HaveOccurred()) + + // FIXED: The external resource should NOT be deleted because it doesn't have + // a Kserve OwnerReference. The fix checks OwnerReferences before deletion in both: + // 1. First deletion loop (ServiceMesh.ManagementState == Removed) + // 2. Second deletion loop (!authorinoInstalled) + resourceAfter := resources.GvkToUnstructured(tc.externalResourceGVK) + err = cli.Get(ctx, client.ObjectKey{Name: tc.externalResourceName, Namespace: tc.externalResourceNs}, resourceAfter) + g.Expect(err).ShouldNot(HaveOccurred(), "External resource should still exist (bug is fixed)") + }) + } +} diff --git a/internal/controller/components/kserve/kserve_support.go b/internal/controller/components/kserve/kserve_support.go index baf6c06df495..b9b1eb469da6 100644 --- a/internal/controller/components/kserve/kserve_support.go +++ b/internal/controller/components/kserve/kserve_support.go @@ -5,13 +5,16 @@ import ( "embed" "encoding/base64" "encoding/json" + "errors" "fmt" "maps" "strings" + "github.com/go-logr/logr" operatorv1 "github.com/openshift/api/operator/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" + k8serr "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -24,6 +27,7 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/api/dscinitialization/v1" infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/api/infrastructure/v1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + odherrors "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/errors" odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" @@ -133,7 +137,7 @@ func getKnativeCertSecretName(k *componentApi.Kserve) string { func getDefaultDeploymentMode(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec) (string, error) { kserveConfigMap := corev1.ConfigMap{} err := cli.Get(ctx, client.ObjectKey{Name: kserveConfigMapName, Namespace: dscispec.ApplicationsNamespace}, &kserveConfigMap) - if errors.IsNotFound(err) { + if k8serr.IsNotFound(err) { return "", nil } if err != nil { @@ -269,7 +273,10 @@ func getAndRemoveOwnerReferences( current := resources.GvkToUnstructured(res.GroupVersionKind()) lookupErr := cli.Get(ctx, client.ObjectKeyFromObject(&res), current) - if errors.IsNotFound(lookupErr) { + if k8serr.IsNotFound(lookupErr) { + return nil + } + if errors.Is(lookupErr, &meta.NoKindMatchError{}) { return nil } if lookupErr != nil { @@ -302,3 +309,47 @@ func isKserveOwnerRef(or metav1.OwnerReference) bool { return or.APIVersion == componentApi.GroupVersion.String() && or.Kind == componentApi.KserveKind } + +// deleteResourceIfOwnedByKserve fetches a cluster resource and deletes it only if it has a Kserve OwnerReference. +// This prevents accidentally deleting resources created by users or other controllers. +func deleteResourceIfOwnedByKserve(ctx context.Context, cli client.Client, res unstructured.Unstructured, logger logr.Logger) error { + // Fetch the cluster resource to check ownership + key := client.ObjectKeyFromObject(&res) + clusterRes := resources.GvkToUnstructured(res.GroupVersionKind()) + + if err := cli.Get(ctx, key, clusterRes); err != nil { + if k8serr.IsNotFound(err) { + return nil // Already deleted or never existed + } + if errors.Is(err, &meta.NoKindMatchError{}) { + return nil // CRD missing + } + return odherrors.NewStopErrorW(err) + } + + // Only delete if resource has Kserve OwnerReference + hasKserveOwner := false + for _, owner := range clusterRes.GetOwnerReferences() { + if isKserveOwnerRef(owner) { + hasKserveOwner = true + break + } + } + + if !hasKserveOwner { + return nil // Skip resources not owned by Kserve controller + } + + err := cli.Delete(ctx, clusterRes, client.PropagationPolicy(metav1.DeletePropagationForeground)) + if err != nil { + if k8serr.IsNotFound(err) { + return nil + } + if errors.Is(err, &meta.NoKindMatchError{}) { + return nil + } + return odherrors.NewStopErrorW(err) + } + logger.Info("Deleted", "kind", clusterRes.GetKind(), "name", clusterRes.GetName(), "namespace", clusterRes.GetNamespace()) + return nil +} diff --git a/internal/controller/components/kserve/kserve_support_test.go b/internal/controller/components/kserve/kserve_support_test.go new file mode 100644 index 000000000000..3b25398c82e5 --- /dev/null +++ b/internal/controller/components/kserve/kserve_support_test.go @@ -0,0 +1,131 @@ +//nolint:testpackage +package kserve + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" + + componentApi "github.com/opendatahub-io/opendatahub-operator/v2/api/components/v1alpha1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/utils/test/fakeclient" + + . "github.com/onsi/gomega" +) + +func TestGetAndRemoveOwnerReferences_Success(t *testing.T) { + ctx := t.Context() + g := NewWithT(t) + + // Create a ConfigMap with owner reference to Kserve + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-configmap", + Namespace: "test-namespace", + Labels: map[string]string{ + labels.PlatformPartOf: "kserve", + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: componentApi.GroupVersion.String(), + Kind: componentApi.KserveKind, + Name: "test-kserve", + UID: "test-uid", + }, + }, + }, + Data: map[string]string{ + "key": "value", + }, + } + + cli, err := fakeclient.New(fakeclient.WithObjects(cm)) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Create an unstructured resource from the ConfigMap + res := unstructured.Unstructured{} + res.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "ConfigMap", + }) + res.SetName("test-configmap") + res.SetNamespace("test-namespace") + + // Call getAndRemoveOwnerReferences + err = getAndRemoveOwnerReferences(ctx, cli, res, isKserveOwnerRef) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Verify the owner reference was removed + updatedCM := &corev1.ConfigMap{} + err = cli.Get(ctx, client.ObjectKey{Name: "test-configmap", Namespace: "test-namespace"}, updatedCM) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(updatedCM.GetOwnerReferences()).Should(BeEmpty()) + g.Expect(updatedCM.GetLabels()).ShouldNot(HaveKey(labels.PlatformPartOf)) +} + +func TestGetAndRemoveOwnerReferences_ResourceNotFound(t *testing.T) { + ctx := t.Context() + g := NewWithT(t) + + cli, err := fakeclient.New() + g.Expect(err).ShouldNot(HaveOccurred()) + + // Create an unstructured resource that doesn't exist + res := unstructured.Unstructured{} + res.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "ConfigMap", + }) + res.SetName("nonexistent-configmap") + res.SetNamespace("test-namespace") + + // Call getAndRemoveOwnerReferences - should not error for missing resources + err = getAndRemoveOwnerReferences(ctx, cli, res, isKserveOwnerRef) + g.Expect(err).ShouldNot(HaveOccurred()) +} + +func TestGetAndRemoveOwnerReferences_CRDNotInstalled(t *testing.T) { + ctx := t.Context() + g := NewWithT(t) + + // Create an interceptor that simulates a missing CRD + interceptorFuncs := interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + // Simulate the error when trying to get a resource whose CRD doesn't exist + return &meta.NoKindMatchError{ + GroupKind: schema.GroupKind{ + Group: "maistra.io", + Kind: "ServiceMeshMember", + }, + SearchedVersions: []string{"v1"}, + } + }, + } + + cli, err := fakeclient.New(fakeclient.WithInterceptorFuncs(interceptorFuncs)) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Create an unstructured resource for a CRD that doesn't exist + // (e.g., ServiceMeshMember when OSSM is not installed) + res := unstructured.Unstructured{} + res.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "maistra.io", + Version: "v1", + Kind: "ServiceMeshMember", + }) + res.SetName("default") + res.SetNamespace("knative-serving") + + // Call getAndRemoveOwnerReferences - should not error for missing CRDs + err = getAndRemoveOwnerReferences(ctx, cli, res, isKserveOwnerRef) + g.Expect(err).ShouldNot(HaveOccurred()) +}