diff --git a/go.mod b/go.mod index 82c3038b18c8..068504e7704a 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/spf13/viper v1.20.1 github.com/stretchr/testify v1.10.0 go.uber.org/zap v1.27.0 + golang.org/x/mod v0.24.0 gomodules.xyz/jsonpatch/v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.32.4 diff --git a/go.sum b/go.sum index 00761ae8376a..413eb249fdf5 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,8 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= @@ -150,6 +152,7 @@ github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -167,10 +170,14 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= +golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= @@ -178,14 +185,19 @@ golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -198,6 +210,7 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/controller/components/kueue/kueue_config.go b/internal/controller/components/kueue/kueue_config.go index 139daff0cc92..3302dbfff800 100644 --- a/internal/controller/components/kueue/kueue_config.go +++ b/internal/controller/components/kueue/kueue_config.go @@ -6,6 +6,7 @@ import ( "slices" operatorv1 "github.com/openshift/api/operator/v1" + "golang.org/x/mod/semver" corev1 "k8s.io/api/core/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -22,18 +23,19 @@ import ( var ( frameworkMapping = map[string]string{ - "pod": "Pod", - "deployment": "Deployment", - "statefulset": "StatefulSet", - "batch/job": "BatchJob", - "ray.io/rayjob": "RayJob", - "ray.io/raycluster": "RayCluster", - "jobset.x-k8s.io/jobset": "JobSet", - "kubeflow.org/mpijob": "MPIJob", - "kubeflow.org/paddlejob": "PaddleJob", - "kubeflow.org/pytorchjob": "PyTorchJob", - "kubeflow.org/tfjob": "TFJob", - "kubeflow.org/xgboostjob": "XGBoostJob", + "pod": "Pod", + "deployment": "Deployment", + "statefulset": "StatefulSet", + "batch/job": "BatchJob", + "ray.io/rayjob": "RayJob", + "ray.io/raycluster": "RayCluster", + "jobset.x-k8s.io/jobset": "JobSet", + "kubeflow.org/mpijob": "MPIJob", + "kubeflow.org/paddlejob": "PaddleJob", + "kubeflow.org/pytorchjob": "PyTorchJob", + "kubeflow.org/tfjob": "TFJob", + "kubeflow.org/xgboostjob": "XGBoostJob", + "trainer.kubeflow.org/trainjob": "TrainJob", "leaderworkerset.x-k8s.io/leaderworkerset": "LeaderWorkerSet", } ) @@ -79,11 +81,19 @@ func createKueueCR(ctx context.Context, rr *odhtypes.ReconciliationRequest) (*un return nil, fmt.Errorf("failed to lookup kueue manager config: %w", err) } + kueueInfo, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator) + if err != nil { + return nil, fmt.Errorf("failed to check if %s exists: %w", kueueOperator, err) + } + + if kueueInfo == nil { + return nil, ErrKueueOperatorNotInstalled + } // // Conversions // - integrations, err := convertIntegrations(managerConfig) + integrations, err := convertIntegrations(managerConfig, kueueInfo.Version) if err != nil { return nil, fmt.Errorf("failed to convert integrations: %w", err) } @@ -148,7 +158,7 @@ func createKueueCR(ctx context.Context, rr *odhtypes.ReconciliationRequest) (*un } // convertIntegrations converts the integrations section from ConfigMap to Kueue operator format. -func convertIntegrations(config map[string]interface{}) (map[string]interface{}, error) { +func convertIntegrations(config map[string]interface{}, kueueVersion string) (map[string]interface{}, error) { integrations := map[string]interface{}{} // @@ -169,6 +179,13 @@ func convertIntegrations(config map[string]interface{}) (map[string]interface{}, "StatefulSet", ) + // Support for TrainJob was added in Kueue 1.2.0, so if the installed version + // of kueue is equal or greater than 1.2.0, add TrainJob to the list of frameworks. + versionCmp := semver.Compare(kueueVersion, "v1.2.0") + if versionCmp >= 0 { + frameworkSet.Insert("TrainJob") + } + for _, framework := range frameworks { if converted, ok := frameworkMapping[framework]; ok { frameworkSet.Insert(converted) diff --git a/internal/controller/components/kueue/kueue_config_test.go b/internal/controller/components/kueue/kueue_config_test.go index f8acd5a54e8c..c7b738c17241 100644 --- a/internal/controller/components/kueue/kueue_config_test.go +++ b/internal/controller/components/kueue/kueue_config_test.go @@ -2,11 +2,14 @@ package kueue import ( + "fmt" "testing" + ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" "gopkg.in/yaml.v3" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/controller-runtime/pkg/client/fake" componentApi "github.com/opendatahub-io/opendatahub-operator/v2/api/components/v1alpha1" @@ -63,6 +66,7 @@ integrations: - "kubeflow.org/pytorchjob" - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" + - "trainer.kubeflow.org/trainjob" - "workload.codeflare.dev/appwrapper" - "leaderworkerset.x-k8s.io/leaderworkerset" manageJobsWithoutQueueName: true @@ -93,6 +97,7 @@ spec: - RayJob - StatefulSet - TFJob + - TrainJob - XGBoostJob workloadManagement: labelPolicy: None @@ -137,6 +142,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -163,6 +169,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -189,6 +196,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -219,6 +227,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -257,6 +266,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob externalFrameworks: - MPIJob - RayJob @@ -287,6 +297,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -316,6 +327,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob workloadManagement: labelPolicy: None ` @@ -348,6 +360,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob gangScheduling: policy: ByWorkload byWorkload: @@ -382,6 +395,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob preemption: preemptionPolicy: FairSharing fairSharing: @@ -418,6 +432,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob gangScheduling: policy: ByWorkload byWorkload: @@ -457,6 +472,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob ` runKueueCRTest(t, kueueConfig, kueueCR) } @@ -489,6 +505,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob externalFrameworks: - MPIJob - RayJob @@ -524,6 +541,7 @@ spec: - RayCluster - RayJob - StatefulSet + - TrainJob labelKeys: - custom.label/key1 - custom.label/key2 @@ -551,6 +569,15 @@ func runKueueCRTest(t *testing.T, configMapYAML string, expectedCRYAML string) { } g.Expect(fakeClient.Create(ctx, dsci)).Should(Succeed()) + // Set an OperatorCondition for kueue-operator with the 1.2.0 version + operatorCondition := &ofapiv2.OperatorCondition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kueue-operator.v1.2.0", + Namespace: "openshift-kueue-operator", + }, + } + g.Expect(fakeClient.Create(ctx, operatorCondition)).Should(Succeed()) + rr := &odhtypes.ReconciliationRequest{ Client: fakeClient, Instance: &componentApi.Kueue{}, @@ -633,3 +660,66 @@ invalid: yaml: content: [ g.Expect(result).Should(BeNil()) g.Expect(err.Error()).Should(ContainSubstring("failed to lookup kueue manager config")) } + +// --- Test: TrainJob framework generic test, with RHBoKv110 and RHBoKv120 ---. +func TestCreateKueueConfigurationCR_TrainJobFramework(t *testing.T) { + tests := []struct { + name string + kueueVersion string + }{ + { + name: "TestCreateKueueConfigurationCR_TrainJob_Framework_WithRHBoKv110", + kueueVersion: "v1.1.0", + }, + { + name: "TestCreateKueueConfigurationCR_TrainJob_Framework_WithRHBoKv120", + kueueVersion: "v1.2.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + ctx := t.Context() + + // Setup fake client + fakeClient, err := fakeclient.New() + g.Expect(err).ShouldNot(HaveOccurred()) + + // DSCI with applications namespace + dsci := &dsciv2.DSCInitialization{ + ObjectMeta: metav1.ObjectMeta{Name: "test-dsci"}, + Spec: dsciv2.DSCInitializationSpec{ApplicationsNamespace: "test-namespace"}, + } + g.Expect(fakeClient.Create(ctx, dsci)).Should(Succeed()) + + // Set an OperatorCondition for kueue-operator with the desired version + operatorCondition := &ofapiv2.OperatorCondition{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("kueue-operator.%s", tt.kueueVersion), + Namespace: "openshift-kueue-operator", + }, + } + + g.Expect(fakeClient.Create(ctx, operatorCondition)).Should(Succeed()) + + rr := &odhtypes.ReconciliationRequest{Client: fakeClient, Instance: &componentApi.Kueue{}} + + // No ConfigMap needed; defaults will be used + result, err := createKueueCR(ctx, rr) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Extract frameworks from the resulting CR and check if the expected values are there + frameworks, _, err := unstructured.NestedStringSlice(result.Object, "spec", "config", "integrations", "frameworks") + g.Expect(err).ShouldNot(HaveOccurred()) + switch tt.kueueVersion { + case "v1.1.0": + g.Expect(frameworks).ShouldNot(ContainElement("TrainJob")) + case "v1.2.0": + g.Expect(frameworks).Should(ContainElement("TrainJob")) + default: + t.Skipf("Unexpected kueue version: %s", tt.kueueVersion) + } + }) + } +} diff --git a/internal/controller/components/kueue/kueue_controller_actions.go b/internal/controller/components/kueue/kueue_controller_actions.go index d72dbf70213e..1de0f8be50db 100644 --- a/internal/controller/components/kueue/kueue_controller_actions.go +++ b/internal/controller/components/kueue/kueue_controller_actions.go @@ -29,7 +29,7 @@ func checkPreConditions(ctx context.Context, rr *odhtypes.ReconciliationRequest) case operatorv1.Managed: return ErrKueueStateManagedNotSupported case operatorv1.Unmanaged: - if found, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator); err != nil || !found { + if kueueInfo, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator); err != nil || kueueInfo == nil { if err != nil { return odherrors.NewStopErrorW(err) } diff --git a/internal/controller/components/kueue/kueue_controller_actions_test.go b/internal/controller/components/kueue/kueue_controller_actions_test.go index 83c919f71b16..c163169e4c9a 100644 --- a/internal/controller/components/kueue/kueue_controller_actions_test.go +++ b/internal/controller/components/kueue/kueue_controller_actions_test.go @@ -2,6 +2,7 @@ package kueue import ( + "fmt" "slices" "testing" @@ -31,6 +32,8 @@ import ( . "github.com/onsi/gomega" ) +const kueueOperatorMinVersionWithTrainerSupport = "1.2.0" + func TestCheckPreConditions_Unknown_State(t *testing.T) { ctx := t.Context() g := NewWithT(t) @@ -61,9 +64,12 @@ func TestCheckPreConditions_Managed_KueueOperatorAlreadyInstalled(t *testing.T) cli, err := fakeclient.New( fakeclient.WithObjects( - &ofapiv2.OperatorCondition{ObjectMeta: metav1.ObjectMeta{ - Name: kueueOperator, - }}, + &ofapiv2.OperatorCondition{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s.%s", kueueOperator, kueueOperatorMinVersionWithTrainerSupport), + Namespace: kueueOperatorNamespace, + }, + }, ), ) g.Expect(err).ShouldNot(HaveOccurred()) @@ -547,12 +553,21 @@ func TestDefaultKueueResourcesAction(t *testing.T) { }, } + // Set an OperatorCondition for kueue-operator with the 1.2.0 version + operatorCondition := &ofapiv2.OperatorCondition{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s.%s", kueueOperator, kueueOperatorMinVersionWithTrainerSupport), + Namespace: kueueOperatorNamespace, + }, + } + runtimeObjects := []client.Object{ managedNamespace, legacyManagedNamespace, bothManagedNamespace, unmanagedNamespace, dsci, + operatorCondition, } clusterNodes := getClusterNodes(t, test.withGPU) diff --git a/internal/controller/components/trainer/trainer_controller_actions.go b/internal/controller/components/trainer/trainer_controller_actions.go index 995b16f7c984..049d43f9d357 100644 --- a/internal/controller/components/trainer/trainer_controller_actions.go +++ b/internal/controller/components/trainer/trainer_controller_actions.go @@ -27,7 +27,7 @@ import ( ) func checkPreConditions(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { - if found, err := cluster.OperatorExists(ctx, rr.Client, jobSetOperator); err != nil || !found { + if jobSetInfo, err := cluster.OperatorExists(ctx, rr.Client, jobSetOperator); err != nil || jobSetInfo == nil { if err != nil { return odherrors.NewStopErrorW(err) } diff --git a/internal/controller/components/trainer/trainer_controller_actions_test.go b/internal/controller/components/trainer/trainer_controller_actions_test.go index e49169f349b8..8cf916d4b4d9 100644 --- a/internal/controller/components/trainer/trainer_controller_actions_test.go +++ b/internal/controller/components/trainer/trainer_controller_actions_test.go @@ -2,6 +2,7 @@ package trainer import ( + "fmt" "testing" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" @@ -20,6 +21,8 @@ import ( . "github.com/onsi/gomega" ) +const jobSetOperatorRndVersion = "1.1.0" + func TestCheckPreConditions_Managed_JobSetOperatorNotInstalled(t *testing.T) { ctx := t.Context() g := NewWithT(t) @@ -49,7 +52,7 @@ func TestCheckPreConditions_Managed_JobSetCRDNotInstalled(t *testing.T) { cli, err := fakeclient.New( fakeclient.WithObjects( &ofapiv2.OperatorCondition{ObjectMeta: metav1.ObjectMeta{ - Name: jobSetOperator, + Name: fmt.Sprintf("%s.%s", jobSetOperator, jobSetOperatorRndVersion), }}, ), ) @@ -88,7 +91,7 @@ func TestCheckPreConditions_Managed_JobSetCRDInstalled(t *testing.T) { }, } jobSetOperatorCondition := &ofapiv2.OperatorCondition{ObjectMeta: metav1.ObjectMeta{ - Name: jobSetOperator, + Name: fmt.Sprintf("%s.%s", jobSetOperator, jobSetOperatorRndVersion), }} cli, err := fakeclient.New( diff --git a/internal/controller/services/monitoring/monitoring_controller_support.go b/internal/controller/services/monitoring/monitoring_controller_support.go index dcccb77dd466..bd71406f3229 100644 --- a/internal/controller/services/monitoring/monitoring_controller_support.go +++ b/internal/controller/services/monitoring/monitoring_controller_support.go @@ -391,7 +391,7 @@ func checkMonitoringPreconditions(ctx context.Context, rr *odhtypes.Reconciliati // Check for opentelemetry-product operator if either metrics or traces are enabled if monitoring.Spec.Metrics != nil || monitoring.Spec.Traces != nil { - if found, err := cluster.OperatorExists(ctx, rr.Client, opentelemetryOperator); err != nil || !found { + if openTelemetryInfo, err := cluster.OperatorExists(ctx, rr.Client, opentelemetryOperator); err != nil || openTelemetryInfo == nil { if err != nil { return odherrors.NewStopErrorW(err) } @@ -401,7 +401,7 @@ func checkMonitoringPreconditions(ctx context.Context, rr *odhtypes.Reconciliati // Check for cluster-observability-operator if metrics are enabled if monitoring.Spec.Metrics != nil { - if found, err := cluster.OperatorExists(ctx, rr.Client, clusterObservabilityOperator); err != nil || !found { + if clusterObservabilityOperatorInfo, err := cluster.OperatorExists(ctx, rr.Client, clusterObservabilityOperator); err != nil || clusterObservabilityOperatorInfo == nil { if err != nil { return odherrors.NewStopErrorW(err) } @@ -411,7 +411,7 @@ func checkMonitoringPreconditions(ctx context.Context, rr *odhtypes.Reconciliati // Check for tempo-product operator if traces are enabled if monitoring.Spec.Traces != nil { - if found, err := cluster.OperatorExists(ctx, rr.Client, tempoOperator); err != nil || !found { + if tempoOperatorInfo, err := cluster.OperatorExists(ctx, rr.Client, tempoOperator); err != nil || tempoOperatorInfo == nil { if err != nil { return odherrors.NewStopErrorW(err) } diff --git a/pkg/cluster/cluster_config.go b/pkg/cluster/cluster_config.go index 12411671bab4..5ef0075f1303 100644 --- a/pkg/cluster/cluster_config.go +++ b/pkg/cluster/cluster_config.go @@ -250,8 +250,8 @@ func GetClusterServiceVersion(ctx context.Context, c client.Client, namespace st // detectSelfManaged detects if it is Self Managed Rhoai or OpenDataHub. func detectSelfManaged(ctx context.Context, cli client.Client) (common.Platform, error) { - exists, err := OperatorExists(ctx, cli, "rhods-operator") - if exists { + operatorInfo, err := OperatorExists(ctx, cli, "rhods-operator") + if operatorInfo != nil { return SelfManagedRhoai, nil } diff --git a/pkg/cluster/operator.go b/pkg/cluster/operator.go index 7314a4098546..34931bbf47ab 100644 --- a/pkg/cluster/operator.go +++ b/pkg/cluster/operator.go @@ -15,6 +15,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +// OperatorInfo Struct to retrieve the information about an installed operator. +type OperatorInfo struct { + Version string +} + // GetSubscription checks if a Subscription for the operator exists in the given namespace. // if exists, return object; otherwise, return error. func GetSubscription(ctx context.Context, cli client.Client, namespace string, name string) (*v1alpha1.Subscription, error) { @@ -56,21 +61,35 @@ func DeleteExistingSubscription(ctx context.Context, cli client.Client, operator } // OperatorExists checks if an Operator with 'operatorPrefix' is installed. -// Return true if found it, false if not. -// if we need to check exact version of the operator installed, can append vX.Y.Z later. -func OperatorExists(ctx context.Context, cli client.Client, operatorPrefix string) (bool, error) { +// If the operator exists, it returns some operator information. +// If the operator does not exist, it returns a nil reference. +func OperatorExists(ctx context.Context, cli client.Client, operatorPrefix string) (*OperatorInfo, error) { opConditionList := &ofapiv2.OperatorConditionList{} err := cli.List(ctx, opConditionList) if err != nil { - return false, err + // return nil reference and the error when parsing the list + return nil, err } for _, opCondition := range opConditionList.Items { - if strings.HasPrefix(opCondition.Name, operatorPrefix) { - return true, nil + expectedPrefix := fmt.Sprintf("%s.", operatorPrefix) + if !strings.HasPrefix(opCondition.Name, expectedPrefix) { + // Skip if no OperatorCondition is found with the expected prefix + continue } + // Get the version from the operatorCondition name, trimming the prefix. + version := strings.TrimPrefix(opCondition.Name, expectedPrefix) + if version == "" { + // Return Operator info with an empty version if the version is empty. + return &OperatorInfo{Version: ""}, nil + } + // Return the OperatorInfo + if !strings.HasPrefix(version, "v") { + version = fmt.Sprintf("v%s", version) + } + return &OperatorInfo{Version: version}, nil } - - return false, nil + // return nil reference if the operator is not installed in the cluster + return nil, nil } // CustomResourceDefinitionExists checks if a CustomResourceDefinition with the given GVK exists. diff --git a/tests/e2e/test_context_test.go b/tests/e2e/test_context_test.go index 930893d64f98..22322e81c0c5 100644 --- a/tests/e2e/test_context_test.go +++ b/tests/e2e/test_context_test.go @@ -1363,7 +1363,8 @@ func (tc *TestContext) ApproveInstallPlan(plan *ofapi.InstallPlan) { // - bool: True if an operator matching the prefix is found, false otherwise. // - error: Any error encountered during the search operation. func (tc *TestContext) CheckOperatorExists(operatorNamePrefix string) (bool, error) { - return cluster.OperatorExists(tc.Context(), tc.Client(), operatorNamePrefix) + operatorInfo, err := cluster.OperatorExists(tc.Context(), tc.Client(), operatorNamePrefix) + return operatorInfo != nil, err } // EnsureWebhookBlocksResourceCreation verifies that webhook validation blocks creation of resources with invalid values.