Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ require (
github.com/spf13/viper v1.20.1
github.com/stretchr/testify v1.10.0
go.uber.org/zap v1.27.0
golang.org/x/mod v0.24.0
gomodules.xyz/jsonpatch/v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.32.4
Expand Down
13 changes: 13 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
Expand Down Expand Up @@ -150,6 +152,7 @@ github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
Expand All @@ -167,25 +170,34 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand All @@ -198,6 +210,7 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
45 changes: 31 additions & 14 deletions internal/controller/components/kueue/kueue_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"slices"

operatorv1 "github.com/openshift/api/operator/v1"
"golang.org/x/mod/semver"
corev1 "k8s.io/api/core/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand All @@ -22,18 +23,19 @@ import (

var (
frameworkMapping = map[string]string{
"pod": "Pod",
"deployment": "Deployment",
"statefulset": "StatefulSet",
"batch/job": "BatchJob",
"ray.io/rayjob": "RayJob",
"ray.io/raycluster": "RayCluster",
"jobset.x-k8s.io/jobset": "JobSet",
"kubeflow.org/mpijob": "MPIJob",
"kubeflow.org/paddlejob": "PaddleJob",
"kubeflow.org/pytorchjob": "PyTorchJob",
"kubeflow.org/tfjob": "TFJob",
"kubeflow.org/xgboostjob": "XGBoostJob",
"pod": "Pod",
"deployment": "Deployment",
"statefulset": "StatefulSet",
"batch/job": "BatchJob",
"ray.io/rayjob": "RayJob",
"ray.io/raycluster": "RayCluster",
"jobset.x-k8s.io/jobset": "JobSet",
"kubeflow.org/mpijob": "MPIJob",
"kubeflow.org/paddlejob": "PaddleJob",
"kubeflow.org/pytorchjob": "PyTorchJob",
"kubeflow.org/tfjob": "TFJob",
"kubeflow.org/xgboostjob": "XGBoostJob",
"trainer.kubeflow.org/trainjob": "TrainJob",
"leaderworkerset.x-k8s.io/leaderworkerset": "LeaderWorkerSet",
}
)
Expand Down Expand Up @@ -79,11 +81,19 @@ func createKueueCR(ctx context.Context, rr *odhtypes.ReconciliationRequest) (*un
return nil, fmt.Errorf("failed to lookup kueue manager config: %w", err)
}

kueueInfo, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator)
if err != nil {
return nil, fmt.Errorf("failed to check if %s exists: %w", kueueOperator, err)
}

if kueueInfo == nil {
return nil, ErrKueueOperatorNotInstalled
}
//
// Conversions
//

integrations, err := convertIntegrations(managerConfig)
integrations, err := convertIntegrations(managerConfig, kueueInfo.Version)
if err != nil {
return nil, fmt.Errorf("failed to convert integrations: %w", err)
}
Expand Down Expand Up @@ -148,7 +158,7 @@ func createKueueCR(ctx context.Context, rr *odhtypes.ReconciliationRequest) (*un
}

// convertIntegrations converts the integrations section from ConfigMap to Kueue operator format.
func convertIntegrations(config map[string]interface{}) (map[string]interface{}, error) {
func convertIntegrations(config map[string]interface{}, kueueVersion string) (map[string]interface{}, error) {
integrations := map[string]interface{}{}

//
Expand All @@ -169,6 +179,13 @@ func convertIntegrations(config map[string]interface{}) (map[string]interface{},
"StatefulSet",
)

// Support for TrainJob was added in Kueue 1.2.0, so if the installed version
// of kueue is equal or greater than 1.2.0, add TrainJob to the list of frameworks.
versionCmp := semver.Compare(kueueVersion, "v1.2.0")
if versionCmp >= 0 {
frameworkSet.Insert("TrainJob")
}

for _, framework := range frameworks {
if converted, ok := frameworkMapping[framework]; ok {
frameworkSet.Insert(converted)
Expand Down
90 changes: 90 additions & 0 deletions internal/controller/components/kueue/kueue_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
package kueue

import (
"fmt"
"testing"

ofapiv2 "github.com/operator-framework/api/pkg/operators/v2"
"gopkg.in/yaml.v3"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

componentApi "github.com/opendatahub-io/opendatahub-operator/v2/api/components/v1alpha1"
Expand Down Expand Up @@ -63,6 +66,7 @@ integrations:
- "kubeflow.org/pytorchjob"
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
- "trainer.kubeflow.org/trainjob"
- "workload.codeflare.dev/appwrapper"
- "leaderworkerset.x-k8s.io/leaderworkerset"
manageJobsWithoutQueueName: true
Expand Down Expand Up @@ -93,6 +97,7 @@ spec:
- RayJob
- StatefulSet
- TFJob
- TrainJob
- XGBoostJob
workloadManagement:
labelPolicy: None
Expand Down Expand Up @@ -137,6 +142,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand All @@ -163,6 +169,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand All @@ -189,6 +196,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand Down Expand Up @@ -219,6 +227,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand Down Expand Up @@ -257,6 +266,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
externalFrameworks:
- MPIJob
- RayJob
Expand Down Expand Up @@ -287,6 +297,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand Down Expand Up @@ -316,6 +327,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
workloadManagement:
labelPolicy: None
`
Expand Down Expand Up @@ -348,6 +360,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
gangScheduling:
policy: ByWorkload
byWorkload:
Expand Down Expand Up @@ -382,6 +395,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
preemption:
preemptionPolicy: FairSharing
fairSharing:
Expand Down Expand Up @@ -418,6 +432,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
gangScheduling:
policy: ByWorkload
byWorkload:
Expand Down Expand Up @@ -457,6 +472,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
`
runKueueCRTest(t, kueueConfig, kueueCR)
}
Expand Down Expand Up @@ -489,6 +505,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
externalFrameworks:
- MPIJob
- RayJob
Expand Down Expand Up @@ -524,6 +541,7 @@ spec:
- RayCluster
- RayJob
- StatefulSet
- TrainJob
labelKeys:
- custom.label/key1
- custom.label/key2
Expand Down Expand Up @@ -551,6 +569,15 @@ func runKueueCRTest(t *testing.T, configMapYAML string, expectedCRYAML string) {
}
g.Expect(fakeClient.Create(ctx, dsci)).Should(Succeed())

// Set an OperatorCondition for kueue-operator with the 1.2.0 version
operatorCondition := &ofapiv2.OperatorCondition{
ObjectMeta: metav1.ObjectMeta{
Name: "kueue-operator.v1.2.0",
Namespace: "openshift-kueue-operator",
},
}
g.Expect(fakeClient.Create(ctx, operatorCondition)).Should(Succeed())

rr := &odhtypes.ReconciliationRequest{
Client: fakeClient,
Instance: &componentApi.Kueue{},
Expand Down Expand Up @@ -633,3 +660,66 @@ invalid: yaml: content: [
g.Expect(result).Should(BeNil())
g.Expect(err.Error()).Should(ContainSubstring("failed to lookup kueue manager config"))
}

// --- Test: TrainJob framework generic test, with RHBoKv110 and RHBoKv120 ---.
func TestCreateKueueConfigurationCR_TrainJobFramework(t *testing.T) {
tests := []struct {
name string
kueueVersion string
}{
{
name: "TestCreateKueueConfigurationCR_TrainJob_Framework_WithRHBoKv110",
kueueVersion: "v1.1.0",
},
{
name: "TestCreateKueueConfigurationCR_TrainJob_Framework_WithRHBoKv120",
kueueVersion: "v1.2.0",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
ctx := t.Context()

// Setup fake client
fakeClient, err := fakeclient.New()
g.Expect(err).ShouldNot(HaveOccurred())

// DSCI with applications namespace
dsci := &dsciv2.DSCInitialization{
ObjectMeta: metav1.ObjectMeta{Name: "test-dsci"},
Spec: dsciv2.DSCInitializationSpec{ApplicationsNamespace: "test-namespace"},
}
g.Expect(fakeClient.Create(ctx, dsci)).Should(Succeed())

// Set an OperatorCondition for kueue-operator with the desired version
operatorCondition := &ofapiv2.OperatorCondition{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("kueue-operator.%s", tt.kueueVersion),
Namespace: "openshift-kueue-operator",
},
}

g.Expect(fakeClient.Create(ctx, operatorCondition)).Should(Succeed())

rr := &odhtypes.ReconciliationRequest{Client: fakeClient, Instance: &componentApi.Kueue{}}

// No ConfigMap needed; defaults will be used
result, err := createKueueCR(ctx, rr)
g.Expect(err).ShouldNot(HaveOccurred())

// Extract frameworks from the resulting CR and check if the expected values are there
frameworks, _, err := unstructured.NestedStringSlice(result.Object, "spec", "config", "integrations", "frameworks")
g.Expect(err).ShouldNot(HaveOccurred())
switch tt.kueueVersion {
case "v1.1.0":
g.Expect(frameworks).ShouldNot(ContainElement("TrainJob"))
case "v1.2.0":
g.Expect(frameworks).Should(ContainElement("TrainJob"))
default:
t.Skipf("Unexpected kueue version: %s", tt.kueueVersion)
}
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func checkPreConditions(ctx context.Context, rr *odhtypes.ReconciliationRequest)
case operatorv1.Managed:
return ErrKueueStateManagedNotSupported
case operatorv1.Unmanaged:
if found, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator); err != nil || !found {
if kueueInfo, err := cluster.OperatorExists(ctx, rr.Client, kueueOperator); err != nil || kueueInfo == nil {
if err != nil {
return odherrors.NewStopErrorW(err)
}
Expand Down
Loading