From 1250b1350e4d5a1e28ae0ac1da3183a2b915bcbd Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Wed, 30 Oct 2024 10:35:41 +0100 Subject: [PATCH 1/2] cross-ns CC: Add classNamespace to topology Signed-off-by: Danil-Grigorev --- api/v1beta1/cluster_types.go | 14 ++- api/v1beta1/index/cluster.go | 26 ++++++ api/v1beta1/index/index.go | 4 + api/v1beta1/zz_generated.openapi.go | 7 ++ .../crd/bases/cluster.x-k8s.io_clusters.yaml | 9 ++ .../cluster-class/write-clusterclass.md | 87 ++++++++++++++++++- internal/apis/core/v1alpha4/conversion.go | 1 + .../core/v1alpha4/zz_generated.conversion.go | 1 + .../topology/cluster/cluster_controller.go | 6 +- .../cluster/cluster_controller_test.go | 68 +++++++++++++++ .../topology/cluster/suite_test.go | 5 +- internal/webhooks/clusterclass.go | 7 +- internal/webhooks/clusterclass_test.go | 67 ++++++++++++++ util/test/builder/builders.go | 15 +++- 14 files changed, 305 insertions(+), 12 deletions(-) diff --git a/api/v1beta1/cluster_types.go b/api/v1beta1/cluster_types.go index bbce8f181311..25c5cf45f7c7 100644 --- a/api/v1beta1/cluster_types.go +++ b/api/v1beta1/cluster_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1beta1 import ( + "cmp" "fmt" "net" "strings" @@ -517,6 +518,15 @@ type Topology struct { // The name of the ClusterClass object to create the topology. Class string `json:"class"` + // The namespace of the ClusterClass object to create the topology. Empty namespace assumes the namespace of the cluster object. + // Class namespace changes are not supported by the rebase procedure, as different CC namespace uses namespace-local templates. + // Cluster templates namespace modification is not allowed. + // +optional + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=253 + // +kubebuilder:validation:Pattern="^[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\\.[a-z0-9](?:[-a-z0-9]*[a-z0-9])?)*$" + ClassNamespace string `json:"classNamespace,omitempty"` + // The Kubernetes version of the cluster. Version string `json:"version"` @@ -1045,7 +1055,9 @@ func (c *Cluster) GetClassKey() types.NamespacedName { if c.Spec.Topology == nil { return types.NamespacedName{} } - return types.NamespacedName{Namespace: c.GetNamespace(), Name: c.Spec.Topology.Class} + + namespace := cmp.Or(c.Spec.Topology.ClassNamespace, c.Namespace) + return types.NamespacedName{Namespace: namespace, Name: c.Spec.Topology.Class} } // GetConditions returns the set of conditions for this object. diff --git a/api/v1beta1/index/cluster.go b/api/v1beta1/index/cluster.go index d61d8c36f90d..09fa67cb2ec5 100644 --- a/api/v1beta1/index/cluster.go +++ b/api/v1beta1/index/cluster.go @@ -30,6 +30,8 @@ import ( const ( // ClusterClassNameField is used by the Cluster controller to index Clusters by ClusterClass name. ClusterClassNameField = "spec.topology.class" + // ClusterClassNamespaceField is used by the Cluster controller to index Clusters by ClusterClass namespace. + ClusterClassNamespaceField = "spec.topology.classNamespace" ) // ByClusterClassName adds the cluster class name index to the @@ -44,6 +46,18 @@ func ByClusterClassName(ctx context.Context, mgr ctrl.Manager) error { return nil } +// ByClusterClassNamespace adds the cluster class namespace index to the +// managers cache. +func ByClusterClassNamespace(ctx context.Context, mgr ctrl.Manager) error { + if err := mgr.GetCache().IndexField(ctx, &clusterv1.Cluster{}, + ClusterClassNamespaceField, + ClusterByClusterClassNamespace, + ); err != nil { + return errors.Wrap(err, "error setting index field") + } + return nil +} + // ClusterByClusterClassClassName contains the logic to index Clusters by ClusterClass name. func ClusterByClusterClassClassName(o client.Object) []string { cluster, ok := o.(*clusterv1.Cluster) @@ -55,3 +69,15 @@ func ClusterByClusterClassClassName(o client.Object) []string { } return nil } + +// ClusterByClusterClassNamespace contains the logic to index Clusters by ClusterClass namespace. +func ClusterByClusterClassNamespace(o client.Object) []string { + cluster, ok := o.(*clusterv1.Cluster) + if !ok { + panic(fmt.Sprintf("Expected Cluster but got a %T", o)) + } + if cluster.Spec.Topology != nil { + return []string{cluster.GetClassKey().Namespace} + } + return nil +} diff --git a/api/v1beta1/index/index.go b/api/v1beta1/index/index.go index c6a17bf175bc..b5f0e7d9df55 100644 --- a/api/v1beta1/index/index.go +++ b/api/v1beta1/index/index.go @@ -39,6 +39,10 @@ func AddDefaultIndexes(ctx context.Context, mgr ctrl.Manager) error { if err := ByClusterClassName(ctx, mgr); err != nil { return err } + + if err := ByClusterClassNamespace(ctx, mgr); err != nil { + return err + } } if feature.Gates.Enabled(feature.MachinePool) { diff --git a/api/v1beta1/zz_generated.openapi.go b/api/v1beta1/zz_generated.openapi.go index 3167c70da4e7..92b169cfd0db 100644 --- a/api/v1beta1/zz_generated.openapi.go +++ b/api/v1beta1/zz_generated.openapi.go @@ -4461,6 +4461,13 @@ func schema_sigsk8sio_cluster_api_api_v1beta1_Topology(ref common.ReferenceCallb Format: "", }, }, + "classNamespace": { + SchemaProps: spec.SchemaProps{ + Description: "The namespace of the ClusterClass object to create the topology. Empty namespace assumes the namespace of the cluster object. Class namespace changes are not supported by the rebase procedure, as different CC namespace uses namespace-local templates. Cluster templates namespace modification is not allowed.", + Type: []string{"string"}, + Format: "", + }, + }, "version": { SchemaProps: spec.SchemaProps{ Description: "The Kubernetes version of the cluster.", diff --git a/config/crd/bases/cluster.x-k8s.io_clusters.yaml b/config/crd/bases/cluster.x-k8s.io_clusters.yaml index 4ad16356145b..63a036e7b2f5 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusters.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusters.yaml @@ -928,6 +928,15 @@ spec: description: The name of the ClusterClass object to create the topology. type: string + classNamespace: + description: |- + The namespace of the ClusterClass object to create the topology. Empty namespace assumes the namespace of the cluster object. + Class namespace changes are not supported by the rebase procedure, as different CC namespace uses namespace-local templates. + Cluster templates namespace modification is not allowed. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9](?:[-a-z0-9]*[a-z0-9])?)*$ + type: string controlPlane: description: controlPlane describes the cluster control plane. properties: diff --git a/docs/book/src/tasks/experimental-features/cluster-class/write-clusterclass.md b/docs/book/src/tasks/experimental-features/cluster-class/write-clusterclass.md index 1c722cfcf5f9..e7942308042e 100644 --- a/docs/book/src/tasks/experimental-features/cluster-class/write-clusterclass.md +++ b/docs/book/src/tasks/experimental-features/cluster-class/write-clusterclass.md @@ -15,7 +15,7 @@ flexible enough to be used in as many Clusters as possible by supporting variant * [Defining a custom naming strategy for MachineDeployment objects](#defining-a-custom-naming-strategy-for-machinedeployment-objects) * [Defining a custom naming strategy for MachinePool objects](#defining-a-custom-naming-strategy-for-machinepool-objects) * [Advanced features of ClusterClass with patches](#advanced-features-of-clusterclass-with-patches) - * [MachineDeployment variable overrides](#machinedeployment-variable-overrides) + * [MachineDeployment variable overrides](#machinedeployment-and-machinepool-variable-overrides) * [Builtin variables](#builtin-variables) * [Complex variable types](#complex-variable-types) * [Using variable values in JSON patches](#using-variable-values-in-json-patches) @@ -438,11 +438,94 @@ spec: template: "{{ .cluster.name }}-{{ .machinePool.topologyName }}-{{ .random }}" ``` +### Defining a custom namespace for ClusterClass object + +As a user, I may need to create a `Cluster` from a `ClusterClass` object that exists only in a different namespace. To uniquely identify the `ClusterClass`, a `NamespacedName` ref is constructed from combination of: +* `cluster.spec.topology.classNamespace` - namespace of the `ClusterClass` object. +* `cluster.spec.topology.class` - name of the `ClusterClass` object. + +Example of the `Cluster` object with the `name/namespace` reference: + +```yaml +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: my-docker-cluster + namespace: default +spec: + topology: + class: docker-clusterclass-v0.1.0 + classNamespace: default + version: v1.22.4 + controlPlane: + replicas: 3 + workers: + machineDeployments: + - class: default-worker + name: md-0 + replicas: 4 + failureDomain: region +``` + + + +#### Securing cross-namespace reference to the ClusterClass + +It is often desirable to restrict free cross-namespace `ClusterClass` access for the `Cluster` object. This can be implemented by defining a [`ValidatingAdmissionPolicy`](https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/#what-is-validating-admission-policy) on the `Cluster` object. + +An example of such policy may be: + +```yaml +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: "cluster-class-ref.cluster.x-k8s.io" +spec: + failurePolicy: Fail + paramKind: + apiVersion: v1 + kind: Secret + matchConstraints: + resourceRules: + - apiGroups: ["cluster.x-k8s.io"] + apiVersions: ["v1beta1"] + operations: ["CREATE", "UPDATE"] + resources: ["clusters"] + validations: + - expression: "!has(object.spec.topology.classNamespace) || object.spec.topology.classNamespace in params.data" +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: "cluster-class-ref-binding.cluster.x-k8s.io" +spec: + policyName: "cluster-class-ref.cluster.x-k8s.io" + validationActions: [Deny] + paramRef: + name: "allowed-namespaces.cluster-class-ref.cluster.x-k8s.io" + namespace: "default" + parameterNotFoundAction: Deny +--- +apiVersion: v1 +kind: Secret +metadata: + name: "allowed-namespaces.cluster-class-ref.cluster.x-k8s.io" + namespace: "default" +data: + default: "" +``` + ## Advanced features of ClusterClass with patches This section will explain more advanced features of ClusterClass patches. -### MachineDeployment & MachinePool variable overrides +### MachineDeployment and MachinePool variable overrides If you want to use many variations of MachineDeployments in Clusters, you can either define a MachineDeployment class for every variation or you can define patches and variables to diff --git a/internal/apis/core/v1alpha4/conversion.go b/internal/apis/core/v1alpha4/conversion.go index cd69ca35d2ba..8e2ed513d13d 100644 --- a/internal/apis/core/v1alpha4/conversion.go +++ b/internal/apis/core/v1alpha4/conversion.go @@ -42,6 +42,7 @@ func (src *Cluster) ConvertTo(dstRaw conversion.Hub) error { if dst.Spec.Topology == nil { dst.Spec.Topology = &clusterv1.Topology{} } + dst.Spec.Topology.ClassNamespace = restored.Spec.Topology.ClassNamespace dst.Spec.Topology.Variables = restored.Spec.Topology.Variables dst.Spec.Topology.ControlPlane.Variables = restored.Spec.Topology.ControlPlane.Variables diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go index 14df0dfc9112..f5c0433a653d 100644 --- a/internal/apis/core/v1alpha4/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha4/zz_generated.conversion.go @@ -1757,6 +1757,7 @@ func Convert_v1alpha4_Topology_To_v1beta1_Topology(in *Topology, out *v1beta1.To func autoConvert_v1beta1_Topology_To_v1alpha4_Topology(in *v1beta1.Topology, out *Topology, s conversion.Scope) error { out.Class = in.Class + // WARNING: in.ClassNamespace requires manual conversion: does not exist in peer-type out.Version = in.Version out.RolloutAfter = (*metav1.Time)(unsafe.Pointer(in.RolloutAfter)) if err := Convert_v1beta1_ControlPlaneTopology_To_v1alpha4_ControlPlaneTopology(&in.ControlPlane, &out.ControlPlane, s); err != nil { diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index 30c140734fe0..a3eebd539d92 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -492,8 +492,10 @@ func (r *Reconciler) clusterClassToCluster(ctx context.Context, o client.Object) if err := r.Client.List( ctx, clusterList, - client.MatchingFields{index.ClusterClassNameField: clusterClass.Name}, - client.InNamespace(clusterClass.Namespace), + client.MatchingFields{ + index.ClusterClassNameField: clusterClass.Name, + index.ClusterClassNamespaceField: clusterClass.Namespace, + }, ); err != nil { return nil } diff --git a/internal/controllers/topology/cluster/cluster_controller_test.go b/internal/controllers/topology/cluster/cluster_controller_test.go index 61fc1d8dd810..9b6b3419c76b 100644 --- a/internal/controllers/topology/cluster/cluster_controller_test.go +++ b/internal/controllers/topology/cluster/cluster_controller_test.go @@ -52,6 +52,7 @@ import ( var ( clusterName1 = "cluster1" clusterName2 = "cluster2" + clusterName3 = "cluster3" clusterClassName1 = "class1" clusterClassName2 = "class2" infrastructureMachineTemplateName1 = "inframachinetemplate1" @@ -854,6 +855,19 @@ func setupTestEnvForIntegrationTests(ns *corev1.Namespace) (func() error, error) Build()). Build() + // Cross ns referencing cluster + cluster3 := builder.Cluster(ns.Name, clusterName3). + WithTopology( + builder.ClusterTopology(). + WithClass(clusterClass.Name). + WithClassNamespace("other"). + WithMachineDeployment(machineDeploymentTopology2). + WithMachinePool(machinePoolTopology2). + WithVersion("1.21.0"). + WithControlPlaneReplicas(1). + Build()). + Build() + // Setup kubeconfig secrets for the clusters, so the ClusterCacheTracker works. cluster1Secret := kubeconfig.GenerateSecret(cluster1, kubeconfig.FromEnvTestConfig(env.Config, cluster1)) cluster2Secret := kubeconfig.GenerateSecret(cluster2, kubeconfig.FromEnvTestConfig(env.Config, cluster2)) @@ -876,6 +890,7 @@ func setupTestEnvForIntegrationTests(ns *corev1.Namespace) (func() error, error) clusterClassForRebase, cluster1, cluster2, + cluster3, cluster1Secret, cluster2Secret, } @@ -1577,3 +1592,56 @@ func TestReconciler_ValidateCluster(t *testing.T) { }) } } + +func TestClusterClassToCluster(t *testing.T) { + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.ClusterTopology, true) + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "cluster-reconcile-namespace") + g.Expect(err).ToNot(HaveOccurred()) + + // Create the objects needed for the integration test: + cleanup, err := setupTestEnvForIntegrationTests(ns) + g.Expect(err).ToNot(HaveOccurred()) + + // Defer a cleanup function that deletes each of the objects created during setupTestEnvForIntegrationTests. + defer func() { + g.Expect(cleanup()).To(Succeed()) + }() + + tests := []struct { + name string + clusterClass *clusterv1.ClusterClass + expected []reconcile.Request + }{ + { + name: "ClusterClass change should request reconcile for the referenced class", + clusterClass: builder.ClusterClass(ns.Name, clusterClassName1).Build(), + expected: []reconcile.Request{ + {NamespacedName: client.ObjectKeyFromObject(builder.Cluster(ns.Name, clusterName1).Build())}, + {NamespacedName: client.ObjectKeyFromObject(builder.Cluster(ns.Name, clusterName2).Build())}, + }, + }, + { + name: "ClusterClass with no matching name and namespace should not trigger reconcile", + clusterClass: builder.ClusterClass("other", clusterClassName2).Build(), + expected: []reconcile.Request{}, + }, + { + name: "Different ClusterClass with matching name and namespace should trigger reconcile", + clusterClass: builder.ClusterClass("other", clusterClassName1).Build(), + expected: []reconcile.Request{ + {NamespacedName: client.ObjectKeyFromObject(builder.Cluster(ns.Name, clusterName3).Build())}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(*testing.T) { + r := &Reconciler{Client: env.GetClient()} + + requests := r.clusterClassToCluster(ctx, tt.clusterClass) + g.Expect(requests).To(ConsistOf(tt.expected)) + }) + } +} diff --git a/internal/controllers/topology/cluster/suite_test.go b/internal/controllers/topology/cluster/suite_test.go index 715f4a52bd47..32a41031689e 100644 --- a/internal/controllers/topology/cluster/suite_test.go +++ b/internal/controllers/topology/cluster/suite_test.go @@ -60,11 +60,14 @@ func TestMain(m *testing.M) { if err := index.AddDefaultIndexes(ctx, mgr); err != nil { panic(fmt.Sprintf("unable to setup index: %v", err)) } - // Set up the ClusterClassName index explicitly here. This index is ordinarily created in + // Set up the ClusterClassName and ClusterClassNamespace index explicitly here. This index is ordinarily created in // index.AddDefaultIndexes. That doesn't happen here because the ClusterClass feature flag is not set. if err := index.ByClusterClassName(ctx, mgr); err != nil { panic(fmt.Sprintf("unable to setup index: %v", err)) } + if err := index.ByClusterClassNamespace(ctx, mgr); err != nil { + panic(fmt.Sprintf("unable to setup index: %v", err)) + } } setupReconcilers := func(ctx context.Context, mgr ctrl.Manager) { clusterCache, err := clustercache.SetupWithManager(ctx, mgr, clustercache.Options{ diff --git a/internal/webhooks/clusterclass.go b/internal/webhooks/clusterclass.go index 65fa7a2d69cd..ab0e842e0400 100644 --- a/internal/webhooks/clusterclass.go +++ b/internal/webhooks/clusterclass.go @@ -379,12 +379,15 @@ func (webhook *ClusterClass) classNamesFromMPWorkerClass(w clusterv1.WorkersClas func (webhook *ClusterClass) getClustersUsingClusterClass(ctx context.Context, clusterClass *clusterv1.ClusterClass) ([]clusterv1.Cluster, error) { clusters := &clusterv1.ClusterList{} err := webhook.Client.List(ctx, clusters, - client.MatchingFields{index.ClusterClassNameField: clusterClass.Name}, - client.InNamespace(clusterClass.Namespace), + client.MatchingFields{ + index.ClusterClassNameField: clusterClass.Name, + index.ClusterClassNamespaceField: clusterClass.Namespace, + }, ) if err != nil { return nil, err } + return clusters.Items, nil } diff --git a/internal/webhooks/clusterclass_test.go b/internal/webhooks/clusterclass_test.go index 49b8928529ad..6275a271da7b 100644 --- a/internal/webhooks/clusterclass_test.go +++ b/internal/webhooks/clusterclass_test.go @@ -92,6 +92,7 @@ func TestClusterClassDefaultNamespaces(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(fakeScheme). WithIndex(&clusterv1.Cluster{}, index.ClusterClassNameField, index.ClusterByClusterClassClassName). + WithIndex(&clusterv1.Cluster{}, index.ClusterClassNamespaceField, index.ClusterByClusterClassNamespace). Build() // Create the webhook and add the fakeClient as its client. @@ -1866,6 +1867,7 @@ func TestClusterClassValidation(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(fakeScheme). WithIndex(&clusterv1.Cluster{}, index.ClusterClassNameField, index.ClusterByClusterClassClassName). + WithIndex(&clusterv1.Cluster{}, index.ClusterClassNamespaceField, index.ClusterByClusterClassNamespace). Build() // Pin the compatibility version used in variable CEL validation to 1.29, so we don't have to continuously refactor @@ -2513,6 +2515,7 @@ func TestClusterClassValidationWithClusterAwareChecks(t *testing.T) { WithScheme(fakeScheme). WithObjects(tt.clusters...). WithIndex(&clusterv1.Cluster{}, index.ClusterClassNameField, index.ClusterByClusterClassClassName). + WithIndex(&clusterv1.Cluster{}, index.ClusterClassNamespaceField, index.ClusterByClusterClassNamespace). Build() // Create the webhook and add the fakeClient as its client. @@ -2527,6 +2530,70 @@ func TestClusterClassValidationWithClusterAwareChecks(t *testing.T) { } } +func TestGetClustersUsingClusterClass(t *testing.T) { + // NOTE: ClusterTopology feature flag is disabled by default, thus preventing to create or update ClusterClasses. + // Enabling the feature flag temporarily for this test. + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.ClusterTopology, true) + + topology := builder.ClusterTopology().WithClass("class1") + + tests := []struct { + name string + clusterClass *clusterv1.ClusterClass + clusters []client.Object + expectErr bool + expectClusters []client.Object + }{ + { + name: "ClusterClass should return clusters referencing it", + clusterClass: builder.ClusterClass("default", "class1").Build(), + clusters: []client.Object{ + builder.Cluster("default", "cluster1").WithTopology(topology.Build()).Build(), + builder.Cluster("default", "cluster2").Build(), + builder.Cluster("other", "cluster2").WithTopology(topology.DeepCopy().WithClassNamespace("default").Build()).Build(), + builder.Cluster("other", "cluster3").WithTopology(topology.Build()).Build(), + }, + expectClusters: []client.Object{ + builder.Cluster("default", "cluster1").Build(), + builder.Cluster("other", "cluster2").Build(), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + // Sets up the fakeClient for the test case. + fakeClient := fake.NewClientBuilder(). + WithScheme(fakeScheme). + WithObjects(tt.clusters...). + WithIndex(&clusterv1.Cluster{}, index.ClusterClassNameField, index.ClusterByClusterClassClassName). + WithIndex(&clusterv1.Cluster{}, index.ClusterClassNamespaceField, index.ClusterByClusterClassNamespace). + Build() + + // Create the webhook and add the fakeClient as its client. + webhook := &ClusterClass{Client: fakeClient} + clusters, err := webhook.getClustersUsingClusterClass(ctx, tt.clusterClass) + if tt.expectErr { + g.Expect(err).To(HaveOccurred()) + return + } + g.Expect(err).ToNot(HaveOccurred()) + + clusterKeys := []client.ObjectKey{} + for _, c := range clusters { + clusterKeys = append(clusterKeys, client.ObjectKeyFromObject(&c)) + } + expectedKeys := []client.ObjectKey{} + for _, c := range tt.expectClusters { + expectedKeys = append(expectedKeys, client.ObjectKeyFromObject(c)) + } + g.Expect(clusterKeys).To(Equal(expectedKeys)) + }) + } +} + func TestValidateAutoscalerAnnotationsForClusterClass(t *testing.T) { tests := []struct { name string diff --git a/util/test/builder/builders.go b/util/test/builder/builders.go index dd2ae1dc881f..762b418fe835 100644 --- a/util/test/builder/builders.go +++ b/util/test/builder/builders.go @@ -114,7 +114,7 @@ func (c *ClusterBuilder) Build() *clusterv1.Cluster { // ClusterTopologyBuilder contains the fields needed to build a testable ClusterTopology. type ClusterTopologyBuilder struct { - class string + class, classNamespace string workers *clusterv1.WorkersTopology version string controlPlaneReplicas int32 @@ -136,6 +136,12 @@ func (c *ClusterTopologyBuilder) WithClass(class string) *ClusterTopologyBuilder return c } +// WithClassNamespace adds the passed classNamespace to the ClusterTopologyBuilder. +func (c *ClusterTopologyBuilder) WithClassNamespace(ns string) *ClusterTopologyBuilder { + c.classNamespace = ns + return c +} + // WithVersion adds the passed version to the ClusterTopologyBuilder. func (c *ClusterTopologyBuilder) WithVersion(version string) *ClusterTopologyBuilder { c.version = version @@ -181,9 +187,10 @@ func (c *ClusterTopologyBuilder) WithVariables(vars ...clusterv1.ClusterVariable // Build returns a testable cluster Topology object with any values passed to the builder. func (c *ClusterTopologyBuilder) Build() *clusterv1.Topology { t := &clusterv1.Topology{ - Class: c.class, - Workers: c.workers, - Version: c.version, + Class: c.class, + ClassNamespace: c.classNamespace, + Workers: c.workers, + Version: c.version, ControlPlane: clusterv1.ControlPlaneTopology{ Replicas: &c.controlPlaneReplicas, MachineHealthCheck: c.controlPlaneMHC, From b1bbe0c535e9e3f097e29ff32e7a68079942d0ca Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Fri, 10 Jan 2025 13:57:08 +0100 Subject: [PATCH 2/2] Extend scale test & improve controller-runtime dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- .../dashboards/controller-runtime.json | 36 ++- test/e2e/cluster_upgrade_runtimesdk.go | 41 +-- .../main/bases/cluster-with-topology.yaml | 6 + .../main/clusterclass-in-memory.yaml | 129 +++++----- test/e2e/scale.go | 237 +++++++++++------- test/e2e/scale_test.go | 34 ++- .../clusterctl/clusterctl_helpers.go | 13 +- test/framework/clusterctl/e2e_config.go | 15 ++ 8 files changed, 317 insertions(+), 194 deletions(-) diff --git a/hack/observability/grafana/dashboards/controller-runtime.json b/hack/observability/grafana/dashboards/controller-runtime.json index 5fb6f4bfc2df..14785ce4146e 100644 --- a/hack/observability/grafana/dashboards/controller-runtime.json +++ b/hack/observability/grafana/dashboards/controller-runtime.json @@ -352,7 +352,7 @@ "overrides": [] }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, "y": 6 @@ -360,8 +360,12 @@ "id": 13, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -459,16 +463,20 @@ "overrides": [] }, "gridPos": { - "h": 6, + "h": 9, "w": 12, "x": 0, - "y": 7 + "y": 15 }, "id": 6, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -552,16 +560,20 @@ "overrides": [] }, "gridPos": { - "h": 6, + "h": 9, "w": 12, "x": 12, - "y": 7 + "y": 15 }, "id": 7, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -5010,7 +5022,7 @@ }, "definition": "label_values(kube_pod_info{node=~\"^$Node$\"},pod)", "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Pod", "multi": true, "name": "Pod", diff --git a/test/e2e/cluster_upgrade_runtimesdk.go b/test/e2e/cluster_upgrade_runtimesdk.go index cbbb04da1f8a..115f2350c574 100644 --- a/test/e2e/cluster_upgrade_runtimesdk.go +++ b/test/e2e/cluster_upgrade_runtimesdk.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "time" @@ -161,8 +162,11 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl By("Deploy Test Extension ExtensionConfig") + // In this test we are defaulting all handlers to blocking because we expect the handlers to block the + // cluster lifecycle by default. Setting defaultAllHandlersToBlocking to true enforces that the test-extension + // automatically creates the ConfigMap with blocking preloaded responses. Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, - extensionConfig(specName, namespace.Name, input.ExtensionServiceNamespace, input.ExtensionServiceName))). + extensionConfig(specName, namespace.Name, input.ExtensionServiceNamespace, input.ExtensionServiceName, true, true))). To(Succeed(), "Failed to create the extension config") By("Creating a workload cluster; creation waits for BeforeClusterCreateHook to gate the operation") @@ -304,7 +308,7 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl if !input.SkipCleanup { // Delete the extensionConfig first to ensure the BeforeDeleteCluster hook doesn't block deletion. Eventually(func() error { - return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(specName, namespace.Name, input.ExtensionServiceNamespace, input.ExtensionServiceName)) + return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(specName, namespace.Name, input.ExtensionServiceNamespace, input.ExtensionServiceName, true, true)) }, 10*time.Second, 1*time.Second).Should(Succeed(), "delete extensionConfig failed") Byf("Deleting cluster %s", klog.KObj(clusterResources.Cluster)) @@ -429,8 +433,8 @@ func machineSetPreflightChecksTestHandler(ctx context.Context, c client.Client, // We make sure this cluster-wide object does not conflict with others by using a random generated // name and a NamespaceSelector selecting on the namespace of the current test. // Thus, this object is "namespaced" to the current test even though it's a cluster-wide object. -func extensionConfig(name, namespace, extensionServiceNamespace, extensionServiceName string) *runtimev1.ExtensionConfig { - return &runtimev1.ExtensionConfig{ +func extensionConfig(name, namespace, extensionServiceNamespace, extensionServiceName string, selectNamespace, defaultAllHandlersToBlocking bool) *runtimev1.ExtensionConfig { + cfg := &runtimev1.ExtensionConfig{ ObjectMeta: metav1.ObjectMeta{ // Note: We have to use a constant name here as we have to be able to reference it in the ClusterClass // when configuring external patches. @@ -448,25 +452,24 @@ func extensionConfig(name, namespace, extensionServiceNamespace, extensionServic Namespace: extensionServiceNamespace, }, }, - NamespaceSelector: &metav1.LabelSelector{ - // Note: we are limiting the test extension to be used by the namespace where the test is run. - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{namespace}, - }, - }, - }, Settings: map[string]string{ - // In the E2E test we are defaulting all handlers to blocking because cluster_upgrade_runtimesdk_test - // expects the handlers to block the cluster lifecycle by default. - // Setting this value to true enforces that the test-extension automatically creates the ConfigMap with - // blocking preloaded responses. - "defaultAllHandlersToBlocking": "true", + "defaultAllHandlersToBlocking": strconv.FormatBool(defaultAllHandlersToBlocking), }, }, } + if selectNamespace { + cfg.Spec.NamespaceSelector = &metav1.LabelSelector{ + // Note: we are limiting the test extension to be used by the namespace where the test is run. + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{namespace}, + }, + }, + } + } + return cfg } // Check that each hook in hooks has been called at least once by checking if its actualResponseStatus is in the hook response configmap. diff --git a/test/e2e/data/infrastructure-inmemory/main/bases/cluster-with-topology.yaml b/test/e2e/data/infrastructure-inmemory/main/bases/cluster-with-topology.yaml index 61fe1299299d..4c5357ccb844 100644 --- a/test/e2e/data/infrastructure-inmemory/main/bases/cluster-with-topology.yaml +++ b/test/e2e/data/infrastructure-inmemory/main/bases/cluster-with-topology.yaml @@ -12,6 +12,7 @@ spec: serviceDomain: ${SERVICE_DOMAIN:="cluster.local"} topology: class: in-memory + classNamespace: ${NAMESPACE} version: ${KUBERNETES_VERSION} controlPlane: replicas: ${CONTROL_PLANE_MACHINE_COUNT} @@ -20,3 +21,8 @@ spec: - class: default-worker name: md-0 replicas: ${WORKER_MACHINE_COUNT} + variables: + - name: kubeadmControlPlaneMaxSurge + value: "1" + - name: imageRepository + value: "kindest" diff --git a/test/e2e/data/infrastructure-inmemory/main/clusterclass-in-memory.yaml b/test/e2e/data/infrastructure-inmemory/main/clusterclass-in-memory.yaml index 945a2cb2b891..d6c6fbd3cdd0 100644 --- a/test/e2e/data/infrastructure-inmemory/main/clusterclass-in-memory.yaml +++ b/test/e2e/data/infrastructure-inmemory/main/clusterclass-in-memory.yaml @@ -1,56 +1,3 @@ -apiVersion: cluster.x-k8s.io/v1beta1 -kind: ClusterClass -metadata: - name: in-memory -spec: - controlPlane: - metadata: - annotations: - machineInfrastructure: - ref: - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - kind: InMemoryMachineTemplate - name: in-memory-control-plane - ref: - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 - kind: KubeadmControlPlaneTemplate - name: in-memory-control-plane - machineHealthCheck: - unhealthyConditions: - - type: Ready - status: Unknown - timeout: 300s - - type: Ready - status: "False" - timeout: 300s - infrastructure: - ref: - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - kind: InMemoryClusterTemplate - name: in-memory-cluster - workers: - machineDeployments: - - class: default-worker - template: - bootstrap: - ref: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 - kind: KubeadmConfigTemplate - name: in-memory-default-worker-bootstraptemplate - infrastructure: - ref: - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - kind: InMemoryMachineTemplate - name: in-memory-default-worker-machinetemplate - machineHealthCheck: - unhealthyConditions: - - type: Ready - status: Unknown - timeout: 300s - - type: Ready - status: "False" - timeout: 300s ---- apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 kind: InMemoryClusterTemplate metadata: @@ -95,19 +42,19 @@ spec: behaviour: vm: provisioning: - startupDuration: "30s" + startupDuration: "10s" startupJitter: "0.2" node: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" apiServer: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" etcd: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" --- apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 @@ -120,19 +67,19 @@ spec: behaviour: vm: provisioning: - startupDuration: "30s" + startupDuration: "10s" startupJitter: "0.2" node: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" apiServer: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" etcd: provisioning: - startupDuration: "10s" + startupDuration: "2s" startupJitter: "0.2" --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 @@ -146,4 +93,62 @@ spec: nodeRegistration: criSocket: unix:///var/run/containerd/containerd.sock kubeletExtraArgs: - eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% \ No newline at end of file + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: ClusterClass +metadata: + name: in-memory +spec: + controlPlane: + metadata: + annotations: + machineInfrastructure: + ref: + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + kind: InMemoryMachineTemplate + name: in-memory-control-plane + ref: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlaneTemplate + name: in-memory-control-plane + machineHealthCheck: + unhealthyConditions: + - type: Ready + status: Unknown + timeout: 300s + - type: Ready + status: "False" + timeout: 300s + infrastructure: + ref: + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + kind: InMemoryClusterTemplate + name: in-memory-cluster + workers: + machineDeployments: + - class: default-worker + template: + bootstrap: + ref: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: in-memory-default-worker-bootstraptemplate + infrastructure: + ref: + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + kind: InMemoryMachineTemplate + name: in-memory-default-worker-machinetemplate + machineHealthCheck: + unhealthyConditions: + - type: Ready + status: Unknown + timeout: 300s + - type: Ready + status: "False" + timeout: 300s + patches: + - name: test-patch + external: + generateExtension: generate-patches.scale + discoverVariablesExtension: discover-variables.scale diff --git a/test/e2e/scale.go b/test/e2e/scale.go index 56a2256777e7..d2f80c8a6f90 100644 --- a/test/e2e/scale.go +++ b/test/e2e/scale.go @@ -18,6 +18,7 @@ package e2e import ( "bytes" + "cmp" "context" "fmt" "math" @@ -49,11 +50,14 @@ import ( ) const ( - scaleClusterCount = "CAPI_SCALE_CLUSTER_COUNT" - scaleConcurrency = "CAPI_SCALE_CONCURRENCY" - scaleControlPlaneMachineCount = "CAPI_SCALE_CONTROL_PLANE_MACHINE_COUNT" - scaleWorkerMachineCount = "CAPI_SCALE_WORKER_MACHINE_COUNT" - scaleMachineDeploymentCount = "CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT" + scaleClusterCount = "CAPI_SCALE_CLUSTER_COUNT" + scaleConcurrency = "CAPI_SCALE_CONCURRENCY" + scaleControlPlaneMachineCount = "CAPI_SCALE_CONTROL_PLANE_MACHINE_COUNT" + scaleWorkerPerMachineDeploymentCount = "CAPI_SCALE_WORKER_PER_MACHINE_DEPLOYMENT_COUNT" + scaleMachineDeploymentCount = "CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT" + scaleAdditionalClusterClassCount = "CAPI_SCALE_ADDITIONAL_CLUSTER_CLASS_COUNT" + scaleDeployClusterInSeparateNamespaces = "CAPI_SCALE_DEPLOY_CLUSTER_IN_SEPARATE_NAMESPACES" + scaleUseCrossNamespaceClusterClass = "CAPI_SCALE_USE_CROSS_NAMESPACE_CLUSTER_CLASS" // Note: Names must consist of lower case alphanumeric characters or '-'. scaleClusterNamePlaceholder = "scale-cluster-name-placeholder" @@ -84,10 +88,6 @@ type ScaleSpecInput struct { // Can be overridden by variable CAPI_SCALE_CLUSTER_COUNT. ClusterCount *int64 - // DeployClusterInSeparateNamespaces defines if each cluster should be deployed into its separate namespace. - // In this case The namespace name will be the name of the cluster. - DeployClusterInSeparateNamespaces bool - // Concurrency is the maximum concurrency of each of the scale operations. // If unspecified it defaults to 5. // Can be overridden by variable CAPI_SCALE_CONCURRENCY. @@ -98,12 +98,12 @@ type ScaleSpecInput struct { // Can be overridden by variable CAPI_SCALE_CONTROLPLANE_MACHINE_COUNT. ControlPlaneMachineCount *int64 - // WorkerMachineCount defines number of worker machines per machine deployment of the workload cluster. + // WorkerPerMachineDeploymentCount defines number of worker machines per machine deployment of the workload cluster. // If not specified, 1 will be used. - // Can be overridden by variable CAPI_SCALE_WORKER_MACHINE_COUNT. + // Can be overridden by variable CAPI_SCALE_WORKER_PER_MACHINE_DEPLOYMENT_COUNT. // The resulting number of worker nodes for each of the workload cluster will - // be MachineDeploymentCount*WorkerMachineCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_MACHINE_COUNT). - WorkerMachineCount *int64 + // be MachineDeploymentCount*WorkerPerMachineDeploymentCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_PER_MACHINE_DEPLOYMENT_COUNT). + WorkerPerMachineDeploymentCount *int64 // MachineDeploymentCount defines the number of MachineDeployments to be used per workload cluster. // If not specified, 1 will be used. @@ -112,9 +112,37 @@ type ScaleSpecInput struct { // It uses this machine deployment to create additional copies. // Names of the MachineDeployments will be overridden to "md-1", "md-2", etc. // The resulting number of worker nodes for each of the workload cluster will - // be MachineDeploymentCount*WorkerMachineCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_MACHINE_COUNT). + // be MachineDeploymentCount*WorkerPerMachineDeploymentCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_PER_MACHINE_DEPLOYMENT_COUNT). MachineDeploymentCount *int64 + // AdditionalClusterClassCount is the number of copies of the ClusterClasses that will be deployed. + // This can be used to test how Cluster API scales with a higher number of ClusterClasses. + // Can be overridden by variable CAPI_SCALE_ADDITIONAL_CLUSTER_CLASS_COUNT. + AdditionalClusterClassCount *int64 + + // ClusterClassName is the name of the ClusterClass. + // This is only required if AdditionalClusterClassCount is set and > 0. + ClusterClassName string + + // DeployClusterInSeparateNamespaces defines if each cluster should be deployed into its separate namespace. + // In this case The namespace name will be the name of the cluster. + // Can be overridden by variable CAPI_SCALE_DEPLOY_CLUSTER_IN_SEPARATE_NAMESPACES. + DeployClusterInSeparateNamespaces *bool + + // UseCrossNamespaceClusterClass configures Clusters that are deployed into separate Namespaces to + // use a single ClusterClass instead of using a ClusterClass in their namespace. + // Note: This can be only be true when DeployClusterInSeparateNamespaces is true. + // Can be overridden by variable CAPI_SCALE_USE_CROSS_NAMESPACE_CLUSTER_CLASS. + UseCrossNamespaceClusterClass *bool + + // ExtensionServiceNamespace is the namespace where the service for the Runtime Extension is located. + // Note: This should only be set if a Runtime Extension is used. + ExtensionServiceNamespace string + + // ExtensionServiceNamespace is the name where the service for the Runtime Extension is located. + // Note: This should only be set if a Runtime Extension is used. + ExtensionServiceName string + // Allows to inject a function to be run after test namespace is created. // If not specified, this is a no-op. PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) @@ -200,68 +228,56 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { flavor = *input.Flavor } - controlPlaneMachineCount := ptr.To[int64](1) - if input.ControlPlaneMachineCount != nil { - controlPlaneMachineCount = input.ControlPlaneMachineCount - } - // If variable is defined that will take precedence. - if input.E2EConfig.HasVariable(scaleControlPlaneMachineCount) { - controlPlaneMachineCountStr := input.E2EConfig.GetVariable(scaleControlPlaneMachineCount) - controlPlaneMachineCountInt, err := strconv.Atoi(controlPlaneMachineCountStr) - Expect(err).ToNot(HaveOccurred()) - controlPlaneMachineCount = ptr.To[int64](int64(controlPlaneMachineCountInt)) - } - - workerMachineCount := ptr.To[int64](1) - if input.WorkerMachineCount != nil { - workerMachineCount = input.WorkerMachineCount - } - // If variable is defined that will take precedence. - if input.E2EConfig.HasVariable(scaleWorkerMachineCount) { - workerMachineCountStr := input.E2EConfig.GetVariable(scaleWorkerMachineCount) - workerMachineCountInt, err := strconv.Atoi(workerMachineCountStr) - Expect(err).ToNot(HaveOccurred()) - workerMachineCount = ptr.To[int64](int64(workerMachineCountInt)) + clusterCount := *cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleClusterCount)), + input.ClusterCount, ptr.To[int64](10), + ) + concurrency := *cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleConcurrency)), + input.Concurrency, ptr.To[int64](5), + ) + controlPlaneMachineCount := cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleControlPlaneMachineCount)), + input.ControlPlaneMachineCount, ptr.To[int64](1), + ) + machineDeploymentCount := *cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleMachineDeploymentCount)), + input.MachineDeploymentCount, ptr.To[int64](1), + ) + workerPerMachineDeploymentCount := cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleWorkerPerMachineDeploymentCount)), + input.WorkerPerMachineDeploymentCount, ptr.To[int64](3), + ) + additionalClusterClassCount := *cmp.Or(variableAsInt64(input.E2EConfig.GetVariableBestEffort(scaleAdditionalClusterClassCount)), + input.AdditionalClusterClassCount, ptr.To[int64](0), + ) + deployClusterInSeparateNamespaces := *cmp.Or(variableAsBool(input.E2EConfig.GetVariableBestEffort(scaleDeployClusterInSeparateNamespaces)), + input.DeployClusterInSeparateNamespaces, ptr.To[bool](false), + ) + useCrossNamespaceClusterClass := *cmp.Or(variableAsBool(input.E2EConfig.GetVariableBestEffort(scaleUseCrossNamespaceClusterClass)), + input.UseCrossNamespaceClusterClass, ptr.To[bool](false), + ) + if useCrossNamespaceClusterClass { + Expect(deployClusterInSeparateNamespaces).To(BeTrue(), "deployClusterInSeparateNamespaces must be "+ + "true if useCrossNamespaceClusterClass is true") } - machineDeploymentCount := ptr.To[int64](1) - if input.MachineDeploymentCount != nil { - machineDeploymentCount = input.MachineDeploymentCount - } - // If variable is defined that will take precedence. - if input.E2EConfig.HasVariable(scaleMachineDeploymentCount) { - machineDeploymentCountStr := input.E2EConfig.GetVariable(scaleMachineDeploymentCount) - machineDeploymentCountInt, err := strconv.Atoi(machineDeploymentCountStr) - Expect(err).ToNot(HaveOccurred()) - machineDeploymentCount = ptr.To[int64](int64(machineDeploymentCountInt)) - } - - clusterCount := int64(10) - if input.ClusterCount != nil { - clusterCount = *input.ClusterCount - } - // If variable is defined that will take precedence. - if input.E2EConfig.HasVariable(scaleClusterCount) { - clusterCountStr := input.E2EConfig.GetVariable(scaleClusterCount) - var err error - clusterCount, err = strconv.ParseInt(clusterCountStr, 10, 64) - Expect(err).NotTo(HaveOccurred(), "%q value should be integer", scaleClusterCount) - } - - concurrency := int64(5) - if input.Concurrency != nil { - concurrency = *input.Concurrency - } - // If variable is defined that will take precedence. - if input.E2EConfig.HasVariable(scaleConcurrency) { - concurrencyStr := input.E2EConfig.GetVariable(scaleConcurrency) - var err error - concurrency, err = strconv.ParseInt(concurrencyStr, 10, 64) - Expect(err).NotTo(HaveOccurred(), "%q value should be integer", scaleConcurrency) + if input.ExtensionServiceNamespace != "" && input.ExtensionServiceName != "" { + // NOTE: test extension is already deployed in the management cluster. If for any reason in future we want + // to make this test more self-contained this test should be modified in order to create an additional + // management cluster; also the E2E test configuration should be modified introducing something like + // optional:true allowing to define which providers should not be installed by default in + // a management cluster. + By("Deploy Test Extension ExtensionConfig") + + // In this test we are defaulting all handlers to non-blocking because we don't expect the handlers to block the + // cluster lifecycle by default. Setting defaultAllHandlersToBlocking to false enforces that the test-extension + // automatically creates the ConfigMap with non-blocking preloaded responses. + defaultAllHandlersToBlocking := false + // select on the current namespace if the Clusters are all deployed in the current namespace + // This is necessary so in CI this test doesn't influence other tests by enabling lifecycle hooks + // in other test namespaces. + selectNamespace := !*input.DeployClusterInSeparateNamespaces + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + extensionConfig(specName, namespace.Name, input.ExtensionServiceNamespace, input.ExtensionServiceName, selectNamespace, defaultAllHandlersToBlocking))). + To(Succeed(), "Failed to create the extension config") } - // TODO(ykakarap): Follow-up: Add support for legacy cluster templates. - By("Create the ClusterClass to be used by all workload clusters") // IMPORTANT: ConfigCluster function in the test framework is currently not concurrency safe. @@ -278,7 +294,7 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { ClusterName: scaleClusterNamePlaceholder, KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeFrom), ControlPlaneMachineCount: controlPlaneMachineCount, - WorkerMachineCount: workerMachineCount, + WorkerMachineCount: workerPerMachineDeploymentCount, }) Expect(baseWorkloadClusterTemplate).ToNot(BeNil(), "Failed to get the cluster template") @@ -291,17 +307,27 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { baseClusterClassYAML, baseClusterTemplateYAML := extractClusterClassAndClusterFromTemplate(baseWorkloadClusterTemplate) // Modify the baseClusterTemplateYAML so that it has the desired number of machine deployments. - baseClusterTemplateYAML = modifyMachineDeployments(baseClusterTemplateYAML, int(*machineDeploymentCount)) + baseClusterTemplateYAML = modifyMachineDeployments(baseClusterTemplateYAML, machineDeploymentCount) // If all clusters should be deployed in the same namespace (namespace.Name), // then deploy the ClusterClass in this namespace. - if !input.DeployClusterInSeparateNamespaces { + if !deployClusterInSeparateNamespaces || useCrossNamespaceClusterClass { if len(baseClusterClassYAML) > 0 { clusterClassYAML := bytes.Replace(baseClusterClassYAML, []byte(scaleClusterNamespacePlaceholder), []byte(namespace.Name), -1) log.Logf("Apply ClusterClass") Eventually(func() error { return input.BootstrapClusterProxy.CreateOrUpdate(ctx, clusterClassYAML) }, 1*time.Minute).Should(Succeed()) + + // Create additional unused instances of the ClusterClass + for i := range additionalClusterClassCount { + additionalName := fmt.Sprintf("%s-%d", input.ClusterClassName, i+1) + log.Logf("Apply additional ClusterClass %s/%s", namespace.Name, additionalName) + additionalClassYAML := bytes.Replace(clusterClassYAML, []byte(input.ClusterClassName), []byte(additionalName), -1) + Eventually(func() error { + return input.BootstrapClusterProxy.CreateOrUpdate(ctx, additionalClassYAML) + }, 1*time.Minute).Should(Succeed()) + } } else { log.Logf("ClusterClass already exists. Skipping creation.") } @@ -341,7 +367,10 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { Concurrency: concurrency, FailFast: input.FailFast, WorkerFunc: func(ctx context.Context, inputChan chan string, resultChan chan workResult, wg *sync.WaitGroup) { - createClusterWorker(ctx, input.BootstrapClusterProxy, inputChan, resultChan, wg, namespace.Name, input.DeployClusterInSeparateNamespaces, baseClusterClassYAML, baseClusterTemplateYAML, creator, input.PostScaleClusterNamespaceCreated) + createClusterWorker(ctx, input.BootstrapClusterProxy, inputChan, resultChan, wg, namespace.Name, + deployClusterInSeparateNamespaces, useCrossNamespaceClusterClass, + baseClusterClassYAML, baseClusterTemplateYAML, creator, input.PostScaleClusterNamespaceCreated, + additionalClusterClassCount, input.ClusterClassName) }, }) if err != nil { @@ -381,7 +410,7 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { Concurrency: concurrency, FailFast: input.FailFast, WorkerFunc: func(ctx context.Context, inputChan chan string, resultChan chan workResult, wg *sync.WaitGroup) { - upgradeClusterAndWaitWorker(ctx, inputChan, resultChan, wg, namespace.Name, input.DeployClusterInSeparateNamespaces, baseClusterTemplateYAML, upgrader) + upgradeClusterAndWaitWorker(ctx, inputChan, resultChan, wg, namespace.Name, deployClusterInSeparateNamespaces, baseClusterTemplateYAML, upgrader) }, }) if err != nil { @@ -405,6 +434,7 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { } if input.SkipCleanup { + By("PASSED!") return } @@ -422,7 +452,7 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { wg, input.BootstrapClusterProxy.GetClient(), namespace.Name, - input.DeployClusterInSeparateNamespaces, + deployClusterInSeparateNamespaces, input.E2EConfig.GetIntervals(specName, "wait-delete-cluster")..., ) }, @@ -589,7 +619,7 @@ func getClusterCreateFn(clusterProxy framework.ClusterProxy) clusterCreator { type PostScaleClusterNamespaceCreated func(clusterProxy framework.ClusterProxy, clusterNamespace string, clusterName string, clusterClassYAML []byte, clusterTemplateYAML []byte) ([]byte, []byte) -func createClusterWorker(ctx context.Context, clusterProxy framework.ClusterProxy, inputChan <-chan string, resultChan chan<- workResult, wg *sync.WaitGroup, defaultNamespace string, deployClusterInSeparateNamespaces bool, baseClusterClassYAML, baseClusterTemplateYAML []byte, create clusterCreator, postScaleClusterNamespaceCreated PostScaleClusterNamespaceCreated) { +func createClusterWorker(ctx context.Context, clusterProxy framework.ClusterProxy, inputChan <-chan string, resultChan chan<- workResult, wg *sync.WaitGroup, defaultNamespace string, deployClusterInSeparateNamespaces, enableCrossNamespaceClusterClass bool, baseClusterClassYAML, baseClusterTemplateYAML []byte, create clusterCreator, postScaleClusterNamespaceCreated PostScaleClusterNamespaceCreated, additionalClusterClasses int64, clusterClassName string) { defer wg.Done() for { @@ -626,7 +656,7 @@ func createClusterWorker(ctx context.Context, clusterProxy framework.ClusterProx // * Adjust namespace in ClusterClass YAML. // * Create new namespace. if deployClusterInSeparateNamespaces { - log.Logf("Create namespace %", namespaceName) + log.Logf("Create namespace %s", namespaceName) _ = framework.CreateNamespace(ctx, framework.CreateNamespaceInput{ Creator: clusterProxy.GetClient(), Name: namespaceName, @@ -645,16 +675,33 @@ func createClusterWorker(ctx context.Context, clusterProxy framework.ClusterProx // If every cluster should be deployed in a separate namespace: // * Deploy ClusterClass in new namespace. - if deployClusterInSeparateNamespaces { + if deployClusterInSeparateNamespaces && !enableCrossNamespaceClusterClass { log.Logf("Apply ClusterClass in namespace %s", namespaceName) clusterClassYAML := bytes.Replace(customizedClusterClassYAML, []byte(scaleClusterNamespacePlaceholder), []byte(namespaceName), -1) Eventually(func() error { return clusterProxy.CreateOrUpdate(ctx, clusterClassYAML) }, 1*time.Minute).Should(Succeed()) + + // Create additional unused instances of the ClusterClass + for i := range additionalClusterClasses { + additionalName := fmt.Sprintf("%s-%d", clusterClassName, i+1) + log.Logf("Apply additional ClusterClass %s/%s", namespaceName, additionalName) + additionalClassYAML := bytes.Replace(clusterClassYAML, []byte(clusterClassName), []byte(additionalName), -1) + Eventually(func() error { + return clusterProxy.CreateOrUpdate(ctx, additionalClassYAML) + }, 1*time.Minute).Should(Succeed()) + } } // Adjust namespace and name in Cluster YAML - clusterTemplateYAML := bytes.Replace(customizedClusterTemplateYAML, []byte(scaleClusterNamespacePlaceholder), []byte(namespaceName), -1) + clusterTemplateYAML := customizedClusterTemplateYAML + if enableCrossNamespaceClusterClass { + // Set classNamespace to the defaultNamespace where the ClusterClass is located. + clusterTemplateYAML = bytes.Replace(clusterTemplateYAML, + []byte(fmt.Sprintf("classNamespace: %s", scaleClusterNamespacePlaceholder)), + []byte(fmt.Sprintf("classNamespace: %s", defaultNamespace)), -1) + } + clusterTemplateYAML = bytes.Replace(clusterTemplateYAML, []byte(scaleClusterNamespacePlaceholder), []byte(namespaceName), -1) clusterTemplateYAML = bytes.Replace(clusterTemplateYAML, []byte(scaleClusterNamePlaceholder), []byte(clusterName), -1) // Deploy Cluster. @@ -843,7 +890,7 @@ type workResult struct { err any } -func modifyMachineDeployments(baseClusterTemplateYAML []byte, count int) []byte { +func modifyMachineDeployments(baseClusterTemplateYAML []byte, count int64) []byte { Expect(baseClusterTemplateYAML).NotTo(BeEmpty(), "Invalid argument. baseClusterTemplateYAML cannot be empty when calling modifyMachineDeployments") Expect(count).To(BeNumerically(">=", 0), "Invalid argument. count cannot be less than 0 when calling modifyMachineDeployments") @@ -863,7 +910,7 @@ func modifyMachineDeployments(baseClusterTemplateYAML []byte, count int) []byte baseMD := cluster.Spec.Topology.Workers.MachineDeployments[0] allMDs := make([]clusterv1.MachineDeploymentTopology, count) allMDDigits := 1 + int(math.Log10(float64(count))) - for i := 1; i <= count; i++ { + for i := int64(1); i <= count; i++ { md := baseMD.DeepCopy() // This ensures we always have the right number of leading zeros in our machine deployment names, e.g. // count=1000 will lead to machine deployment names like md-0001, md-0002, so on. @@ -878,3 +925,25 @@ func modifyMachineDeployments(baseClusterTemplateYAML []byte, count int) []byte return modifiedClusterYAML } + +func variableAsInt64(variableValue string) *int64 { + if variableValue == "" { + return nil + } + + variableValueInt, err := strconv.ParseInt(variableValue, 10, 64) + Expect(err).ToNot(HaveOccurred()) + + return ptr.To[int64](variableValueInt) +} + +func variableAsBool(variableValue string) *bool { + if variableValue == "" { + return nil + } + + variableValueBool, err := strconv.ParseBool(variableValue) + Expect(err).ToNot(HaveOccurred()) + + return ptr.To[bool](variableValueBool) +} diff --git a/test/e2e/scale_test.go b/test/e2e/scale_test.go index 541dee0ec56e..7ced2167f090 100644 --- a/test/e2e/scale_test.go +++ b/test/e2e/scale_test.go @@ -28,18 +28,28 @@ var _ = Describe("When testing the machinery for scale testing using in-memory p // Note: This test does not support MachinePools. ScaleSpec(ctx, func() ScaleSpecInput { return ScaleSpecInput{ - E2EConfig: e2eConfig, - ClusterctlConfigPath: clusterctlConfigPath, - InfrastructureProvider: ptr.To("in-memory"), - BootstrapClusterProxy: bootstrapClusterProxy, - ArtifactFolder: artifactFolder, - ClusterCount: ptr.To[int64](10), - Concurrency: ptr.To[int64](5), - Flavor: ptr.To(""), - ControlPlaneMachineCount: ptr.To[int64](1), - MachineDeploymentCount: ptr.To[int64](1), - WorkerMachineCount: ptr.To[int64](3), - SkipCleanup: skipCleanup, + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + InfrastructureProvider: ptr.To("in-memory"), + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + Flavor: ptr.To(""), + SkipCleanup: skipCleanup, + ClusterCount: ptr.To[int64](10), + Concurrency: ptr.To[int64](5), + ControlPlaneMachineCount: ptr.To[int64](1), + MachineDeploymentCount: ptr.To[int64](1), + WorkerPerMachineDeploymentCount: ptr.To[int64](3), + AdditionalClusterClassCount: ptr.To[int64](4), + ClusterClassName: "in-memory", + DeployClusterInSeparateNamespaces: ptr.To[bool](false), + UseCrossNamespaceClusterClass: ptr.To[bool](false), + // The runtime extension gets deployed to the test-extension-system namespace and is exposed + // by the test-extension-webhook-service. + // The below values are used when creating the cluster-wide ExtensionConfig to refer + // the actual service. + ExtensionServiceNamespace: "test-extension-system", + ExtensionServiceName: "test-extension-webhook-service", } }) }) diff --git a/test/framework/clusterctl/clusterctl_helpers.go b/test/framework/clusterctl/clusterctl_helpers.go index 806742d5b636..6ad78a17a21f 100644 --- a/test/framework/clusterctl/clusterctl_helpers.go +++ b/test/framework/clusterctl/clusterctl_helpers.go @@ -17,6 +17,7 @@ limitations under the License. package clusterctl import ( + "cmp" "context" "fmt" "os" @@ -446,11 +447,13 @@ func ApplyCustomClusterTemplateAndWait(ctx context.Context, input ApplyCustomClu }, input.WaitForClusterIntervals...) if result.Cluster.Spec.Topology != nil { - result.ClusterClass = framework.GetClusterClassByName(ctx, framework.GetClusterClassByNameInput{ - Getter: input.ClusterProxy.GetClient(), - Namespace: input.Namespace, - Name: result.Cluster.Spec.Topology.Class, - }) + if result.Cluster.Spec.Topology != nil { + result.ClusterClass = framework.GetClusterClassByName(ctx, framework.GetClusterClassByNameInput{ + Getter: input.ClusterProxy.GetClient(), + Namespace: cmp.Or(result.Cluster.Spec.Topology.ClassNamespace, result.Cluster.Namespace), + Name: result.Cluster.Spec.Topology.Class, + }) + } } log.Logf("Waiting for control plane of cluster %s to be initialized", klog.KRef(input.Namespace, input.ClusterName)) diff --git a/test/framework/clusterctl/e2e_config.go b/test/framework/clusterctl/e2e_config.go index b50440a902be..94e6de9d5372 100644 --- a/test/framework/clusterctl/e2e_config.go +++ b/test/framework/clusterctl/e2e_config.go @@ -800,6 +800,21 @@ func (c *E2EConfig) GetVariable(varName string) string { return value } +// GetVariableBestEffort returns a variable from environment variables or from the e2e config file. +// If the variable cannot be found it returns and empty string and does not fail. +func (c *E2EConfig) GetVariableBestEffort(varName string) string { + if value, ok := os.LookupEnv(varName); ok { + return value + } + + value, ok := c.Variables[varName] + if ok { + return value + } + + return "" +} + // GetInt64PtrVariable returns an Int64Ptr variable from the e2e config file. func (c *E2EConfig) GetInt64PtrVariable(varName string) *int64 { wCountStr := c.GetVariable(varName)