From de1745f2c3681e17b2038b2a35d80f74574f2943 Mon Sep 17 00:00:00 2001 From: gangwgr Date: Tue, 26 May 2026 13:00:58 +0530 Subject: [PATCH 1/4] test/encryption: add InvalidImageRecoveryScenario for KMS plugin image fix Adds a test scenario that verifies the cluster can recover when a KMS encryption config is set with an invalid plugin image: 1. Enable KMS with invalid (non-existent) plugin image 2. Attempt to switch to aescbc (cluster remains stuck on KMS) 3. Operator reports degraded 4. Fix the config with valid KMS plugin image 5. Operator recovers and encryption works normally --- test/library/encryption/scenarios.go | 82 +++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/test/library/encryption/scenarios.go b/test/library/encryption/scenarios.go index dd1a3fcc4b..1abf781ad9 100644 --- a/test/library/encryption/scenarios.go +++ b/test/library/encryption/scenarios.go @@ -8,10 +8,10 @@ import ( "testing" "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/rand" - "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/rand" configv1 "github.com/openshift/api/config/v1" ) @@ -277,3 +277,81 @@ func TestEncryptionRotation(ctx context.Context, t testing.TB, scenario Rotation // TODO: assert conditions - operator and encryption migration controller must report status as active not progressing, and not failing for all scenarios } + +// InvalidImageRecoveryScenario tests that an invalid KMS plugin image causes +// the cluster to degrade and that correcting the image restores normal operation. +// +// Arda's scenario: +// 1. Enable KMS with invalid image +// 2. Attempt to switch to aescbc (cluster should be stuck on KMS) +// 3. See degraded +// 4. Update with KMS with valid image +// 5. See everything works +type InvalidImageRecoveryScenario struct { + BasicScenario + // InvalidImageProvider is the KMS EncryptionProvider configured with an invalid + // (non-existent or broken) KMS plugin image. Enabling this should cause degradation. + InvalidImageProvider EncryptionProvider + // ValidImageProvider is the KMS EncryptionProvider configured with the correct + // KMS plugin image. Applying this after degradation should restore the cluster. + ValidImageProvider EncryptionProvider + // WaitForDegraded should block until the operator reports a degraded condition. + WaitForDegraded func(ctx context.Context, t testing.TB) + // WaitForRecovery should block until the operator reports a healthy condition + // and encryption is fully operational. + WaitForRecovery func(ctx context.Context, t testing.TB) +} + +// TestEncryptionInvalidImageRecovery tests that: +// 1. Enabling KMS with an invalid plugin image causes degradation +// 2. The cluster remains stuck and cannot migrate to another mode (e.g. aescbc) +// 3. Fixing the KMS config with a valid image restores the cluster +func TestEncryptionInvalidImageRecovery(ctx context.Context, t testing.TB, scenario InvalidImageRecoveryScenario) { + e := NewE(t, PrintEventsOnFailure(scenario.OperatorNamespace)) + require.NotNil(t, scenario.WaitForDegraded, "WaitForDegraded must not be nil") + require.NotNil(t, scenario.WaitForRecovery, "WaitForRecovery must not be nil") + require.Equal(t, configv1.EncryptionTypeKMS, scenario.InvalidImageProvider.Type, "InvalidImageProvider must be KMS type") + require.Equal(t, configv1.EncryptionTypeKMS, scenario.ValidImageProvider.Type, "ValidImageProvider must be KMS type") + + cs := GetClients(e) + + // step 1: enable KMS with invalid image (set config but don't wait for completion) + t.Log("Setting KMS encryption with invalid plugin image") + if scenario.InvalidImageProvider.Setup != nil { + scenario.InvalidImageProvider.Setup(ctx, e) + } + apiServer, err := cs.ApiServerConfig.Get(ctx, "cluster", metav1.GetOptions{}) + require.NoError(t, err) + apiServer.Spec.Encryption = scenario.InvalidImageProvider.APIServerEncryption + _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) + require.NoError(t, err) + + // step 2: attempt to switch to aescbc — cluster should remain stuck on KMS + t.Log("Attempting to switch to aescbc (cluster should remain stuck on KMS)") + apiServer, err = cs.ApiServerConfig.Get(ctx, "cluster", metav1.GetOptions{}) + require.NoError(t, err) + apiServer.Spec.Encryption = configv1.APIServerEncryption{Type: configv1.EncryptionTypeAESCBC} + _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) + require.NoError(t, err) + + // step 3: wait for degraded + t.Log("Waiting for operator to report degraded status") + scenario.WaitForDegraded(ctx, e) + + // step 4: fix the config with valid KMS image + t.Log("Updating KMS encryption with valid plugin image to recover") + if scenario.ValidImageProvider.Setup != nil { + scenario.ValidImageProvider.Setup(ctx, e) + } + apiServer, err = cs.ApiServerConfig.Get(ctx, "cluster", metav1.GetOptions{}) + require.NoError(t, err) + apiServer.Spec.Encryption = scenario.ValidImageProvider.APIServerEncryption + _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) + require.NoError(t, err) + + // step 5: wait for recovery — everything should work + t.Log("Waiting for operator to recover with valid KMS image") + scenario.WaitForRecovery(ctx, e) + + t.Log("Invalid image recovery test passed") +} From f307d1c384a52d7542052c72333f79651e935ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arda=20G=C3=BC=C3=A7l=C3=BC?= Date: Fri, 22 May 2026 09:00:38 +0300 Subject: [PATCH 2/4] Rename AppRole datakeys to role-id and secret-id --- test/library/encryption/kms/vault.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/library/encryption/kms/vault.go b/test/library/encryption/kms/vault.go index 1449313f96..5585ec5a2a 100644 --- a/test/library/encryption/kms/vault.go +++ b/test/library/encryption/kms/vault.go @@ -106,8 +106,8 @@ func ensureDefaultVaultAppRoleSecret(ctx context.Context, t testing.TB) { }, Type: corev1.SecretTypeOpaque, Data: map[string][]byte{ - "roleID": creds.Data["role-id"], - "secretID": creds.Data["secret-id"], + "role-id": creds.Data["role-id"], + "secret-id": creds.Data["secret-id"], }, } recorder := events.NewInMemoryRecorder("vault-approle-secret-setup", clock.RealClock{}) From 9e7a937b13fd6b66f000dd35c59a078c4da47926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arda=20G=C3=BC=C3=A7l=C3=BC?= Date: Wed, 13 May 2026 12:48:39 +0300 Subject: [PATCH 3/4] In-place field update for KMS mode regardless of the convergence When a KMS sidecar revision is stuck (e.g. wrong container image), the cluster-admin needs to update APIServer.spec.encryption.kms with corrected values. Previously, all encryption controllers blocked on revision convergence, so the fix could never be applied. This change allows the key_controller to update the latest key secret's KMS plugin config in-place for non-migration fields (image, TLS, AppRole) regardless of convergence state. If there is a change in migration-triggering fields, we should wait until convergence happens. State controller carries in-place fields updates only when there is no change in the encryption-config data key. --- .../encryption/controllers/key_controller.go | 109 ++++++ .../controllers/key_controller_test.go | 341 +++++++++++++++++- .../controllers/state_controller.go | 86 +++++ .../controllers/state_controller_test.go | 156 ++++++++ .../encryption/encryptiondata/config_test.go | 27 ++ test/e2e-encryption/encryption_test.go | 77 +++- 6 files changed, 775 insertions(+), 21 deletions(-) diff --git a/pkg/operator/encryption/controllers/key_controller.go b/pkg/operator/encryption/controllers/key_controller.go index b0e1403993..0a2032eedf 100644 --- a/pkg/operator/encryption/controllers/key_controller.go +++ b/pkg/operator/encryption/controllers/key_controller.go @@ -11,6 +11,7 @@ import ( "time" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -28,6 +29,7 @@ import ( "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/encryption/crypto" + "github.com/openshift/library-go/pkg/operator/encryption/encoding" "github.com/openshift/library-go/pkg/operator/encryption/secrets" "github.com/openshift/library-go/pkg/operator/encryption/state" "github.com/openshift/library-go/pkg/operator/encryption/statemachine" @@ -171,6 +173,16 @@ func (c *keyController) checkAndCreateKeys(ctx context.Context, syncContext fact return err } + // Apply in-place KMS plugin config updates (e.g. image, TLS) to the latest key + // secret regardless of convergence. This unblocks stuck revisions and propagates + // operational fixes like CVE image updates. Changes to migration-triggering fields + // (transit key, vault address) are skipped via kmsMigrationRequired. + if currentMode == state.KMS { + if err := c.maybeUpdateKMSPluginConfigInPlace(ctx, syncContext, apiEncryptionConfiguration); err != nil { + return err + } + } + currentConfig, desiredEncryptionState, secrets, isProgressingReason, err := statemachine.GetEncryptionConfigAndState(ctx, c.deployer, c.secretClient, c.encryptionSecretSelector, encryptedGRs) if err != nil { return err @@ -262,6 +274,103 @@ func (c *keyController) validateExistingSecret(ctx context.Context, keySecret *c return nil // we made this key earlier } +// maybeUpdateKMSPluginConfigInPlace updates the latest key secret's KMS plugin +// config when only in-place-safe fields changed (image, TLS, authentication). +func (c *keyController) maybeUpdateKMSPluginConfigInPlace(ctx context.Context, syncContext factory.SyncContext, apiServerEncryption configv1.APIServerEncryption) error { + keySecrets, err := secrets.ListKeySecrets(ctx, c.secretClient, c.encryptionSecretSelector) + if err != nil { + return err + } + if len(keySecrets) == 0 { + return nil + } + // Sort by key ID descending so [0] is the newest. + // Parse the newest secret directly — fail fast if it is malformed + // instead of silently falling back to an older key. + sort.Slice(keySecrets, func(i, j int) bool { + iKeyID, _ := state.NameToKeyID(keySecrets[i].Name) + jKeyID, _ := state.NameToKeyID(keySecrets[j].Name) + return iKeyID > jKeyID + }) + // We only focus on the latest backed key. + latest, err := secrets.ToKeyState(keySecrets[0]) + if err != nil { + return fmt.Errorf("latest key secret %s is invalid: %w", keySecrets[0].Name, err) + } + + // Any mode mismatch (e.g. KMS <-> AESCBC) requires a migration, not an in-place + // update. The normal needsNewKey path handles this after convergence. + if latest.Mode != state.KMS { + return nil + } + // This should never happen under normal operation because ToKeyState enforces + // that KMS mode keys have a plugin config. This can only occur if someone + // manually edited the key secret and removed the kms-plugin-config data field. + // To mitigate, re-add the removed kms-plugin-config data to the key secret. + if !latest.HasKMSPlugin() { + return fmt.Errorf("latest KMS key %s is missing plugin config", latest.Key.Name) + } + // Skip when the plugin config is already up-to-date or when + if equality.Semantic.DeepEqual(latest.KMS.Plugin, apiServerEncryption.KMS) { + return nil + } + + migrationRequired, err := kmsMigrationRequired(latest.KMS.Plugin, apiServerEncryption.KMS) + if err != nil { + return err + } + // migration-triggering fields changed (needs a new key, not an in-place update). + if migrationRequired { + return nil + } + + keySecret, err := secrets.FromKeyState(c.instanceName, latest) + if err != nil { + return err + } + s, err := c.secretClient.Secrets(keySecret.Namespace).Get(ctx, keySecret.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get key secret %s/%s: %v", keySecret.Namespace, keySecret.Name, err) + } + pluginData, err := encoding.EncodeKMSPluginConfig(apiServerEncryption.KMS) + if err != nil { + return fmt.Errorf("failed to encode KMS plugin config: %v", err) + } + s.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] = pluginData + _, updateErr := c.secretClient.Secrets(s.Namespace).Update(ctx, s, metav1.UpdateOptions{}) + if errors.IsConflict(updateErr) { + return nil + } + if updateErr == nil { + syncContext.Recorder().Eventf("EncryptionKeyKMSPluginConfigUpdated", "Updated KMS plugin config on key secret %q in-place", s.Name) + } + return updateErr +} + +// kmsMigrationRequired reports whether the KMS config change between latest +// (stored in the key secret) and current (from the APIServer CR) involves +// migration-triggering fields that require a new encryption key. +// Returns false when only in-place-safe fields differ (image, TLS, authentication) +// or when configs are identical. +func kmsMigrationRequired(latest, current configv1.KMSPluginConfig) (bool, error) { + if equality.Semantic.DeepEqual(latest, current) { + return false, nil + } + if latest.Type != configv1.VaultKMSProvider || current.Type != configv1.VaultKMSProvider { + return false, fmt.Errorf("KMS plugin config has an invalid type: %q", latest.Type) + } + if latest.Type != current.Type { + return true, nil + } + if latest.Vault.VaultAddress != current.Vault.VaultAddress || + latest.Vault.VaultNamespace != current.Vault.VaultNamespace || + latest.Vault.TransitMount != current.Vault.TransitMount || + latest.Vault.TransitKey != current.Vault.TransitKey { + return true, nil + } + return false, nil +} + func (c *keyController) generateKeySecret(ctx context.Context, keyID uint64, currentMode state.Mode, apiServerEncryption configv1.APIServerEncryption, internalReason, externalReason string) (*corev1.Secret, error) { bs := crypto.ModeToNewKeyFunc[currentMode]() ks := state.KeyState{ diff --git a/pkg/operator/encryption/controllers/key_controller_test.go b/pkg/operator/encryption/controllers/key_controller_test.go index 9fdb0cbd0e..7820f3f741 100644 --- a/pkg/operator/encryption/controllers/key_controller_test.go +++ b/pkg/operator/encryption/controllers/key_controller_test.go @@ -337,7 +337,7 @@ func TestKeyController(t *testing.T) { {Group: "", Resource: "secrets"}, }, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, initialObjects: []runtime.Object{ encryptiontesting.CreateDummyKubeAPIPod("kube-apiserver-1", "kms", "node-1"), encryptiontesting.CreateVaultAppRoleSecret("vault-approle-secret", "test-role-id", "test-secret-id"), @@ -417,7 +417,7 @@ func TestKeyController(t *testing.T) { }, apiServerObjects: []runtime.Object{apiServerWithKMS}, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, }, { @@ -432,7 +432,7 @@ func TestKeyController(t *testing.T) { }, apiServerObjects: []runtime.Object{apiServerWithKMS}, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, validateFunc: func(ts *testing.T, actions []clientgotesting.Action, targetNamespace string, targetGRs []schema.GroupResource) { wasSecretValidated := false for _, action := range actions { @@ -498,7 +498,7 @@ func TestKeyController(t *testing.T) { apiServerObjects: []runtime.Object{apiServerWithKMS}, targetNamespace: "kms", // Should be no-op because KMS keys don't have time-based rotation - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, }, { name: "no-op when latest KMS key is not migrated yet", @@ -512,7 +512,7 @@ func TestKeyController(t *testing.T) { apiServerObjects: []runtime.Object{apiServerWithKMS}, targetNamespace: "kms", // Should be no-op because migration hasn't completed yet - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed"}, }, { @@ -527,7 +527,7 @@ func TestKeyController(t *testing.T) { }, apiServerObjects: []runtime.Object{apiServerWithKMS}, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config", "create:secrets:openshift-config-managed", "create:events:kms"}, validateFunc: func(ts *testing.T, actions []clientgotesting.Action, targetNamespace string, targetGRs []schema.GroupResource) { wasSecretValidated := false for _, action := range actions { @@ -591,7 +591,7 @@ func TestKeyController(t *testing.T) { {Group: "", Resource: "secrets"}, }, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config"}, initialObjects: []runtime.Object{ encryptiontesting.CreateDummyKubeAPIPod("kube-apiserver-1", "kms", "node-1"), }, @@ -620,7 +620,7 @@ func TestKeyController(t *testing.T) { {Group: "", Resource: "secrets"}, }, targetNamespace: "kms", - expectedActions: []string{"list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config"}, + expectedActions: []string{"list:secrets:openshift-config-managed", "list:pods:kms", "get:secrets:kms", "list:secrets:openshift-config-managed", "get:secrets:openshift-config"}, initialObjects: []runtime.Object{ encryptiontesting.CreateDummyKubeAPIPod("kube-apiserver-1", "kms", "node-1"), &corev1.Secret{ @@ -702,6 +702,212 @@ func TestKeyController(t *testing.T) { } }, }, + + // In-place KMS plugin config update tests + { + name: "KMS in-place plugin config update during non-convergence", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + // No pods — triggers non-convergence + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: updated} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "update:secrets:openshift-config-managed", + "create:events:kms", + "list:pods:kms", + }, + validateFunc: func(ts *testing.T, actions []clientgotesting.Action, targetNamespace string, targetGRs []schema.GroupResource) { + for _, action := range actions { + if action.Matches("update", "secrets") { + updateAction := action.(clientgotesting.UpdateAction) + updatedSecret := updateAction.GetObject().(*corev1.Secret) + pluginData := updatedSecret.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + expectedData, _ := encoding.EncodeKMSPluginConfig(updated) + if string(pluginData) != string(expectedData) { + ts.Errorf("plugin config not updated: got %s", pluginData) + } + encData := updatedSecret.Data["encryption.apiserver.operator.openshift.io-kms-encryption-config"] + original := encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", nil, 1) + if string(encData) != string(original.Data["encryption.apiserver.operator.openshift.io-kms-encryption-config"]) { + ts.Error("KMS encryption config should not change during in-place update") + } + return + } + } + ts.Error("expected an update action for the key secret") + }, + }, + { + name: "KMS in-place plugin config update during convergence", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + encryptiontesting.CreateDummyKubeAPIPod("kube-apiserver-1", "kms", "node-1"), + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: updated} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "update:secrets:openshift-config-managed", + "create:events:kms", + "list:pods:kms", + "get:secrets:kms", + "list:secrets:openshift-config-managed", + }, + validateFunc: func(ts *testing.T, actions []clientgotesting.Action, targetNamespace string, targetGRs []schema.GroupResource) { + for _, action := range actions { + if action.Matches("create", "secrets") { + ts.Error("should not create a new key for in-place update") + } + if action.Matches("update", "secrets") { + updateAction := action.(clientgotesting.UpdateAction) + updatedSecret := updateAction.GetObject().(*corev1.Secret) + pluginData := updatedSecret.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + expectedData, _ := encoding.EncodeKMSPluginConfig(updated) + if string(pluginData) != string(expectedData) { + ts.Errorf("plugin config not updated: got %s", pluginData) + } + encData := updatedSecret.Data["encryption.apiserver.operator.openshift.io-kms-encryption-config"] + original := encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", nil, 1) + if string(encData) != string(original.Data["encryption.apiserver.operator.openshift.io-kms-encryption-config"]) { + ts.Error("KMS encryption config should not change during in-place update") + } + } + } + }, + }, + { + name: "KMS migration-triggering change skips in-place during non-convergence", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.TransitKey = "different-transit-key" + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: updated} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "list:pods:kms", + }, + }, + { + name: "KMS no-op when plugin config matches during non-convergence", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: encryptiontesting.DefaultKMSPluginConfig} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "list:pods:kms", + }, + }, + { + name: "skips in-place when latest key is not KMS", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + encryptiontesting.CreateEncryptionKeySecretWithRawKeyWithMode("kms", nil, 2, []byte("61def964fb967f5d7c44a2af8dab6865"), "aescbc"), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: updated} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "list:pods:kms", + }, + }, + { + name: "KMS in-place update only modifies latest key", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialObjects: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", nil, 2), + }, + apiServerObjects: []runtime.Object{func() *configv1.APIServer { + s := &configv1.APIServer{ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + s.Spec.Encryption = configv1.APIServerEncryption{Type: "KMS", KMS: updated} + return s + }()}, + targetNamespace: "kms", + expectedActions: []string{ + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "update:secrets:openshift-config-managed", + "create:events:kms", + "list:pods:kms", + }, + validateFunc: func(ts *testing.T, actions []clientgotesting.Action, targetNamespace string, targetGRs []schema.GroupResource) { + for _, action := range actions { + if action.Matches("update", "secrets") { + updateAction := action.(clientgotesting.UpdateAction) + updatedSecret := updateAction.GetObject().(*corev1.Secret) + if updatedSecret.Name != "encryption-key-kms-2" { + ts.Errorf("expected key 2 to be updated, got %s", updatedSecret.Name) + } + pluginData := updatedSecret.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + expectedData, _ := encoding.EncodeKMSPluginConfig(updated) + if string(pluginData) != string(expectedData) { + ts.Errorf("plugin config not updated on latest key") + } + return + } + } + ts.Error("expected an update action for the latest key secret") + }, + }, } for _, scenario := range scenarios { @@ -987,3 +1193,122 @@ func TestGetCurrentModeReasonAndEncryptionConfig(t *testing.T) { }) } } + +func TestKmsMigrationRequired(t *testing.T) { + base := configv1.KMSPluginConfig{ + Type: configv1.VaultKMSProvider, + Vault: configv1.VaultKMSPluginConfig{ + KMSPluginImage: "registry.example.com/kms-plugin@sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890", + VaultAddress: "https://vault.example.com", + VaultNamespace: "test-ns", + TransitMount: "transit", + TransitKey: "my-key", + }, + } + + tests := []struct { + name string + modify func(configv1.KMSPluginConfig) configv1.KMSPluginConfig + expected bool + expectedErr bool + }{ + { + name: "identical configs", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { return c }, + expected: false, + }, + { + name: "only image changed (in-place)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:0000001234567890abcdef1234567890abcdef1234567890abcdef1234567890" + return c + }, + expected: false, + }, + { + name: "only TLS caBundle changed (in-place)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.TLS.CABundle = configv1.VaultConfigMapReference{Name: "new-ca-bundle"} + return c + }, + expected: false, + }, + { + name: "only authentication secret changed (in-place)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.Authentication.AppRole.Secret.Name = "new-secret" + return c + }, + expected: false, + }, + { + name: "vault address changed (migration)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.VaultAddress = "https://vault2.example.com" + return c + }, + expected: true, + }, + { + name: "vault namespace changed (migration)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.VaultNamespace = "other-ns" + return c + }, + expected: true, + }, + { + name: "transit mount changed (migration)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.TransitMount = "transit-v2" + return c + }, + expected: true, + }, + { + name: "transit key changed (migration)", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.TransitKey = "different-key" + return c + }, + expected: true, + }, + { + name: "mixed: migration field + in-place field changed", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Vault.TransitKey = "different-key" + c.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:0000001234567890abcdef1234567890abcdef1234567890abcdef1234567890" + return c + }, + expected: true, + }, + { + name: "unsupported KMS type returns error", + modify: func(c configv1.KMSPluginConfig) configv1.KMSPluginConfig { + c.Type = "UnsupportedProvider" + c.Vault.KMSPluginImage = "different-image" + return c + }, + expectedErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + current := tt.modify(base) + got, err := kmsMigrationRequired(base, current) + if tt.expectedErr { + if err == nil { + t.Errorf("kmsMigrationRequired() expected error, got nil") + } + return + } + if err != nil { + t.Errorf("kmsMigrationRequired() unexpected error: %v", err) + } + if got != tt.expected { + t.Errorf("kmsMigrationRequired() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/pkg/operator/encryption/controllers/state_controller.go b/pkg/operator/encryption/controllers/state_controller.go index a0b7f1f677..4ef4c5982e 100644 --- a/pkg/operator/encryption/controllers/state_controller.go +++ b/pkg/operator/encryption/controllers/state_controller.go @@ -5,6 +5,8 @@ import ( "fmt" "time" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" @@ -16,6 +18,7 @@ import ( configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/encryption/encryptiondata" + "github.com/openshift/library-go/pkg/operator/encryption/secrets" "github.com/openshift/library-go/pkg/operator/encryption/state" "github.com/openshift/library-go/pkg/operator/encryption/statemachine" "github.com/openshift/library-go/pkg/operator/events" @@ -132,6 +135,11 @@ func (c *stateController) generateAndApplyCurrentEncryptionConfigSecret(ctx cont return err } if len(transitioningReason) > 0 { + // Even when not converged, propagate in-place KMS plugin config changes + // so the revision controller can create a new revision with corrected sidecar config. + if err := c.maybeUpdateKMSDataInEncryptionConfigSecret(ctx, recorder); err != nil { + return err + } queue.AddAfter(stateWorkKey, 2*time.Minute) return nil } @@ -163,6 +171,84 @@ func (c *stateController) generateAndApplyCurrentEncryptionConfigSecret(ctx cont return nil } +// maybeUpdateKMSDataInEncryptionConfigSecret propagates in-place KMS plugin +// config changes to the encryption-config secret during non-convergence. It enriches +// the existing state with current key secrets (picking up updated plugin configs from +// key_controller), guards that the EncryptionConfiguration is unchanged (no key +// promotion or structural changes), and applies only kms-plugin-config updates. +func (c *stateController) maybeUpdateKMSDataInEncryptionConfigSecret(ctx context.Context, recorder events.Recorder) error { + keySecrets, err := secrets.ListKeySecrets(ctx, c.secretClient, c.encryptionSecretSelector) + if err != nil { + return err + } + if len(keySecrets) == 0 { + return nil + } + name := fmt.Sprintf("%s-%s", encryptiondata.EncryptionConfSecretName, c.instanceName) + namespace := "openshift-config-managed" + // Read the unrevisioned encryption-config from openshift-config-managed (not the + // revisioned copy in the target namespace) because during non-convergence the deployer + // cannot return a single converged revision. Updating this source secret triggers + // resource-sync → revision → rollout to unblock the stuck revision. + existingSecret, err := c.secretClient.Secrets(namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return err + } + + existingConfig, err := encryptiondata.FromSecret(existingSecret) + if err != nil { + return err + } + if existingConfig == nil || len(existingConfig.KMSPlugins) == 0 { + return nil + } + + // Round-trip through ToEncryptionState → FromEncryptionState to pick up + // updated KMS plugin configs from key secrets while preserving the existing + // EncryptionConfiguration structure. ToEncryptionState enriches each key in + // the config with its backed secret data (including any in-place plugin config + // updates), and FromEncryptionState rebuilds the Config from the enriched state. + enrichedState, _ := encryptiondata.ToEncryptionState(existingConfig, keySecrets) + if enrichedState == nil { + return nil + } + + rebuiltConfig, err := encryptiondata.FromEncryptionState(enrichedState) + if err != nil { + return err + } + + // Only proceed if the provider list, key ordering, and write key designation + // are unchanged. Structural changes require convergence to avoid a server + // encrypting with a key another server hasn't observed. + if !equality.Semantic.DeepEqual(existingConfig.Encryption.Resources, rebuiltConfig.Encryption.Resources) { + return nil + } + + // Error if the plugin key set changed unexpectedly — this indicates + // a bug in the enrichment round-trip or corrupted state. + if len(existingConfig.KMSPlugins) != len(rebuiltConfig.KMSPlugins) { + return fmt.Errorf("KMS plugin key set size changed unexpectedly: existing=%d, rebuilt=%d", len(existingConfig.KMSPlugins), len(rebuiltConfig.KMSPlugins)) + } + for keyID := range existingConfig.KMSPlugins { + if _, ok := rebuiltConfig.KMSPlugins[keyID]; !ok { + return fmt.Errorf("KMS plugin config for keyID %s disappeared after enrichment round-trip", keyID) + } + } + + changed, err := c.applyEncryptionConfigSecret(ctx, rebuiltConfig, recorder) + if err != nil { + return err + } + if changed { + recorder.Eventf("EncryptionKMSPluginConfigPropagated", "Updated KMS plugin config in encryption-config secret %s/%s during non-convergence", namespace, name) + } + return nil +} + func (c *stateController) applyEncryptionConfigSecret(ctx context.Context, secretData *encryptiondata.Config, recorder events.Recorder) (bool, error) { s, err := encryptiondata.ToSecret("openshift-config-managed", fmt.Sprintf("%s-%s", encryptiondata.EncryptionConfSecretName, c.instanceName), secretData) if err != nil { diff --git a/pkg/operator/encryption/controllers/state_controller_test.go b/pkg/operator/encryption/controllers/state_controller_test.go index d809e44ce3..34e2d892cb 100644 --- a/pkg/operator/encryption/controllers/state_controller_test.go +++ b/pkg/operator/encryption/controllers/state_controller_test.go @@ -1170,6 +1170,162 @@ func TestStateController(t *testing.T) { } }, }, + + // In-place KMS plugin config propagation during non-convergence + { + name: "KMS: propagates updated plugin config to encryption-config during non-convergence", + targetNamespace: "kms", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialResources: []runtime.Object{ + // No pods — non-converged + // Key secret with updated plugin config (new image) + encryptiontesting.CreateEncryptionKeySecretWithCustomKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1, func() configv1.KMSPluginConfig { + updated := encryptiontesting.DefaultKMSPluginConfig + updated.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + return updated + }()), + // Existing encryption-config in openshift-config-managed with old plugin config + func() *corev1.Secret { + ec := &encryptiondata.Config{ + Encryption: &apiserverconfigv1.EncryptionConfiguration{ + Resources: []apiserverconfigv1.ResourceConfiguration{{ + Resources: []string{"secrets"}, + Providers: []apiserverconfigv1.ProviderConfiguration{{ + KMS: &apiserverconfigv1.KMSConfiguration{ + APIVersion: "v2", + Name: "1_secrets", + Endpoint: "unix:///var/run/kmsplugin/kms-1.sock", + Timeout: &metav1.Duration{Duration: 10 * time.Second}, + }, + }, { + Identity: &apiserverconfigv1.IdentityConfiguration{}, + }}, + }}, + }, + KMSPlugins: map[string]configv1.KMSPluginConfig{"1": encryptiontesting.DefaultKMSPluginConfig}, + } + ecs := createEncryptionCfgSecret(t, "openshift-config-managed", "1", ec) + ecs.Name = "encryption-config-kms" + return ecs + }(), + }, + expectedActions: []string{ + "list:pods:kms", + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "update:secrets:openshift-config-managed", + "create:events:kms", + "create:events:kms", + }, + validateFunc: func(ts *testing.T, actions []clientgotesting.Action, destName string, expectedEncryptionCfg *encryptiondata.Config) { + for _, action := range actions { + if action.Matches("update", "secrets") { + updateAction := action.(clientgotesting.UpdateAction) + actualSecret := updateAction.GetObject().(*corev1.Secret) + actualConfig, err := encryptiondata.FromSecret(actualSecret) + if err != nil { + ts.Fatalf("failed to parse updated encryption config: %v", err) + } + // Verify plugin config was updated + updatedPlugin := encryptiontesting.DefaultKMSPluginConfig + updatedPlugin.Vault.KMSPluginImage = "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111" + if !equality.Semantic.DeepEqual(actualConfig.KMSPlugins["1"], updatedPlugin) { + ts.Errorf("plugin config not propagated: got %+v", actualConfig.KMSPlugins["1"]) + } + // Verify EncryptionConfiguration (structural part) is unchanged + if len(actualConfig.Encryption.Resources) != 1 || len(actualConfig.Encryption.Resources[0].Providers) != 2 { + ts.Errorf("EncryptionConfiguration structure changed unexpectedly") + } + return + } + } + ts.Error("expected an update action for the encryption-config secret") + }, + }, + { + name: "KMS: no-op propagation when plugin configs match during non-convergence", + targetNamespace: "kms", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialResources: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + func() *corev1.Secret { + ec := &encryptiondata.Config{ + Encryption: &apiserverconfigv1.EncryptionConfiguration{ + Resources: []apiserverconfigv1.ResourceConfiguration{{ + Resources: []string{"secrets"}, + Providers: []apiserverconfigv1.ProviderConfiguration{{ + KMS: &apiserverconfigv1.KMSConfiguration{ + APIVersion: "v2", + Name: "1_secrets", + Endpoint: "unix:///var/run/kmsplugin/kms-1.sock", + Timeout: &metav1.Duration{Duration: 10 * time.Second}, + }, + }, { + Identity: &apiserverconfigv1.IdentityConfiguration{}, + }}, + }}, + }, + KMSPlugins: map[string]configv1.KMSPluginConfig{"1": encryptiontesting.DefaultKMSPluginConfig}, + } + ecs := createEncryptionCfgSecret(t, "openshift-config-managed", "1", ec) + ecs.Name = "encryption-config-kms" + return ecs + }(), + }, + expectedActions: []string{ + "list:pods:kms", + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + }, + }, + { + name: "KMS: no encryption-config secret yet during non-convergence", + targetNamespace: "kms", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialResources: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1), + }, + expectedActions: []string{ + "list:pods:kms", + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + }, + }, + { + name: "non-KMS mode skips propagation during non-convergence", + targetNamespace: "kms", + targetGRs: []schema.GroupResource{ + {Group: "", Resource: "secrets"}, + }, + initialResources: []runtime.Object{ + encryptiontesting.CreateEncryptionKeySecretWithRawKey("kms", []schema.GroupResource{{Group: "", Resource: "secrets"}}, 1, []byte("61def964fb967f5d7c44a2af8dab6865")), + func() *corev1.Secret { + ec := encryptiondatatesting.CreateEncryptionCfgWithWriteKey([]encryptiondatatesting.EncryptionKeysResourceTuple{{ + Resource: "secrets", + Keys: []apiserverconfigv1.Key{{ + Name: "1", + Secret: "NjFkZWY5NjRmYjk2N2Y1ZDdjNDRhMmFmOGRhYjY4NjU=", + }}, + }}) + ecs := createEncryptionCfgSecret(t, "openshift-config-managed", "1", ec) + ecs.Name = "encryption-config-kms" + return ecs + }(), + }, + expectedActions: []string{ + "list:pods:kms", + "list:secrets:openshift-config-managed", + "get:secrets:openshift-config-managed", + }, + }, } for _, scenario := range scenarios { diff --git a/pkg/operator/encryption/encryptiondata/config_test.go b/pkg/operator/encryption/encryptiondata/config_test.go index 4e6f1d3a81..fd8fc709ed 100644 --- a/pkg/operator/encryption/encryptiondata/config_test.go +++ b/pkg/operator/encryption/encryptiondata/config_test.go @@ -452,6 +452,33 @@ func TestToEncryptionState(t *testing.T) { } } +// ToEncryptionState must not mutate Config object. +// This test will catch it. +func TestToEncryptionStateDoesNotMutateInput(t *testing.T) { + input := encryptiondatatesting.CreateEncryptionCfgWithWriteKey([]encryptiondatatesting.EncryptionKeysResourceTuple{ + { + Resource: "secrets", + Keys: []apiserverconfigv1.Key{ + {Name: "2", Secret: "AAAAAAAAAAAAAAAAAAAAAA=="}, + {Name: "1", Secret: "MTcxNTgyYTBmY2Q2YzVmZGI2NWNiZjVhM2U5MjQ5ZDc="}, + }, + Modes: []string{"KMS", "aescbc"}, + }, + }) + inputCopy := input.Encryption.DeepCopy() + + keySecrets := []*corev1.Secret{ + encryptiontesting.CreateEncryptionKeySecretWithKMSPluginConfig("test", []schema.GroupResource{{Resource: "secrets"}}, 2), + encryptiontesting.CreateEncryptionKeySecretWithRawKey("test", []schema.GroupResource{{Resource: "secrets"}}, 1, []byte("61def964fb967f5d7c44a2af8dab6865")), + } + + encryptiondata.ToEncryptionState(input, keySecrets) + + if diff := cmp.Diff(inputCopy, input.Encryption); diff != "" { + t.Fatalf("ToEncryptionState mutated the input Config.Encryption:\n%s", diff) + } +} + func TestFromEncryptionState(t *testing.T) { scenarios := []struct { name string diff --git a/test/e2e-encryption/encryption_test.go b/test/e2e-encryption/encryption_test.go index 37f558e7bf..d0fe51a578 100644 --- a/test/e2e-encryption/encryption_test.go +++ b/test/e2e-encryption/encryption_test.go @@ -13,6 +13,7 @@ import ( corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" v1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -284,20 +285,39 @@ func TestEncryptionIntegration(tt *testing.T) { } } - for keyID := range expectedKeyIDs { - pluginConfig, ok := cfg.KMSPlugins[keyID] - if !ok { - t.Fatalf("expected kms-plugin-config for keyID %s but not found in encryption-config secret", keyID) + err = wait.PollUntilContextTimeout(ctx, time.Millisecond*500, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) { + encryptionConfigSecret, err = kubeClient.CoreV1().Secrets("openshift-config-managed").Get(ctx, fmt.Sprintf("encryption-config-%s", component), metav1.GetOptions{}) + if err != nil { + return false, err } - - keySecret, err := kubeClient.CoreV1().Secrets("openshift-config-managed").Get(ctx, fmt.Sprintf("encryption-key-%s-%s", component, keyID), metav1.GetOptions{}) - require.NoError(t, err) - keyPluginData := keySecret.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] - require.NotEmpty(t, keyPluginData, "key secret %s missing kms-plugin-config data", keyID) - keyPluginConfig, err := encoding.DecodeKMSPluginConfig(keyPluginData) - require.NoError(t, err) - require.Equal(t, keyPluginConfig, pluginConfig, "kms-plugin-config for keyID %s in encryption-config secret does not match key secret", keyID) - } + cfg, err = encryptiondata.FromSecret(encryptionConfigSecret) + if err != nil { + return false, nil + } + for keyID := range expectedKeyIDs { + pluginConfig, ok := cfg.KMSPlugins[keyID] + if !ok { + return false, nil + } + keySecret, err := kubeClient.CoreV1().Secrets("openshift-config-managed").Get(ctx, fmt.Sprintf("encryption-key-%s-%s", component, keyID), metav1.GetOptions{}) + if err != nil { + return false, err + } + keyPluginData := keySecret.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] + if len(keyPluginData) == 0 { + return false, nil + } + keyPluginConfig, err := encoding.DecodeKMSPluginConfig(keyPluginData) + if err != nil { + return false, nil + } + if !equality.Semantic.DeepEqual(keyPluginConfig, pluginConfig) { + return false, nil + } + } + return true, nil + }) + require.NoError(t, err) for keyID := range cfg.KMSPlugins { if !expectedKeyIDs[keyID] { t.Fatalf("unexpected kms-plugin-config for keyID %s in encryption-config secret", keyID) @@ -606,6 +626,37 @@ func TestEncryptionIntegration(tt *testing.T) { ) waitForConditionStatus("Encrypted", operatorv1.ConditionTrue) + t.Logf("In-place KMS plugin config update: change image on key 12 (non-migration field)") + _, err = fakeApiServerClient.Patch(ctx, "cluster", types.MergePatchType, []byte(`{"spec":{"encryption":{"type":"KMS","kms":{"type":"Vault","vault":{"kmsPluginImage":"registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111","vaultAddress":"https://vault.example.com","authentication":{"type":"AppRole","appRole":{"secret":{"name":"vault-approle-secret"}}},"transitKey":"test-transit-key"}}}}}`), metav1.PatchOptions{}) + require.NoError(t, err) + + // Wait for the key secret's plugin config to be updated in-place + err = wait.PollUntilContextTimeout(ctx, time.Millisecond*500, wait.ForeverTestTimeout, true, func(ctx context.Context) (bool, error) { + s, err := kubeClient.CoreV1().Secrets("openshift-config-managed").Get(ctx, fmt.Sprintf("encryption-key-%s-%s", component, "12"), metav1.GetOptions{}) + if err != nil { + return false, err + } + pluginData := s.Data["encryption.apiserver.operator.openshift.io-kms-plugin-config"] + if len(pluginData) == 0 { + return false, nil + } + cfg, err := encoding.DecodeKMSPluginConfig(pluginData) + if err != nil { + return false, fmt.Errorf("failed to decode kms-plugin-config for key 12: %w", err) + } + return cfg.Vault.KMSPluginImage == "registry.example.com/kms-plugin@sha256:1111111111111111111111111111111111111111111111111111111111111111", nil + }) + require.NoError(t, err) + // Verify no new key was created — in-place update should not trigger key creation + waitForKeys(11) + // Verify the encryption-config secret also has the updated plugin config + verifyKMSPlugins() + // The in-place plugin config update on key 12 triggers the state controller to + // propagate the updated plugin data into the encryption-config secret, consume it. + waitForConfigs( + fmt.Sprintf("kubeapiservers.operator.openshift.io=kms:%s,aescbc:11,identity;kubeschedulers.operator.openshift.io=kms:%s,aescbc:11,identity", kms12, kms12Sched), + ) + t.Logf("Switch to identity from KMS") _, err = fakeApiServerClient.Patch(ctx, "cluster", types.MergePatchType, []byte(`{"spec":{"encryption":{"type":"identity","kms":null}}}`), metav1.PatchOptions{}) require.NoError(t, err) From d94ed6169d86fab2c6dfc646eb0dd6941a9f48c1 Mon Sep 17 00:00:00 2001 From: gangwgr Date: Tue, 26 May 2026 18:15:39 +0530 Subject: [PATCH 4/4] test/kms: add InvalidImageVaultEncryptionProvider for degradation tests The invalid image uses a properly formatted OCI reference with sha256 digest that passes API validation but points to a non-existent image, causing ImagePullBackOff and operator degradation. --- test/library/encryption/helpers.go | 31 ++++++++++++++++++ test/library/encryption/kms/vault.go | 34 +++++++++++++++++++ test/library/encryption/scenarios.go | 49 +++++++++++----------------- 3 files changed, 84 insertions(+), 30 deletions(-) diff --git a/test/library/encryption/helpers.go b/test/library/encryption/helpers.go index 890a52ef3c..329fc0e98e 100644 --- a/test/library/encryption/helpers.go +++ b/test/library/encryption/helpers.go @@ -346,3 +346,34 @@ func setUpTearDown(namespace string) func(testing.TB, bool) { } } } + +// WaitForPodImagePullBackOff polls pods in the given namespace until at least one pod +// has an init container or container stuck in ImagePullBackOff. This is useful for +// detecting that an invalid KMS plugin image is causing static pod failure. +func WaitForPodImagePullBackOff(ctx context.Context, t testing.TB, kubeClient kubernetes.Interface, namespace, labelSelector string, timeout time.Duration) { + t.Helper() + t.Logf("Waiting up to %s for a pod in %s (selector=%s) to enter ImagePullBackOff", timeout, namespace, labelSelector) + err := wait.PollUntilContextTimeout(ctx, waitPollInterval, timeout, true, func(ctx context.Context) (bool, error) { + pods, err := kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) + if err != nil { + t.Logf("Error listing pods: %v", err) + return false, nil + } + for _, pod := range pods.Items { + for _, cs := range pod.Status.InitContainerStatuses { + if cs.State.Waiting != nil && (cs.State.Waiting.Reason == "ImagePullBackOff" || cs.State.Waiting.Reason == "ErrImagePull") { + t.Logf("Pod %s init container %s is in %s", pod.Name, cs.Name, cs.State.Waiting.Reason) + return true, nil + } + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && (cs.State.Waiting.Reason == "ImagePullBackOff" || cs.State.Waiting.Reason == "ErrImagePull") { + t.Logf("Pod %s container %s is in %s", pod.Name, cs.Name, cs.State.Waiting.Reason) + return true, nil + } + } + } + return false, nil + }) + require.NoError(t, err, "timed out waiting for pod to enter ImagePullBackOff in namespace %s", namespace) +} diff --git a/test/library/encryption/kms/vault.go b/test/library/encryption/kms/vault.go index 5585ec5a2a..2c44aefd0e 100644 --- a/test/library/encryption/kms/vault.go +++ b/test/library/encryption/kms/vault.go @@ -33,6 +33,11 @@ const ( defaultVaultTransitKey = "kms-key" defaultAppRoleTargetNamespace = "openshift-config" vaultCommandTimeout = 30 * time.Second + + // InvalidVaultKMSPluginImage is an OCI image reference that passes API validation + // (correct format, sha256 digest, sufficient length) but does not exist in any registry. + // Use this to test degradation when the KMS plugin image cannot be pulled. + InvalidVaultKMSPluginImage = "quay.io/openshifttest/mock-kms-plugin-nonexistent@sha256:0000000000000000000000000000000000000000000000000000000000000000" ) // DefaultVaultEncryptionProvider is a ready-to-use Vault KMS EncryptionProvider for e2e tests. @@ -47,6 +52,35 @@ var DefaultFakeVaultEncryptionProvider = library.EncryptionProvider{ Setup: ensureDefaultVaultAppRoleSecret, } +// InvalidImageVaultEncryptionProvider is a Vault KMS EncryptionProvider with a +// non-existent plugin image. Use this for testing degradation when the image cannot be pulled. +var InvalidImageVaultEncryptionProvider = library.EncryptionProvider{ + APIServerEncryption: InvalidImageVaultKMSPluginConfig, + Setup: ensureDefaultVaultAppRoleSecret, +} + +// InvalidImageVaultKMSPluginConfig is identical to DefaultVaultKMSPluginConfig +// but uses a non-existent image that will fail to pull. +var InvalidImageVaultKMSPluginConfig = configv1.APIServerEncryption{ + Type: configv1.EncryptionTypeKMS, + KMS: configv1.KMSPluginConfig{ + Type: configv1.VaultKMSProvider, + Vault: configv1.VaultKMSPluginConfig{ + KMSPluginImage: InvalidVaultKMSPluginImage, + VaultAddress: defaultVaultAddress, + VaultNamespace: defaultVaultEnterpriseNS, + TransitMount: defaultVaultTransitMount, + TransitKey: defaultVaultTransitKey, + Authentication: configv1.VaultAuthentication{ + Type: configv1.VaultAuthenticationTypeAppRole, + AppRole: configv1.VaultAppRoleAuthentication{ + Secret: configv1.VaultSecretReference{Name: defaultVaultAppRoleSecretName}, + }, + }, + }, + }, +} + // DefaultVaultKMSPluginConfig is the standard Vault KMS encryption config // used by CI e2e tests. var DefaultVaultKMSPluginConfig = configv1.APIServerEncryption{ diff --git a/test/library/encryption/scenarios.go b/test/library/encryption/scenarios.go index 1abf781ad9..27bb057519 100644 --- a/test/library/encryption/scenarios.go +++ b/test/library/encryption/scenarios.go @@ -279,14 +279,14 @@ func TestEncryptionRotation(ctx context.Context, t testing.TB, scenario Rotation } // InvalidImageRecoveryScenario tests that an invalid KMS plugin image causes -// the cluster to degrade and that correcting the image restores normal operation. +// the operator to degrade (even when switching to aescbc) and that providing +// a valid KMS image restores normal operation. // -// Arda's scenario: -// 1. Enable KMS with invalid image -// 2. Attempt to switch to aescbc (cluster should be stuck on KMS) -// 3. See degraded -// 4. Update with KMS with valid image -// 5. See everything works +// Flow: +// 1. Enable KMS with invalid (non-existent) plugin image +// 2. Switch to aescbc — cluster should remain stuck on KMS (rollout blocked) +// 3. Operator reports degraded (stuck NodeInstaller, ImagePullBackOff, etc.) +// 4. Fix by applying KMS with valid image and wait for full encryption migration type InvalidImageRecoveryScenario struct { BasicScenario // InvalidImageProvider is the KMS EncryptionProvider configured with an invalid @@ -297,19 +297,15 @@ type InvalidImageRecoveryScenario struct { ValidImageProvider EncryptionProvider // WaitForDegraded should block until the operator reports a degraded condition. WaitForDegraded func(ctx context.Context, t testing.TB) - // WaitForRecovery should block until the operator reports a healthy condition - // and encryption is fully operational. - WaitForRecovery func(ctx context.Context, t testing.TB) } // TestEncryptionInvalidImageRecovery tests that: -// 1. Enabling KMS with an invalid plugin image causes degradation -// 2. The cluster remains stuck and cannot migrate to another mode (e.g. aescbc) -// 3. Fixing the KMS config with a valid image restores the cluster +// 1. Enabling KMS with an invalid plugin image causes the operator to degrade +// 2. Switching to aescbc does NOT recover (cluster is stuck on the failed KMS rollout) +// 3. Providing a valid KMS image restores the cluster (uses SetAndWaitForEncryptionType) func TestEncryptionInvalidImageRecovery(ctx context.Context, t testing.TB, scenario InvalidImageRecoveryScenario) { e := NewE(t, PrintEventsOnFailure(scenario.OperatorNamespace)) require.NotNil(t, scenario.WaitForDegraded, "WaitForDegraded must not be nil") - require.NotNil(t, scenario.WaitForRecovery, "WaitForRecovery must not be nil") require.Equal(t, configv1.EncryptionTypeKMS, scenario.InvalidImageProvider.Type, "InvalidImageProvider must be KMS type") require.Equal(t, configv1.EncryptionTypeKMS, scenario.ValidImageProvider.Type, "ValidImageProvider must be KMS type") @@ -326,7 +322,8 @@ func TestEncryptionInvalidImageRecovery(ctx context.Context, t testing.TB, scena _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) require.NoError(t, err) - // step 2: attempt to switch to aescbc — cluster should remain stuck on KMS + // step 2: attempt to switch to aescbc — the cluster should remain stuck on KMS + // because the static pod rollout with the invalid image is blocking progress t.Log("Attempting to switch to aescbc (cluster should remain stuck on KMS)") apiServer, err = cs.ApiServerConfig.Get(ctx, "cluster", metav1.GetOptions{}) require.NoError(t, err) @@ -334,24 +331,16 @@ func TestEncryptionInvalidImageRecovery(ctx context.Context, t testing.TB, scena _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) require.NoError(t, err) - // step 3: wait for degraded + // step 3: wait for degraded — the operator is stuck because the invalid KMS image + // prevents the NodeInstaller from completing the rollout t.Log("Waiting for operator to report degraded status") scenario.WaitForDegraded(ctx, e) - // step 4: fix the config with valid KMS image - t.Log("Updating KMS encryption with valid plugin image to recover") - if scenario.ValidImageProvider.Setup != nil { - scenario.ValidImageProvider.Setup(ctx, e) - } - apiServer, err = cs.ApiServerConfig.Get(ctx, "cluster", metav1.GetOptions{}) - require.NoError(t, err) - apiServer.Spec.Encryption = scenario.ValidImageProvider.APIServerEncryption - _, err = cs.ApiServerConfig.Update(ctx, apiServer, metav1.UpdateOptions{}) - require.NoError(t, err) - - // step 5: wait for recovery — everything should work - t.Log("Waiting for operator to recover with valid KMS image") - scenario.WaitForRecovery(ctx, e) + // step 4: fix by applying KMS with valid image and wait for full recovery + // (uses SetAndWaitForEncryptionType which waits for encryption key migration, + // same as the standard encryption on/off and migration tests) + t.Log("Recovering: applying KMS with valid plugin image and waiting for full migration") + SetAndWaitForEncryptionType(ctx, e, scenario.ValidImageProvider, scenario.TargetGRs, scenario.Namespace, scenario.LabelSelector) t.Log("Invalid image recovery test passed") }