From f5b221e9c1e8caffcf984576e524c327c7daf72e Mon Sep 17 00:00:00 2001 From: David Porter Date: Thu, 9 Oct 2025 19:34:24 -0700 Subject: [PATCH 1/9] Refactors, with no change in behavior updateReplicationConfig Signed-off-by: David Porter - Changes the flags being passed around to be consistent with their use outside - s/clusterUpdated/replicationConfigChanged/g - s/activeClusterUpdated/activeClusterChanged/g - Documents their intended behaviour --- common/domain/handler.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/common/domain/handler.go b/common/domain/handler.go index 35b1e901949..9c57eacad5d 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -1566,12 +1566,15 @@ func (d *handlerImpl) updateReplicationConfig( domainName string, config *persistence.DomainReplicationConfig, updateRequest *types.UpdateDomainRequest, -) (*persistence.DomainReplicationConfig, bool, bool, error) { +) ( + mutatedCfg *persistence.DomainReplicationConfig, + replicationConfigChanged bool, // this being turned on will trigger an increment in the configVersion + activeClusterChanged bool, // this indicates a failover is happening and a failover version is to be incremented + err error, +) { - clusterUpdated := false - activeClusterUpdated := false if len(updateRequest.Clusters) != 0 { - clusterUpdated = true + replicationConfigChanged = true clustersNew := []*persistence.ClusterReplicationConfig{} for _, clusterConfig := range updateRequest.Clusters { clustersNew = append(clustersNew, &persistence.ClusterReplicationConfig{ @@ -1589,7 +1592,7 @@ func (d *handlerImpl) updateReplicationConfig( } if updateRequest.ActiveClusterName != nil { - activeClusterUpdated = true + activeClusterChanged = true config.ActiveClusterName = *updateRequest.ActiveClusterName } @@ -1639,7 +1642,7 @@ func (d *handlerImpl) updateReplicationConfig( ActiveClustersByRegion: finalActiveClusters, } d.logger.Debugf("Setting active clusters to %v, updateRequest.ActiveClusters.ActiveClustersByRegion: %v", finalActiveClusters, updateRequest.ActiveClusters.ActiveClustersByRegion) - activeClusterUpdated = true + activeClusterChanged = true } if updateRequest != nil && updateRequest.ActiveClusters != nil && updateRequest.ActiveClusters.AttributeScopes != nil { @@ -1653,12 +1656,12 @@ func (d *handlerImpl) updateReplicationConfig( } config.ActiveClusters.AttributeScopes = result.AttributeScopes - activeClusterUpdated = true - clusterUpdated = true + activeClusterChanged = true + replicationConfigChanged = true } } - return config, clusterUpdated, activeClusterUpdated, nil + return config, replicationConfigChanged, activeClusterChanged, nil } func (d *handlerImpl) handleGracefulFailover( From 8410e9ba476b3b29c8440a99ee25a79598e4a9f9 Mon Sep 17 00:00:00 2001 From: David Porter Date: Thu, 9 Oct 2025 19:46:10 -0700 Subject: [PATCH 2/9] Carefully rearranges functions in the domain handler Signed-off-by: David Porter --- common/domain/handler.go | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/common/domain/handler.go b/common/domain/handler.go index 9c57eacad5d..97217108afd 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -1596,7 +1596,25 @@ func (d *handlerImpl) updateReplicationConfig( config.ActiveClusterName = *updateRequest.ActiveClusterName } - if updateRequest.ActiveClusters != nil && updateRequest.ActiveClusters.ActiveClustersByRegion != nil { + if updateRequest.ActiveClusters != nil { + return d.updateReplicationConfigForActiveActive(domainName, config, updateRequest) + } + + return config, replicationConfigChanged, activeClusterChanged, nil +} + +func (d *handlerImpl) updateReplicationConfigForActiveActive( + domainName string, + config *persistence.DomainReplicationConfig, + updateRequest *types.UpdateDomainRequest, +) ( + mutatedCfg *persistence.DomainReplicationConfig, + replicationConfigChanged bool, // this being turned on will trigger an increment in the configVersion + activeClusterChanged bool, // this indicates a failover is happening and a failover version is to be incremented + err error, +) { + + if updateRequest.ActiveClusters.ActiveClustersByRegion != nil { existingActiveClusters := config.ActiveClusters if existingActiveClusters == nil { // migration from active-passive to active-active existingActiveClusters = &types.ActiveClusters{ @@ -1645,7 +1663,7 @@ func (d *handlerImpl) updateReplicationConfig( activeClusterChanged = true } - if updateRequest != nil && updateRequest.ActiveClusters != nil && updateRequest.ActiveClusters.AttributeScopes != nil { + if updateRequest.ActiveClusters.AttributeScopes != nil { result, isCh := d.buildActiveActiveClusterScopesFromUpdateRequest(updateRequest, config, domainName) if isCh { From 5961cb8d3c92c1255a2b654bf312fc79a755f01e Mon Sep 17 00:00:00 2001 From: David Porter Date: Thu, 9 Oct 2025 20:29:30 -0700 Subject: [PATCH 3/9] Adds some validation Signed-off-by: David Porter --- common/domain/attrValidator.go | 37 +++++++ common/domain/attrValidator_test.go | 146 ++++++++++++++++++++++++++++ common/domain/handler.go | 4 + 3 files changed, 187 insertions(+) diff --git a/common/domain/attrValidator.go b/common/domain/attrValidator.go index f48efd57096..ed9e02ce125 100644 --- a/common/domain/attrValidator.go +++ b/common/domain/attrValidator.go @@ -182,3 +182,40 @@ func (d *AttrValidatorImpl) validateClusterName( } return nil } + +func (d *AttrValidatorImpl) validateActiveActiveDomainReplicationConfig( + activeClusters *types.ActiveClusters, +) error { + + clusters := d.clusterMetadata.GetEnabledClusterInfo() + + for _, scopeData := range activeClusters.AttributeScopes { + for _, activeCluster := range scopeData.ClusterAttributes { + _, ok := clusters[activeCluster.ActiveClusterName] + if !ok { + return &types.BadRequestError{Message: fmt.Sprintf( + "Invalid active cluster name: %v", + activeCluster.ActiveClusterName, + )} + } + if activeCluster.FailoverVersion < 0 { + return &types.BadRequestError{Message: fmt.Sprintf( + "invalid failover version: %d", + activeCluster.FailoverVersion, + )} + } + } + } + + // todo (david.porter) remove this once we have completely migrated to AttributeScopes + for _, activeCluster := range activeClusters.ActiveClustersByRegion { + if _, ok := clusters[activeCluster.ActiveClusterName]; !ok { + return &types.BadRequestError{Message: fmt.Sprintf( + "Invalid active cluster name: %v", + activeCluster.ActiveClusterName, + )} + } + } + + return nil +} diff --git a/common/domain/attrValidator_test.go b/common/domain/attrValidator_test.go index f542de119de..1203007e9e8 100644 --- a/common/domain/attrValidator_test.go +++ b/common/domain/attrValidator_test.go @@ -23,9 +23,13 @@ package domain import ( "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "github.com/uber/cadence/common/cluster" + "github.com/uber/cadence/common/config" + "github.com/uber/cadence/common/log" + "github.com/uber/cadence/common/metrics" "github.com/uber/cadence/common/persistence" "github.com/uber/cadence/common/types" ) @@ -333,3 +337,145 @@ func (s *attrValidatorSuite) TestValidateDomainReplicationConfigClustersDoesNotR ) s.IsType(&types.BadRequestError{}, err) } + +func TestValidateActiveActiveDomainReplicationConfig(t *testing.T) { + // Define test cluster names + const ( + clusterA = "cluster-a" + clusterB = "cluster-b" + ) + + // Create explicit cluster metadata for the test + clusterMetadata := cluster.NewMetadata( + config.ClusterGroupMetadata{ + FailoverVersionIncrement: 10, + PrimaryClusterName: clusterA, + CurrentClusterName: clusterA, + ClusterGroup: map[string]config.ClusterInformation{ + clusterA: { + Enabled: true, + InitialFailoverVersion: 1, + }, + clusterB: { + Enabled: true, + InitialFailoverVersion: 2, + }, + }, + }, + func(d string) bool { return false }, + metrics.NewNoopMetricsClient(), + log.NewNoop(), + ) + + validator := newAttrValidator(clusterMetadata, 1) + + testCases := []struct { + name string + activeClusters *types.ActiveClusters + expectedErr bool + errType interface{} + }{ + { + name: "valid clusters in ActiveClustersByRegion only", + activeClusters: &types.ActiveClusters{ + AttributeScopes: map[string]types.ClusterAttributeScope{ + "city": { + ClusterAttributes: map[string]types.ActiveClusterInfo{ + "portland": { + ActiveClusterName: clusterB, + FailoverVersion: 200, + }, + }, + }, + }, + ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ + "region1": { + ActiveClusterName: clusterA, + FailoverVersion: 100, + }, + "region2": { + ActiveClusterName: clusterB, + FailoverVersion: 200, + }, + }, + }, + expectedErr: false, + }, + { + name: "invalid cluster in ActiveClustersByRegion", + activeClusters: &types.ActiveClusters{ + ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ + "region1": { + ActiveClusterName: "invalid-cluster", + FailoverVersion: 100, + }, + }, + }, + expectedErr: true, + errType: &types.BadRequestError{}, + }, + { + name: "invalid cluster in AttributeScopes", + activeClusters: &types.ActiveClusters{ + AttributeScopes: map[string]types.ClusterAttributeScope{ + "city": { + ClusterAttributes: map[string]types.ActiveClusterInfo{ + "seattle": { + ActiveClusterName: "invalid-cluster", + FailoverVersion: 100, + }, + }, + }, + }, + }, + expectedErr: true, + errType: &types.BadRequestError{}, + }, + { + name: "empty ActiveClusters - all maps nil", + activeClusters: &types.ActiveClusters{ + ActiveClustersByRegion: nil, + AttributeScopes: nil, + }, + expectedErr: false, + }, + { + name: "empty ActiveClusters - maps initialized but empty", + activeClusters: &types.ActiveClusters{ + ActiveClustersByRegion: map[string]types.ActiveClusterInfo{}, + AttributeScopes: map[string]types.ClusterAttributeScope{}, + }, + expectedErr: false, + }, + { + name: "an invalid failover version should return an error", + activeClusters: &types.ActiveClusters{ + AttributeScopes: map[string]types.ClusterAttributeScope{ + "city": { + ClusterAttributes: map[string]types.ActiveClusterInfo{ + "seattle": { + ActiveClusterName: clusterA, + FailoverVersion: -1, + }, + }, + }, + }, + }, + expectedErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := validator.validateActiveActiveDomainReplicationConfig(tc.activeClusters) + if tc.expectedErr { + assert.Error(t, err) + if tc.errType != nil { + assert.IsType(t, tc.errType, err) + } + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/common/domain/handler.go b/common/domain/handler.go index 97217108afd..5e36b1bfdbd 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -1614,6 +1614,10 @@ func (d *handlerImpl) updateReplicationConfigForActiveActive( err error, ) { + if err := d.domainAttrValidator.validateActiveActiveDomainReplicationConfig(updateRequest.ActiveClusters); err != nil { + return nil, false, false, err + } + if updateRequest.ActiveClusters.ActiveClustersByRegion != nil { existingActiveClusters := config.ActiveClusters if existingActiveClusters == nil { // migration from active-passive to active-active From ab65fc076cf235de63eeaa13ac3b7c8f0c08276c Mon Sep 17 00:00:00 2001 From: David Porter Date: Fri, 10 Oct 2025 11:33:40 -0700 Subject: [PATCH 4/9] fix tests Signed-off-by: David Porter --- .../testdata/replication_simulation_activeactive.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simulation/replication/testdata/replication_simulation_activeactive.yaml b/simulation/replication/testdata/replication_simulation_activeactive.yaml index e44d8c43c83..d92f6126c71 100644 --- a/simulation/replication/testdata/replication_simulation_activeactive.yaml +++ b/simulation/replication/testdata/replication_simulation_activeactive.yaml @@ -37,7 +37,7 @@ operations: activeClusterSelectionPolicy: clusterAttribute: scope: region - name: region0 + name: cluster0 # start workflow in cluster1 - op: start_workflow @@ -51,7 +51,7 @@ operations: activeClusterSelectionPolicy: clusterAttribute: scope: region - name: region1 + name: cluster1 # validate that wf1 is started in cluster0 and completed in cluster1 - op: validate From d9d57b5bf87f6d3b31971b93feb128c1ff9ca630 Mon Sep 17 00:00:00 2001 From: David Porter Date: Fri, 10 Oct 2025 14:05:42 -0700 Subject: [PATCH 5/9] revert mistake Signed-off-by: David Porter --- .../testdata/replication_simulation_activeactive.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simulation/replication/testdata/replication_simulation_activeactive.yaml b/simulation/replication/testdata/replication_simulation_activeactive.yaml index d92f6126c71..e44d8c43c83 100644 --- a/simulation/replication/testdata/replication_simulation_activeactive.yaml +++ b/simulation/replication/testdata/replication_simulation_activeactive.yaml @@ -37,7 +37,7 @@ operations: activeClusterSelectionPolicy: clusterAttribute: scope: region - name: cluster0 + name: region0 # start workflow in cluster1 - op: start_workflow @@ -51,7 +51,7 @@ operations: activeClusterSelectionPolicy: clusterAttribute: scope: region - name: cluster1 + name: region1 # validate that wf1 is started in cluster0 and completed in cluster1 - op: validate From ffef4e95fd4ba32fb8536a0e9069f010c572df1b Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 22 Oct 2025 15:22:47 -0700 Subject: [PATCH 6/9] removing unnneded bits Signed-off-by: David Porter --- common/domain/attrValidator.go | 11 -------- common/domain/attrValidator_test.go | 41 ----------------------------- 2 files changed, 52 deletions(-) diff --git a/common/domain/attrValidator.go b/common/domain/attrValidator.go index 07bac0a815b..fa45bb015c6 100644 --- a/common/domain/attrValidator.go +++ b/common/domain/attrValidator.go @@ -208,16 +208,5 @@ func (d *AttrValidatorImpl) validateActiveActiveDomainReplicationConfig( } } } - - // todo (david.porter) remove this once we have completely migrated to AttributeScopes - for _, activeCluster := range activeClusters.ActiveClustersByRegion { - if _, ok := clusters[activeCluster.ActiveClusterName]; !ok { - return &types.BadRequestError{Message: fmt.Sprintf( - "Invalid active cluster name: %v", - activeCluster.ActiveClusterName, - )} - } - } - return nil } diff --git a/common/domain/attrValidator_test.go b/common/domain/attrValidator_test.go index 9c3f317c2ba..1cfa5b8b432 100644 --- a/common/domain/attrValidator_test.go +++ b/common/domain/attrValidator_test.go @@ -383,45 +383,6 @@ func TestValidateActiveActiveDomainReplicationConfig(t *testing.T) { expectedErr bool errType interface{} }{ - { - name: "valid clusters in ActiveClustersByRegion only", - activeClusters: &types.ActiveClusters{ - AttributeScopes: map[string]types.ClusterAttributeScope{ - "city": { - ClusterAttributes: map[string]types.ActiveClusterInfo{ - "portland": { - ActiveClusterName: clusterB, - FailoverVersion: 200, - }, - }, - }, - }, - ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ - "region1": { - ActiveClusterName: clusterA, - FailoverVersion: 100, - }, - "region2": { - ActiveClusterName: clusterB, - FailoverVersion: 200, - }, - }, - }, - expectedErr: false, - }, - { - name: "invalid cluster in ActiveClustersByRegion", - activeClusters: &types.ActiveClusters{ - ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ - "region1": { - ActiveClusterName: "invalid-cluster", - FailoverVersion: 100, - }, - }, - }, - expectedErr: true, - errType: &types.BadRequestError{}, - }, { name: "invalid cluster in AttributeScopes", activeClusters: &types.ActiveClusters{ @@ -442,7 +403,6 @@ func TestValidateActiveActiveDomainReplicationConfig(t *testing.T) { { name: "empty ActiveClusters - all maps nil", activeClusters: &types.ActiveClusters{ - ActiveClustersByRegion: nil, AttributeScopes: nil, }, expectedErr: false, @@ -450,7 +410,6 @@ func TestValidateActiveActiveDomainReplicationConfig(t *testing.T) { { name: "empty ActiveClusters - maps initialized but empty", activeClusters: &types.ActiveClusters{ - ActiveClustersByRegion: map[string]types.ActiveClusterInfo{}, AttributeScopes: map[string]types.ClusterAttributeScope{}, }, expectedErr: false, From eb7c92127d767372e820565f6c2ddfb514282eb2 Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 22 Oct 2025 15:38:20 -0700 Subject: [PATCH 7/9] Fix lint Signed-off-by: David Porter --- common/domain/attrValidator_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/domain/attrValidator_test.go b/common/domain/attrValidator_test.go index 1cfa5b8b432..075198cbddf 100644 --- a/common/domain/attrValidator_test.go +++ b/common/domain/attrValidator_test.go @@ -403,14 +403,14 @@ func TestValidateActiveActiveDomainReplicationConfig(t *testing.T) { { name: "empty ActiveClusters - all maps nil", activeClusters: &types.ActiveClusters{ - AttributeScopes: nil, + AttributeScopes: nil, }, expectedErr: false, }, { name: "empty ActiveClusters - maps initialized but empty", activeClusters: &types.ActiveClusters{ - AttributeScopes: map[string]types.ClusterAttributeScope{}, + AttributeScopes: map[string]types.ClusterAttributeScope{}, }, expectedErr: false, }, From 376274b3e11be8a92f1f0865f7b3a50bd5e1a0d5 Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 22 Oct 2025 16:13:29 -0700 Subject: [PATCH 8/9] remember to test Signed-off-by: David Porter --- common/domain/attrValidator.go | 5 ++++- common/domain/handler.go | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/common/domain/attrValidator.go b/common/domain/attrValidator.go index fa45bb015c6..a6403b07859 100644 --- a/common/domain/attrValidator.go +++ b/common/domain/attrValidator.go @@ -127,7 +127,6 @@ func (d *AttrValidatorImpl) validateDomainReplicationConfigForGlobalDomain( if !isInClusters(activeCluster) { return errActiveClusterNotInClusters } - // For active-active domains, also validate that all clusters in AttributeScopes are valid if activeClusters != nil && activeClusters.AttributeScopes != nil { for _, scope := range activeClusters.AttributeScopes { @@ -189,6 +188,10 @@ func (d *AttrValidatorImpl) validateActiveActiveDomainReplicationConfig( activeClusters *types.ActiveClusters, ) error { + if activeClusters == nil || activeClusters.AttributeScopes == nil { + return nil + } + clusters := d.clusterMetadata.GetEnabledClusterInfo() for _, scopeData := range activeClusters.AttributeScopes { diff --git a/common/domain/handler.go b/common/domain/handler.go index 7378611270c..a8b5de24476 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -1577,8 +1577,11 @@ func (d *handlerImpl) updateReplicationConfig( config.ActiveClusterName = *updateRequest.ActiveClusterName } - // ActiveClustersByRegion field has been removed - this logic is now handled by AttributeScopes - // The legacy ActiveClustersByRegion functionality is replaced by using AttributeScopes with "region" scope + err = d.domainAttrValidator.validateActiveActiveDomainReplicationConfig(updateRequest.ActiveClusters) + if err != nil { + return nil, false, false, err + } + if updateRequest != nil && updateRequest.ActiveClusters != nil && updateRequest.ActiveClusters.AttributeScopes != nil { result, isCh := d.buildActiveActiveClusterScopesFromUpdateRequest(updateRequest, config, domainName) if isCh { From 4efd7a7ce965e790f9c2d2038c7eddc4c8d827d3 Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 22 Oct 2025 16:21:13 -0700 Subject: [PATCH 9/9] Fix Signed-off-by: David Porter --- common/domain/handler.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/common/domain/handler.go b/common/domain/handler.go index a8b5de24476..a7c21b3b89f 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -1543,14 +1543,18 @@ func (d *handlerImpl) updateDeleteBadBinary( return config, false, nil } +// updateReplicationConfig is the function which takes the input request and current state and edits and returns it +// to the desired state by merging the request values with the current state. +// replicationConfigChanged being turned on will trigger an increment in the configVersion. +// activeClusterChanged indicates a failover is happening and a failover version is to be incremented func (d *handlerImpl) updateReplicationConfig( domainName string, config *persistence.DomainReplicationConfig, updateRequest *types.UpdateDomainRequest, ) ( mutatedCfg *persistence.DomainReplicationConfig, - replicationConfigChanged bool, // this being turned on will trigger an increment in the configVersion - activeClusterChanged bool, // this indicates a failover is happening and a failover version is to be incremented + replicationConfigChanged bool, + activeClusterChanged bool, err error, ) {