@@ -939,242 +939,27 @@ func (d *handlerImpl) FailoverDomain(
939939 failoverRequest * types.FailoverDomainRequest ,
940940) (* types.FailoverDomainResponse , error ) {
941941
942- // must get the metadata (notificationVersion) first
943- // this version can be regarded as the lock on the v2 domain table
944- // and since we do not know which table will return the domain afterwards
945- // this call has to be made
946- metadata , err := d .domainManager .GetMetadata (ctx )
947- if err != nil {
948- return nil , err
949- }
950- notificationVersion := metadata .NotificationVersion
951- getResponse , err := d .domainManager .GetDomain (ctx , & persistence.GetDomainRequest {Name : failoverRequest .GetDomainName ()})
952- if err != nil {
953- return nil , err
954- }
955-
956- info := getResponse .Info
957- config := getResponse .Config
958- replicationConfig := getResponse .ReplicationConfig
959- wasActiveActive := replicationConfig .IsActiveActive ()
960- configVersion := getResponse .ConfigVersion
961- failoverVersion := getResponse .FailoverVersion
962- failoverNotificationVersion := getResponse .FailoverNotificationVersion
963- isGlobalDomain := getResponse .IsGlobalDomain
964- gracefulFailoverEndTime := getResponse .FailoverEndTime
965- currentActiveCluster := replicationConfig .ActiveClusterName
966- currentActiveClusters := replicationConfig .ActiveClusters .DeepCopy ()
967- previousFailoverVersion := getResponse .PreviousFailoverVersion
968- lastUpdatedTime := time .Unix (0 , getResponse .LastUpdatedTime )
969-
970- updateRequest := & types.UpdateDomainRequest {
971- Name : failoverRequest .DomainName ,
972- ActiveClusterName : failoverRequest .DomainActiveClusterName ,
973- }
974-
975- // Update replication config
976- replicationConfig , replicationConfigChanged , activeClusterChanged , err := d .updateReplicationConfig (
977- getResponse .Info .Name ,
978- replicationConfig ,
979- updateRequest ,
980- )
942+ currentDomainState , err := d .domainManager .GetDomain (ctx , & persistence.GetDomainRequest {Name : failoverRequest .GetDomainName ()})
981943 if err != nil {
982944 return nil , err
983945 }
984946
985- // Handle graceful failover request
986- if updateRequest .FailoverTimeoutInSeconds != nil {
987- gracefulFailoverEndTime , previousFailoverVersion , err = d .handleGracefulFailover (
988- updateRequest ,
989- replicationConfig ,
990- currentActiveCluster ,
991- gracefulFailoverEndTime ,
992- failoverVersion ,
993- activeClusterChanged ,
994- isGlobalDomain ,
995- )
996- if err != nil {
997- return nil , err
998- }
999- }
1000-
1001- err = d .validateGlobalDomainReplicationConfigForUpdateDomain (replicationConfig , replicationConfigChanged , activeClusterChanged )
1002- if err != nil {
1003- return nil , err
1004- }
1005-
1006- now := d .timeSource .Now ()
1007- // Check the failover cool down time
1008- if lastUpdatedTime .Add (d .config .FailoverCoolDown (info .Name )).After (now ) {
1009- d .logger .Debugf ("Domain was last updated at %v, failoverCoolDown: %v, current time: %v." , lastUpdatedTime , d .config .FailoverCoolDown (info .Name ), now )
1010- return nil , errDomainUpdateTooFrequent
1011- }
1012-
1013- // set the version
1014- if replicationConfigChanged {
1015- configVersion ++
1016- }
1017-
1018- if activeClusterChanged && isGlobalDomain {
1019- var failoverType constants.FailoverType = constants .FailoverTypeGrace
1020-
1021- // Force failover cleans graceful failover state
1022- if updateRequest .FailoverTimeoutInSeconds == nil {
1023- failoverType = constants .FailoverTypeForce
1024- gracefulFailoverEndTime = nil
1025- previousFailoverVersion = constants .InitialPreviousFailoverVersion
1026- }
1027-
1028- // Cases:
1029- // 1. active-passive domain's ActiveClusterName is changed
1030- // 2. active-passive domain is being migrated to active-active
1031- // 3. active-active domain's ActiveClusters is changed
1032- isActiveActive := replicationConfig .IsActiveActive ()
1033-
1034- // case 1. active-passive domain's ActiveClusterName is changed
1035- if ! wasActiveActive && ! isActiveActive {
1036- failoverVersion = d .clusterMetadata .GetNextFailoverVersion (
1037- replicationConfig .ActiveClusterName ,
1038- failoverVersion ,
1039- updateRequest .Name ,
1040- )
1041-
1042- d .logger .Debug ("active-passive domain failover" ,
1043- tag .WorkflowDomainName (info .Name ),
1044- tag .Dynamic ("failover-version" , failoverVersion ),
1045- tag .Dynamic ("failover-type" , failoverType ),
1046- )
1047-
1048- err = updateFailoverHistoryInDomainData (info , d .config , NewFailoverEvent (
1049- now ,
1050- failoverType ,
1051- & currentActiveCluster ,
1052- updateRequest .ActiveClusterName ,
1053- nil ,
1054- nil ,
1055- ))
1056- if err != nil {
1057- d .logger .Warn ("failed to update failover history" , tag .Error (err ))
1058- }
1059- }
1060-
1061- // case 2. active-passive domain is being migrated to active-active
1062- if ! wasActiveActive && isActiveActive {
1063- // for active-passive to active-active migration,
1064- // we increment failover version so top level failoverVersion is updated and domain data is replicated.
1065-
1066- failoverVersion = d .clusterMetadata .GetNextFailoverVersion (
1067- replicationConfig .ActiveClusterName ,
1068- failoverVersion + 1 , //todo: (active-active): Let's review if we need to increment
1069- // this for cluster-attr failover changes. It may not be necessary to increment
1070- updateRequest .Name ,
1071- )
1072-
1073- d .logger .Debug ("active-passive domain is being migrated to active-active" ,
1074- tag .WorkflowDomainName (info .Name ),
1075- tag .Dynamic ("failover-version" , failoverVersion ),
1076- tag .Dynamic ("failover-type" , failoverType ),
1077- )
1078-
1079- err = updateFailoverHistoryInDomainData (info , d .config , NewFailoverEvent (
1080- now ,
1081- failoverType ,
1082- & currentActiveCluster ,
1083- updateRequest .ActiveClusterName ,
1084- nil ,
1085- replicationConfig .ActiveClusters ,
1086- ))
1087- if err != nil {
1088- d .logger .Warn ("failed to update failover history" , tag .Error (err ))
1089- }
1090- }
1091-
1092- // case 3. active-active domain's ActiveClusters is changed
1093- if wasActiveActive && isActiveActive {
1094- // top level failover version is not used for task versions for active-active domains but we still increment it
1095- // to indicate there was a change in replication config
1096- failoverVersion = d .clusterMetadata .GetNextFailoverVersion (
1097- replicationConfig .ActiveClusterName ,
1098- failoverVersion + 1 , //todo: (active-active): Let's review if we need to increment
1099- // this for cluster-attr failover changes. It may not be necessary to increment
1100- updateRequest .Name ,
1101- )
1102-
1103- d .logger .Debug ("active-active domain failover" ,
1104- tag .WorkflowDomainName (info .Name ),
1105- tag .Dynamic ("failover-version" , failoverVersion ),
1106- tag .Dynamic ("failover-type" , failoverType ),
1107- )
1108-
1109- err = updateFailoverHistoryInDomainData (info , d .config , NewFailoverEvent (
1110- now ,
1111- failoverType ,
1112- & currentActiveCluster ,
1113- nil ,
1114- currentActiveClusters ,
1115- replicationConfig .ActiveClusters ,
1116- ))
1117- if err != nil {
1118- d .logger .Warn ("failed to update failover history" , tag .Error (err ))
1119- }
1120- }
1121-
1122- failoverNotificationVersion = notificationVersion
947+ if ! currentDomainState .IsGlobalDomain {
948+ return nil , errLocalDomainsCannotFailover
1123949 }
1124950
1125- lastUpdatedTime = now
951+ notificationVersion := currentDomainState . NotificationVersion
1126952
1127- updateReq := createUpdateRequest (
1128- info ,
1129- config ,
1130- replicationConfig ,
1131- configVersion ,
1132- failoverVersion ,
1133- failoverNotificationVersion ,
1134- gracefulFailoverEndTime ,
1135- previousFailoverVersion ,
1136- lastUpdatedTime ,
953+ response , err := d .handleFailoverRequest (
954+ ctx ,
955+ failoverRequest .ToUpdateDomainRequest (),
956+ currentDomainState ,
1137957 notificationVersion ,
1138958 )
1139-
1140- err = d .domainManager .UpdateDomain (ctx , & updateReq )
1141959 if err != nil {
1142- d .logger .Info ("Update domain's replication configs failed" ,
1143- tag .WorkflowDomainName (info .Name ),
1144- tag .WorkflowDomainID (info .ID ),
1145- )
1146960 return nil , err
1147961 }
1148-
1149- if isGlobalDomain {
1150- if err = d .domainReplicator .HandleTransmissionTask (
1151- ctx ,
1152- types .DomainOperationUpdate ,
1153- info ,
1154- config ,
1155- replicationConfig ,
1156- configVersion ,
1157- failoverVersion ,
1158- previousFailoverVersion ,
1159- isGlobalDomain ,
1160- ); err != nil {
1161- return nil , err
1162- }
1163- }
1164-
1165- domainInfo , configuration , replicationConfiguration := d .createResponse (info , config , replicationConfig )
1166-
1167- d .logger .Info ("Update domain's replication configs succeeded" ,
1168- tag .WorkflowDomainName (info .Name ),
1169- tag .WorkflowDomainID (info .ID ),
1170- )
1171- return & types.FailoverDomainResponse {
1172- DomainInfo : domainInfo ,
1173- Configuration : configuration ,
1174- ReplicationConfiguration : replicationConfiguration ,
1175- FailoverVersion : failoverVersion ,
1176- IsGlobalDomain : isGlobalDomain ,
1177- }, nil
962+ return response .ToFailoverDomainResponse (), nil
1178963}
1179964
1180965// DeleteDomain deletes a domain
0 commit comments