tarantool · Satbek · Jun 11, 2025 · Jun 11, 2025 · Jun 11, 2025 · Copilot
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -12,6 +12,13 @@ and this project adheres to
 Unreleased
 -------------------------------------------------------------------------------
 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Fixed
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Invalid leader appointment in Raft failover when there are not enough instances
+  in the replicaset.
+
 -------------------------------------------------------------------------------
 [2.15.3] - 2025-04-24
 -------------------------------------------------------------------------------

diff --git a/cartridge/failover.lua b/cartridge/failover.lua
@@ -958,18 +958,17 @@ local function cfg(clusterwide_config, opts)
         and #topology.get_leaders_order(
             topology_cfg, vars.replicaset_uuid, nil, {only_electable = false, only_enabled = true}) < 3
         then
-            first_appointments = _get_appointments_disabled_mode(topology_cfg)
             log.warn('Not enough instances to enable Raft failover')
             raft_failover.disable()
         else
             local ok, err = ApplyConfigError:pcall(raft_failover.cfg)
             if not ok then
                 return nil, err
             end
-            first_appointments = raft_failover.get_appointments(topology_cfg)
             log.info('Raft failover enabled')
         end
 
+        first_appointments = raft_failover.get_appointments(topology_cfg)
         vars.failover_fiber = fiber.new(failover_loop, {
             get_appointments = function()
                 vars.membership_notification:wait()

diff --git a/cartridge/failover/raft.lua b/cartridge/failover/raft.lua
@@ -58,7 +58,22 @@ local function get_appointments(topology_cfg)
         ::next_rs::
     end
 
-    appointments[vars.replicaset_uuid] = vars.leader_uuid
+    -- If Raft is enabled but `disable_raft_on_small_clusters` is active,
+    -- and the replicaset has fewer than 3 instances,
+    -- Raft failover is considered disabled for this replicaset.
+    -- In this case, we appoint a leader as in 'disabled' failover mode.
+    for replicaset_uuid, _ in pairs(replicasets) do
+        local leaders = topology.get_leaders_order(
+            topology_cfg, replicaset_uuid, nil, {only_electable = false, only_enabled = true}
+        )
+        if vars.disable_raft_on_small_clusters and #leaders < 3 then
+            appointments[replicaset_uuid] = leaders[1]
+        end
+    end
+
+    if vars.leader_uuid ~= nil then
+        appointments[vars.replicaset_uuid] = vars.leader_uuid
+    end
     return appointments
 end
 

diff --git a/test/entrypoint/srv_basic.lua b/test/entrypoint/srv_basic.lua
@@ -32,6 +32,7 @@ package.preload['mymodule'] = function()
     local httpd = service_registry.get('httpd')
     local validated = {}
     local master_switches = {}
+    local leaders_history = {}
 
     if httpd ~= nil then
         httpd:route(
@@ -72,6 +73,7 @@ package.preload['mymodule'] = function()
         get_state = function() return state end,
         is_master = function() return master end,
         get_master_switches = function() return master_switches end,
+        get_leaders_history = function() return leaders_history end,
         validate_config = function()
             table.insert(validated, true)
             return true
@@ -99,6 +101,8 @@ package.preload['mymodule'] = function()
             end
             master = opts.is_master
             table.remove(validated, #validated)
+            local failover = require('cartridge.failover')
+            table.insert(leaders_history, failover.get_active_leaders())
         end,
         stop = function()
             state = 'stopped'

diff --git a/test/integration/raft_test.lua b/test/integration/raft_test.lua
@@ -8,6 +8,7 @@ local g_not_enough_instances = t.group('integration.raft_not_enough_instances')
 local g_unelectable = t.group('integration.raft_unelectable')
 local g_disable = t.group('integration.raft_disabled_instances')
 local g_expel = t.group('integration.raft_expelled_instances')
+local g_appointments = t.group('integration.raft_appointments')
 local g_all_rw = t.group('integration.raft_all_rw')
 local h = require('test.helper')
 
@@ -341,6 +342,7 @@ g.test_kill_master = function()
 
     start_server('storage-1')
     start_server('storage-2')
+    g.cluster:wait_until_healthy()
 
     local after_2pc = get_2pc_count()
 
@@ -794,3 +796,133 @@ g_all_rw.test_raft_in_all_rw_mode_fails = function()
         return require('cartridge.confapplier').get_state()
     end), 'RolesConfigured')
 end
+
+----------------------------------------------------------------
+
+setup_group(g_appointments, {
+    {
+        alias = 'router',
+        uuid = h.uuid('a'),
+        roles = {
+            'vshard-router',
+            'myrole',
+        },
+        servers = 1,
+    },
+    {
+        alias = 'storage',
+        uuid = replicaset_uuid,
+        roles = {
+            'vshard-storage',
+        },
+        servers = {
+            {
+                instance_uuid = storage_1_uuid,
+            },
+            {
+                instance_uuid = storage_2_uuid,
+            },
+            {
+                instance_uuid = storage_3_uuid,
+            },
+        }
+    },
+})
+
+g_appointments.before_each(function()
+    t.assert_equals(set_failover_params(g_appointments, { mode = 'raft' }), { mode = 'raft' })
+end)
+
+g_appointments.after_each(function()
+    -- return original leader
+    g_appointments.cluster:retrying({}, function()
+        g_appointments.cluster:server('storage-1'):call('box.ctl.promote')
+    end)
+
+    h.retrying({}, function()
+        t.assert_equals(g_appointments.cluster:server('storage-1'):eval(q_leadership), storage_1_uuid)
+    end)
+end)
+
+g_appointments.test_leader_persists_after_config_apply = function()
+    -- There was a bug with leader selection when Raft failover was enabled
+    -- on replicasets with fewer than 3 instances.
+    -- Raft is disabled on such replicasets. These were often routers.
+    -- As a result, the wrong storages were elected as masters,
+    -- and vshard returned a NON_MASTER error.
+    -- The bug is reproduced as follows:
+    -- 1. Change the leader in the replica set
+    -- 2. After the leader change, the router learns about the new leader via membership
+    -- 3. Apply a config update, which triggers failover.cfg
+    -- 4. The router incorrectly determines the leader and sets the old master as the leader,
+    --    because it appears first in the topology (lexicographically), as in 'disabled' mode.
+
+    -- Step 1: change the leader in the replica set
+    g_appointments.cluster:retrying({}, function()
+        g_appointments.cluster:server('storage-2'):call('box.ctl.promote')
+    end)
+
+    local storage_rs_uuid = g_appointments.cluster:server('storage-1').replicaset_uuid
+
+    local leader_switched_index
+    -- Step 2: wait for the router to update the leader via membership after the master change
+    h.retrying({}, function()
+        local res = g_appointments.cluster:server('router-1'):exec(function()
+            local cartridge = require('cartridge')
+            return cartridge.service_get('myrole').get_leaders_history()
+        end)
+
+        leader_switched_index = #res
+        t.assert_equals(res[leader_switched_index][storage_rs_uuid], storage_2_uuid)
+    end)
+
+    -- Step 3: Apply new config to simulate a configuration change and trigger apply_config
+    g_appointments.cluster:server('router-1'):exec(function()
+        return require("cartridge").config_patch_clusterwide({uuid = require("uuid").str()})
+    end)
+
+    h.wish_state(g_appointments.cluster:server('router-1'), 'RolesConfigured', 10)
+
+    -- Step 4: Check the updated leaders on the router
+    -- Ensure the leader did not revert from storage-2 back to storage-1
+    h.retrying({}, function()
+        local res = g_appointments.cluster:server('router-1'):exec(function()
+            local cartridge = require('cartridge')
+            return cartridge.service_get('myrole').get_leaders_history()
+        end)
+
+        t.assert(#res > leader_switched_index, 'Wait for failover.cfg to be called again after config apply')
+        for i = leader_switched_index, #res do
+            local leader_list = res[i]
+            -- After the switch, storage-2 must remain the leader
+            t.assert_equals(leader_list[storage_rs_uuid], storage_2_uuid)
+        end
+    end)
+end
+
+g_appointments.test_on_small_replicaset = function()
+    -- Check that if there are fewer than 3 instances in the replicaset,
+    -- the router behaves as in 'disabled' failover mode and appoints
+    -- the first (lexicographically) enabled instance as leader.
+
+    -- change master, 
+    g_appointments.cluster:retrying({}, function()
+        g_appointments.cluster:server('storage-2'):call('box.ctl.promote')
+    end)
+
+    h.retrying({}, function()
+        t.assert_equals(g_appointments.cluster:server('router-1'):eval(q_leadership), storage_2_uuid)
+    end)
+
+    -- Disable one instance to simulate a small replicaset (<3 instances)
+    g_appointments.cluster.main_server:exec(function(uuid)
+        require('cartridge.lua-api.topology').disable_servers({uuid})
+    end, {storage_2_uuid})
+
+    h.wish_state(g_appointments.cluster:server('router-1'), 'RolesConfigured', 10)
+
+    h.retrying({}, function()
+        -- storage-1 should be appointed as leader on router, as in 'disabled' mode
+        t.assert_equals(g_appointments.cluster:server('router-1'):eval(q_leadership), storage_1_uuid)
+    end)
+end