Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ and this project adheres to
Unreleased
-------------------------------------------------------------------------------

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fixed
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- Invalid leader appointment in Raft failover when there are not enough instances
in the replicaset.

-------------------------------------------------------------------------------
[2.15.3] - 2025-04-24
-------------------------------------------------------------------------------
Expand Down
3 changes: 1 addition & 2 deletions cartridge/failover.lua
Original file line number Diff line number Diff line change
Expand Up @@ -958,18 +958,17 @@ local function cfg(clusterwide_config, opts)
and #topology.get_leaders_order(
topology_cfg, vars.replicaset_uuid, nil, {only_electable = false, only_enabled = true}) < 3
then
first_appointments = _get_appointments_disabled_mode(topology_cfg)
log.warn('Not enough instances to enable Raft failover')
raft_failover.disable()
else
local ok, err = ApplyConfigError:pcall(raft_failover.cfg)
if not ok then
return nil, err
end
first_appointments = raft_failover.get_appointments(topology_cfg)
log.info('Raft failover enabled')
end

first_appointments = raft_failover.get_appointments(topology_cfg)
vars.failover_fiber = fiber.new(failover_loop, {
get_appointments = function()
vars.membership_notification:wait()
Expand Down
17 changes: 16 additions & 1 deletion cartridge/failover/raft.lua
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,22 @@ local function get_appointments(topology_cfg)
::next_rs::
end

appointments[vars.replicaset_uuid] = vars.leader_uuid
-- If Raft is enabled but `disable_raft_on_small_clusters` is active,
-- and the replicaset has fewer than 3 instances,
-- Raft failover is considered disabled for this replicaset.
-- In this case, we appoint a leader as in 'disabled' failover mode.
for replicaset_uuid, _ in pairs(replicasets) do
Copy link

Copilot AI Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The threshold ‘3’ for small clusters is a magic number—consider extracting it into a named constant (e.g. MIN_RAFT_REPLICASET_SIZE) or config parameter for clarity and easier adjustments.

Copilot uses AI. Check for mistakes.
local leaders = topology.get_leaders_order(
topology_cfg, replicaset_uuid, nil, {only_electable = false, only_enabled = true}
)
if vars.disable_raft_on_small_clusters and #leaders < 3 then
appointments[replicaset_uuid] = leaders[1]
end
end

if vars.leader_uuid ~= nil then
appointments[vars.replicaset_uuid] = vars.leader_uuid
end
return appointments
end

Expand Down
4 changes: 4 additions & 0 deletions test/entrypoint/srv_basic.lua
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ package.preload['mymodule'] = function()
local httpd = service_registry.get('httpd')
local validated = {}
local master_switches = {}
local leaders_history = {}

if httpd ~= nil then
httpd:route(
Expand Down Expand Up @@ -72,6 +73,7 @@ package.preload['mymodule'] = function()
get_state = function() return state end,
is_master = function() return master end,
get_master_switches = function() return master_switches end,
get_leaders_history = function() return leaders_history end,
validate_config = function()
table.insert(validated, true)
return true
Expand Down Expand Up @@ -99,6 +101,8 @@ package.preload['mymodule'] = function()
end
master = opts.is_master
table.remove(validated, #validated)
local failover = require('cartridge.failover')
table.insert(leaders_history, failover.get_active_leaders())
end,
stop = function()
state = 'stopped'
Expand Down
132 changes: 132 additions & 0 deletions test/integration/raft_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ local g_not_enough_instances = t.group('integration.raft_not_enough_instances')
local g_unelectable = t.group('integration.raft_unelectable')
local g_disable = t.group('integration.raft_disabled_instances')
local g_expel = t.group('integration.raft_expelled_instances')
local g_appointments = t.group('integration.raft_appointments')
local g_all_rw = t.group('integration.raft_all_rw')
local h = require('test.helper')

Expand Down Expand Up @@ -341,6 +342,7 @@ g.test_kill_master = function()

start_server('storage-1')
start_server('storage-2')
g.cluster:wait_until_healthy()

local after_2pc = get_2pc_count()

Expand Down Expand Up @@ -794,3 +796,133 @@ g_all_rw.test_raft_in_all_rw_mode_fails = function()
return require('cartridge.confapplier').get_state()
end), 'RolesConfigured')
end

----------------------------------------------------------------

setup_group(g_appointments, {
{
alias = 'router',
uuid = h.uuid('a'),
roles = {
'vshard-router',
'myrole',
},
servers = 1,
},
{
alias = 'storage',
uuid = replicaset_uuid,
roles = {
'vshard-storage',
},
servers = {
{
instance_uuid = storage_1_uuid,
},
{
instance_uuid = storage_2_uuid,
},
{
instance_uuid = storage_3_uuid,
},
}
},
})

g_appointments.before_each(function()
t.assert_equals(set_failover_params(g_appointments, { mode = 'raft' }), { mode = 'raft' })
end)

g_appointments.after_each(function()
-- return original leader
g_appointments.cluster:retrying({}, function()
g_appointments.cluster:server('storage-1'):call('box.ctl.promote')
end)

h.retrying({}, function()
t.assert_equals(g_appointments.cluster:server('storage-1'):eval(q_leadership), storage_1_uuid)
end)
end)

g_appointments.test_leader_persists_after_config_apply = function()
-- There was a bug with leader selection when Raft failover was enabled
-- on replicasets with fewer than 3 instances.
-- Raft is disabled on such replicasets. These were often routers.
-- As a result, the wrong storages were elected as masters,
-- and vshard returned a NON_MASTER error.
-- The bug is reproduced as follows:
-- 1. Change the leader in the replica set
-- 2. After the leader change, the router learns about the new leader via membership
-- 3. Apply a config update, which triggers failover.cfg
-- 4. The router incorrectly determines the leader and sets the old master as the leader,
-- because it appears first in the topology (lexicographically), as in 'disabled' mode.

-- Step 1: change the leader in the replica set
g_appointments.cluster:retrying({}, function()
g_appointments.cluster:server('storage-2'):call('box.ctl.promote')
end)

local storage_rs_uuid = g_appointments.cluster:server('storage-1').replicaset_uuid

local leader_switched_index
-- Step 2: wait for the router to update the leader via membership after the master change
h.retrying({}, function()
local res = g_appointments.cluster:server('router-1'):exec(function()
local cartridge = require('cartridge')
return cartridge.service_get('myrole').get_leaders_history()
end)

leader_switched_index = #res
t.assert_equals(res[leader_switched_index][storage_rs_uuid], storage_2_uuid)
end)

-- Step 3: Apply new config to simulate a configuration change and trigger apply_config
g_appointments.cluster:server('router-1'):exec(function()
return require("cartridge").config_patch_clusterwide({uuid = require("uuid").str()})
end)

h.wish_state(g_appointments.cluster:server('router-1'), 'RolesConfigured', 10)

-- Step 4: Check the updated leaders on the router
-- Ensure the leader did not revert from storage-2 back to storage-1
h.retrying({}, function()
local res = g_appointments.cluster:server('router-1'):exec(function()
local cartridge = require('cartridge')
return cartridge.service_get('myrole').get_leaders_history()
end)

t.assert(#res > leader_switched_index, 'Wait for failover.cfg to be called again after config apply')
for i = leader_switched_index, #res do
local leader_list = res[i]
-- After the switch, storage-2 must remain the leader
t.assert_equals(leader_list[storage_rs_uuid], storage_2_uuid)
end
end)
end

g_appointments.test_on_small_replicaset = function()
-- Check that if there are fewer than 3 instances in the replicaset,
-- the router behaves as in 'disabled' failover mode and appoints
-- the first (lexicographically) enabled instance as leader.

-- change master,
g_appointments.cluster:retrying({}, function()
g_appointments.cluster:server('storage-2'):call('box.ctl.promote')
end)

h.retrying({}, function()
t.assert_equals(g_appointments.cluster:server('router-1'):eval(q_leadership), storage_2_uuid)
end)

-- Disable one instance to simulate a small replicaset (<3 instances)
g_appointments.cluster.main_server:exec(function(uuid)
require('cartridge.lua-api.topology').disable_servers({uuid})
end, {storage_2_uuid})

h.wish_state(g_appointments.cluster:server('router-1'), 'RolesConfigured', 10)

h.retrying({}, function()
-- storage-1 should be appointed as leader on router, as in 'disabled' mode
t.assert_equals(g_appointments.cluster:server('router-1'):eval(q_leadership), storage_1_uuid)
end)
end
Loading