Skip to content

Commit ded876f

Browse files
committed
recovery: reduce spam of "Finish bucket recovery step" logs
Before this patch "Finish bucket recovery step ..." logs were printed at the end of recovery even if no buckets were successfully recovered. It led to unnecessary log records. This patch fixes the issue by adding an additional check for the number of recovered buckets. Also we print id of every bucket which was successfully recovered in order to track buckets' duplication. Part of #212 NO_DOC=bugfix
1 parent 30d27ac commit ded876f

File tree

2 files changed

+95
-1
lines changed

2 files changed

+95
-1
lines changed

test/storage-luatest/storage_1_1_1_test.lua

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,96 @@ test_group.test_manual_bucket_send_doubled_buckets = function(g)
101101
ilt.assert_equals(box.space._bucket:get(bid), nil)
102102
end, {bid})
103103
end
104+
105+
local rebalancer_recovery_group = t.group('rebalancer-recovery-logging')
106+
107+
local function start_bucket_move(src_storage, dest_storage, bucket_id)
108+
src_storage:exec(function(bucket_id, replicaset_id)
109+
ivshard.storage.bucket_send(bucket_id, replicaset_id)
110+
end, {bucket_id, dest_storage:replicaset_uuid()})
111+
112+
dest_storage:exec(function(bucket_id)
113+
t.helpers.retrying({timeout = 10}, function()
114+
t.assert(box.space._bucket:select(bucket_id))
115+
end)
116+
end, {bucket_id})
117+
end
118+
119+
local function assert_bucket_is_transferred(src_storage, dest_storage,
120+
bucket_id)
121+
src_storage:exec(function(bucket_id)
122+
t.helpers.retrying({}, function()
123+
t.assert_equals(box.space._bucket:select(bucket_id), {})
124+
end)
125+
end, {bucket_id})
126+
dest_storage:exec(function(bucket_id)
127+
t.helpers.retrying({}, function()
128+
t.assert_equals(box.space._bucket:select(bucket_id)[1].status,
129+
'active')
130+
end)
131+
end, {bucket_id})
132+
end
133+
134+
rebalancer_recovery_group.before_all(function(g)
135+
global_cfg = vtest.config_new(cfg_template)
136+
vtest.cluster_new(g, global_cfg)
137+
g.router = vtest.router_new(g, 'router', global_cfg)
138+
vtest.cluster_bootstrap(g, global_cfg)
139+
vtest.cluster_wait_vclock_all(g)
140+
141+
vtest.cluster_exec_each_master(g, function()
142+
box.schema.create_space('test_space')
143+
box.space.test_space:format({
144+
{name = 'pk', type = 'unsigned'},
145+
{name = 'bucket_id', type = 'unsigned'},
146+
})
147+
box.space.test_space:create_index('primary', {parts = {'pk'}})
148+
box.space.test_space:create_index(
149+
'bucket_id', {parts = {'bucket_id'}, unique = false})
150+
end)
151+
end)
152+
153+
rebalancer_recovery_group.after_all(function(g)
154+
g.cluster:drop()
155+
end)
156+
157+
--
158+
-- Improve logging of rebalancer and recovery (gh-212).
159+
--
160+
rebalancer_recovery_group.test_no_logs_while_unsuccess_recovery = function(g)
161+
g.replica_2_a:exec(function()
162+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = true
163+
rawset(_G, 'old_call', ivshard.storage._call)
164+
ivshard.storage._call = function(service_name, ...)
165+
if service_name == 'recovery_bucket_stat' then
166+
return error('TimedOut')
167+
end
168+
return _G.old_call(service_name, ...)
169+
end
170+
end)
171+
local hanged_bucket_id_1 = vtest.storage_first_bucket(g.replica_1_a)
172+
start_bucket_move(g.replica_1_a, g.replica_2_a, hanged_bucket_id_1)
173+
local hanged_bucket_id_2 = vtest.storage_first_bucket(g.replica_1_a)
174+
start_bucket_move(g.replica_1_a, g.replica_2_a, hanged_bucket_id_2)
175+
t.helpers.retrying({}, function()
176+
t.assert(g.replica_1_a:grep_log('Error during recovery of bucket 1'))
177+
end)
178+
t.assert_not(g.replica_1_a:grep_log('Finish bucket recovery step, 0'))
179+
g.replica_2_a:exec(function()
180+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = false
181+
ivshard.storage._call = _G.old_call
182+
end)
183+
t.helpers.retrying({}, function()
184+
g.replica_2_a:exec(function()
185+
ivshard.storage.garbage_collector_wakeup()
186+
ivshard.storage.recovery_wakeup()
187+
end)
188+
g.replica_1_a:exec(function() ivshard.storage.recovery_wakeup() end)
189+
t.assert(g.replica_1_a:grep_log('Finish bucket recovery step, 2 ' ..
190+
'sending buckets are recovered among'))
191+
end)
192+
assert_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
193+
hanged_bucket_id_1)
194+
assert_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
195+
hanged_bucket_id_2)
196+
end

vshard/storage/init.lua

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ local lmsgpack = require('msgpack')
55
local netbox = require('net.box') -- for net.box:self()
66
local trigger = require('internal.trigger')
77
local ffi = require('ffi')
8+
local json_encode = require('json').encode
89
local yaml_encode = require('yaml').encode
910
local fiber_clock = lfiber.clock
1011
local fiber_yield = lfiber.yield
@@ -1003,7 +1004,7 @@ local function recovery_step_by_type(type)
10031004
is_step_empty = false
10041005
::continue::
10051006
end
1006-
if not is_step_empty then
1007+
if recovered > 0 then
10071008
log.info('Finish bucket recovery step, %d %s buckets are recovered '..
10081009
'among %d', recovered, type, total)
10091010
end

0 commit comments

Comments
 (0)