Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions test/storage-luatest/storage_1_1_1_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ local function wait_for_bucket_is_transferred(src_storage, dest_storage,
end

--
-- Reduce spam of "Finish bucket recovery step" logs in recovery
-- service (gh-212).
-- Reduce spam of "Finish bucket recovery step" logs and add logging of
-- recovered buckets in recovery service (gh-212).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is actually nice how you made this message evolve together with the commits.

--
test_group.test_no_logs_while_unsuccess_recovery = function(g)
g.replica_2_a:exec(function()
Expand Down Expand Up @@ -183,8 +183,13 @@ test_group.test_no_logs_while_unsuccess_recovery = function(g)
ivshard.storage.recovery_wakeup()
end)
g.replica_1_a:exec(function() ivshard.storage.recovery_wakeup() end)
t.assert(g.replica_1_a:grep_log('Finish bucket recovery step, 2 ' ..
'sending buckets are recovered among'))
-- In some rare cases the recovery service can recover buckets one
-- by one. As a result we get multiple "Finish bucket recovery" and
-- "Recovery buckets" logs with different bucket ids and buckets'
-- count. That is why we should grep general logs without buckets'
-- count and bucket ids to avoid flakiness.
t.assert(g.replica_1_a:grep_log('Finish bucket recovery step'))
t.assert(g.replica_1_a:grep_log('Recovered buckets'))
end)
wait_for_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
hanged_bucket_id_1)
Expand Down
14 changes: 13 additions & 1 deletion vshard/storage/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ local lmsgpack = require('msgpack')
local netbox = require('net.box') -- for net.box:self()
local trigger = require('internal.trigger')
local ffi = require('ffi')
local json_encode = require('json').encode
local yaml_encode = require('yaml').encode
local fiber_clock = lfiber.clock
local fiber_yield = lfiber.yield
Expand Down Expand Up @@ -922,6 +923,12 @@ local function recovery_local_bucket_is_active(local_bucket, remote_bucket)
return status == BSENT or status == BGARBAGE
end

local function save_recovered(dict, id, status)
local ids = dict[status] or {}
table.insert(ids, id)
dict[status] = ids
end

--
-- Check status of each transferring bucket. Resolve status where
-- possible.
Expand All @@ -932,6 +939,7 @@ local function recovery_step_by_type(type, limiter)
local recovered = 0
local total = 0
local start_format = 'Starting %s buckets recovery step'
local recovered_buckets = {}
for _, bucket in _bucket.index.status:pairs(type) do
lfiber.testcancel()
total = total + 1
Expand Down Expand Up @@ -992,12 +1000,15 @@ local function recovery_step_by_type(type, limiter)
if recovery_local_bucket_is_sent(bucket, remote_bucket) then
_bucket:update({bucket_id}, {{'=', 2, BSENT}})
recovered = recovered + 1
save_recovered(recovered_buckets, bucket_id, BSENT)
elseif recovery_local_bucket_is_garbage(bucket, remote_bucket) then
_bucket:update({bucket_id}, {{'=', 2, BGARBAGE}})
recovered = recovered + 1
save_recovered(recovered_buckets, bucket_id, BGARBAGE)
elseif recovery_local_bucket_is_active(bucket, remote_bucket) then
_bucket:replace({bucket_id, BACTIVE})
recovered = recovered + 1
save_recovered(recovered_buckets, bucket_id, BACTIVE)
elseif is_step_empty then
log.info('Bucket %s is %s local and %s on replicaset %s, waiting',
bucket_id, bucket.status, remote_bucket.status, peer_id)
Expand All @@ -1007,7 +1018,8 @@ local function recovery_step_by_type(type, limiter)
end
if recovered > 0 then
log.info('Finish bucket recovery step, %d %s buckets are recovered '..
'among %d', recovered, type, total)
'among %d. Recovered buckets: %s', recovered, type, total,
json_encode(recovered_buckets))
end
return total, recovered
end
Expand Down