Skip to content
This repository was archived by the owner on May 25, 2021. It is now read-only.

Commit 1db1337

Browse files
committed
Improve compaction task status updates
Previous the emsort related operations did not update the compaction task status. For large databases this leads to some very long waits while the compaction task stays at 100%. This change adds progress reports to the steps for sorting and copying document ids back into the database file.
1 parent 21c8d37 commit 1db1337

File tree

2 files changed

+127
-31
lines changed

2 files changed

+127
-31
lines changed

src/couch_db_updater.erl

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,7 @@ copy_compact(Db, NewDb0, Retry) ->
11471147
TaskProps0 = [
11481148
{type, database_compaction},
11491149
{database, Db#db.name},
1150+
{phase, seq_tree},
11501151
{progress, 0},
11511152
{changes_done, 0},
11521153
{total_changes, TotalChanges}
@@ -1193,6 +1194,8 @@ start_copy_compact(#db{}=Db) ->
11931194
open_compaction_files(Name, Header, Filepath, Options),
11941195
erlang:monitor(process, MFd),
11951196

1197+
{ok, DocCount} = couch_db:get_doc_count(Db),
1198+
11961199
% This is a bit worrisome. init_db/4 will monitor the data fd
11971200
% but it doesn't know about the meta fd. For now I'll maintain
11981201
% that the data fd is the old normal fd and meta fd is special
@@ -1201,9 +1204,9 @@ start_copy_compact(#db{}=Db) ->
12011204

12021205
NewDb1 = copy_purge_info(Db, NewDb),
12031206
NewDb2 = copy_compact(Db, NewDb1, Retry),
1204-
NewDb3 = sort_meta_data(NewDb2),
1207+
NewDb3 = sort_meta_data(NewDb2, DocCount),
12051208
NewDb4 = commit_compaction_data(NewDb3),
1206-
NewDb5 = copy_meta_data(NewDb4),
1209+
NewDb5 = copy_meta_data(NewDb4, DocCount),
12071210
NewDb6 = sync_header(NewDb5, db_to_header(NewDb5, NewDb5#db.header)),
12081211
close_db(NewDb6),
12091212

@@ -1323,12 +1326,82 @@ bind_id_tree(Db, Fd, State) ->
13231326
Db#db{id_tree=IdBtree}.
13241327

13251328

1326-
sort_meta_data(Db0) ->
1327-
{ok, Ems} = couch_emsort:merge(Db0#db.id_tree),
1328-
Db0#db{id_tree=Ems}.
1329+
sort_meta_data(Db0, DocCount) ->
1330+
couch_task_status:update([
1331+
{phase, sort_ids_init},
1332+
{total_changes, DocCount},
1333+
{changes_done, 0},
1334+
{progress, 0}
1335+
]),
1336+
Ems0 = Db0#db.id_tree,
1337+
Options = [
1338+
{event_cb, fun emsort_cb/3},
1339+
{event_st, {init, 0, 0}}
1340+
],
1341+
Ems1 = couch_emsort:set_options(Ems0, Options),
1342+
{ok, Ems2} = couch_emsort:merge(Ems1),
1343+
Db0#db{id_tree=Ems2}.
1344+
1345+
1346+
emsort_cb(_Ems, {merge, chain}, {init, Copied, Nodes}) ->
1347+
{init, Copied, Nodes + 1};
1348+
emsort_cb(_Ems, row_copy, {init, Copied, Nodes}) when Copied >= 1000 ->
1349+
update_compact_task(Copied + 1),
1350+
{init, 0, Nodes};
1351+
emsort_cb(_Ems, row_copy, {init, Copied, Nodes}) ->
1352+
{init, Copied + 1, Nodes};
1353+
emsort_cb(Ems, {merge_start, reverse}, {init, Copied, Nodes}) ->
1354+
BBChunkSize = couch_emsort:get_bb_chunk_size(Ems),
1355+
1356+
% Subtract one because we already finished the first
1357+
% iteration when we were counting the number of nodes
1358+
% in the backbone.
1359+
Iters = calculate_sort_iters(Nodes, BBChunkSize, 0) - 1,
1360+
1361+
% Compaction retries mean we may have copied more than
1362+
% doc count rows. This accounts for that by using the
1363+
% number we've actually copied.
1364+
[PrevCopied] = couch_task_status:get([changes_done]),
1365+
TotalCopied = PrevCopied + Copied,
1366+
1367+
couch_task_status:update([
1368+
{phase, sort_ids},
1369+
{total_changes, Iters * TotalCopied},
1370+
{changes_done, 0},
1371+
{progress, 0}
1372+
]),
1373+
0;
1374+
1375+
emsort_cb(_Ems, row_copy, Copied) when is_integer(Copied), Copied > 1000 ->
1376+
update_compact_task(Copied + 1),
1377+
0;
1378+
1379+
emsort_cb(_Ems, row_copy, Copied) when is_integer(Copied) ->
1380+
Copied + 1;
1381+
1382+
emsort_cb(_Ems, _Event, St) ->
1383+
St.
13291384

13301385

1331-
copy_meta_data(#db{fd=Fd, header=Header}=Db) ->
1386+
calculate_sort_iters(Nodes, BBChunk, Count) when Nodes < BBChunk ->
1387+
Count;
1388+
calculate_sort_iters(Nodes0, BBChunk, Count) when BBChunk > 1 ->
1389+
Calc = fun(N0) ->
1390+
N1 = N0 div BBChunk,
1391+
N1 + if N1 rem BBChunk == 0 -> 0; true -> 1 end
1392+
end,
1393+
Nodes1 = Calc(Nodes0),
1394+
Nodes2 = Calc(Nodes1),
1395+
calculate_sort_iters(Nodes2, BBChunk, Count + 2).
1396+
1397+
1398+
copy_meta_data(#db{fd=Fd, header=Header}=Db, DocCount) ->
1399+
couch_task_status:update([
1400+
{phase, copy_ids},
1401+
{changes_done, 0},
1402+
{total_changes, DocCount},
1403+
{progress, 0}
1404+
]),
13321405
Src = Db#db.id_tree,
13331406
DstState = couch_db_header:id_tree_state(Header),
13341407
{ok, IdTree0} = couch_btree:open(DstState, Fd, [
@@ -1348,6 +1421,7 @@ copy_meta_data(#db{fd=Fd, header=Header}=Db) ->
13481421
{ok, SeqTree} = couch_btree:add_remove(
13491422
Acc#merge_st.seq_tree, [], Acc#merge_st.rem_seqs
13501423
),
1424+
update_compact_task(length(Acc#merge_st.infos)),
13511425
Db#db{id_tree=IdTree, seq_tree=SeqTree}.
13521426

13531427

@@ -1359,6 +1433,7 @@ merge_docids(Iter, #merge_st{infos=Infos}=Acc) when length(Infos) > 1000 ->
13591433
} = Acc,
13601434
{ok, IdTree1} = couch_btree:add(IdTree0, Infos),
13611435
{ok, SeqTree1} = couch_btree:add_remove(SeqTree0, [], RemSeqs),
1436+
update_compact_task(length(Infos)),
13621437
Acc1 = Acc#merge_st{
13631438
id_tree=IdTree1,
13641439
seq_tree=SeqTree1,

src/couch_emsort.erl

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,18 @@
129129
% CA3 CD3
130130
%
131131

132-
-export([open/1, open/2, get_fd/1, get_state/1]).
132+
-export([open/1, open/2, set_options/2, get_fd/1, get_state/1]).
133+
-export([get_bb_chunk_size/1]).
133134
-export([add/2, merge/1, sort/1, iter/1, next/1]).
134135

135136

136137
-record(ems, {
137138
fd,
138139
root,
139140
bb_chunk = 10,
140-
chain_chunk = 100
141+
chain_chunk = 100,
142+
event_cb,
143+
event_st
141144
}).
142145

143146

@@ -156,7 +159,11 @@ set_options(Ems, [{root, Root} | Rest]) ->
156159
set_options(Ems, [{chain_chunk, Count} | Rest]) when is_integer(Count) ->
157160
set_options(Ems#ems{chain_chunk=Count}, Rest);
158161
set_options(Ems, [{back_bone_chunk, Count} | Rest]) when is_integer(Count) ->
159-
set_options(Ems#ems{bb_chunk=Count}, Rest).
162+
set_options(Ems#ems{bb_chunk=Count}, Rest);
163+
set_options(Ems, [{event_cb, EventCB} | Rest]) when is_function(EventCB, 3) ->
164+
set_options(Ems#ems{event_cb=EventCB}, Rest);
165+
set_options(Ems, [{event_st, EventSt} | Rest]) ->
166+
set_options(Ems#ems{event_st=EventSt}, Rest).
160167

161168

162169
get_fd(#ems{fd=Fd}) ->
@@ -167,6 +174,10 @@ get_state(#ems{root=Root}) ->
167174
Root.
168175

169176

177+
get_bb_chunk_size(#ems{bb_chunk = Size}) ->
178+
Size.
179+
180+
170181
add(Ems, []) ->
171182
{ok, Ems};
172183
add(Ems, KVs) ->
@@ -224,51 +235,55 @@ decimate(#ems{root={_BB, nil}}=Ems) ->
224235
% We have less than bb_chunk backbone pointers so we're
225236
% good to start streaming KV's back to the client.
226237
Ems;
227-
decimate(#ems{root={BB, NextBB}}=Ems) ->
238+
decimate(#ems{}=Ems0) ->
228239
% To make sure we have a bounded amount of data in RAM
229240
% at any given point we first need to decimate the data
230241
% by performing the first couple iterations of a merge
231242
% sort writing the intermediate results back to disk.
232243

233244
% The first pass gives us a sort with pointers linked from
234245
% largest to smallest.
235-
{RevBB, RevNextBB} = merge_back_bone(Ems, small, BB, NextBB),
246+
{ok, Ems1} = event_notify(Ems0, {merge_start, forward}),
247+
{ok, Ems2} = merge_back_bone(Ems1, small),
236248

237249
% We have to run a second pass so that links are pointed
238250
% back from smallest to largest.
239-
{FwdBB, FwdNextBB} = merge_back_bone(Ems, big, RevBB, RevNextBB),
251+
{ok, Ems3} = event_notify(Ems2, {merge_start, reverse}),
252+
{ok, Ems4} = merge_back_bone(Ems3, big),
240253

241254
% Continue deicmating until we have an acceptable bound on
242255
% the number of keys to use.
243-
decimate(Ems#ems{root={FwdBB, FwdNextBB}}).
256+
decimate(Ems4).
244257

245258

246-
merge_back_bone(Ems, Choose, BB, NextBB) ->
247-
BBPos = merge_chains(Ems, Choose, BB),
248-
merge_rest_back_bone(Ems, Choose, NextBB, {[BBPos], nil}).
259+
merge_back_bone(#ems{root={BB, NextBB}}=Ems0, Choose) ->
260+
{ok, Ems1, BBPos} = merge_chains(Ems0, Choose, BB),
261+
merge_rest_back_bone(Ems1, Choose, NextBB, {[BBPos], nil}).
249262

250263

251-
merge_rest_back_bone(_Ems, _Choose, nil, Acc) ->
252-
Acc;
253-
merge_rest_back_bone(Ems, Choose, BBPos, Acc) ->
254-
{ok, {BB, NextBB}} = couch_file:pread_term(Ems#ems.fd, BBPos),
255-
NewPos = merge_chains(Ems, Choose, BB),
256-
{NewBB, NewPrev} = append_item(Ems, Acc, NewPos, Ems#ems.bb_chunk),
257-
merge_rest_back_bone(Ems, Choose, NextBB, {NewBB, NewPrev}).
264+
merge_rest_back_bone(Ems, _Choose, nil, Acc) ->
265+
{ok, Ems#ems{root=Acc}};
266+
merge_rest_back_bone(Ems0, Choose, BBPos, Acc) ->
267+
{ok, {BB, NextBB}} = couch_file:pread_term(Ems0#ems.fd, BBPos),
268+
{ok, Ems1, NewPos} = merge_chains(Ems0, Choose, BB),
269+
{NewBB, NewPrev} = append_item(Ems1, Acc, NewPos, Ems1#ems.bb_chunk),
270+
merge_rest_back_bone(Ems1, Choose, NextBB, {NewBB, NewPrev}).
258271

259272

260-
merge_chains(Ems, Choose, BB) ->
261-
Chains = init_chains(Ems, Choose, BB),
262-
merge_chains(Ems, Choose, Chains, {[], nil}).
273+
merge_chains(Ems0, Choose, BB) ->
274+
{ok, Ems1} = event_notify(Ems0, {merge, chain}),
275+
Chains = init_chains(Ems1, Choose, BB),
276+
merge_chains(Ems1, Choose, Chains, {[], nil}).
263277

264278

265279
merge_chains(Ems, _Choose, [], ChainAcc) ->
266280
{ok, CPos, _} = couch_file:append_term(Ems#ems.fd, ChainAcc),
267-
CPos;
268-
merge_chains(#ems{chain_chunk=CC}=Ems, Choose, Chains, Acc) ->
269-
{KV, RestChains} = choose_kv(Choose, Ems, Chains),
270-
{NewKVs, NewPrev} = append_item(Ems, Acc, KV, CC),
271-
merge_chains(Ems, Choose, RestChains, {NewKVs, NewPrev}).
281+
{ok, Ems, CPos};
282+
merge_chains(#ems{chain_chunk=CC}=Ems0, Choose, Chains, Acc) ->
283+
{KV, RestChains} = choose_kv(Choose, Ems0, Chains),
284+
{NewKVs, NewPrev} = append_item(Ems0, Acc, KV, CC),
285+
{ok, Ems1} = event_notify(Ems0, row_copy),
286+
merge_chains(Ems1, Choose, RestChains, {NewKVs, NewPrev}).
272287

273288

274289
init_chains(Ems, Choose, BB) ->
@@ -316,3 +331,9 @@ append_item(Ems, {List, Prev}, Pos, Size) when length(List) >= Size ->
316331
append_item(_Ems, {List, Prev}, Pos, _Size) ->
317332
{[Pos | List], Prev}.
318333

334+
335+
event_notify(#ems{event_cb = undefined} = Ems, _) ->
336+
{ok, Ems};
337+
event_notify(#ems{event_cb=EventCB, event_st=EventSt}=Ems, Event) ->
338+
NewSt = EventCB(Ems, Event, EventSt),
339+
{ok, Ems#ems{event_st=NewSt}}.

0 commit comments

Comments
 (0)