Skip to content

Commit caa5e23

Browse files
committed
mango: rolling execution statistics
In case of map-reduce views, the arrival of the `complete` message is not guaranteed for the view callback (at the shard) when a `stop` is issued during the aggregation (at the coordinator). Due to that, internally collected shard-level statistics may not be fed back to the coordinator which can cause data loss hence inaccuracy in the overall execution statistics. Address this issue by switching to a "rolling" model where row-level statistics are immediately streamed back to the coordinator. Support mixed-version cluster upgrades by activating this model only if requested through the map-reduce arguments and the given shard supports that. Fixes #4560
1 parent f88ba94 commit caa5e23

File tree

3 files changed

+194
-38
lines changed

3 files changed

+194
-38
lines changed

src/fabric/src/fabric_view_row.erl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
get_value/1,
1919
get_doc/1,
2020
get_worker/1,
21+
get_stats/1,
2122
set_key/2,
2223
set_doc/2,
2324
set_worker/2,
25+
set_stats/2,
2426
transform/1
2527
]).
2628

@@ -91,6 +93,14 @@ set_worker(#view_row{} = Row, Worker) ->
9193
set_worker({view_row, #{} = Row}, Worker) ->
9294
{view_row, Row#{worker => Worker}}.
9395

96+
get_stats({view_row, #{stats := Stats}}) ->
97+
Stats;
98+
get_stats({view_row, #{}}) ->
99+
undefined.
100+
101+
set_stats({view_row, #{} = Row}, Stats) ->
102+
{view_row, Row#{stats => Stats}}.
103+
94104
transform(#view_row{value = {[{reduce_overflow_error, Msg}]}}) ->
95105
{row, [{key, null}, {id, error}, {value, reduce_overflow_error}, {reason, Msg}]};
96106
transform(#view_row{key = Key, id = reduced, value = Value}) ->
@@ -109,8 +119,13 @@ transform({view_row, #{} = Row0}) ->
109119
Value = maps:get(value, Row0, undefined),
110120
Doc = maps:get(doc, Row0, undefined),
111121
Worker = maps:get(worker, Row0, undefined),
122+
Stats = maps:get(stats, Row0, undefined),
112123
Row = #view_row{id = Id, key = Key, value = Value, doc = Doc, worker = Worker},
113-
transform(Row).
124+
{row, Props} = RowProps = transform(Row),
125+
case Stats of
126+
undefined -> RowProps;
127+
#{} -> {row, [{stats, Stats} | Props]}
128+
end.
114129

115130
-ifdef(TEST).
116131
-include_lib("couch/include/couch_eunit.hrl").

src/mango/src/mango_cursor_view.erl

Lines changed: 109 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555
covering_index => 'maybe'(#idx{})
5656
}.
5757

58+
-type mrargs_extra_item() ::
59+
{callback, {atom(), atom()}}
60+
| {selector, any()}
61+
| {callback_args, viewcbargs()}
62+
| {ignore_partition_query_limit, boolean()}
63+
| {execution_stats_map, boolean()}
64+
| {execution_stats_rolling, boolean()}.
65+
-type mrargs_extra() :: [mrargs_extra_item()].
66+
5867
-spec viewcbargs_new(Selector, Fields, CoveringIndex) -> ViewCBArgs when
5968
Selector :: selector(),
6069
Fields :: fields(),
@@ -207,7 +216,9 @@ base_args(#cursor{index = Idx, selector = Selector, fields = Fields} = Cursor) -
207216
% - Return execution statistics in a map
208217
{execution_stats_map, true},
209218
% - Return view rows in a map
210-
{view_row_map, true}
219+
{view_row_map, true},
220+
% - Stream execution statistics
221+
{execution_stats_rolling, true}
211222
]
212223
}.
213224

@@ -341,6 +352,43 @@ choose_best_index(IndexRanges) ->
341352
{SelectedIndex, SelectedIndexRanges, _} = hd(SortedIndexRanges),
342353
{{SelectedIndex, SelectedIndexRanges}, SortedIndexRanges}.
343354

355+
-spec format_stats(RawStats, Options) -> FormattedStats when
356+
RawStats :: shard_stats_v2(),
357+
Options :: mrargs_extra(),
358+
FormattedStats :: shard_stats_v1() | shard_stats_v2().
359+
format_stats(Stats, Options) when is_list(Options) ->
360+
case couch_util:get_value(execution_stats_map, Options, false) of
361+
true ->
362+
Stats;
363+
false ->
364+
#{docs_examined := DocsExamined} = Stats,
365+
{docs_examined, DocsExamined}
366+
end.
367+
368+
-spec submit_stats(Options) -> ok when
369+
Options :: mrargs_extra().
370+
submit_stats(Options) when is_list(Options) ->
371+
ShardStats = mango_execution_stats:shard_get_stats(),
372+
Stats = format_stats(ShardStats, Options),
373+
% Send execution stats in batch (shard-level)
374+
ok = rexi:stream2({execution_stats, Stats}).
375+
376+
-spec roll_stats(ViewRow, Options) -> ViewRow when
377+
ViewRow :: view_row(),
378+
Options :: mrargs_extra().
379+
roll_stats(ViewRow, Options) when is_list(Options) ->
380+
ViewRowMap = couch_util:get_value(view_row_map, Options, false),
381+
RollingStats = couch_util:get_value(execution_stats_rolling, Options, false),
382+
case ViewRowMap andalso RollingStats of
383+
true ->
384+
ShardStats = mango_execution_stats:shard_get_stats(),
385+
mango_execution_stats:shard_init(),
386+
Stats = format_stats(ShardStats, Options),
387+
fabric_view_row:set_stats(ViewRow, Stats);
388+
false ->
389+
ViewRow
390+
end.
391+
344392
-spec view_cb
345393
(Message, #mrargs{}) -> Response when
346394
Message :: {meta, any()} | {row, row_properties()} | complete,
@@ -382,7 +430,8 @@ view_cb({row, Props}, #mrargs{extra = Options} = Acc) ->
382430
case match_and_extract_doc(Doc, Selector, Fields) of
383431
{match, FinalDoc} ->
384432
ViewRow1 = fabric_view_row:set_doc(ViewRow, FinalDoc),
385-
ok = rexi:stream2(ViewRow1),
433+
ViewRow2 = roll_stats(ViewRow1, Options),
434+
ok = rexi:stream2(ViewRow2),
386435
set_mango_msg_timestamp();
387436
{no_match, undefined} ->
388437
maybe_send_mango_ping()
@@ -397,7 +446,8 @@ view_cb({row, Props}, #mrargs{extra = Options} = Acc) ->
397446
Process(Doc);
398447
{undefined, _} ->
399448
% include_docs=false. Use quorum fetch at coordinator
400-
ok = rexi:stream2(ViewRow),
449+
ViewRow1 = roll_stats(ViewRow, Options),
450+
ok = rexi:stream2(ViewRow1),
401451
set_mango_msg_timestamp();
402452
{Doc, _} ->
403453
mango_execution_stats:shard_incr_docs_examined(),
@@ -406,17 +456,7 @@ view_cb({row, Props}, #mrargs{extra = Options} = Acc) ->
406456
end,
407457
{ok, Acc};
408458
view_cb(complete, #mrargs{extra = Options} = Acc) ->
409-
ShardStats = mango_execution_stats:shard_get_stats(),
410-
Stats =
411-
case couch_util:get_value(execution_stats_map, Options, false) of
412-
true ->
413-
ShardStats;
414-
false ->
415-
DocsExamined = maps:get(docs_examined, ShardStats),
416-
{docs_examined, DocsExamined}
417-
end,
418-
% Send shard-level execution stats
419-
ok = rexi:stream2({execution_stats, Stats}),
459+
submit_stats(Options),
420460
% Finish view output
421461
ok = rexi:stream_last(complete),
422462
{ok, Acc};
@@ -472,6 +512,21 @@ maybe_send_mango_ping() ->
472512
set_mango_msg_timestamp() ->
473513
put(mango_last_msg_timestamp, os:timestamp()).
474514

515+
-spec add_shard_stats(#execution_stats{}, shard_stats()) -> #execution_stats{}.
516+
add_shard_stats(Stats0, {docs_examined, DocsExamined}) ->
517+
mango_execution_stats:incr_docs_examined(Stats0, DocsExamined);
518+
add_shard_stats(Stats0, #{} = ShardStats) ->
519+
DocsExamined = shard_stats_get(docs_examined, ShardStats),
520+
KeysExamined = shard_stats_get(keys_examined, ShardStats),
521+
Stats = mango_execution_stats:incr_docs_examined(Stats0, DocsExamined),
522+
mango_execution_stats:incr_keys_examined(Stats, KeysExamined).
523+
524+
-spec handle_execution_stats(#cursor{}, shard_stats()) -> {ok, #cursor{}}.
525+
handle_execution_stats(Cursor0, ShardStats) ->
526+
#cursor{execution_stats = Stats} = Cursor0,
527+
Cursor = Cursor0#cursor{execution_stats = add_shard_stats(Stats, ShardStats)},
528+
{ok, Cursor}.
529+
475530
-spec handle_message(message(), #cursor{}) -> Response when
476531
Response ::
477532
{ok, #cursor{}}
@@ -495,20 +550,10 @@ handle_message({row, Props}, Cursor) ->
495550
couch_log:error("~s :: Error loading doc: ~p", [?MODULE, Error]),
496551
{ok, Cursor}
497552
end;
498-
handle_message({execution_stats, {docs_examined, DocsExamined}}, Cursor0) ->
499-
#cursor{execution_stats = Stats} = Cursor0,
500-
Cursor = Cursor0#cursor{
501-
execution_stats = mango_execution_stats:incr_docs_examined(Stats, DocsExamined)
502-
},
503-
{ok, Cursor};
553+
handle_message({execution_stats, {docs_examined, _} = ShardStats}, Cursor0) ->
554+
handle_execution_stats(Cursor0, ShardStats);
504555
handle_message({execution_stats, #{} = ShardStats}, Cursor0) ->
505-
DocsExamined = shard_stats_get(docs_examined, ShardStats),
506-
KeysExamined = shard_stats_get(keys_examined, ShardStats),
507-
#cursor{execution_stats = Stats0} = Cursor0,
508-
Stats1 = mango_execution_stats:incr_docs_examined(Stats0, DocsExamined),
509-
Stats = mango_execution_stats:incr_keys_examined(Stats1, KeysExamined),
510-
Cursor = Cursor0#cursor{execution_stats = Stats},
511-
{ok, Cursor};
556+
handle_execution_stats(Cursor0, ShardStats);
512557
handle_message(complete, Cursor) ->
513558
{ok, Cursor};
514559
handle_message({error, Reason}, _Cursor) ->
@@ -648,9 +693,14 @@ consider_index_coverage(Index, Fields, #mrargs{include_docs = IncludeDocs0} = Ar
648693
| {no_match, null, {execution_stats, shard_stats()}}
649694
| any().
650695
doc_member_and_extract(Cursor, RowProps) ->
651-
Db = Cursor#cursor.db,
652-
Opts = Cursor#cursor.opts,
653-
ExecutionStats = Cursor#cursor.execution_stats,
696+
#cursor{db = Db, opts = Opts, execution_stats = ExecutionStats0} = Cursor,
697+
ExecutionStats =
698+
case couch_util:get_value(stats, RowProps) of
699+
undefined ->
700+
ExecutionStats0;
701+
ShardStats ->
702+
add_shard_stats(ExecutionStats0, ShardStats)
703+
end,
654704
Selector = Cursor#cursor.selector,
655705
case couch_util:get_value(doc, RowProps) of
656706
{DocProps} ->
@@ -748,7 +798,8 @@ base_opts_test() ->
748798
}},
749799
{ignore_partition_query_limit, true},
750800
{execution_stats_map, true},
751-
{view_row_map, true}
801+
{view_row_map, true},
802+
{execution_stats_rolling, true}
752803
],
753804
MRArgs =
754805
#mrargs{
@@ -1093,7 +1144,8 @@ t_execute_ok_all_docs(_) ->
10931144
}},
10941145
{ignore_partition_query_limit, true},
10951146
{execution_stats_map, true},
1096-
{view_row_map, true}
1147+
{view_row_map, true},
1148+
{execution_stats_rolling, true}
10971149
],
10981150
Args =
10991151
#mrargs{
@@ -1180,7 +1232,8 @@ t_execute_ok_query_view(_) ->
11801232
}},
11811233
{ignore_partition_query_limit, true},
11821234
{execution_stats_map, true},
1183-
{view_row_map, true}
1235+
{view_row_map, true},
1236+
{execution_stats_rolling, true}
11841237
],
11851238
Args =
11861239
#mrargs{
@@ -1279,7 +1332,8 @@ t_execute_ok_all_docs_with_execution_stats(_) ->
12791332
}},
12801333
{ignore_partition_query_limit, true},
12811334
{execution_stats_map, true},
1282-
{view_row_map, true}
1335+
{view_row_map, true},
1336+
{execution_stats_rolling, true}
12831337
],
12841338
Args =
12851339
#mrargs{
@@ -1394,6 +1448,8 @@ t_view_cb_row_matching_regular_doc(_) ->
13941448
fields => all_fields,
13951449
covering_index => undefined
13961450
}},
1451+
{execution_stats_map, true},
1452+
{execution_stats_rolling, true},
13971453
{view_row_map, true}
13981454
]
13991455
},
@@ -1413,6 +1469,8 @@ t_view_cb_row_non_matching_regular_doc(_) ->
14131469
fields => all_fields,
14141470
covering_index => undefined
14151471
}},
1472+
{execution_stats_map, true},
1473+
{execution_stats_rolling, true},
14161474
{view_row_map, true}
14171475
]
14181476
},
@@ -1432,6 +1490,8 @@ t_view_cb_row_null_doc(_) ->
14321490
fields => all_fields,
14331491
covering_index => undefined
14341492
}},
1493+
{execution_stats_map, true},
1494+
{execution_stats_rolling, true},
14351495
{view_row_map, true}
14361496
]
14371497
},
@@ -1452,6 +1512,8 @@ t_view_cb_row_missing_doc_triggers_quorum_fetch(_) ->
14521512
fields => all_fields,
14531513
covering_index => undefined
14541514
}},
1515+
{execution_stats_map, true},
1516+
{execution_stats_rolling, true},
14551517
{view_row_map, true}
14561518
]
14571519
},
@@ -1479,6 +1541,8 @@ t_view_cb_row_matching_covered_doc(_) ->
14791541
fields => Fields,
14801542
covering_index => Index
14811543
}},
1544+
{execution_stats_map, true},
1545+
{execution_stats_rolling, true},
14821546
{view_row_map, true}
14831547
]
14841548
},
@@ -1503,6 +1567,8 @@ t_view_cb_row_non_matching_covered_doc(_) ->
15031567
fields => Fields,
15041568
covering_index => Index
15051569
}},
1570+
{execution_stats_map, true},
1571+
{execution_stats_rolling, true},
15061572
{view_row_map, true}
15071573
]
15081574
},
@@ -1638,10 +1704,13 @@ t_handle_message_row_ok_above_limit(_) ->
16381704
user_acc = accumulator,
16391705
user_fun = fun foo:bar/2
16401706
},
1641-
Row = [{id, id}, {key, key}, {doc, Doc}],
1707+
ShardStats = #{keys_examined => 2, docs_examined => 3},
1708+
Row = [{id, id}, {key, key}, {doc, Doc}, {stats, ShardStats}],
16421709
Cursor1 =
16431710
Cursor#cursor{
1644-
execution_stats = #execution_stats{resultsReturned = 1},
1711+
execution_stats = #execution_stats{
1712+
resultsReturned = 1, totalKeysExamined = 2, totalDocsExamined = 3
1713+
},
16451714
limit = 8,
16461715
user_acc = updated_accumulator,
16471716
bookmark_docid = id,
@@ -1689,12 +1758,15 @@ t_handle_message_row_ok_triggers_quorum_fetch_match(_) ->
16891758
user_acc = accumulator,
16901759
limit = 1
16911760
},
1692-
Row = [{id, id}, {doc, undefined}],
1761+
ShardStats = #{keys_examined => 2, docs_examined => 3},
1762+
Row = [{id, id}, {doc, undefined}, {stats, ShardStats}],
16931763
Cursor1 =
16941764
Cursor#cursor{
16951765
execution_stats =
16961766
#execution_stats{
16971767
totalQuorumDocsExamined = 1,
1768+
totalKeysExamined = 2,
1769+
totalDocsExamined = 3,
16981770
resultsReturned = 1
16991771
},
17001772
user_acc = updated_accumulator,

0 commit comments

Comments
 (0)