Skip to content

Commit 7ad68f1

Browse files
boryaskdave
authored andcommitted
btrfs: track current commit duration in commit_stats
When debugging/detecting outlier commit stalls, having an indicator that we are currently in a long commit critical section can be very useful. Extend the commit_stats sysfs file to also include the current commit critical section duration. Since this requires storing the last commit start time, use that rather than a separate stack variable for storing the finished commit durations as well. This also requires slightly moving up the timing of the stats updating to *inside* the critical section to avoid the transaction T+1 setting the critical_section_start_time to 0 before transaction T can update its stats, which would trigger the new ASSERT. This is an improvement in and of itself, as it makes the stats more accurately represent the true critical section time. It may be yet better to pull the stats up to where start_transaction gets unblocked, rather than the next commit, but this seems like a good enough place as well. Signed-off-by: Boris Burkov <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 648412b commit 7ad68f1

File tree

3 files changed

+19
-9
lines changed

3 files changed

+19
-9
lines changed

fs/btrfs/fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ struct btrfs_commit_stats {
420420
u64 last_commit_dur;
421421
/* The total commit duration in ns */
422422
u64 total_commit_dur;
423+
/* Start of the last critical section in ns. */
424+
u64 critical_section_start_time;
423425
};
424426

425427
struct btrfs_fs_info {

fs/btrfs/sysfs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,13 +1138,21 @@ static ssize_t btrfs_commit_stats_show(struct kobject *kobj,
11381138
struct kobj_attribute *a, char *buf)
11391139
{
11401140
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
1141+
u64 now = ktime_get_ns();
1142+
u64 start_time = fs_info->commit_stats.critical_section_start_time;
1143+
u64 pending = 0;
1144+
1145+
if (start_time)
1146+
pending = now - start_time;
11411147

11421148
return sysfs_emit(buf,
11431149
"commits %llu\n"
1150+
"cur_commit_ms %llu\n"
11441151
"last_commit_ms %llu\n"
11451152
"max_commit_ms %llu\n"
11461153
"total_commit_ms %llu\n",
11471154
fs_info->commit_stats.commit_count,
1155+
div_u64(pending, NSEC_PER_MSEC),
11481156
div_u64(fs_info->commit_stats.last_commit_dur, NSEC_PER_MSEC),
11491157
div_u64(fs_info->commit_stats.max_commit_dur, NSEC_PER_MSEC),
11501158
div_u64(fs_info->commit_stats.total_commit_dur, NSEC_PER_MSEC));

fs/btrfs/transaction.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,13 +2163,19 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
21632163
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
21642164
}
21652165

2166-
static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval)
2166+
static void update_commit_stats(struct btrfs_fs_info *fs_info)
21672167
{
2168+
ktime_t now = ktime_get_ns();
2169+
ktime_t interval = now - fs_info->commit_stats.critical_section_start_time;
2170+
2171+
ASSERT(fs_info->commit_stats.critical_section_start_time);
2172+
21682173
fs_info->commit_stats.commit_count++;
21692174
fs_info->commit_stats.last_commit_dur = interval;
21702175
fs_info->commit_stats.max_commit_dur =
21712176
max_t(u64, fs_info->commit_stats.max_commit_dur, interval);
21722177
fs_info->commit_stats.total_commit_dur += interval;
2178+
fs_info->commit_stats.critical_section_start_time = 0;
21732179
}
21742180

21752181
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2178,8 +2184,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
21782184
struct btrfs_transaction *cur_trans = trans->transaction;
21792185
struct btrfs_transaction *prev_trans = NULL;
21802186
int ret;
2181-
ktime_t start_time;
2182-
ktime_t interval;
21832187

21842188
ASSERT(refcount_read(&trans->use_count) == 1);
21852189
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
@@ -2312,8 +2316,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
23122316
* Get the time spent on the work done by the commit thread and not
23132317
* the time spent waiting on a previous commit
23142318
*/
2315-
start_time = ktime_get_ns();
2316-
2319+
fs_info->commit_stats.critical_section_start_time = ktime_get_ns();
23172320
extwriter_counter_dec(cur_trans, trans->type);
23182321

23192322
ret = btrfs_start_delalloc_flush(fs_info);
@@ -2545,6 +2548,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
25452548
if (ret)
25462549
goto scrub_continue;
25472550

2551+
update_commit_stats(fs_info);
25482552
/*
25492553
* We needn't acquire the lock here because there is no other task
25502554
* which can change it.
@@ -2581,17 +2585,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
25812585

25822586
trace_btrfs_transaction_commit(fs_info);
25832587

2584-
interval = ktime_get_ns() - start_time;
2585-
25862588
btrfs_scrub_continue(fs_info);
25872589

25882590
if (current->journal_info == trans)
25892591
current->journal_info = NULL;
25902592

25912593
kmem_cache_free(btrfs_trans_handle_cachep, trans);
25922594

2593-
update_commit_stats(fs_info, interval);
2594-
25952595
return ret;
25962596

25972597
unlock_reloc:

0 commit comments

Comments
 (0)