Skip to content

Commit 4ad7b7b

Browse files
craig[bot]irfansharif
craig[bot]
andcommitted
Merge #110060
110060: admission: fix wait queue histograms r=irfansharif a=irfansharif We previously did not record anything into {IO,CPU} wait queue histograms when work either bypassed admission control (because of the nature of the work, or when certain admission queues were disabled through cluster settings). This meant that our histogram percentiles were not accurate. This problem didn't exist at the flow control level where work may not be subject to flow control depending on the mode selected ('apply_to_elastic', 'apply_to_all'). We'd still record a measured wait duration (~0ms), so we had accurate waiting-for-flow-tokens histograms. Part of #82743. Release note: None Co-authored-by: irfan sharif <[email protected]>
2 parents c18029b + 4da2627 commit 4ad7b7b

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

pkg/kv/kvserver/kvadmission/kvadmission.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,6 @@ func (n *controllerImpl) AdmitKVWork(
334334
AdmissionOriginNode: n.nodeID.Get(),
335335
}
336336
}
337-
338337
}
339338
// If flow control is disabled or if work bypasses flow control, we still
340339
// subject it above-raft, leaseholder-only IO admission control.

pkg/util/admission/work_queue.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
564564
if !info.ReplicatedWorkInfo.Enabled {
565565
enabledSetting := admissionControlEnabledSettings[q.workKind]
566566
if enabledSetting != nil && !enabledSetting.Get(&q.settings.SV) {
567+
q.metrics.recordBypassedAdmission(info.Priority)
567568
return false, nil
568569
}
569570
}
@@ -618,6 +619,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
618619
q.admitMu.Unlock()
619620
q.granter.tookWithoutPermission(info.RequestedCount)
620621
q.metrics.incAdmitted(info.Priority)
622+
q.metrics.recordBypassedAdmission(info.Priority)
621623
return true, nil
622624
}
623625
// Work is subject to admission control.
@@ -663,6 +665,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
663665
false, /* coordMuLocked */
664666
)
665667
}
668+
q.metrics.recordFastPathAdmission(info.Priority)
666669
return true, nil
667670
}
668671
// Did not get token/slot.
@@ -1757,6 +1760,26 @@ func (m *WorkQueueMetrics) recordFinishWait(priority admissionpb.WorkPriority, d
17571760
priorityStats.WaitDurations.RecordValue(dur.Nanoseconds())
17581761
}
17591762

1763+
func (m *WorkQueueMetrics) recordBypassedAdmission(priority admissionpb.WorkPriority) {
1764+
// For work that either bypasses admission queues (because of the nature of
1765+
// the work itself or because certain queues are disabled), we'll explicit
1766+
// record a zero wait duration so that the histogram percentiles remain
1767+
// accurate.
1768+
m.total.WaitDurations.RecordValue(0)
1769+
priorityStats := m.getOrCreate(priority)
1770+
priorityStats.WaitDurations.RecordValue(0)
1771+
}
1772+
1773+
func (m *WorkQueueMetrics) recordFastPathAdmission(priority admissionpb.WorkPriority) {
1774+
// Explicitly record a zero wait queue duration when we're able to acquire
1775+
// tokens/slots without needing to add ourselves to tenant heaps. Explicitly
1776+
// recording zeros ensure that our histograms are accurate with respect to
1777+
// all work going through admission control.
1778+
m.total.WaitDurations.RecordValue(0)
1779+
priorityStats := m.getOrCreate(priority)
1780+
priorityStats.WaitDurations.RecordValue(0)
1781+
}
1782+
17601783
// MetricStruct implements the metric.Struct interface.
17611784
func (*WorkQueueMetrics) MetricStruct() {}
17621785

0 commit comments

Comments
 (0)