Skip to content

Commit 2e733b3

Browse files
authored
feat(metrics): add scheduler related metrics (#2076)
Signed-off-by: Alexei Dodon <[email protected]>
1 parent 8bac653 commit 2e733b3

File tree

21 files changed

+564
-67
lines changed

21 files changed

+564
-67
lines changed

pkg/api/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ func (c *Controller) Shutdown() {
377377
}
378378

379379
func (c *Controller) StartBackgroundTasks(reloadCtx context.Context) {
380-
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Log)
380+
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Metrics, c.Log)
381381
c.taskScheduler.RunScheduler(reloadCtx)
382382

383383
// Enable running garbage-collect periodically for DefaultStore

pkg/api/cookiestore.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package api
33
import (
44
"context"
55
"encoding/gob"
6+
"fmt"
67
"io/fs"
78
"os"
89
"path"
@@ -157,3 +158,12 @@ func (cleanTask *CleanTask) DoWork(ctx context.Context) error {
157158

158159
return nil
159160
}
161+
162+
func (cleanTask *CleanTask) String() string {
163+
return fmt.Sprintf("{Name: %s, sessions: %s}",
164+
cleanTask.Name(), cleanTask.sessions)
165+
}
166+
167+
func (cleanTask *CleanTask) Name() string {
168+
return "SessionCleanupTask"
169+
}

pkg/exporter/api/controller_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"math/big"
1212
"net/http"
13+
"runtime"
1314
"strings"
1415
"sync"
1516
"testing"
@@ -25,6 +26,7 @@ import (
2526
zotcfg "zotregistry.io/zot/pkg/api/config"
2627
"zotregistry.io/zot/pkg/exporter/api"
2728
"zotregistry.io/zot/pkg/extensions/monitoring"
29+
"zotregistry.io/zot/pkg/scheduler"
2830
. "zotregistry.io/zot/pkg/test/common"
2931
)
3032

@@ -69,12 +71,22 @@ func readDefaultMetrics(collector *api.Collector, chMetric chan prometheus.Metri
6971
So(err, ShouldBeNil)
7072
So(*metric.Gauge.Value, ShouldEqual, 1)
7173

74+
pmMetric = <-chMetric
75+
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_scheduler_workers_total"].String())
76+
77+
err = pmMetric.Write(&metric)
78+
So(err, ShouldBeNil)
79+
So(*metric.Gauge.Value, ShouldEqual, runtime.NumCPU()*scheduler.NumWorkersMultiplier)
80+
7281
pmMetric = <-chMetric
7382
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_info"].String())
7483

7584
err = pmMetric.Write(&metric)
7685
So(err, ShouldBeNil)
7786
So(*metric.Gauge.Value, ShouldEqual, 0)
87+
88+
pmMetric = <-chMetric
89+
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_scheduler_generators_total"].String())
7890
}
7991

8092
func TestNewExporter(t *testing.T) {

pkg/extensions/extension_userprefs_disable.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ func IsBuiltWithUserPrefsExtension() bool {
1818
func SetupUserPreferencesRoutes(config *config.Config, router *mux.Router,
1919
metaDB mTypes.MetaDB, log log.Logger,
2020
) {
21-
log.Warn().Msg("userprefs extension is disabled because given zot binary doesn't" +
21+
log.Warn().Msg("userprefs extension is disabled because given zot binary doesn't " +
2222
"include this feature please build a binary that does so")
2323
}

pkg/extensions/imagetrust/image_trust.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package imagetrust
55

66
import (
77
"context"
8+
"fmt"
89
"time"
910

1011
"github.com/aws/aws-sdk-go-v2/aws"
@@ -274,3 +275,13 @@ func (validityT *validityTask) DoWork(ctx context.Context) error {
274275

275276
return nil
276277
}
278+
279+
func (validityT *validityTask) String() string {
280+
return fmt.Sprintf("{sigValidityTaskGenerator: %s, repo: %s}",
281+
"signatures validity task", // description of generator's task purpose
282+
validityT.repo.Name)
283+
}
284+
285+
func (validityT *validityTask) Name() string {
286+
return "SignatureValidityTask"
287+
}

pkg/extensions/monitoring/extension.go

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,53 @@ var (
8383
},
8484
[]string{"storageName", "lockType"},
8585
)
86+
schedulerGenerators = promauto.NewCounter( //nolint: gochecknoglobals
87+
prometheus.CounterOpts{
88+
Namespace: metricsNamespace,
89+
Name: "scheduler_generators_total",
90+
Help: "Total number of generators registered in scheduler",
91+
},
92+
)
93+
schedulerGeneratorsStatus = promauto.NewGaugeVec( //nolint: gochecknoglobals
94+
prometheus.GaugeOpts{
95+
Namespace: metricsNamespace,
96+
Name: "scheduler_generators_status",
97+
Help: "Scheduler generators by priority & state",
98+
},
99+
[]string{"priority", "state"},
100+
)
101+
schedulerNumWorkers = promauto.NewGauge( //nolint: gochecknoglobals
102+
prometheus.GaugeOpts{ //nolint: promlinter
103+
Namespace: metricsNamespace,
104+
Name: "scheduler_workers_total",
105+
Help: "Total number of available workers to perform scheduler tasks",
106+
},
107+
)
108+
schedulerWorkers = promauto.NewGaugeVec( //nolint: gochecknoglobals
109+
prometheus.GaugeOpts{
110+
Namespace: metricsNamespace,
111+
Name: "scheduler_workers",
112+
Help: "Scheduler workers state",
113+
},
114+
[]string{"state"},
115+
)
116+
schedulerTasksQueue = promauto.NewGaugeVec( //nolint: gochecknoglobals
117+
prometheus.GaugeOpts{
118+
Namespace: metricsNamespace,
119+
Name: "scheduler_tasksqueue_length",
120+
Help: "Number of tasks waiting in the queue to pe processed by scheduler workers",
121+
},
122+
[]string{"priority"},
123+
)
124+
workersTasksDuration = promauto.NewHistogramVec( //nolint: gochecknoglobals
125+
prometheus.HistogramOpts{
126+
Namespace: metricsNamespace,
127+
Name: "scheduler_workers_tasks_duration_seconds",
128+
Help: "How long it takes for a worker to execute a task",
129+
Buckets: GetDefaultBuckets(),
130+
},
131+
[]string{"name"},
132+
)
86133
)
87134

88135
type metricServer struct {
@@ -169,7 +216,7 @@ func IncDownloadCounter(ms MetricServer, repo string) {
169216
}
170217

171218
func SetStorageUsage(ms MetricServer, rootDir, repo string) {
172-
ms.SendMetric(func() {
219+
ms.ForceSendMetric(func() {
173220
dir := path.Join(rootDir, repo)
174221
repoSize, err := GetDirSize(dir)
175222

@@ -196,3 +243,47 @@ func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageNa
196243
storageLockLatency.WithLabelValues(storageName, lockType).Observe(latency.Seconds())
197244
})
198245
}
246+
247+
func IncSchedulerGenerators(ms MetricServer) {
248+
ms.ForceSendMetric(func() {
249+
schedulerGenerators.Inc()
250+
})
251+
}
252+
253+
func SetSchedulerGenerators(ms MetricServer, gen map[string]map[string]uint64) {
254+
ms.SendMetric(func() {
255+
for priority, states := range gen {
256+
for state, value := range states {
257+
schedulerGeneratorsStatus.WithLabelValues(priority, state).Set(float64(value))
258+
}
259+
}
260+
})
261+
}
262+
263+
func SetSchedulerNumWorkers(ms MetricServer, total int) {
264+
ms.SendMetric(func() {
265+
schedulerNumWorkers.Set(float64(total))
266+
})
267+
}
268+
269+
func SetSchedulerWorkers(ms MetricServer, w map[string]int) {
270+
ms.SendMetric(func() {
271+
for state, value := range w {
272+
schedulerWorkers.WithLabelValues(state).Set(float64(value))
273+
}
274+
})
275+
}
276+
277+
func SetSchedulerTasksQueue(ms MetricServer, tq map[string]int) {
278+
ms.SendMetric(func() {
279+
for priority, value := range tq {
280+
schedulerTasksQueue.WithLabelValues(priority).Set(float64(value))
281+
}
282+
})
283+
}
284+
285+
func ObserveWorkersTasksDuration(ms MetricServer, taskName string, duration time.Duration) {
286+
ms.SendMetric(func() {
287+
workersTasksDuration.WithLabelValues(taskName).Observe(duration.Seconds())
288+
})
289+
}

0 commit comments

Comments
 (0)