17
17
package collector
18
18
19
19
import (
20
+ "errors"
20
21
"fmt"
21
22
"log/slog"
22
23
"os"
@@ -26,15 +27,17 @@ import (
26
27
"strconv"
27
28
"sync"
28
29
30
+ "golang.org/x/exp/maps"
31
+
29
32
"github.com/alecthomas/kingpin/v2"
30
33
"github.com/prometheus/client_golang/prometheus"
31
34
"github.com/prometheus/procfs"
32
35
"github.com/prometheus/procfs/sysfs"
33
- "golang.org/x/exp/maps"
34
36
)
35
37
36
38
type cpuCollector struct {
37
- fs procfs.FS
39
+ procfs procfs.FS
40
+ sysfs sysfs.FS
38
41
cpu * prometheus.Desc
39
42
cpuInfo * prometheus.Desc
40
43
cpuFrequencyHz * prometheus.Desc
@@ -45,6 +48,7 @@ type cpuCollector struct {
45
48
cpuPackageThrottle * prometheus.Desc
46
49
cpuIsolated * prometheus.Desc
47
50
logger * slog.Logger
51
+ cpuOnline * prometheus.Desc
48
52
cpuStats map [int64 ]procfs.CPUStat
49
53
cpuStatsMutex sync.Mutex
50
54
isolatedCpus []uint16
@@ -70,17 +74,17 @@ func init() {
70
74
71
75
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
72
76
func NewCPUCollector (logger * slog.Logger ) (Collector , error ) {
73
- fs , err := procfs .NewFS (* procPath )
77
+ pfs , err := procfs .NewFS (* procPath )
74
78
if err != nil {
75
79
return nil , fmt .Errorf ("failed to open procfs: %w" , err )
76
80
}
77
81
78
- sysfs , err := sysfs .NewFS (* sysPath )
82
+ sfs , err := sysfs .NewFS (* sysPath )
79
83
if err != nil {
80
84
return nil , fmt .Errorf ("failed to open sysfs: %w" , err )
81
85
}
82
86
83
- isolcpus , err := sysfs .IsolatedCPUs ()
87
+ isolcpus , err := sfs .IsolatedCPUs ()
84
88
if err != nil {
85
89
if ! os .IsNotExist (err ) {
86
90
return nil , fmt .Errorf ("Unable to get isolated cpus: %w" , err )
@@ -89,8 +93,9 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) {
89
93
}
90
94
91
95
c := & cpuCollector {
92
- fs : fs ,
93
- cpu : nodeCPUSecondsDesc ,
96
+ procfs : pfs ,
97
+ sysfs : sfs ,
98
+ cpu : nodeCPUSecondsDesc ,
94
99
cpuInfo : prometheus .NewDesc (
95
100
prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "info" ),
96
101
"CPU information from /proc/cpuinfo." ,
@@ -131,6 +136,11 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) {
131
136
"Whether each core is isolated, information from /sys/devices/system/cpu/isolated." ,
132
137
[]string {"cpu" }, nil ,
133
138
),
139
+ cpuOnline : prometheus .NewDesc (
140
+ prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "online" ),
141
+ "CPUs that are online and being scheduled." ,
142
+ []string {"cpu" }, nil ,
143
+ ),
134
144
logger : logger ,
135
145
isolatedCpus : isolcpus ,
136
146
cpuStats : make (map [int64 ]procfs.CPUStat ),
@@ -177,12 +187,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
177
187
if c .isolatedCpus != nil {
178
188
c .updateIsolated (ch )
179
189
}
180
- return c .updateThermalThrottle (ch )
190
+ err := c .updateThermalThrottle (ch )
191
+ if err != nil {
192
+ return err
193
+ }
194
+ err = c .updateOnline (ch )
195
+ if err != nil {
196
+ return err
197
+ }
198
+
199
+ return nil
181
200
}
182
201
183
202
// updateInfo reads /proc/cpuinfo
184
203
func (c * cpuCollector ) updateInfo (ch chan <- prometheus.Metric ) error {
185
- info , err := c .fs .CPUInfo ()
204
+ info , err := c .procfs .CPUInfo ()
186
205
if err != nil {
187
206
return err
188
207
}
@@ -333,9 +352,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) {
333
352
}
334
353
}
335
354
355
+ // updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics.
356
+ func (c * cpuCollector ) updateOnline (ch chan <- prometheus.Metric ) error {
357
+ cpus , err := c .sysfs .CPUs ()
358
+ if err != nil {
359
+ return err
360
+ }
361
+ // No-op if the system does not support CPU online stats.
362
+ cpu0 := cpus [0 ]
363
+ if _ , err := cpu0 .Online (); err != nil && errors .Is (err , os .ErrNotExist ) {
364
+ return nil
365
+ }
366
+ for _ , cpu := range cpus {
367
+ setOnline := float64 (0 )
368
+ if online , _ := cpu .Online (); online {
369
+ setOnline = 1
370
+ }
371
+ ch <- prometheus .MustNewConstMetric (c .cpuOnline , prometheus .GaugeValue , setOnline , cpu .Number ())
372
+ }
373
+
374
+ return nil
375
+ }
376
+
336
377
// updateStat reads /proc/stat through procfs and exports CPU-related metrics.
337
378
func (c * cpuCollector ) updateStat (ch chan <- prometheus.Metric ) error {
338
- stats , err := c .fs .Stat ()
379
+ stats , err := c .procfs .Stat ()
339
380
if err != nil {
340
381
return err
341
382
}
0 commit comments