Skip to content

udp_queues_linux.go: Expose UDP drops via gauge analogous to queue sizes #2993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
fi

if [[ -f "$(pwd)/.build/darwin-amd64/node_exporter" ]]; then
promu codesign "$(pwd)/.build/darwin-amd64/node_exporter"
promu codesign "$(pwd)/.build/darwin-amd64/node_exporter"
fi
- persist_to_workspace:
root: .
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ thermal | Exposes thermal statistics like `pmset -g therm`. | Darwin
thermal\_zone | Exposes thermal zone & cooling device statistics from `/sys/class/thermal`. | Linux
time | Exposes the current system time. | _any_
timex | Exposes selected adjtimex(2) system call stats. | Linux
udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from `/proc/net/udp` and `/proc/net/udp6`. | Linux
udp | Exposes UDP statistics from `/proc/net/udp` and `/proc/net/udp6`. | Linux
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Guess it makes sense to rename this but:

  1. We should rename the file as well
  2. Technically its a breaking change, e.g people might enable/disable the collector explicitly so when upgrading it will refused to start

@SuperQ wdyt? maybe we should give registerCollector a list of aliases and have it print a deprecation warning if any of them are used?

Or to just get this merged, maybe just not rename it for now..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I figured this wasn't an ideal solution, but I recognize that I'm too ignorant to know what sorts of API-ish guarantees are in place. I opted for "make sure that the metric names stay the same", although since I changed the namespace from "udp_queues" to "udp" it's possible I didn't even achieve that.

FWIW, I chose this after looking at some of the other metrics, and settled on something similar to the pattern used by cpu.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi guys, I contributed this PR based on your work @cleeland .
As @discordianfish said, the existing exporter name udp_queues might be already used by people disabling that collector.
So it's worth to not change it otherwise it would be a breaking change and would need to wait a major release from node exporter.
If it's fine for you, let's work in my PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nicolas-laduguie thanks! I left my thoughts over on the new PR. Thanks for continuing to help shepherd this.

uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD
vmstat | Exposes statistics from `/proc/vmstat`. | Linux
watchdog | Exposes statistics from `/sys/class/watchdog` | Linux
Expand Down
5 changes: 4 additions & 1 deletion collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2991,7 +2991,7 @@ node_scrape_collector_success{collector="tapestats"} 1
node_scrape_collector_success{collector="textfile"} 1
node_scrape_collector_success{collector="thermal_zone"} 1
node_scrape_collector_success{collector="time"} 1
node_scrape_collector_success{collector="udp_queues"} 1
node_scrape_collector_success{collector="udp"} 1
node_scrape_collector_success{collector="vmstat"} 1
node_scrape_collector_success{collector="watchdog"} 1
node_scrape_collector_success{collector="wifi"} 1
Expand Down Expand Up @@ -3242,6 +3242,9 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1
# TYPE node_time_seconds gauge
# HELP node_time_zone_offset_seconds System time zone offset in seconds.
# TYPE node_time_zone_offset_seconds gauge
# HELP node_udp_drops_total Total number of datagrams dropped.
# TYPE node_udp_drops_total counter
node_udp_drops_total{ip="v4"} 100
# HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes.
# TYPE node_udp_queues gauge
node_udp_queues{ip="v4",queue="rx"} 0
Expand Down
5 changes: 4 additions & 1 deletion collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3013,7 +3013,7 @@ node_scrape_collector_success{collector="tapestats"} 1
node_scrape_collector_success{collector="textfile"} 1
node_scrape_collector_success{collector="thermal_zone"} 1
node_scrape_collector_success{collector="time"} 1
node_scrape_collector_success{collector="udp_queues"} 1
node_scrape_collector_success{collector="udp"} 1
node_scrape_collector_success{collector="vmstat"} 1
node_scrape_collector_success{collector="watchdog"} 1
node_scrape_collector_success{collector="wifi"} 1
Expand Down Expand Up @@ -3264,6 +3264,9 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1
# TYPE node_time_seconds gauge
# HELP node_time_zone_offset_seconds System time zone offset in seconds.
# TYPE node_time_zone_offset_seconds gauge
# HELP node_udp_drops_total Total number of datagrams dropped.
# TYPE node_udp_drops_total counter
node_udp_drops_total{ip="v4"} 100
# HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes.
# TYPE node_udp_queues gauge
node_udp_queues{ip="v4",queue="rx"} 0
Expand Down
34 changes: 23 additions & 11 deletions collector/udp_queues_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,40 +27,49 @@ import (
)

type (
udpQueuesCollector struct {
udpCollector struct {
fs procfs.FS
desc *prometheus.Desc
queues *prometheus.Desc
drops *prometheus.Desc
logger *slog.Logger
}
)

func init() {
registerCollector("udp_queues", defaultEnabled, NewUDPqueuesCollector)
registerCollector("udp", defaultEnabled, NewUDPCollector)
}

// NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes.
func NewUDPqueuesCollector(logger *slog.Logger) (Collector, error) {
func NewUDPCollector(logger *slog.Logger) (Collector, error) {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
return &udpQueuesCollector{
return &udpCollector{
fs: fs,
desc: prometheus.NewDesc(
queues: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "udp", "queues"),
"Number of allocated memory in the kernel for UDP datagrams in bytes.",
[]string{"queue", "ip"}, nil,
),
drops: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "udp", "drops_total"),
"Total number of datagrams dropped.",
[]string{"ip"}, nil,
),
logger: logger,
}, nil
}

func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error {
func (c *udpCollector) Update(ch chan<- prometheus.Metric) error {

s4, errIPv4 := c.fs.NetUDPSummary()
if errIPv4 == nil {
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4")
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4")
ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4")
ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4")
if s4.Drops != nil {
ch <- prometheus.MustNewConstMetric(c.drops, prometheus.CounterValue, float64(*s4.Drops), "v4")
}
} else {
if errors.Is(errIPv4, os.ErrNotExist) {
c.logger.Debug("not collecting ipv4 based metrics")
Expand All @@ -71,8 +80,11 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error {

s6, errIPv6 := c.fs.NetUDP6Summary()
if errIPv6 == nil {
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6")
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6")
ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6")
ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6")
if s6.Drops != nil {
ch <- prometheus.MustNewConstMetric(c.drops, prometheus.CounterValue, float64(*s6.Drops), "v6")
}
} else {
if errors.Is(errIPv6, os.ErrNotExist) {
c.logger.Debug("not collecting ipv6 based metrics")
Expand Down
2 changes: 1 addition & 1 deletion end-to-end-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ enabled_collectors=$(cat << COLLECTORS
sysctl
textfile
thermal_zone
udp_queues
udp
vmstat
watchdog
wifi
Expand Down
Loading