Skip to content

Commit bde1501

Browse files
committed
Adding symbolizer instrumentation
1 parent 3e97366 commit bde1501

File tree

7 files changed

+249
-24
lines changed

7 files changed

+249
-24
lines changed

cmd/symbolization/main.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ const (
1616
)
1717

1818
func main() {
19-
client := symbolizer.NewDebuginfodClient(debuginfodBaseURL)
19+
client := symbolizer.NewDebuginfodClient(debuginfodBaseURL, nil)
2020

2121
// Alternatively, use a local debug info file:
2222
//client := &localDebuginfodClient{debugFilePath: "/path/to/your/debug/file"}
2323

24-
s := symbolizer.NewSymbolizer(client, nil)
24+
s := symbolizer.NewSymbolizer(client, nil, nil)
2525
ctx := context.Background()
2626

2727
_, err := client.FetchDebuginfo(buildID)

pkg/experiment/query_backend/backend.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func New(
6666
var sym *symbolizer.Symbolizer
6767
if config.Symbolizer.DebuginfodURL != "" {
6868
var err error
69-
sym, err = symbolizer.NewFromConfig(context.Background(), config.Symbolizer)
69+
sym, err = symbolizer.NewFromConfig(context.Background(), config.Symbolizer, reg)
7070
if err != nil {
7171
return nil, fmt.Errorf("create symbolizer: %w", err)
7272
}

pkg/experiment/symbolizer/cache.go

+30-6
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ type CacheConfig struct {
1515
MaxAge time.Duration `yaml:"max_age"`
1616
}
1717

18-
func NewObjstoreCache(bucket objstore.Bucket, maxAge time.Duration) *ObjstoreCache {
18+
func NewObjstoreCache(bucket objstore.Bucket, maxAge time.Duration, metrics *Metrics) *ObjstoreCache {
1919
return &ObjstoreCache{
20-
bucket: bucket,
21-
maxAge: maxAge,
20+
bucket: bucket,
21+
maxAge: maxAge,
22+
metrics: metrics,
2223
}
2324
}
2425

@@ -30,46 +31,69 @@ type DebugInfoCache interface {
3031

3132
// ObjstoreCache implements DebugInfoCache using S3 storage
3233
type ObjstoreCache struct {
33-
bucket objstore.Bucket
34-
maxAge time.Duration
34+
bucket objstore.Bucket
35+
maxAge time.Duration
36+
metrics *Metrics
3537
}
3638

3739
func (c *ObjstoreCache) Get(ctx context.Context, buildID string) (io.ReadCloser, error) {
40+
c.metrics.cacheRequestsTotal.WithLabelValues("get").Inc()
41+
start := time.Now()
42+
defer func() {
43+
c.metrics.cacheOperationDuration.WithLabelValues("get").Observe(time.Since(start).Seconds())
44+
}()
45+
3846
// First check if object exists to avoid unnecessary operations
3947
reader, err := c.bucket.Get(ctx, buildID)
4048
if err != nil {
4149
if c.bucket.IsObjNotFoundErr(err) {
50+
c.metrics.cacheMissesTotal.Inc()
4251
return nil, err
4352
}
53+
c.metrics.cacheRequestErrorsTotal.WithLabelValues("get", "read_error").Inc()
4454
return nil, fmt.Errorf("get from cache: %w", err)
4555
}
4656

4757
// Get attributes - this should use the same HEAD request that Get used
4858
attrs, err := c.bucket.Attributes(ctx, buildID)
4959
if err != nil {
5060
reader.Close()
61+
c.metrics.cacheRequestErrorsTotal.WithLabelValues("get", "attribute_error").Inc()
5162
return nil, fmt.Errorf("get cache attributes: %w", err)
5263
}
5364

5465
// Check if expired
5566
if time.Since(attrs.LastModified) > c.maxAge {
5667
reader.Close()
68+
c.metrics.cacheExpiredTotal.Inc()
69+
5770
// Async deletion to not block the request
5871
go func() {
5972
delCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
6073
defer cancel()
61-
_ = c.bucket.Delete(delCtx, buildID)
74+
if err = c.bucket.Delete(delCtx, buildID); err != nil {
75+
c.metrics.cacheRequestErrorsTotal.WithLabelValues("delete", "delete_error").Inc()
76+
}
6277
}()
6378
return nil, fmt.Errorf("cached object expired")
6479
}
6580

81+
c.metrics.cacheHitsTotal.Inc()
6682
return reader, nil
6783
}
6884

6985
func (c *ObjstoreCache) Put(ctx context.Context, buildID string, reader io.Reader) error {
86+
c.metrics.cacheRequestsTotal.WithLabelValues("put").Inc()
87+
start := time.Now()
88+
defer func() {
89+
c.metrics.cacheOperationDuration.WithLabelValues("put").Observe(time.Since(start).Seconds())
90+
}()
91+
7092
if err := c.bucket.Upload(ctx, buildID, reader); err != nil {
93+
c.metrics.cacheRequestErrorsTotal.WithLabelValues("put", "upload_error").Inc()
7194
return fmt.Errorf("upload to cache: %w", err)
7295
}
96+
7397
return nil
7498
}
7599

pkg/experiment/symbolizer/debuginfod_client.go

+23-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"os"
88
"path/filepath"
99
"regexp"
10+
"time"
1011
)
1112

1213
type DebuginfodClient interface {
@@ -15,48 +16,69 @@ type DebuginfodClient interface {
1516

1617
type debuginfodClient struct {
1718
baseURL string
19+
metrics *Metrics
1820
}
1921

20-
func NewDebuginfodClient(baseURL string) DebuginfodClient {
22+
func NewDebuginfodClient(baseURL string, metrics *Metrics) DebuginfodClient {
2123
return &debuginfodClient{
2224
baseURL: baseURL,
25+
metrics: metrics,
2326
}
2427
}
2528

2629
// FetchDebuginfo fetches the debuginfo file for a specific build ID.
2730
func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) {
31+
c.metrics.debuginfodRequestsTotal.Inc()
32+
start := time.Now()
33+
2834
sanitizedBuildID, err := sanitizeBuildID(buildID)
2935
if err != nil {
36+
c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("invalid_id").Inc()
3037
return "", err
3138
}
3239

3340
url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.baseURL, sanitizedBuildID)
3441

3542
resp, err := http.Get(url)
3643
if err != nil {
44+
c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("http").Inc()
45+
c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds())
3746
return "", fmt.Errorf("failed to fetch debuginfod: %w", err)
3847
}
3948
defer resp.Body.Close()
4049

4150
if resp.StatusCode != http.StatusOK {
51+
c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("http").Inc()
52+
c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds())
4253
return "", fmt.Errorf("unexpected HTTP status: %s", resp.Status)
4354
}
4455

56+
// Record file size from Content-Length if available
57+
if contentLength := resp.ContentLength; contentLength > 0 {
58+
c.metrics.debuginfodFileSize.Observe(float64(contentLength))
59+
}
60+
4561
// TODO: Avoid file operations and handle debuginfo in memory.
4662
// Save the debuginfo to a temporary file
4763
tempDir := os.TempDir()
4864
filePath := filepath.Join(tempDir, fmt.Sprintf("%s.elf", sanitizedBuildID))
4965
outFile, err := os.Create(filePath)
5066
if err != nil {
67+
c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("file_create").Inc()
68+
c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds())
5169
return "", fmt.Errorf("failed to create temp file: %w", err)
5270
}
5371
defer outFile.Close()
5472

5573
_, err = io.Copy(outFile, resp.Body)
5674
if err != nil {
75+
c.metrics.debuginfodRequestErrorsTotal.WithLabelValues("write").Inc()
76+
c.metrics.debuginfodRequestDuration.WithLabelValues("error").Observe(time.Since(start).Seconds())
5777
return "", fmt.Errorf("failed to write debuginfod to file: %w", err)
5878
}
5979

80+
c.metrics.debuginfodRequestDuration.WithLabelValues("success").Observe(time.Since(start).Seconds())
81+
6082
return filePath, nil
6183
}
6284

pkg/experiment/symbolizer/metrics.go

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package symbolizer
2+
3+
import "github.com/prometheus/client_golang/prometheus"
4+
5+
type Metrics struct {
6+
registerer prometheus.Registerer
7+
8+
// Debuginfod metrics
9+
debuginfodRequestDuration *prometheus.HistogramVec
10+
debuginfodFileSize prometheus.Histogram
11+
debuginfodRequestsTotal prometheus.Counter
12+
debuginfodRequestErrorsTotal *prometheus.CounterVec
13+
14+
// Cache metrics
15+
cacheRequestsTotal *prometheus.CounterVec
16+
cacheRequestErrorsTotal *prometheus.CounterVec
17+
cacheHitsTotal prometheus.Counter
18+
cacheMissesTotal prometheus.Counter
19+
cacheOperationDuration *prometheus.HistogramVec
20+
cacheExpiredTotal prometheus.Counter
21+
22+
// Symbolization metrics
23+
//symbolizationDuration prometheus.Histogram
24+
//symbolizationLocations *prometheus.CounterVec
25+
symbolizationRequestsTotal prometheus.Counter
26+
symbolizationRequestErrorsTotal *prometheus.CounterVec
27+
symbolizationDuration prometheus.Histogram
28+
symbolizationLocationTotal *prometheus.CounterVec
29+
}
30+
31+
func NewMetrics(reg prometheus.Registerer) *Metrics {
32+
m := &Metrics{
33+
registerer: reg,
34+
debuginfodRequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
35+
Name: "pyroscope_symbolizer_debuginfod_request_duration_seconds",
36+
Help: "Time spent performing debuginfod requests",
37+
Buckets: []float64{0.1, 0.5, 1, 5, 10, 30, 60, 120, 300},
38+
}, []string{"status"},
39+
),
40+
debuginfodFileSize: prometheus.NewHistogram(
41+
prometheus.HistogramOpts{
42+
Name: "pyroscope_symbolizer_debuginfo_file_size_bytes",
43+
Help: "Size of debug info files fetched from debuginfod",
44+
// 1MB to 4GB
45+
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 12),
46+
},
47+
),
48+
debuginfodRequestsTotal: prometheus.NewCounter(prometheus.CounterOpts{
49+
Name: "pyroscope_symbolizer_debuginfod_requests_total",
50+
Help: "Total number of debuginfod requests attempted",
51+
}),
52+
debuginfodRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
53+
Name: "pyroscope_symbolizer_debuginfod_request_errors_total",
54+
Help: "Total number of debuginfod request errors",
55+
}, []string{"reason"}),
56+
cacheRequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
57+
Name: "pyroscope_symbolizer_cache_requests_total",
58+
Help: "Total number of cache requests",
59+
}, []string{"operation"}),
60+
cacheRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
61+
Name: "pyroscope_symbolizer_cache_request_errors_total",
62+
Help: "Total number of cache request errors",
63+
}, []string{"operation", "reason"}), // get/put, and specific error reasons
64+
cacheHitsTotal: prometheus.NewCounter(prometheus.CounterOpts{
65+
Name: "pyroscope_symbolizer_cache_hits_total",
66+
Help: "Total number of cache hits",
67+
}),
68+
cacheMissesTotal: prometheus.NewCounter(prometheus.CounterOpts{
69+
Name: "pyroscope_symbolizer_cache_misses_total",
70+
Help: "Total number of cache misses",
71+
}),
72+
cacheOperationDuration: prometheus.NewHistogramVec(
73+
prometheus.HistogramOpts{
74+
Name: "pyroscope_symbolizer_cache_operation_duration_seconds",
75+
Help: "Time spent performing cache operations",
76+
Buckets: []float64{.01, .05, .1, .5, 1, 5, 10, 30, 60},
77+
},
78+
[]string{"operation"},
79+
),
80+
cacheExpiredTotal: prometheus.NewCounter(prometheus.CounterOpts{
81+
Name: "pyroscope_symbolizer_cache_expired_total",
82+
Help: "Total number of expired items removed from cache",
83+
}),
84+
symbolizationRequestsTotal: prometheus.NewCounter(prometheus.CounterOpts{
85+
Name: "pyroscope_symbolizer_requests_total",
86+
Help: "Total number of symbolization requests",
87+
}),
88+
symbolizationRequestErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
89+
Name: "pyroscope_symbolizer_request_errors_total",
90+
Help: "Total number of symbolization errors",
91+
}, []string{"reason"}),
92+
symbolizationDuration: prometheus.NewHistogram(
93+
prometheus.HistogramOpts{
94+
Name: "pyroscope_symbolizer_duration_seconds",
95+
Help: "Time spent performing symbolization",
96+
Buckets: []float64{.01, .05, .1, .5, 1, 5, 10, 30},
97+
},
98+
),
99+
symbolizationLocationTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
100+
Name: "pyroscope_symbolizer_locations_total",
101+
Help: "Total number of locations processed",
102+
}, []string{"status"}),
103+
}
104+
m.register()
105+
return m
106+
}
107+
108+
func (m *Metrics) register() {
109+
if m.registerer == nil {
110+
return
111+
}
112+
113+
collectors := []prometheus.Collector{
114+
m.debuginfodRequestDuration,
115+
m.debuginfodFileSize,
116+
m.debuginfodRequestErrorsTotal,
117+
m.debuginfodRequestsTotal,
118+
m.cacheRequestsTotal,
119+
m.cacheRequestErrorsTotal,
120+
m.cacheHitsTotal,
121+
m.cacheMissesTotal,
122+
m.cacheOperationDuration,
123+
m.cacheExpiredTotal,
124+
m.symbolizationRequestsTotal,
125+
m.symbolizationRequestErrorsTotal,
126+
m.symbolizationDuration,
127+
m.symbolizationLocationTotal,
128+
}
129+
130+
for _, collector := range collectors {
131+
m.registerer.MustRegister(collector)
132+
}
133+
}
134+
135+
func (m *Metrics) Unregister() {
136+
if m.registerer == nil {
137+
return
138+
}
139+
140+
collectors := []prometheus.Collector{
141+
m.debuginfodRequestDuration,
142+
m.debuginfodFileSize,
143+
m.debuginfodRequestErrorsTotal,
144+
m.debuginfodRequestsTotal,
145+
m.cacheRequestsTotal,
146+
m.cacheRequestErrorsTotal,
147+
m.cacheHitsTotal,
148+
m.cacheMissesTotal,
149+
m.cacheOperationDuration,
150+
m.cacheExpiredTotal,
151+
m.symbolizationRequestsTotal,
152+
m.symbolizationRequestErrorsTotal,
153+
m.symbolizationDuration,
154+
m.symbolizationLocationTotal,
155+
}
156+
157+
for _, collector := range collectors {
158+
m.registerer.Unregister(collector)
159+
}
160+
}

0 commit comments

Comments
 (0)