Skip to content

Commit b5104a1

Browse files
committed
Add symbolization inside the read path
1 parent 412698c commit b5104a1

File tree

13 files changed

+372
-17
lines changed

13 files changed

+372
-17
lines changed

cmd/symbolization/main.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77

88
pprof "github.com/google/pprof/profile"
99

10-
"github.com/grafana/pyroscope/pkg/experiment/symbolization"
10+
"github.com/grafana/pyroscope/pkg/experiment/symbolizer"
1111
)
1212

1313
const (
@@ -16,12 +16,12 @@ const (
1616
)
1717

1818
func main() {
19-
client := symbolization.NewDebuginfodClient(debuginfodBaseURL)
19+
client := symbolizer.NewDebuginfodClient(debuginfodBaseURL)
2020

2121
// Alternatively, use a local debug info file:
2222
//client := &localDebuginfodClient{debugFilePath: "/path/to/your/debug/file"}
2323

24-
symbolizer := symbolization.NewSymbolizer(client)
24+
s := symbolizer.NewSymbolizer(client)
2525
ctx := context.Background()
2626

2727
_, err := client.FetchDebuginfo(buildID)
@@ -31,11 +31,11 @@ func main() {
3131
//defer os.Remove(debugFilePath)
3232

3333
// Create a request to symbolize specific addresses
34-
req := symbolization.Request{
34+
req := symbolizer.Request{
3535
BuildID: buildID,
36-
Mappings: []symbolization.RequestMapping{
36+
Mappings: []symbolizer.RequestMapping{
3737
{
38-
Locations: []*symbolization.Location{
38+
Locations: []*symbolizer.Location{
3939
{
4040
Address: 0x1500,
4141
Mapping: &pprof.Mapping{},
@@ -53,7 +53,7 @@ func main() {
5353
},
5454
}
5555

56-
if err := symbolizer.Symbolize(ctx, req); err != nil {
56+
if err := s.Symbolize(ctx, req); err != nil {
5757
log.Fatalf("Failed to symbolize: %v", err)
5858
}
5959

pkg/experiment/query_backend/backend.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@ import (
1414

1515
metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
1616
queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1"
17+
"github.com/grafana/pyroscope/pkg/experiment/symbolizer"
1718
"github.com/grafana/pyroscope/pkg/util"
1819
)
1920

2021
type Config struct {
2122
Address string `yaml:"address"`
2223
GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=Configures the gRPC client used to communicate between the query-frontends and the query-schedulers."`
24+
DebuginfodURL string `yaml:"debuginfod_url"`
2325
}
2426

2527
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
2628
f.StringVar(&cfg.Address, "query-backend.address", "localhost:9095", "")
29+
f.StringVar(&cfg.DebuginfodURL, "query-backend.debuginfod-url", "https://debuginfod.elfutils.org", "URL of the debuginfod server")
2730
cfg.GRPCClientConfig.RegisterFlagsWithPrefix("query-backend.grpc-client-config", f)
2831
}
2932

@@ -48,6 +51,8 @@ type QueryBackend struct {
4851

4952
backendClient QueryHandler
5053
blockReader QueryHandler
54+
55+
symbolizer *symbolizer.Symbolizer
5156
}
5257

5358
func New(
@@ -57,13 +62,27 @@ func New(
5762
backendClient QueryHandler,
5863
blockReader QueryHandler,
5964
) (*QueryBackend, error) {
65+
var sym *symbolizer.Symbolizer
66+
if config.DebuginfodURL != "" {
67+
sym = symbolizer.NewSymbolizer(
68+
symbolizer.NewDebuginfodClient(config.DebuginfodURL),
69+
)
70+
}
71+
6072
q := QueryBackend{
6173
config: config,
6274
logger: logger,
6375
reg: reg,
6476
backendClient: backendClient,
6577
blockReader: blockReader,
78+
symbolizer: sym,
6679
}
80+
81+
// Pass symbolizer to BlockReader if it's the right type
82+
if br, ok := blockReader.(*BlockReader); ok {
83+
br.symbolizer = sym
84+
}
85+
6786
q.service = services.NewIdleService(q.starting, q.stopping)
6887
return &q, nil
6988
}

pkg/experiment/query_backend/query_tree.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ func queryTree(q *queryContext, query *queryv1.Query) (*queryv1.Report, error) {
5656
defer runutil.CloseWithErrCapture(&err, profiles, "failed to close profile stream")
5757

5858
resolver := symdb.NewResolver(q.ctx, q.ds.Symbols(),
59-
symdb.WithResolverMaxNodes(query.Tree.GetMaxNodes()))
59+
symdb.WithResolverMaxNodes(query.Tree.GetMaxNodes()),
60+
symdb.WithSymbolizer(q.symbolizer))
61+
6062
defer resolver.Release()
6163

6264
if len(spanSelector) > 0 {

pkg/experiment/symbolization/addrmapper.go renamed to pkg/experiment/symbolizer/addrmapper.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package symbolization
1+
package symbolizer
22

33
import (
44
"debug/elf"

pkg/experiment/symbolization/debuginfod_client.go renamed to pkg/experiment/symbolizer/debuginfod_client.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
package symbolization
1+
package symbolizer
22

33
import (
44
"fmt"
55
"io"
66
"net/http"
77
"os"
88
"path/filepath"
9+
"regexp"
910
)
1011

1112
type DebuginfodClient interface {
@@ -24,7 +25,12 @@ func NewDebuginfodClient(baseURL string) DebuginfodClient {
2425

2526
// FetchDebuginfo fetches the debuginfo file for a specific build ID.
2627
func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) {
27-
url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.baseURL, buildID)
28+
sanitizedBuildID, err := sanitizeBuildID(buildID)
29+
if err != nil {
30+
return "", err
31+
}
32+
33+
url := fmt.Sprintf("%s/buildid/%s/debuginfo", c.baseURL, sanitizedBuildID)
2834

2935
resp, err := http.Get(url)
3036
if err != nil {
@@ -36,9 +42,10 @@ func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) {
3642
return "", fmt.Errorf("unexpected HTTP status: %s", resp.Status)
3743
}
3844

45+
// TODO: Avoid file operations and handle debuginfo in memory.
3946
// Save the debuginfo to a temporary file
4047
tempDir := os.TempDir()
41-
filePath := filepath.Join(tempDir, fmt.Sprintf("%s.elf", buildID))
48+
filePath := filepath.Join(tempDir, fmt.Sprintf("%s.elf", sanitizedBuildID))
4249
outFile, err := os.Create(filePath)
4350
if err != nil {
4451
return "", fmt.Errorf("failed to create temp file: %w", err)
@@ -52,3 +59,13 @@ func (c *debuginfodClient) FetchDebuginfo(buildID string) (string, error) {
5259

5360
return filePath, nil
5461
}
62+
63+
// sanitizeBuildID ensures that the buildID is a safe and valid string for use in file paths.
64+
func sanitizeBuildID(buildID string) (string, error) {
65+
// Allow only alphanumeric characters, dashes, and underscores.
66+
validBuildID := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
67+
if !validBuildID.MatchString(buildID) {
68+
return "", fmt.Errorf("invalid build ID: %s", buildID)
69+
}
70+
return buildID, nil
71+
}

pkg/experiment/symbolization/dwarf.go renamed to pkg/experiment/symbolizer/dwarf.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package symbolization
1+
package symbolizer
22

33
import (
44
"context"

pkg/experiment/symbolization/symbolizer.go renamed to pkg/experiment/symbolizer/symbolizer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package symbolization
1+
package symbolizer
22

33
import (
44
"context"
@@ -92,7 +92,7 @@ func (s *Symbolizer) Symbolize(ctx context.Context, req Request) error {
9292
continue // Skip errors for individual addresses
9393
}
9494

95-
// Update the location directly (this is why Parca modifies the request - it's updating the shared locations)
95+
// Update the location directly
9696
loc.Lines = lines
9797
}
9898
}

pkg/experiment/symbolization/types.go renamed to pkg/experiment/symbolizer/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package symbolization
1+
package symbolizer
22

33
import (
44
"context"

pkg/phlaredb/symdb/resolver.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
1313
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
14+
"github.com/grafana/pyroscope/pkg/experiment/symbolizer"
1415
"github.com/grafana/pyroscope/pkg/model"
1516
schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
1617
"github.com/grafana/pyroscope/pkg/pprof"
@@ -37,6 +38,8 @@ type Resolver struct {
3738

3839
maxNodes int64
3940
sts *typesv1.StackTraceSelector
41+
42+
symbolizer *symbolizer.Symbolizer
4043
}
4144

4245
type ResolverOption func(*Resolver)
@@ -57,6 +60,12 @@ func WithResolverMaxNodes(n int64) ResolverOption {
5760
}
5861
}
5962

63+
func WithSymbolizer(s *symbolizer.Symbolizer) ResolverOption {
64+
return func(r *Resolver) {
65+
r.symbolizer = s
66+
}
67+
}
68+
6069
// WithResolverStackTraceSelector specifies the stack trace selector.
6170
// Only stack traces that belong to the callSite (have the prefix provided)
6271
// will be selected. If empty, the filter is ignored.
@@ -273,7 +282,9 @@ func (r *Resolver) withSymbols(ctx context.Context, fn func(*Symbols, *SampleApp
273282
if err := p.fetch(ctx); err != nil {
274283
return err
275284
}
276-
return fn(p.reader.Symbols(), p.samples)
285+
symbols := p.reader.Symbols()
286+
symbols.SetSymbolizer(r.symbolizer)
287+
return fn(symbols, p.samples)
277288
}))
278289
}
279290
return g.Wait()
@@ -295,3 +306,18 @@ func (r *Symbols) Tree(
295306
) (*model.Tree, error) {
296307
return buildTree(ctx, r, appender, maxNodes)
297308
}
309+
310+
func (r *Symbols) SetSymbolizer(sym *symbolizer.Symbolizer) {
311+
r.Symbolizer = sym
312+
}
313+
314+
func (r *Symbols) needsDebuginfodSymbolization(loc *schemav1.InMemoryLocation, mapping *schemav1.InMemoryMapping) bool {
315+
if r.Symbolizer == nil {
316+
return false
317+
}
318+
if len(loc.Line) == 0 {
319+
// Must have mapping with build ID
320+
return mapping != nil && mapping.BuildId != 0
321+
}
322+
return false
323+
}

pkg/phlaredb/symdb/resolver_tree.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ import (
44
"context"
55
"sync"
66

7+
pprof "github.com/google/pprof/profile"
78
"golang.org/x/sync/errgroup"
89

10+
"github.com/grafana/pyroscope/pkg/experiment/symbolizer"
911
"github.com/grafana/pyroscope/pkg/iter"
1012
"github.com/grafana/pyroscope/pkg/model"
1113
schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
@@ -19,6 +21,13 @@ func buildTree(
1921
appender *SampleAppender,
2022
maxNodes int64,
2123
) (*model.Tree, error) {
24+
// Try debuginfod symbolization first
25+
if symbols != nil && symbols.Symbolizer != nil {
26+
if err := symbolizeLocations(ctx, symbols); err != nil {
27+
// TODO: Log error but continue? partial symbolization is better than none
28+
}
29+
}
30+
2231
// If the number of samples is large (> 128K) and the StacktraceResolver
2332
// implements the range iterator, we will be building the tree based on
2433
// the parent pointer tree of the partition (a copy of). The only exception
@@ -239,3 +248,85 @@ func minValue(nodes []Node, maxNodes int64) int64 {
239248
}
240249
return h[0]
241250
}
251+
252+
func symbolizeLocations(ctx context.Context, symbols *Symbols) error {
253+
type locToSymbolize struct {
254+
idx int32
255+
loc *schemav1.InMemoryLocation
256+
mapping *schemav1.InMemoryMapping
257+
}
258+
locsByBuildId := make(map[string][]locToSymbolize)
259+
260+
// Find all locations needing symbolization
261+
for i, loc := range symbols.Locations {
262+
if mapping := &symbols.Mappings[loc.MappingId]; symbols.needsDebuginfodSymbolization(&loc, mapping) {
263+
buildIDStr := symbols.Strings[mapping.BuildId]
264+
locsByBuildId[buildIDStr] = append(locsByBuildId[buildIDStr], locToSymbolize{
265+
idx: int32(i),
266+
loc: &loc,
267+
mapping: mapping,
268+
})
269+
}
270+
}
271+
272+
for buildID, locs := range locsByBuildId {
273+
req := symbolizer.Request{
274+
BuildID: buildID,
275+
Mappings: []symbolizer.RequestMapping{{
276+
Locations: make([]*symbolizer.Location, len(locs)),
277+
}},
278+
}
279+
280+
for i, loc := range locs {
281+
req.Mappings[0].Locations[i] = &symbolizer.Location{
282+
Address: loc.loc.Address,
283+
Mapping: &pprof.Mapping{
284+
Start: loc.mapping.MemoryStart,
285+
Limit: loc.mapping.MemoryLimit,
286+
Offset: loc.mapping.FileOffset,
287+
BuildID: buildID,
288+
},
289+
}
290+
}
291+
292+
if err := symbols.Symbolizer.Symbolize(ctx, req); err != nil {
293+
// TODO: log/process errors but continue with other build IDs
294+
continue
295+
}
296+
297+
// Store symbolization results back
298+
for i, symLoc := range req.Mappings[0].Locations {
299+
if len(symLoc.Lines) > 0 {
300+
// Get the original location we're updating
301+
locIdx := locs[i].idx
302+
303+
// Clear the existing lines for the location
304+
symbols.Locations[locIdx].Line = nil
305+
306+
for _, line := range symLoc.Lines {
307+
// Create string entries first
308+
nameIdx := uint32(len(symbols.Strings))
309+
symbols.Strings = append(symbols.Strings, line.Function.Name)
310+
311+
filenameIdx := uint32(len(symbols.Strings))
312+
symbols.Strings = append(symbols.Strings, line.Function.Filename)
313+
314+
// Create function entry
315+
funcId := uint32(len(symbols.Functions))
316+
symbols.Functions = append(symbols.Functions, schemav1.InMemoryFunction{
317+
Id: uint64(funcId),
318+
Name: nameIdx,
319+
Filename: filenameIdx,
320+
StartLine: uint32(line.Function.StartLine),
321+
})
322+
323+
symbols.Locations[locIdx].Line = append(symbols.Locations[locIdx].Line, schemav1.InMemoryLine{
324+
FunctionId: funcId,
325+
Line: int32(line.Line),
326+
})
327+
}
328+
}
329+
}
330+
}
331+
return nil
332+
}

0 commit comments

Comments
 (0)