From acbfc9bb0ebdae93540dec99b9ce4cd8a1115e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20L=C3=B3pez=20=28inkel=29?= Date: Fri, 21 Mar 2025 10:03:21 -0300 Subject: [PATCH 1/4] Add benchmark for `*PartitionWriter.WriteProfileSymbols` I had to perform the initialization within the for loop as otherwise it always panics. --- pkg/phlaredb/symdb/symdb_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index 2f8804081d..ff0957c02e 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -227,3 +227,23 @@ func TestWritePartition(t *testing.T) { ` require.Equal(t, expected, resolved.String()) } + +func BenchmarkPartitionWriter_WriteProfileSymbols(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + // If opening the profile or creating the config, db, or + // partition writer outside the loop it always results in a + // panic. + b.StopTimer() + p, err := pprof.OpenFile("testdata/profile.pb.gz") + require.NoError(b, err) + p.Normalize() + cfg := DefaultConfig().WithDirectory(b.TempDir()) + db := NewSymDB(cfg) + pw := db.PartitionWriter(0) + b.StartTimer() + + pw.WriteProfileSymbols(p.Profile) + } +} From 196a1a0a9e53744f7e1ffad6e029f32b11321c67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20L=C3=B3pez?= Date: Fri, 21 Mar 2025 16:40:56 +0000 Subject: [PATCH 2/4] Refactor `*PartitionWriter.WriteProfileSymbols` for DRY Thank you @simonswine for the suggestion! Co-authored-by: Christian Simon --- pkg/phlaredb/symdb/symdb_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index ff0957c02e..d804b28fd6 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -231,19 +231,19 @@ func TestWritePartition(t *testing.T) { func BenchmarkPartitionWriter_WriteProfileSymbols(b *testing.B) { b.ReportAllocs() + p, err := pprof.OpenFile("testdata/profile.pb.gz") + require.NoError(b, err) + p.Normalize() + cfg := DefaultConfig().WithDirectory(b.TempDir()) + + db := NewSymDB(cfg) + for i := 0; i < b.N; i++ { - // If opening the profile or creating the config, db, or - // partition writer outside the loop it always results in a - // panic. b.StopTimer() - p, err := pprof.OpenFile("testdata/profile.pb.gz") - require.NoError(b, err) - p.Normalize() - cfg := DefaultConfig().WithDirectory(b.TempDir()) - db := NewSymDB(cfg) - pw := db.PartitionWriter(0) + newP := p.CloneVT() + pw := db.PartitionWriter(uint64(i)) b.StartTimer() - pw.WriteProfileSymbols(p.Profile) + pw.WriteProfileSymbols(newP) } } From fc7e5d2439c72aadd123c9d6bb93b241838abd23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20L=C3=B3pez=20=28inkel=29?= Date: Fri, 21 Mar 2025 11:19:23 -0300 Subject: [PATCH 3/4] Create rewriting map with maximum capacity By doing this we avoid having to regrow the map, which improves CPU and memory consumption. --- pkg/phlaredb/symdb/dedup_slice.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/phlaredb/symdb/dedup_slice.go b/pkg/phlaredb/symdb/dedup_slice.go index 78056f893e..a603740d18 100644 --- a/pkg/phlaredb/symdb/dedup_slice.go +++ b/pkg/phlaredb/symdb/dedup_slice.go @@ -254,7 +254,7 @@ func (s *deduplicatingSlice[M, K, H]) Size() uint64 { func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { var ( - rewritingMap = make(map[int64]int64) + rewritingMap = make(map[int64]int64, len(elems)) missing = int64SlicePool.Get() ) missing = missing[:0] From 44cf7d9dcf4c4ede218fe3580efa1ad9ed845093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20L=C3=B3pez=20=28inkel=29?= Date: Fri, 21 Mar 2025 11:21:01 -0300 Subject: [PATCH 4/4] Use temp slice to append only once Instead of checking `s.slice` on each call to append and regrow if necessary, create a temporary slice with the maximum possible capacity, and append that slice to `s.slice` outside the for loop, effectively checking and regrowing `s.slice` only once. This further improves CPU and memory consumption. --- pkg/phlaredb/symdb/dedup_slice.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/phlaredb/symdb/dedup_slice.go b/pkg/phlaredb/symdb/dedup_slice.go index a603740d18..f0f9face8f 100644 --- a/pkg/phlaredb/symdb/dedup_slice.go +++ b/pkg/phlaredb/symdb/dedup_slice.go @@ -279,6 +279,7 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { if len(missing) > 0 { s.lock.Lock() posSlice := int64(len(s.slice)) + misSlice := make([]M, 0, len(missing)) for _, pos := range missing { // check again if element exists k := s.helper.key(elems[pos]) @@ -288,12 +289,13 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { } // add element to slice/map - s.slice = append(s.slice, s.helper.clone(elems[pos])) + misSlice = append(misSlice, s.helper.clone(elems[pos])) s.lookup[k] = posSlice rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice posSlice++ s.size.Add(s.helper.size(elems[pos])) } + s.slice = append(s.slice, misSlice...) s.lock.Unlock() }