Skip to content

Commit ff7b42c

Browse files
[memprof] Speed up llvm-profdata (#117446)
CallStackRadixTreeBuilder::build takes the parameter MemProfFrameIndexes by value, involving copies: std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>> MemProfFrameIndexes Then "build" makes another copy of MemProfFrameIndexe and passes it to encodeCallStack for every call stack, which is painfully slow. This patch changes the type to a pointer so that we don't have to make a copy every time we pass the argument. Without this patch, it takes 553 seconds to run "llvm-profdata merge" on a large MemProf raw profile. This patch shortenes that down to 67 seconds.
1 parent 9e3215a commit ff7b42c

File tree

5 files changed

+17
-20
lines changed

5 files changed

+17
-20
lines changed

llvm/include/llvm/ProfileData/MemProf.h

+9-10
Original file line numberDiff line numberDiff line change
@@ -1117,21 +1117,20 @@ template <typename FrameIdTy> class CallStackRadixTreeBuilder {
11171117

11181118
// Encode a call stack into RadixArray. Return the starting index within
11191119
// RadixArray.
1120-
LinearCallStackId
1121-
encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
1122-
const llvm::SmallVector<FrameIdTy> *Prev,
1123-
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
1124-
MemProfFrameIndexes);
1120+
LinearCallStackId encodeCallStack(
1121+
const llvm::SmallVector<FrameIdTy> *CallStack,
1122+
const llvm::SmallVector<FrameIdTy> *Prev,
1123+
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
11251124

11261125
public:
11271126
CallStackRadixTreeBuilder() = default;
11281127

11291128
// Build a radix tree array.
1130-
void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
1131-
&&MemProfCallStackData,
1132-
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
1133-
MemProfFrameIndexes,
1134-
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
1129+
void
1130+
build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
1131+
&&MemProfCallStackData,
1132+
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
1133+
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
11351134

11361135
ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
11371136

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4235,7 +4235,7 @@ static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
42354235
CallStackRadixTreeBuilder<LinearFrameId> Builder;
42364236
// We don't need a MemProfFrameIndexes map as we have already converted the
42374237
// full stack id hash to a linear offset into the StackIds array.
4238-
Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
4238+
Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr,
42394239
FrameHistogram);
42404240
Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
42414241
RadixAbbrev);

llvm/lib/ProfileData/InstrProfWriter.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ writeMemProfCallStackArray(
641641
MemProfCallStackIndexes;
642642

643643
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
644-
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
644+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
645645
FrameHistogram);
646646
for (auto I : Builder.getRadixArray())
647647
OS.write32(I);

llvm/lib/ProfileData/MemProf.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,7 @@ template <typename FrameIdTy>
335335
LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
336336
const llvm::SmallVector<FrameIdTy> *CallStack,
337337
const llvm::SmallVector<FrameIdTy> *Prev,
338-
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
339-
MemProfFrameIndexes) {
338+
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes) {
340339
// Compute the length of the common root prefix between Prev and CallStack.
341340
uint32_t CommonLen = 0;
342341
if (Prev) {
@@ -381,8 +380,7 @@ template <typename FrameIdTy>
381380
void CallStackRadixTreeBuilder<FrameIdTy>::build(
382381
llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
383382
&&MemProfCallStackData,
384-
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
385-
MemProfFrameIndexes,
383+
const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
386384
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
387385
// Take the vector portion of MemProfCallStackData. The vector is exactly
388386
// what we need to sort. Also, we no longer need its lookup capability.

llvm/unittests/ProfileData/MemProfTest.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
628628
FrameHistogram =
629629
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
630630
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
631-
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
631+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
632632
FrameHistogram);
633633
ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
634634
const auto Mappings = Builder.takeCallStackPos();
@@ -646,7 +646,7 @@ TEST(MemProf, RadixTreeBuilderOne) {
646646
FrameHistogram =
647647
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
648648
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
649-
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
649+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
650650
FrameHistogram);
651651
EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
652652
3U, // Size of CS1,
@@ -673,7 +673,7 @@ TEST(MemProf, RadixTreeBuilderTwo) {
673673
FrameHistogram =
674674
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
675675
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
676-
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
676+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
677677
FrameHistogram);
678678
EXPECT_THAT(Builder.getRadixArray(),
679679
testing::ElementsAreArray({
@@ -711,7 +711,7 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
711711
FrameHistogram =
712712
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
713713
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
714-
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
714+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
715715
FrameHistogram);
716716
EXPECT_THAT(Builder.getRadixArray(),
717717
testing::ElementsAreArray({

0 commit comments

Comments
 (0)