Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Modify optimized compaction to cover edge cases #25594

Open
wants to merge 23 commits into
base: master-1.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d631314
feat: Modify optimized compaction to cover edge cases
devanbenz Dec 16, 2024
67849ae
feat: Modify the PR to include optimized compaction
devanbenz Dec 17, 2024
827e859
feat: Use named variables for PlanOptimize
devanbenz Dec 17, 2024
5387ca3
feat: adjust test comments
devanbenz Dec 17, 2024
3153596
feat: code removal from debugging
devanbenz Dec 17, 2024
83d28ec
feat: setting BlockCount idx value to 1
devanbenz Dec 17, 2024
f896a01
feat: Adjust testing and add sprintf for magic vars
devanbenz Dec 18, 2024
f15d9be
feat: need to use int64 instead of int
devanbenz Dec 18, 2024
54c8e1c
feat: touch
devanbenz Dec 18, 2024
403d888
feat: Adjust tests to include lower level planning function calls
devanbenz Dec 18, 2024
23d12e1
feat: Fix up some tests that I forgot to adjust
devanbenz Dec 18, 2024
d3afb03
feat: fix typo
devanbenz Dec 18, 2024
cf657a8
feat: touch
devanbenz Dec 18, 2024
fc6ca13
feat: Call SingleGenerationReason() once by initializing a
devanbenz Dec 19, 2024
4fc4d55
feat: clarify file counts for reason we are not fully compacted
devanbenz Dec 19, 2024
c93bdfb
feat: grammar typo
devanbenz Dec 19, 2024
2dd5ef4
feat: missed a test when updating the variable! whoops!
devanbenz Dec 19, 2024
479de96
feat: Add test for another edge case found;
devanbenz Dec 20, 2024
c392906
feat: Remove some overlapping tests
devanbenz Dec 20, 2024
f444518
feat: Adds check for block counts and adjusts tests to use require.Ze…
devanbenz Dec 26, 2024
5e4e2da
feat: Adds test for planning lower level TSMs with block sizes at agg…
devanbenz Dec 26, 2024
c315b1f
chore: rerun ci
devanbenz Dec 26, 2024
eb0a77d
feat: Add a mock backfill test with mixed generations, mixed levels, …
devanbenz Dec 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tsdb/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,15 @@ const (

// MaxTSMFileSize is the maximum size of TSM files.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

MaxTSMFileSize = uint32(2048 * 1024 * 1024) // 2GB

)

var SingleGenerationReasonText string = SingleGenerationReason()

// SingleGenerationReason outputs a log message for our single generation compaction
// when checked for full compaction.
// 1048576000 is a magic number for bytes per gigabyte.
func SingleGenerationReason() string {
return fmt.Sprintf("not fully compacted and not idle because single generation with many files under %d GB and many files under aggressive compaction points per block count (%d points)", int(MaxTSMFileSize/1048576000), AggressiveMaxPointsPerBlock)
return fmt.Sprintf("not fully compacted and not idle because single generation with more than 2 files under %d GB and more than 1 file(s) under aggressive compaction points per block count (%d points)", int(MaxTSMFileSize/1048576000), AggressiveMaxPointsPerBlock)
}

// Config holds the configuration for the tsbd package.
Expand Down
4 changes: 2 additions & 2 deletions tsdb/engine/tsm1/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func (c *DefaultPlanner) FullyCompacted() (bool, string) {
}

if filesUnderMaxTsmSizeCount > 1 && aggressivePointsPerBlockCount < len(gens[0].files) {
return false, tsdb.SingleGenerationReason()
return false, tsdb.SingleGenerationReasonText
}
}
return true, ""
Expand Down Expand Up @@ -397,7 +397,7 @@ func (c *DefaultPlanner) PlanOptimize() (compactGroup []CompactionGroup, compact
}
}

if len(currentGen) == 0 || currentGen.level() == cur.level() {
if len(currentGen) == 0 || currentGen.level() >= cur.level() {
devanbenz marked this conversation as resolved.
Show resolved Hide resolved
currentGen = append(currentGen, cur)
continue
}
Expand Down
175 changes: 58 additions & 117 deletions tsdb/engine/tsm1/compact_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2212,65 +2212,6 @@ func TestDefaultPlanner_PlanOptimize_NoLevel4(t *testing.T) {
}
}

func TestDefaultPlanner_PlanOptimize_Level4(t *testing.T) {
data := []tsm1.FileStat{
{
Path: "01-04.tsm1",
Size: 251 * 1024 * 1024,
},
{
Path: "02-04.tsm1",
Size: 1 * 1024 * 1024,
},
{
Path: "03-04.tsm1",
Size: 1 * 1024 * 1024,
},
{
Path: "04-04.tsm1",
Size: 1 * 1024 * 1024,
},
{
Path: "05-03.tsm1",
Size: 2 * 1024 * 1024 * 1024,
},
{
Path: "06-04.tsm1",
Size: 2 * 1024 * 1024 * 1024,
},
{
Path: "07-03.tsm1",
Size: 2 * 1024 * 1024 * 1024,
},
}

cp := tsm1.NewDefaultPlanner(
&fakeFileStore{
PathsFn: func() []tsm1.FileStat {
return data
},
}, tsdb.DefaultCompactFullWriteColdDuration,
)

expFiles1 := []tsm1.FileStat{data[0], data[1], data[2], data[3], data[4], data[5]}
tsm, pLen, _ := cp.PlanOptimize()
if exp, got := 1, len(tsm); exp != got {
t.Fatalf("group length mismatch: got %v, exp %v", got, exp)
} else if pLen != int64(len(tsm)) {
t.Fatalf("tsm file plan length mismatch: got %v, exp %v", pLen, exp)
}

if exp, got := len(expFiles1), len(tsm[0]); got != exp {
t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp)
}

for i, p := range expFiles1 {
if got, exp := tsm[0][i], p.Path; got != exp {
t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp)
}
}
}

// This test is added to acount for many TSM files within a group being over 2 GB
// we want to ensure that the shard will be planned.
func TestDefaultPlanner_PlanOptimize_LargeMultiGeneration(t *testing.T) {
Expand Down Expand Up @@ -2487,7 +2428,7 @@ func TestDefaultPlanner_FullyCompacted_SmallSingleGeneration(t *testing.T) {
cp := tsm1.NewDefaultPlanner(fs, tsdb.DefaultCompactFullWriteColdDuration)

compacted, reason := cp.FullyCompacted()
require.Equal(t, reason, tsdb.SingleGenerationReason(), "fullyCompacted reason")
require.Equal(t, reason, tsdb.SingleGenerationReasonText, "fullyCompacted reason")
require.False(t, compacted, "is fully compacted")

_, cgLen := cp.PlanLevel(1)
Expand Down Expand Up @@ -2607,7 +2548,7 @@ func TestDefaultPlanner_FullyCompacted_LargeSingleGenerationUnderAggressiveBlock

cp := tsm1.NewDefaultPlanner(fs, tsdb.DefaultCompactFullWriteColdDuration)
compacted, reason := cp.FullyCompacted()
require.Equal(t, reason, tsdb.SingleGenerationReason(), "fullyCompacted reason")
require.Equal(t, reason, tsdb.SingleGenerationReasonText, "fullyCompacted reason")
require.False(t, compacted, "is fully compacted")

_, cgLen := cp.PlanLevel(1)
Expand Down Expand Up @@ -2822,11 +2763,8 @@ func TestDefaultPlanner_FullyCompacted_ManySingleGenLessThen2GBNotMaxAggrBlocks(

cp := tsm1.NewDefaultPlanner(fs, tsdb.DefaultCompactFullWriteColdDuration)

// 1048576000 is a magic number for bytes per gigabyte
reasonExp := fmt.Sprintf("not fully compacted and not idle because single generation with many files under %d GB and many files under aggressive compaction points per block count (%d points)", int(tsdb.MaxTSMFileSize/1048576000), tsdb.AggressiveMaxPointsPerBlock)

compacted, reason := cp.FullyCompacted()
require.Equal(t, reason, reasonExp, "fullyCompacted reason")
require.Equal(t, reason, tsdb.SingleGenerationReasonText, "fullyCompacted reason")
require.False(t, compacted, "is fully compacted")

_, cgLen := cp.PlanLevel(1)
Expand All @@ -2844,87 +2782,90 @@ func TestDefaultPlanner_FullyCompacted_ManySingleGenLessThen2GBNotMaxAggrBlocks(
require.Equal(t, int64(1), genLen, "generation count")
}

func TestDefaultPlanner_PlanOptimize_Multiple(t *testing.T) {
// This test is added to account for multiple generations over level 4
// compaction and over 2 GB group size, with a level 3 start generation
// over 2 GB group size.
func TestDefaultPlanner_FullyCompacted_ManySingleGen2GBLastLevel2(t *testing.T) {
// > 2 GB total group size
// 100% of files are at aggressive max block size
data := []tsm1.FileStat{
{
Path: "01-04.tsm1",
Size: 251 * 1024 * 1024,
Path: "01-05.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "02-04.tsm1",
Size: 1 * 1024 * 1024,
Path: "01-06.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "03-04.tsm1",
Size: 1 * 1024 * 1024,
Path: "01-07.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "04-04.tsm1",
Size: 1 * 1024 * 1024,
Path: "01-08.tsm1",
Size: 1048 * 1024 * 1024,
},
{
Path: "05-03.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "02-05.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "06-03.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "02-06.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "07-04.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "02-07.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "08-04.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "02-08.tsm1",
Size: 1048 * 1024 * 1024,
},
{
Path: "09-04.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "03-03.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "10-04.tsm1",
Size: 2 * 1024 * 1024 * 1024,
Path: "03-04.tsm1",
Size: 2048 * 1024 * 1024,
},
{
Path: "03-04.tsm1",
Size: 600 * 1024 * 1024,
},
{
Path: "03-06.tsm1",
Size: 500 * 1024 * 1024,
},
}

cp := tsm1.NewDefaultPlanner(
&fakeFileStore{
PathsFn: func() []tsm1.FileStat {
return data
},
}, tsdb.DefaultCompactFullWriteColdDuration,
)

expFiles1 := []tsm1.FileStat{data[0], data[1], data[2], data[3]}
expFiles2 := []tsm1.FileStat{data[6], data[7], data[8], data[9]}

tsm, pLen, _ := cp.PlanOptimize()
if exp, got := 2, len(tsm); exp != got {
t.Fatalf("group length mismatch: got %v, exp %v", got, exp)
} else if pLen != int64(len(tsm)) {
t.Fatalf("tsm file plan length mismatch: got %v, exp %v", pLen, exp)
fs := &fakeFileStore{
PathsFn: func() []tsm1.FileStat {
return data
},
}

if exp, got := len(expFiles1), len(tsm[0]); got != exp {
t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp)
}
cp := tsm1.NewDefaultPlanner(fs, tsdb.DefaultCompactFullWriteColdDuration)

for i, p := range expFiles1 {
if got, exp := tsm[0][i], p.Path; got != exp {
t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp)
}
expFiles := make([]tsm1.FileStat, 0)
for _, file := range data {
expFiles = append(expFiles, file)
}

if exp, got := len(expFiles2), len(tsm[1]); got != exp {
t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp)
}
_, cgLen := cp.PlanLevel(1)
require.Equal(t, int64(0), cgLen, "compaction group length; PlanLevel(1)")
devanbenz marked this conversation as resolved.
Show resolved Hide resolved
_, cgLen = cp.PlanLevel(2)
require.Equal(t, int64(0), cgLen, "compaction group length; PlanLevel(2)")
_, cgLen = cp.PlanLevel(3)
require.Equal(t, int64(0), cgLen, "compaction group length; PlanLevel(3)")

for i, p := range expFiles2 {
if got, exp := tsm[1][i], p.Path; got != exp {
t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp)
}
}
_, cgLen = cp.Plan(time.Now().Add(-1))
require.Equal(t, int64(0), cgLen, "compaction group length; Plan()")

tsm, cgLen, genLen := cp.PlanOptimize()
require.Equal(t, int64(1), cgLen, "compaction group length")
require.Equal(t, int64(3), genLen, "generation count")
require.Equal(t, len(expFiles), len(tsm[0]), "tsm files in compaction group")
}

func TestDefaultPlanner_PlanOptimize_Tombstones(t *testing.T) {
Expand Down
Loading