Skip to content

Commit 17d3132

Browse files
Remove partialPlan and use plan with virtualPredicate() for branch-and-bound
Remove the partialPlan struct and its duplicated cost calculation methods. Instead, use the plan struct directly with its virtualPredicate() method and numDecidedPredicates field to compute lower bound costs. Changes: - Removed partialPlan struct - Removed all cost methods from partialPlan (now inherited from plan) - Changed partialPlans heap type to plans - Updated pools to use plan instead of partialPlan - Branch-and-bound now uses plan directly with numDecidedPredicates tracking - TotalCost() automatically computes lower bound when numDecidedPredicates < len(predicates) This simplifies the code by eliminating duplication while maintaining the same behavior. Signed-off-by: Dimitar Dimitrov <[email protected]>
1 parent 5dd5ae0 commit 17d3132

File tree

3 files changed

+57
-195
lines changed

3 files changed

+57
-195
lines changed

pkg/ingester/lookupplan/branch_and_bound.go

Lines changed: 36 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -6,197 +6,53 @@ import (
66
"container/heap"
77
"context"
88
"iter"
9+
"math"
910

1011
"github.com/prometheus/prometheus/model/labels"
1112
"github.com/prometheus/prometheus/tsdb/index"
1213

1314
"github.com/grafana/mimir/pkg/storage/sharding"
1415
)
1516

16-
// partialPlan represents a plan where only some predicates have been decided.
17-
// Predicates are decided in order from 0 to len(predicates)-1.
18-
type partialPlan struct {
17+
type planWithCost struct {
1918
plan
20-
21-
// lowerBoundCost is the value of LowerBoundCost() cached for efficiency.
22-
lowerBoundCost float64
23-
// numDecidedPredicates tracks how many predicates have been decided (0 to len(predicates)).
24-
// Predicates [0, numDecidedPredicates) have been decided.
25-
numDecidedPredicates int
26-
}
27-
28-
func partialPlanWithLowerBound(p plan, numDecided int) partialPlan {
29-
partial := partialPlan{
30-
plan: p,
31-
numDecidedPredicates: numDecided,
32-
}
33-
partial.lowerBoundCost = partial.LowerBoundCost()
34-
return partial
35-
}
36-
37-
func (p partialPlan) hasAnyIndexPredicate() bool {
38-
for _, useIndex := range p.indexPredicate {
39-
if useIndex {
40-
return true
41-
}
42-
}
43-
return false
44-
}
45-
46-
func (p partialPlan) LowerBoundCost() float64 {
47-
return p.indexLookupCost() + p.intersectionCost() + p.seriesRetrievalCost() + p.filterCost()
48-
}
49-
50-
// indexLookupCost returns the cost of performing index lookups for all predicates that use the index
51-
func (p partialPlan) indexLookupCost() float64 {
52-
cost := 0.0
53-
for i := range p.predicates {
54-
pr, ok := p.virtualPredicate(i)
55-
if !ok {
56-
continue
57-
}
58-
59-
cost += pr.indexLookupCost()
60-
}
61-
return cost
19+
cost float64
6220
}
6321

64-
// virtualPredicate returns the predicate at idx and whether it's an index predicate.
65-
// For undecided predicates:
66-
// - The first undecided predicate is treated as an index predicate for lower bound calculation
67-
// - All other undecided predicates are treated as scan predicates with minimal cost
68-
// This goal of virtual undecided predicates is to minimize the cost of the whole plan.
69-
func (p partialPlan) virtualPredicate(idx int) (planPredicate, bool) {
70-
if idx < p.numDecidedPredicates {
71-
return p.predicates[idx], p.indexPredicate[idx]
22+
func newPlanWithCost(p plan, numDecidedPredicates int) planWithCost {
23+
p.numDecidedPredicates = numDecidedPredicates
24+
return planWithCost{
25+
plan: p,
26+
cost: p.TotalCost(),
7227
}
73-
74-
virtualPred := p.predicates[idx]
75-
// Very cheap single match cost, but still non-zero so that there is a difference between using index and not using index for a predicate.
76-
virtualPred.singleMatchCost = 1
77-
// Don't assume 0 cardinality because that might make the whole plan have 0 cardinality which is unrealistic.
78-
virtualPred.cardinality = 1
79-
// Don't assume 0 unique label values because that might make the whole plan have 0 cardinality which is unrealistic.
80-
virtualPred.labelNameUniqueVals = 1
81-
// We don't want selectivity of 0 because then the cost of the rest of the predicates might not matter.
82-
virtualPred.selectivity = 1
83-
// Assume extremely cheap index scan cost.
84-
virtualPred.indexScanCost = 1
85-
86-
return virtualPred, idx == p.numDecidedPredicates
8728
}
8829

89-
// intersectionCost returns the cost of intersecting posting lists from multiple index predicates
90-
// This includes retrieving the series' labels from the index.
91-
func (p partialPlan) intersectionCost() float64 {
92-
iteratedPostings := uint64(0)
93-
for i := range p.predicates {
94-
pred, ok := p.virtualPredicate(i)
95-
if !ok {
96-
continue
97-
}
98-
99-
iteratedPostings += pred.cardinality
100-
}
30+
// plans implements heap.Interface for a min-heap of plans ordered by lower bound cost.
31+
type plans []planWithCost
10132

102-
return float64(iteratedPostings) * p.config.RetrievedPostingCost
103-
}
33+
func (pq plans) Len() int { return len(pq) }
10434

105-
// seriesRetrievalCost returns the cost of retrieving series from the index after intersecting posting lists.
106-
// This includes retrieving the series' labels from the index and checking if the series belongs to the query's shard.
107-
// Realistically we don't retrieve every series because we have the series hash cache, but we ignore that for simplicity.
108-
func (p partialPlan) seriesRetrievalCost() float64 {
109-
return float64(p.NumSelectedPostings()) * p.config.RetrievedSeriesCost
35+
func (pq plans) Less(i, j int) bool {
36+
return pq[i].cost < pq[j].cost
11037
}
11138

112-
// filterCost returns the cost of applying scan predicates to the fetched series.
113-
// The sequence is: intersection → retrieve series → check shard → apply scan matchers.
114-
func (p partialPlan) filterCost() float64 {
115-
cost := 0.0
116-
seriesToFilter := p.numSelectedPostingsInOurShard()
117-
for i := range p.predicates {
118-
// In reality, we will apply all the predicates for each series and stop once one predicate doesn't match.
119-
// But we calculate for the worst case where we have to run all predicates for all series.
120-
pred, ok := p.virtualPredicate(i)
121-
if ok {
122-
continue
123-
}
124-
125-
cost += pred.filterCost(seriesToFilter)
126-
}
127-
return cost
128-
}
129-
130-
func (p partialPlan) numSelectedPostingsInOurShard() uint64 {
131-
return shardedCardinality(p.NumSelectedPostings(), p.shard)
132-
}
133-
134-
func (p partialPlan) NumSelectedPostings() uint64 {
135-
finalSelectivity := 1.0
136-
for i := range p.predicates {
137-
pred, ok := p.virtualPredicate(i)
138-
if !ok {
139-
continue
140-
}
141-
142-
// We use the selectivity across all series instead of the selectivity across label values.
143-
// For example, if {protocol=~.*} matches all values, it doesn't mean it won't reduce the result set after intersection.
144-
//
145-
// We also assume independence between the predicates. This is a simplification.
146-
// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
147-
// While finalSelectivity is neither an upper bound nor a lower bound, assuming independence allows us to come up with cost estimates comparable between plans.
148-
finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
149-
}
150-
return uint64(finalSelectivity * float64(p.totalSeries))
151-
}
152-
153-
// nonShardedCardinality returns an estimate of the total number of series before query sharding is applied.
154-
// This is the base cardinality considering only the selectivity of all predicates.
155-
func (p partialPlan) nonShardedCardinality() uint64 {
156-
finalSelectivity := 1.0
157-
for i := range p.predicates {
158-
pred, _ := p.virtualPredicate(i)
159-
// We use the selectivity across all series instead of the selectivity across label values.
160-
// For example, if {protocol=~.*} matches all values, it could still reduce the result set after intersection.
161-
//
162-
// We also assume independence between the predicates. This is a simplification.
163-
// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
164-
finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
165-
}
166-
return uint64(finalSelectivity * float64(p.totalSeries))
167-
}
168-
169-
// FinalCardinality returns an estimate of the total number of series that this plan would return.
170-
func (p partialPlan) FinalCardinality() uint64 {
171-
return shardedCardinality(p.nonShardedCardinality(), p.shard)
172-
}
173-
174-
// partialPlans implements heap.Interface for a min-heap of partial plans ordered by lower bound.
175-
type partialPlans []partialPlan
176-
177-
func (pq partialPlans) Len() int { return len(pq) }
178-
179-
func (pq partialPlans) Less(i, j int) bool {
180-
return pq[i].lowerBoundCost < pq[j].lowerBoundCost
181-
}
182-
183-
func (pq partialPlans) Swap(i, j int) {
39+
func (pq plans) Swap(i, j int) {
18440
pq[i], pq[j] = pq[j], pq[i]
18541
}
18642

187-
func (pq *partialPlans) Push(x interface{}) {
188-
*pq = append(*pq, x.(partialPlan))
43+
func (pq *plans) Push(x interface{}) {
44+
*pq = append(*pq, x.(planWithCost))
18945
}
19046

191-
func (pq *partialPlans) Pop() interface{} {
47+
func (pq *plans) Pop() interface{} {
19248
old := *pq
19349
n := len(old)
19450
item := old[n-1]
19551
*pq = old[0 : n-1]
19652
return item
19753
}
19854

199-
func (pq partialPlans) Iterator() iter.Seq[plan] {
55+
func (pq plans) Iterator() iter.Seq[plan] {
20056
return func(f func(plan) bool) {
20157
for _, p := range pq {
20258
if !f(p.plan) {
@@ -209,20 +65,20 @@ func (pq partialPlans) Iterator() iter.Seq[plan] {
20965
// generatePlansBranchAndBound uses branch-and-bound to explore the space of possible plans.
21066
// It prunes branches that cannot possibly lead to a better plan than the current best.
21167
func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, statistics index.Statistics, matchers []*labels.Matcher, pools *costBasedPlannerPools, shard *sharding.ShardSelector) iter.Seq[plan] {
212-
// Initialize priority queue with the root partial plan (all predicates undecided)
213-
prospectPlans := pools.GetPartialPlans(maxPlansForPlanning)
68+
// Initialize priority queue with the root plan (all predicates undecided)
69+
prospectPlans := pools.GetPlans(maxPlansForPlanning)
21470
scanOnlyPlan := newScanOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
215-
heap.Push(prospectPlans, partialPlanWithLowerBound(scanOnlyPlan, 0))
71+
heap.Push(prospectPlans, newPlanWithCost(scanOnlyPlan, 0))
21672

217-
completePlans := pools.GetPartialPlans(maxPlansForPlanning)
218-
bestCompleteCost := float64(1<<63 - 1) // Start with max float64
73+
completePlans := pools.GetPlans(maxPlansForPlanning)
74+
bestCompleteCost := math.MaxFloat64
21975
numPredicates := len(scanOnlyPlan.predicates)
22076

22177
for i := maxPlansForPlanning; prospectPlans.Len() > 0 && i > 0; i-- {
222-
current := heap.Pop(prospectPlans).(partialPlan)
78+
current := heap.Pop(prospectPlans).(planWithCost)
22379

22480
// Prune: if lower bound is worse than best complete plan, skip this branch
225-
if current.lowerBoundCost >= bestCompleteCost {
81+
if current.cost >= bestCompleteCost {
22682
continue
22783
}
22884

@@ -234,40 +90,37 @@ func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, stati
23490
// This means we should also not use scan-only plans for pruning because their low cost is not a cost we can actually achieve.
23591
continue
23692
}
237-
actualCost := current.plan.TotalCost()
238-
current.lowerBoundCost = actualCost
23993
heap.Push(completePlans, current)
24094

24195
// Update best complete cost for pruning
242-
if actualCost < bestCompleteCost {
243-
bestCompleteCost = actualCost
96+
if current.cost < bestCompleteCost {
97+
bestCompleteCost = current.cost
24498
}
24599
continue
246100
}
247101

248102
// Branch: create children by deciding the next undecided predicate
249-
nextPredicateIdx := current.numDecidedPredicates
250-
251-
indexChild := current.plan.UseIndexFor(nextPredicateIdx)
252-
heap.Push(prospectPlans, partialPlanWithLowerBound(indexChild, nextPredicateIdx+1))
253-
heap.Push(prospectPlans, partialPlanWithLowerBound(current.plan, nextPredicateIdx+1))
103+
indexChild := current.UseIndexFor(current.numDecidedPredicates)
104+
heap.Push(prospectPlans, newPlanWithCost(indexChild, current.numDecidedPredicates+1))
105+
heap.Push(prospectPlans, newPlanWithCost(current.plan, current.numDecidedPredicates+1))
254106
}
255107

256108
// Fall back to index-only plan to ensure that our code doesn't choose a more expensive plan than the naive plan.
257109
indexOnlyPlan := newIndexOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
258-
heap.Push(completePlans, partialPlanWithLowerBound(indexOnlyPlan, numPredicates))
110+
heap.Push(completePlans, newPlanWithCost(indexOnlyPlan, numPredicates))
259111

260112
// Push all plans from the smaller heap into the larger one
261113
// We need this because we will need to find a plan with at least one index matcher later,
262114
// and we might not find that in either of the heaps alone.
263115
return mergePlans(completePlans, prospectPlans).Iterator()
264116
}
265117

266-
func mergePlans(completePlans, prospectPlans *partialPlans) *partialPlans {
118+
func mergePlans(completePlans, prospectPlans *plans) *plans {
267119
for prospectPlans.Len() > 0 {
268-
p := heap.Pop(prospectPlans).(partialPlan)
269-
// At this point we'll be choosing the cheapest plan. we shouldn't be considering the lower bound as the cost of the plan.
270-
p.lowerBoundCost = p.plan.TotalCost()
120+
p := heap.Pop(prospectPlans).(planWithCost)
121+
// Ensure plan is marked as complete for proper cost calculation
122+
p.numDecidedPredicates = len(p.predicates)
123+
p.cost = p.TotalCost()
271124
heap.Push(completePlans, p)
272125
}
273126
return completePlans

pkg/ingester/lookupplan/plan.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@ func newScanOnlyPlan(ctx context.Context, stats index.Statistics, config CostCon
5858
return p
5959
}
6060

61+
func newIndexOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
62+
p := newScanOnlyPlan(ctx, stats, config, matchers, predicatesPool, shard)
63+
for i := range p.indexPredicate {
64+
p.indexPredicate[i] = true
65+
}
66+
return p
67+
}
68+
6169
// virtualPredicate returns the predicate at idx and whether it's an index predicate.
6270
// For undecided predicates:
6371
// - The first undecided predicate is treated as an index predicate for lower bound calculation
@@ -83,12 +91,13 @@ func (p plan) virtualPredicate(idx int) (planPredicate, bool) {
8391
return virtualPred, idx == p.numDecidedPredicates
8492
}
8593

86-
func newIndexOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
87-
p := newScanOnlyPlan(ctx, stats, config, matchers, predicatesPool, shard)
88-
for i := range p.indexPredicate {
89-
p.indexPredicate[i] = true
94+
func (p plan) hasAnyIndexPredicate() bool {
95+
for _, useIndex := range p.indexPredicate {
96+
if useIndex {
97+
return true
98+
}
9099
}
91-
return p
100+
return false
92101
}
93102

94103
func (p plan) IndexMatchers() []*labels.Matcher {

pkg/ingester/lookupplan/planner.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func (i NoopPlanner) PlanIndexLookup(_ context.Context, plan index.LookupPlan, _
2828
}
2929

3030
var (
31-
rawPartialPlansPool = &sync.Pool{}
31+
rawPlansWithCostPool = &sync.Pool{}
3232
rawIndexPredicatesPool = &sync.Pool{}
3333
)
3434

@@ -47,24 +47,24 @@ const (
4747
)
4848

4949
type costBasedPlannerPools struct {
50-
partialPlansPool *pool.SlabPool[partialPlan]
50+
plansWithCostPool *pool.SlabPool[planWithCost]
5151
indexPredicatesPool *pool.SlabPool[bool]
5252
}
5353

5454
func newCostBasedPlannerPools() *costBasedPlannerPools {
5555
return &costBasedPlannerPools{
56-
partialPlansPool: pool.NewSlabPool[partialPlan](rawPartialPlansPool, maxPlansForPlanning),
56+
plansWithCostPool: pool.NewSlabPool[planWithCost](rawPlansWithCostPool, maxPlansForPlanning),
5757
indexPredicatesPool: pool.NewSlabPool[bool](rawIndexPredicatesPool, predicateIndexSlicesTotalLen),
5858
}
5959
}
6060

61-
func (p *costBasedPlannerPools) GetPartialPlans(num int) *partialPlans {
62-
plans := p.partialPlansPool.Get(num)[:0]
63-
return (*partialPlans)(&plans)
61+
func (p *costBasedPlannerPools) GetPlans(num int) *plans {
62+
pls := p.plansWithCostPool.Get(num)[:0]
63+
return (*plans)(&pls)
6464
}
6565

6666
func (p *costBasedPlannerPools) Release() {
67-
p.partialPlansPool.Release()
67+
p.plansWithCostPool.Release()
6868
p.indexPredicatesPool.Release()
6969
}
7070

0 commit comments

Comments
 (0)