Skip to content

Commit 9a9da0c

Browse files
Remove partialPlan and use plan with virtualPredicate() for branch-and-bound
Remove the partialPlan struct and its duplicated cost calculation methods. Instead, use the plan struct directly with its virtualPredicate() method and numDecidedPredicates field to compute lower bound costs. Changes: - Removed partialPlan struct - Removed all cost methods from partialPlan (now inherited from plan) - Changed partialPlans heap type to plans - Updated pools to use plan instead of partialPlan - Branch-and-bound now uses plan directly with numDecidedPredicates tracking - TotalCost() automatically computes lower bound when numDecidedPredicates < len(predicates) This simplifies the code by eliminating duplication while maintaining the same behavior.
1 parent 4d94509 commit 9a9da0c

File tree

4 files changed

+333
-183
lines changed

4 files changed

+333
-183
lines changed

pkg/ingester/lookupplan/branch_and_bound.go

Lines changed: 33 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,7 @@ import (
1313
"github.com/grafana/mimir/pkg/storage/sharding"
1414
)
1515

16-
// partialPlan represents a plan where only some predicates have been decided.
17-
// Predicates are decided in order from 0 to len(predicates)-1.
18-
type partialPlan struct {
19-
plan
20-
21-
// lowerBoundCost is the value of LowerBoundCost() cached for efficiency.
22-
lowerBoundCost float64
23-
// numDecidedPredicates tracks how many predicates have been decided (0 to len(predicates)).
24-
// Predicates [0, numDecidedPredicates) have been decided.
25-
numDecidedPredicates int
26-
}
27-
28-
func partialPlanWithLowerBound(p plan, numDecided int) partialPlan {
29-
partial := partialPlan{
30-
plan: p,
31-
numDecidedPredicates: numDecided,
32-
}
33-
partial.lowerBoundCost = partial.LowerBoundCost()
34-
return partial
35-
}
36-
37-
func (p partialPlan) hasAnyIndexPredicate() bool {
16+
func hasAnyIndexPredicate(p plan) bool {
3817
for _, useIndex := range p.indexPredicate {
3918
if useIndex {
4019
return true
@@ -43,163 +22,40 @@ func (p partialPlan) hasAnyIndexPredicate() bool {
4322
return false
4423
}
4524

46-
func (p partialPlan) LowerBoundCost() float64 {
47-
return p.indexLookupCost() + p.intersectionCost() + p.seriesRetrievalCost() + p.filterCost()
48-
}
49-
50-
// indexLookupCost returns the cost of performing index lookups for all predicates that use the index
51-
func (p partialPlan) indexLookupCost() float64 {
52-
cost := 0.0
53-
for i := range p.predicates {
54-
pr, ok := p.virtualPredicate(i)
55-
if !ok {
56-
continue
57-
}
58-
59-
cost += pr.indexLookupCost()
60-
}
61-
return cost
62-
}
63-
64-
// virtualPredicate returns the predicate at idx and whether it's an index predicate.
65-
// For undecided predicates:
66-
// - The first undecided predicate is treated as an index predicate for lower bound calculation
67-
// - All other undecided predicates are treated as scan predicates with minimal cost
68-
// This goal of virtual undecided predicates is to minimize the cost of the whole plan.
69-
func (p partialPlan) virtualPredicate(idx int) (planPredicate, bool) {
70-
if idx < p.numDecidedPredicates {
71-
return p.predicates[idx], p.indexPredicate[idx]
72-
}
73-
74-
virtualPred := p.predicates[idx]
75-
// Very cheap single match cost, but still non-zero so that there is a difference between using index and not using index for a predicate.
76-
virtualPred.singleMatchCost = 1
77-
// Don't assume 0 cardinality because that might make the whole plan have 0 cardinality which is unrealistic.
78-
virtualPred.cardinality = 1
79-
// Don't assume 0 unique label values because that might make the whole plan have 0 cardinality which is unrealistic.
80-
virtualPred.labelNameUniqueVals = 1
81-
// We don't want selectivity of 0 because then the cost of the rest of the predicates might not matter.
82-
virtualPred.selectivity = 1
83-
// Assume extremely cheap index scan cost.
84-
virtualPred.indexScanCost = 1
85-
86-
return virtualPred, idx == p.numDecidedPredicates
87-
}
88-
89-
// intersectionCost returns the cost of intersecting posting lists from multiple index predicates
90-
// This includes retrieving the series' labels from the index.
91-
func (p partialPlan) intersectionCost() float64 {
92-
iteratedPostings := uint64(0)
93-
for i := range p.predicates {
94-
pred, ok := p.virtualPredicate(i)
95-
if !ok {
96-
continue
97-
}
98-
99-
iteratedPostings += pred.cardinality
100-
}
101-
102-
return float64(iteratedPostings) * p.config.RetrievedPostingCost
103-
}
104-
105-
// seriesRetrievalCost returns the cost of retrieving series from the index after intersecting posting lists.
106-
// This includes retrieving the series' labels from the index and checking if the series belongs to the query's shard.
107-
// Realistically we don't retrieve every series because we have the series hash cache, but we ignore that for simplicity.
108-
func (p partialPlan) seriesRetrievalCost() float64 {
109-
return float64(p.NumSelectedPostings()) * p.config.RetrievedSeriesCost
110-
}
111-
112-
// filterCost returns the cost of applying scan predicates to the fetched series.
113-
// The sequence is: intersection → retrieve series → check shard → apply scan matchers.
114-
func (p partialPlan) filterCost() float64 {
115-
cost := 0.0
116-
seriesToFilter := p.numSelectedPostingsInOurShard()
117-
for i := range p.predicates {
118-
// In reality, we will apply all the predicates for each series and stop once one predicate doesn't match.
119-
// But we calculate for the worst case where we have to run all predicates for all series.
120-
pred, ok := p.virtualPredicate(i)
121-
if ok {
122-
continue
123-
}
124-
125-
cost += pred.filterCost(seriesToFilter)
126-
}
127-
return cost
128-
}
129-
130-
func (p partialPlan) numSelectedPostingsInOurShard() uint64 {
131-
return shardedCardinality(p.NumSelectedPostings(), p.shard)
132-
}
133-
134-
func (p partialPlan) NumSelectedPostings() uint64 {
135-
finalSelectivity := 1.0
136-
for i := range p.predicates {
137-
pred, ok := p.virtualPredicate(i)
138-
if !ok {
139-
continue
140-
}
141-
142-
// We use the selectivity across all series instead of the selectivity across label values.
143-
// For example, if {protocol=~.*} matches all values, it doesn't mean it won't reduce the result set after intersection.
144-
//
145-
// We also assume independence between the predicates. This is a simplification.
146-
// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
147-
// While finalSelectivity is neither an upper bound nor a lower bound, assuming independence allows us to come up with cost estimates comparable between plans.
148-
finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
149-
}
150-
return uint64(finalSelectivity * float64(p.totalSeries))
151-
}
152-
153-
// nonShardedCardinality returns an estimate of the total number of series before query sharding is applied.
154-
// This is the base cardinality considering only the selectivity of all predicates.
155-
func (p partialPlan) nonShardedCardinality() uint64 {
156-
finalSelectivity := 1.0
157-
for i := range p.predicates {
158-
pred, _ := p.virtualPredicate(i)
159-
// We use the selectivity across all series instead of the selectivity across label values.
160-
// For example, if {protocol=~.*} matches all values, it could still reduce the result set after intersection.
161-
//
162-
// We also assume independence between the predicates. This is a simplification.
163-
// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
164-
finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
165-
}
166-
return uint64(finalSelectivity * float64(p.totalSeries))
167-
}
168-
169-
// FinalCardinality returns an estimate of the total number of series that this plan would return.
170-
func (p partialPlan) FinalCardinality() uint64 {
171-
return shardedCardinality(p.nonShardedCardinality(), p.shard)
25+
func planWithLowerBound(p plan, numDecided int) plan {
26+
p.numDecidedPredicates = numDecided
27+
return p
17228
}
17329

174-
// partialPlans implements heap.Interface for a min-heap of partial plans ordered by lower bound.
175-
type partialPlans []partialPlan
30+
// plans implements heap.Interface for a min-heap of plans ordered by lower bound cost.
31+
type plans []plan
17632

177-
func (pq partialPlans) Len() int { return len(pq) }
33+
func (pq plans) Len() int { return len(pq) }
17834

179-
func (pq partialPlans) Less(i, j int) bool {
180-
return pq[i].lowerBoundCost < pq[j].lowerBoundCost
35+
func (pq plans) Less(i, j int) bool {
36+
return pq[i].TotalCost() < pq[j].TotalCost()
18137
}
18238

183-
func (pq partialPlans) Swap(i, j int) {
39+
func (pq plans) Swap(i, j int) {
18440
pq[i], pq[j] = pq[j], pq[i]
18541
}
18642

187-
func (pq *partialPlans) Push(x interface{}) {
188-
*pq = append(*pq, x.(partialPlan))
43+
func (pq *plans) Push(x interface{}) {
44+
*pq = append(*pq, x.(plan))
18945
}
19046

191-
func (pq *partialPlans) Pop() interface{} {
47+
func (pq *plans) Pop() interface{} {
19248
old := *pq
19349
n := len(old)
19450
item := old[n-1]
19551
*pq = old[0 : n-1]
19652
return item
19753
}
19854

199-
func (pq partialPlans) Iterator() iter.Seq[plan] {
55+
func (pq plans) Iterator() iter.Seq[plan] {
20056
return func(f func(plan) bool) {
20157
for _, p := range pq {
202-
if !f(p.plan) {
58+
if !f(p) {
20359
return
20460
}
20561
}
@@ -209,33 +65,34 @@ func (pq partialPlans) Iterator() iter.Seq[plan] {
20965
// generatePlansBranchAndBound uses branch-and-bound to explore the space of possible plans.
21066
// It prunes branches that cannot possibly lead to a better plan than the current best.
21167
func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, statistics index.Statistics, matchers []*labels.Matcher, pools *costBasedPlannerPools, shard *sharding.ShardSelector) iter.Seq[plan] {
212-
// Initialize priority queue with the root partial plan (all predicates undecided)
213-
prospectPlans := pools.GetPartialPlans(maxPlansForPlanning)
68+
// Initialize priority queue with the root plan (all predicates undecided)
69+
prospectPlans := pools.GetPlans(maxPlansForPlanning)
21470
scanOnlyPlan := newScanOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
215-
heap.Push(prospectPlans, partialPlanWithLowerBound(scanOnlyPlan, 0))
71+
heap.Push(prospectPlans, planWithLowerBound(scanOnlyPlan, 0))
21672

217-
completePlans := pools.GetPartialPlans(maxPlansForPlanning)
73+
completePlans := pools.GetPlans(maxPlansForPlanning)
21874
bestCompleteCost := float64(1<<63 - 1) // Start with max float64
21975
numPredicates := len(scanOnlyPlan.predicates)
22076

22177
for i := maxPlansForPlanning; prospectPlans.Len() > 0 && i > 0; i-- {
222-
current := heap.Pop(prospectPlans).(partialPlan)
78+
current := heap.Pop(prospectPlans).(plan)
22379

22480
// Prune: if lower bound is worse than best complete plan, skip this branch
225-
if current.lowerBoundCost >= bestCompleteCost {
81+
if current.TotalCost() >= bestCompleteCost {
22682
continue
22783
}
22884

22985
// Check if this is a complete plan (all predicates decided)
23086
if current.numDecidedPredicates == numPredicates {
231-
if !current.hasAnyIndexPredicate() {
87+
if !hasAnyIndexPredicate(current) {
23288
// We only want plans with at least one index predicate here.
23389
// Plans without index predicates will return no postings.
23490
// This means we should also not use scan-only plans for pruning because their low cost is not a cost we can actually achieve.
23591
continue
23692
}
237-
actualCost := current.plan.TotalCost()
238-
current.lowerBoundCost = actualCost
93+
// Mark plan as complete (all predicates decided)
94+
current.numDecidedPredicates = len(current.predicates)
95+
actualCost := current.TotalCost()
23996
heap.Push(completePlans, current)
24097

24198
// Update best complete cost for pruning
@@ -248,26 +105,26 @@ func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, stati
248105
// Branch: create children by deciding the next undecided predicate
249106
nextPredicateIdx := current.numDecidedPredicates
250107

251-
indexChild := current.plan.UseIndexFor(nextPredicateIdx)
252-
heap.Push(prospectPlans, partialPlanWithLowerBound(indexChild, nextPredicateIdx+1))
253-
heap.Push(prospectPlans, partialPlanWithLowerBound(current.plan, nextPredicateIdx+1))
108+
indexChild := current.UseIndexFor(nextPredicateIdx)
109+
heap.Push(prospectPlans, planWithLowerBound(indexChild, nextPredicateIdx+1))
110+
heap.Push(prospectPlans, planWithLowerBound(current, nextPredicateIdx+1))
254111
}
255112

256113
// Fall back to index-only plan to ensure that our code doesn't choose a more expensive plan than the naive plan.
257114
indexOnlyPlan := newIndexOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
258-
heap.Push(completePlans, partialPlanWithLowerBound(indexOnlyPlan, numPredicates))
115+
heap.Push(completePlans, indexOnlyPlan)
259116

260117
// Push all plans from the smaller heap into the larger one
261118
// We need this because we will need to find a plan with at least one index matcher later,
262119
// and we might not find that in either of the heaps alone.
263120
return mergePlans(completePlans, prospectPlans).Iterator()
264121
}
265122

266-
func mergePlans(completePlans, prospectPlans *partialPlans) *partialPlans {
123+
func mergePlans(completePlans, prospectPlans *plans) *plans {
267124
for prospectPlans.Len() > 0 {
268-
p := heap.Pop(prospectPlans).(partialPlan)
269-
// At this point we'll be choosing the cheapest plan. we shouldn't be considering the lower bound as the cost of the plan.
270-
p.lowerBoundCost = p.plan.TotalCost()
125+
p := heap.Pop(prospectPlans).(plan)
126+
// Ensure plan is marked as complete for proper cost calculation
127+
p.numDecidedPredicates = len(p.predicates)
271128
heap.Push(completePlans, p)
272129
}
273130
return completePlans

pkg/ingester/lookupplan/planner.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func (i NoopPlanner) PlanIndexLookup(_ context.Context, plan index.LookupPlan, _
2828
}
2929

3030
var (
31-
rawPartialPlansPool = &sync.Pool{}
31+
rawPlansPool = &sync.Pool{}
3232
rawIndexPredicatesPool = &sync.Pool{}
3333
)
3434

@@ -47,24 +47,24 @@ const (
4747
)
4848

4949
type costBasedPlannerPools struct {
50-
partialPlansPool *pool.SlabPool[partialPlan]
50+
plansPool *pool.SlabPool[plan]
5151
indexPredicatesPool *pool.SlabPool[bool]
5252
}
5353

5454
func newCostBasedPlannerPools() *costBasedPlannerPools {
5555
return &costBasedPlannerPools{
56-
partialPlansPool: pool.NewSlabPool[partialPlan](rawPartialPlansPool, maxPlansForPlanning),
56+
plansPool: pool.NewSlabPool[plan](rawPlansPool, maxPlansForPlanning),
5757
indexPredicatesPool: pool.NewSlabPool[bool](rawIndexPredicatesPool, predicateIndexSlicesTotalLen),
5858
}
5959
}
6060

61-
func (p *costBasedPlannerPools) GetPartialPlans(num int) *partialPlans {
62-
plans := p.partialPlansPool.Get(num)[:0]
63-
return (*partialPlans)(&plans)
61+
func (p *costBasedPlannerPools) GetPlans(num int) *plans {
62+
pls := p.plansPool.Get(num)[:0]
63+
return (*plans)(&pls)
6464
}
6565

6666
func (p *costBasedPlannerPools) Release() {
67-
p.partialPlansPool.Release()
67+
p.plansPool.Release()
6868
p.indexPredicatesPool.Release()
6969
}
7070

0 commit comments

Comments
 (0)