Remove partialPlan and use plan with virtualPredicate() for branch-and-bound

dimitarvdimitrov · dimitarvdimitrov · commit 17d31325aa0a · 2025-11-19T16:34:12.000+01:00
Remove the partialPlan struct and its duplicated cost calculation methods.
Instead, use the plan struct directly with its virtualPredicate() method
and numDecidedPredicates field to compute lower bound costs.

Changes:
- Removed partialPlan struct
- Removed all cost methods from partialPlan (now inherited from plan)
- Changed partialPlans heap type to plans
- Updated pools to use plan instead of partialPlan
- Branch-and-bound now uses plan directly with numDecidedPredicates tracking
- TotalCost() automatically computes lower bound when numDecidedPredicates &lt; len(predicates)

This simplifies the code by eliminating duplication while maintaining the same behavior.

Signed-off-by: Dimitar Dimitrov &lt;dimitar.dimitrov@grafana.com&gt;
diff --git a/pkg/ingester/lookupplan/branch_and_bound.go b/pkg/ingester/lookupplan/branch_and_bound.go
@@ -6,197 +6,53 @@ import (
 	"container/heap"
 	"context"
 	"iter"
+	"math"
 
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/tsdb/index"
 
 	"github.com/grafana/mimir/pkg/storage/sharding"
 )
 
-// partialPlan represents a plan where only some predicates have been decided.
-// Predicates are decided in order from 0 to len(predicates)-1.
-type partialPlan struct {
+type planWithCost struct {
 	plan
-
-	// lowerBoundCost is the value of LowerBoundCost() cached for efficiency.
-	lowerBoundCost float64
-	// numDecidedPredicates tracks how many predicates have been decided (0 to len(predicates)).
-	// Predicates [0, numDecidedPredicates) have been decided.
-	numDecidedPredicates int
-}
-
-func partialPlanWithLowerBound(p plan, numDecided int) partialPlan {
-	partial := partialPlan{
-		plan:                 p,
-		numDecidedPredicates: numDecided,
-	}
-	partial.lowerBoundCost = partial.LowerBoundCost()
-	return partial
-}
-
-func (p partialPlan) hasAnyIndexPredicate() bool {
-	for _, useIndex := range p.indexPredicate {
-		if useIndex {
-			return true
-		}
-	}
-	return false
-}
-
-func (p partialPlan) LowerBoundCost() float64 {
-	return p.indexLookupCost() + p.intersectionCost() + p.seriesRetrievalCost() + p.filterCost()
-}
-
-// indexLookupCost returns the cost of performing index lookups for all predicates that use the index
-func (p partialPlan) indexLookupCost() float64 {
-	cost := 0.0
-	for i := range p.predicates {
-		pr, ok := p.virtualPredicate(i)
-		if !ok {
-			continue
-		}
-
-		cost += pr.indexLookupCost()
-	}
-	return cost
+	cost float64
 }
 
-// virtualPredicate returns the predicate at idx and whether it's an index predicate.
-// For undecided predicates:
-// - The first undecided predicate is treated as an index predicate for lower bound calculation
-// - All other undecided predicates are treated as scan predicates with minimal cost
-// This goal of virtual undecided predicates is to minimize the cost of the whole plan.
-func (p partialPlan) virtualPredicate(idx int) (planPredicate, bool) {
-	if idx < p.numDecidedPredicates {
-		return p.predicates[idx], p.indexPredicate[idx]
+func newPlanWithCost(p plan, numDecidedPredicates int) planWithCost {
+	p.numDecidedPredicates = numDecidedPredicates
+	return planWithCost{
+		plan: p,
+		cost: p.TotalCost(),
 	}
-
-	virtualPred := p.predicates[idx]
-	// Very cheap single match cost, but still non-zero so that there is a difference between using index and not using index for a predicate.
-	virtualPred.singleMatchCost = 1
-	// Don't assume 0 cardinality because that might make the whole plan have 0 cardinality which is unrealistic.
-	virtualPred.cardinality = 1
-	// Don't assume 0 unique label values because that might make the whole plan have 0 cardinality which is unrealistic.
-	virtualPred.labelNameUniqueVals = 1
-	// We don't want selectivity of 0 because then the cost of the rest of the predicates might not matter.
-	virtualPred.selectivity = 1
-	// Assume extremely cheap index scan cost.
-	virtualPred.indexScanCost = 1
-
-	return virtualPred, idx == p.numDecidedPredicates
 }
 
-// intersectionCost returns the cost of intersecting posting lists from multiple index predicates
-// This includes retrieving the series' labels from the index.
-func (p partialPlan) intersectionCost() float64 {
-	iteratedPostings := uint64(0)
-	for i := range p.predicates {
-		pred, ok := p.virtualPredicate(i)
-		if !ok {
-			continue
-		}
-
-		iteratedPostings += pred.cardinality
-	}
+// plans implements heap.Interface for a min-heap of plans ordered by lower bound cost.
+type plans []planWithCost
 
-	return float64(iteratedPostings) * p.config.RetrievedPostingCost
-}
+func (pq plans) Len() int { return len(pq) }
 
-// seriesRetrievalCost returns the cost of retrieving series from the index after intersecting posting lists.
-// This includes retrieving the series' labels from the index and checking if the series belongs to the query's shard.
-// Realistically we don't retrieve every series because we have the series hash cache, but we ignore that for simplicity.
-func (p partialPlan) seriesRetrievalCost() float64 {
-	return float64(p.NumSelectedPostings()) * p.config.RetrievedSeriesCost
+func (pq plans) Less(i, j int) bool {
+	return pq[i].cost < pq[j].cost
 }
 
-// filterCost returns the cost of applying scan predicates to the fetched series.
-// The sequence is: intersection → retrieve series → check shard → apply scan matchers.
-func (p partialPlan) filterCost() float64 {
-	cost := 0.0
-	seriesToFilter := p.numSelectedPostingsInOurShard()
-	for i := range p.predicates {
-		// In reality, we will apply all the predicates for each series and stop once one predicate doesn't match.
-		// But we calculate for the worst case where we have to run all predicates for all series.
-		pred, ok := p.virtualPredicate(i)
-		if ok {
-			continue
-		}
-
-		cost += pred.filterCost(seriesToFilter)
-	}
-	return cost
-}
-
-func (p partialPlan) numSelectedPostingsInOurShard() uint64 {
-	return shardedCardinality(p.NumSelectedPostings(), p.shard)
-}
-
-func (p partialPlan) NumSelectedPostings() uint64 {
-	finalSelectivity := 1.0
-	for i := range p.predicates {
-		pred, ok := p.virtualPredicate(i)
-		if !ok {
-			continue
-		}
-
-		// We use the selectivity across all series instead of the selectivity across label values.
-		// For example, if {protocol=~.*} matches all values, it doesn't mean it won't reduce the result set after intersection.
-		//
-		// We also assume independence between the predicates. This is a simplification.
-		// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
-		// While finalSelectivity is neither an upper bound nor a lower bound, assuming independence allows us to come up with cost estimates comparable between plans.
-		finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
-	}
-	return uint64(finalSelectivity * float64(p.totalSeries))
-}
-
-// nonShardedCardinality returns an estimate of the total number of series before query sharding is applied.
-// This is the base cardinality considering only the selectivity of all predicates.
-func (p partialPlan) nonShardedCardinality() uint64 {
-	finalSelectivity := 1.0
-	for i := range p.predicates {
-		pred, _ := p.virtualPredicate(i)
-		// We use the selectivity across all series instead of the selectivity across label values.
-		// For example, if {protocol=~.*} matches all values, it could still reduce the result set after intersection.
-		//
-		// We also assume independence between the predicates. This is a simplification.
-		// For example, the selectivity of {pod=~prometheus.*} doesn't depend on if we have already applied {statefulset=prometheus}.
-		finalSelectivity *= float64(pred.cardinality) / float64(p.totalSeries)
-	}
-	return uint64(finalSelectivity * float64(p.totalSeries))
-}
-
-// FinalCardinality returns an estimate of the total number of series that this plan would return.
-func (p partialPlan) FinalCardinality() uint64 {
-	return shardedCardinality(p.nonShardedCardinality(), p.shard)
-}
-
-// partialPlans implements heap.Interface for a min-heap of partial plans ordered by lower bound.
-type partialPlans []partialPlan
-
-func (pq partialPlans) Len() int { return len(pq) }
-
-func (pq partialPlans) Less(i, j int) bool {
-	return pq[i].lowerBoundCost < pq[j].lowerBoundCost
-}
-
-func (pq partialPlans) Swap(i, j int) {
+func (pq plans) Swap(i, j int) {
 	pq[i], pq[j] = pq[j], pq[i]
 }
 
-func (pq *partialPlans) Push(x interface{}) {
-	*pq = append(*pq, x.(partialPlan))
+func (pq *plans) Push(x interface{}) {
+	*pq = append(*pq, x.(planWithCost))
 }
 
-func (pq *partialPlans) Pop() interface{} {
+func (pq *plans) Pop() interface{} {
 	old := *pq
 	n := len(old)
 	item := old[n-1]
 	*pq = old[0 : n-1]
 	return item
 }
 
-func (pq partialPlans) Iterator() iter.Seq[plan] {
+func (pq plans) Iterator() iter.Seq[plan] {
 	return func(f func(plan) bool) {
 		for _, p := range pq {
 			if !f(p.plan) {
@@ -209,20 +65,20 @@ func (pq partialPlans) Iterator() iter.Seq[plan] {
 // generatePlansBranchAndBound uses branch-and-bound to explore the space of possible plans.
 // It prunes branches that cannot possibly lead to a better plan than the current best.
 func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, statistics index.Statistics, matchers []*labels.Matcher, pools *costBasedPlannerPools, shard *sharding.ShardSelector) iter.Seq[plan] {
-	// Initialize priority queue with the root partial plan (all predicates undecided)
-	prospectPlans := pools.GetPartialPlans(maxPlansForPlanning)
+	// Initialize priority queue with the root plan (all predicates undecided)
+	prospectPlans := pools.GetPlans(maxPlansForPlanning)
 	scanOnlyPlan := newScanOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
-	heap.Push(prospectPlans, partialPlanWithLowerBound(scanOnlyPlan, 0))
+	heap.Push(prospectPlans, newPlanWithCost(scanOnlyPlan, 0))
 
-	completePlans := pools.GetPartialPlans(maxPlansForPlanning)
-	bestCompleteCost := float64(1<<63 - 1) // Start with max float64
+	completePlans := pools.GetPlans(maxPlansForPlanning)
+	bestCompleteCost := math.MaxFloat64
 	numPredicates := len(scanOnlyPlan.predicates)
 
 	for i := maxPlansForPlanning; prospectPlans.Len() > 0 && i > 0; i-- {
-		current := heap.Pop(prospectPlans).(partialPlan)
+		current := heap.Pop(prospectPlans).(planWithCost)
 
 		// Prune: if lower bound is worse than best complete plan, skip this branch
-		if current.lowerBoundCost >= bestCompleteCost {
+		if current.cost >= bestCompleteCost {
 			continue
 		}
 
@@ -234,40 +90,37 @@ func (p CostBasedPlanner) generatePlansBranchAndBound(ctx context.Context, stati
 				// This means we should also not use scan-only plans for pruning because their low cost is not a cost we can actually achieve.
 				continue
 			}
-			actualCost := current.plan.TotalCost()
-			current.lowerBoundCost = actualCost
 			heap.Push(completePlans, current)
 
 			// Update best complete cost for pruning
-			if actualCost < bestCompleteCost {
-				bestCompleteCost = actualCost
+			if current.cost < bestCompleteCost {
+				bestCompleteCost = current.cost
 			}
 			continue
 		}
 
 		// Branch: create children by deciding the next undecided predicate
-		nextPredicateIdx := current.numDecidedPredicates
-
-		indexChild := current.plan.UseIndexFor(nextPredicateIdx)
-		heap.Push(prospectPlans, partialPlanWithLowerBound(indexChild, nextPredicateIdx+1))
-		heap.Push(prospectPlans, partialPlanWithLowerBound(current.plan, nextPredicateIdx+1))
+		indexChild := current.UseIndexFor(current.numDecidedPredicates)
+		heap.Push(prospectPlans, newPlanWithCost(indexChild, current.numDecidedPredicates+1))
+		heap.Push(prospectPlans, newPlanWithCost(current.plan, current.numDecidedPredicates+1))
 	}
 
 	// Fall back to index-only plan to ensure that our code doesn't choose a more expensive plan than the naive plan.
 	indexOnlyPlan := newIndexOnlyPlan(ctx, statistics, p.config, matchers, pools.indexPredicatesPool, shard)
-	heap.Push(completePlans, partialPlanWithLowerBound(indexOnlyPlan, numPredicates))
+	heap.Push(completePlans, newPlanWithCost(indexOnlyPlan, numPredicates))
 
 	// Push all plans from the smaller heap into the larger one
 	// We need this because we will need to find a plan with at least one index matcher later,
 	// and we might not find that in either of the heaps alone.
 	return mergePlans(completePlans, prospectPlans).Iterator()
 }
 
-func mergePlans(completePlans, prospectPlans *partialPlans) *partialPlans {
+func mergePlans(completePlans, prospectPlans *plans) *plans {
 	for prospectPlans.Len() > 0 {
-		p := heap.Pop(prospectPlans).(partialPlan)
-		// At this point we'll be choosing the cheapest plan. we shouldn't be considering the lower bound as the cost of the plan.
-		p.lowerBoundCost = p.plan.TotalCost()
+		p := heap.Pop(prospectPlans).(planWithCost)
+		// Ensure plan is marked as complete for proper cost calculation
+		p.numDecidedPredicates = len(p.predicates)
+		p.cost = p.TotalCost()
 		heap.Push(completePlans, p)
 	}
 	return completePlans
diff --git a/pkg/ingester/lookupplan/plan.go b/pkg/ingester/lookupplan/plan.go
@@ -58,6 +58,14 @@ func newScanOnlyPlan(ctx context.Context, stats index.Statistics, config CostCon
 	return p
 }
 
+func newIndexOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
+	p := newScanOnlyPlan(ctx, stats, config, matchers, predicatesPool, shard)
+	for i := range p.indexPredicate {
+		p.indexPredicate[i] = true
+	}
+	return p
+}
+
 // virtualPredicate returns the predicate at idx and whether it's an index predicate.
 // For undecided predicates:
 // - The first undecided predicate is treated as an index predicate for lower bound calculation
@@ -83,12 +91,13 @@ func (p plan) virtualPredicate(idx int) (planPredicate, bool) {
 	return virtualPred, idx == p.numDecidedPredicates
 }
 
-func newIndexOnlyPlan(ctx context.Context, stats index.Statistics, config CostConfig, matchers []*labels.Matcher, predicatesPool *pool.SlabPool[bool], shard *sharding.ShardSelector) plan {
-	p := newScanOnlyPlan(ctx, stats, config, matchers, predicatesPool, shard)
-	for i := range p.indexPredicate {
-		p.indexPredicate[i] = true
+func (p plan) hasAnyIndexPredicate() bool {
+	for _, useIndex := range p.indexPredicate {
+		if useIndex {
+			return true
+		}
 	}
-	return p
+	return false
 }
 
 func (p plan) IndexMatchers() []*labels.Matcher {
diff --git a/pkg/ingester/lookupplan/planner.go b/pkg/ingester/lookupplan/planner.go
@@ -28,7 +28,7 @@ func (i NoopPlanner) PlanIndexLookup(_ context.Context, plan index.LookupPlan, _
 }
 
 var (
-	rawPartialPlansPool    = &sync.Pool{}
+	rawPlansWithCostPool   = &sync.Pool{}
 	rawIndexPredicatesPool = &sync.Pool{}
 )
 
@@ -47,24 +47,24 @@ const (
 )
 
 type costBasedPlannerPools struct {
-	partialPlansPool    *pool.SlabPool[partialPlan]
+	plansWithCostPool   *pool.SlabPool[planWithCost]
 	indexPredicatesPool *pool.SlabPool[bool]
 }
 
 func newCostBasedPlannerPools() *costBasedPlannerPools {
 	return &costBasedPlannerPools{
-		partialPlansPool:    pool.NewSlabPool[partialPlan](rawPartialPlansPool, maxPlansForPlanning),
+		plansWithCostPool:   pool.NewSlabPool[planWithCost](rawPlansWithCostPool, maxPlansForPlanning),
 		indexPredicatesPool: pool.NewSlabPool[bool](rawIndexPredicatesPool, predicateIndexSlicesTotalLen),
 	}
 }
 
-func (p *costBasedPlannerPools) GetPartialPlans(num int) *partialPlans {
-	plans := p.partialPlansPool.Get(num)[:0]
-	return (*partialPlans)(&plans)
+func (p *costBasedPlannerPools) GetPlans(num int) *plans {
+	pls := p.plansWithCostPool.Get(num)[:0]
+	return (*plans)(&pls)
 }
 
 func (p *costBasedPlannerPools) Release() {
-	p.partialPlansPool.Release()
+	p.plansWithCostPool.Release()
 	p.indexPredicatesPool.Release()
 }
 

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ func (i NoopPlanner) PlanIndexLookup(_ context.Context, plan index.LookupPlan, _`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`var (`
`31`		`- rawPartialPlansPool = &sync.Pool{}`
	`31`	`+ rawPlansWithCostPool = &sync.Pool{}`
`32`	`32`	`rawIndexPredicatesPool = &sync.Pool{}`
`33`	`33`	`)`
`34`	`34`
`@@ -47,24 +47,24 @@ const (`
`47`	`47`	`)`
`48`	`48`
`49`	`49`	`type costBasedPlannerPools struct {`
`50`		`- partialPlansPool *pool.SlabPool[partialPlan]`
	`50`	`+ plansWithCostPool *pool.SlabPool[planWithCost]`
`51`	`51`	`indexPredicatesPool *pool.SlabPool[bool]`
`52`	`52`	`}`
`53`	`53`
`54`	`54`	`func newCostBasedPlannerPools() *costBasedPlannerPools {`
`55`	`55`	`return &costBasedPlannerPools{`
`56`		`- partialPlansPool: pool.NewSlabPool[partialPlan](rawPartialPlansPool, maxPlansForPlanning),`
	`56`	`+ plansWithCostPool: pool.NewSlabPool[planWithCost](rawPlansWithCostPool, maxPlansForPlanning),`
`57`	`57`	`indexPredicatesPool: pool.NewSlabPool[bool](rawIndexPredicatesPool, predicateIndexSlicesTotalLen),`
`58`	`58`	`}`
`59`	`59`	`}`
`60`	`60`
`61`		`-func (p costBasedPlannerPools) GetPartialPlans(num int) partialPlans {`
`62`		`- plans := p.partialPlansPool.Get(num)[:0]`
`63`		`- return (*partialPlans)(&plans)`
	`61`	`+func (p costBasedPlannerPools) GetPlans(num int) plans {`
	`62`	`+ pls := p.plansWithCostPool.Get(num)[:0]`
	`63`	`+ return (*plans)(&pls)`
`64`	`64`	`}`
`65`	`65`
`66`	`66`	`func (p *costBasedPlannerPools) Release() {`
`67`		`- p.partialPlansPool.Release()`
	`67`	`+ p.plansWithCostPool.Release()`
`68`	`68`	`p.indexPredicatesPool.Release()`
`69`	`69`	`}`
`70`	`70`