Skip to content

Commit 295d471

Browse files
committed
sql: harden recent fix to top-level query stats with routines
This commit relaxes the requirements for when it's safe to set the routine metadata forwarder. Previously, we only set it when we end up using the RootTxn in a local plan, but now we examine all possible kinds of concurrency within the local plan to see whether it's actually safe to set the metadata forwarder. This seems like a nice cleanup as well. Note that this commit independently fixes the issue mentioned in the previous commit. Release note: None
1 parent 69187f8 commit 295d471

File tree

6 files changed

+99
-31
lines changed

6 files changed

+99
-31
lines changed

pkg/sql/apply_join.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
1313
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
14+
"github.com/cockroachdb/cockroach/pkg/sql/distsql"
1415
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
1516
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
1617
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
@@ -321,7 +322,7 @@ func runPlanInsidePlan(
321322
recv,
322323
&subqueryResultMemAcc,
323324
false, /* skipDistSQLDiagramGeneration */
324-
params.p.mustUseLeafTxn(),
325+
params.p.innerPlansMustUseLeafTxn(),
325326
) {
326327
return recv.stats, resultWriter.Err()
327328
}
@@ -336,7 +337,9 @@ func runPlanInsidePlan(
336337
planCtx := execCfg.DistSQLPlanner.NewPlanningCtx(ctx, evalCtx, &plannerCopy, plannerCopy.txn, distributeType)
337338
planCtx.distSQLProhibitedErr = distSQLProhibitedErr
338339
planCtx.stmtType = recv.stmtType
339-
planCtx.mustUseLeafTxn = params.p.mustUseLeafTxn()
340+
if params.p.innerPlansMustUseLeafTxn() {
341+
planCtx.flowConcurrency = distsql.ConcurrencyWithOuterPlan
342+
}
340343
planCtx.stmtForDistSQLDiagram = stmtForDistSQLDiagram
341344

342345
// Wrap PlanAndRun in a function call so that we clean up immediately.

pkg/sql/distsql/server.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,28 @@ func newFlow(
533533
return rowflow.NewRowBasedFlow(base)
534534
}
535535

536+
// ConcurrencyKind indicates which concurrency type is present within the local
537+
// DistSQL flow. Note that inter-node concurrency (i.e. whether we have a
538+
// distributed plan) is not reflected here.
539+
type ConcurrencyKind uint32
540+
541+
const (
542+
// ConcurrencyHasParallelProcessors, if set, indicates that we have multiple
543+
// processors running for the same plan stage.
544+
ConcurrencyHasParallelProcessors ConcurrencyKind = (1 << iota)
545+
// ConcurrencyStreamer, if set, indicates we have concurrency due to usage
546+
// of the Streamer API.
547+
ConcurrencyStreamer
548+
// ConcurrencyParallelChecks, if set, indicates that we're running
549+
// post-query CHECKs in parallel with each other (i.e. the concurrency is
550+
// with _other_ local flows).
551+
ConcurrencyParallelChecks
552+
// ConcurrencyWithOuterPlan, if set, indicates that - if we're running an
553+
// "inner" plan (like an apply-join iteration or a routine) - we might have
554+
// concurrency with the "outer" plan.
555+
ConcurrencyWithOuterPlan
556+
)
557+
536558
// LocalState carries information that is required to set up a flow with wrapped
537559
// planNodes.
538560
type LocalState struct {
@@ -547,13 +569,9 @@ type LocalState struct {
547569
// remote flows.
548570
IsLocal bool
549571

550-
// HasConcurrency indicates whether the local flow uses multiple goroutines.
551-
HasConcurrency bool
552-
553-
// MustUseLeaf indicates whether the local flow must use the LeafTxn even if
554-
// there is no concurrency in the flow on its own because there would be
555-
// concurrency with other flows which prohibits the usage of the RootTxn.
556-
MustUseLeaf bool
572+
// concurrency tracks the types of concurrency present when accessing the
573+
// Txn.
574+
concurrency ConcurrencyKind
557575

558576
// Txn is filled in on the gateway only. It is the RootTxn that the query is running in.
559577
// This will be used directly by the flow if the flow has no concurrency and IsLocal is set.
@@ -570,10 +588,22 @@ type LocalState struct {
570588
LocalVectorSources map[int32]any
571589
}
572590

591+
// AddConcurrency marks the given concurrency kinds as present in the local
592+
// flow.
593+
func (l *LocalState) AddConcurrency(kind ConcurrencyKind) {
594+
l.concurrency |= kind
595+
}
596+
597+
// GetConcurrency returns the bit-mask representing all concurrency kinds
598+
// present in the local flow.
599+
func (l LocalState) GetConcurrency() ConcurrencyKind {
600+
return l.concurrency
601+
}
602+
573603
// MustUseLeafTxn returns true if a LeafTxn must be used. It is valid to call
574-
// this method only after IsLocal and HasConcurrency have been set correctly.
604+
// this method only after IsLocal and all concurrency kinds have been set.
575605
func (l LocalState) MustUseLeafTxn() bool {
576-
return !l.IsLocal || l.HasConcurrency || l.MustUseLeaf
606+
return !l.IsLocal || l.concurrency != 0
577607
}
578608

579609
// SetupLocalSyncFlow sets up a synchronous flow on the current (planning) node,

pkg/sql/distsql_physical_planner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,11 +1032,11 @@ type PlanningCtx struct {
10321032
// query).
10331033
subOrPostQuery bool
10341034

1035-
// mustUseLeafTxn, if set, indicates that this PlanningCtx is used to handle
1035+
// flowConcurrency will be non-zero when this PlanningCtx is used to handle
10361036
// one of the plans that will run in parallel with other plans. As such, the
10371037
// DistSQL planner will need to use the LeafTxn (even if it's not needed
10381038
// based on other "regular" factors).
1039-
mustUseLeafTxn bool
1039+
flowConcurrency distsql.ConcurrencyKind
10401040

10411041
// onFlowCleanup contains non-nil functions that will be called after the
10421042
// local flow finished running and is being cleaned up. It allows us to

pkg/sql/distsql_running.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,7 @@ func (dsp *DistSQLPlanner) Run(
745745
// the line.
746746
localState.EvalContext = evalCtx
747747
localState.IsLocal = planCtx.isLocal
748-
localState.MustUseLeaf = planCtx.mustUseLeafTxn
748+
localState.AddConcurrency(planCtx.flowConcurrency)
749749
localState.Txn = txn
750750
localState.LocalProcs = plan.LocalProcessors
751751
localState.LocalVectorSources = plan.LocalVectorSources
@@ -768,15 +768,19 @@ func (dsp *DistSQLPlanner) Run(
768768
// cannot create a LeafTxn, so we cannot parallelize scans.
769769
planCtx.parallelizeScansIfLocal = false
770770
for _, flow := range flows {
771-
localState.HasConcurrency = localState.HasConcurrency || execinfra.HasParallelProcessors(flow)
771+
if execinfra.HasParallelProcessors(flow) {
772+
localState.AddConcurrency(distsql.ConcurrencyHasParallelProcessors)
773+
}
772774
}
773775
} else {
774776
if planCtx.isLocal && noMutations && planCtx.parallelizeScansIfLocal {
775777
// Even though we have a single flow on the gateway node, we might
776778
// have decided to parallelize the scans. If that's the case, we
777779
// will need to use the Leaf txn.
778780
for _, flow := range flows {
779-
localState.HasConcurrency = localState.HasConcurrency || execinfra.HasParallelProcessors(flow)
781+
if execinfra.HasParallelProcessors(flow) {
782+
localState.AddConcurrency(distsql.ConcurrencyHasParallelProcessors)
783+
}
780784
}
781785
}
782786
if noMutations {
@@ -877,7 +881,7 @@ func (dsp *DistSQLPlanner) Run(
877881
// Both index and lookup joins, with and without
878882
// ordering, are executed via the Streamer API that has
879883
// concurrency.
880-
localState.HasConcurrency = true
884+
localState.AddConcurrency(distsql.ConcurrencyStreamer)
881885
break
882886
}
883887
}
@@ -1000,11 +1004,37 @@ func (dsp *DistSQLPlanner) Run(
10001004
return
10011005
}
10021006

1003-
if len(flows) == 1 && evalCtx.Txn != nil && evalCtx.Txn.Type() == kv.RootTxn {
1004-
// If we have a fully local plan and a RootTxn, we don't expect any
1005-
// concurrency, so it's safe to use the DistSQLReceiver to push the
1006-
// metadata into directly from routines.
1007-
if planCtx.planner != nil {
1007+
if len(flows) == 1 && planCtx.planner != nil {
1008+
// We have a fully local plan, so check whether it'll be safe to use the
1009+
// DistSQLReceiver to push the metadata into directly from routines
1010+
// (which is the case when we don't have any concurrency between
1011+
// routines themselves as well as a routine and the "head" processor -
1012+
// the one pushing into the DistSQLReceiver).
1013+
var safe bool
1014+
if evalCtx.Txn != nil && evalCtx.Txn.Type() == kv.RootTxn {
1015+
// We have a RootTxn, so we don't expect any concurrency whatsoever.
1016+
safe = true
1017+
} else {
1018+
// We have a LeafTxn, so we need to examine what kind of concurrency
1019+
// is present in the flow.
1020+
var safeConcurrency distsql.ConcurrencyKind
1021+
// We don't care whether we use the Streamer API - it has
1022+
// concurrency only at the KV client level and below.
1023+
safeConcurrency |= distsql.ConcurrencyStreamer
1024+
// If we have "outer plan" concurrency, the "inner" and the
1025+
// "outer" plans have their own DistSQLReceivers.
1026+
//
1027+
// Note that the same is the case with parallel CHECKs concurrency,
1028+
// but then planCtx.planner is shared between goroutines, so we'll
1029+
// avoid mutating it. (We can't have routines in post-query CHECKs
1030+
// since only FK and UNIQUE checks are run in parallel.)
1031+
safeConcurrency |= distsql.ConcurrencyWithOuterPlan
1032+
unsafeConcurrency := ^safeConcurrency
1033+
if localState.GetConcurrency()&unsafeConcurrency == 0 {
1034+
safe = true
1035+
}
1036+
}
1037+
if safe {
10081038
planCtx.planner.routineMetadataForwarder = recv
10091039
}
10101040
}
@@ -1859,7 +1889,7 @@ func (dsp *DistSQLPlanner) PlanAndRunAll(
18591889
// Skip the diagram generation since on this "main" query path we
18601890
// can get it via the statement bundle.
18611891
true, /* skipDistSQLDiagramGeneration */
1862-
false, /* mustUseLeafTxn */
1892+
false, /* innerPlansMustUseLeafTxn */
18631893
) {
18641894
return recv.commErr
18651895
}
@@ -1926,7 +1956,7 @@ func (dsp *DistSQLPlanner) PlanAndRunSubqueries(
19261956
recv *DistSQLReceiver,
19271957
subqueryResultMemAcc *mon.BoundAccount,
19281958
skipDistSQLDiagramGeneration bool,
1929-
mustUseLeafTxn bool,
1959+
innerPlansMustUseLeafTxn bool,
19301960
) bool {
19311961
for planIdx, subqueryPlan := range subqueryPlans {
19321962
if err := dsp.planAndRunSubquery(
@@ -1939,7 +1969,7 @@ func (dsp *DistSQLPlanner) PlanAndRunSubqueries(
19391969
recv,
19401970
subqueryResultMemAcc,
19411971
skipDistSQLDiagramGeneration,
1942-
mustUseLeafTxn,
1972+
innerPlansMustUseLeafTxn,
19431973
); err != nil {
19441974
recv.SetError(err)
19451975
return false
@@ -1963,7 +1993,7 @@ func (dsp *DistSQLPlanner) planAndRunSubquery(
19631993
recv *DistSQLReceiver,
19641994
subqueryResultMemAcc *mon.BoundAccount,
19651995
skipDistSQLDiagramGeneration bool,
1966-
mustUseLeafTxn bool,
1996+
innerPlansMustUseLeafTxn bool,
19671997
) error {
19681998
subqueryDistribution, distSQLProhibitedErr := planner.getPlanDistribution(ctx, subqueryPlan.plan)
19691999
distribute := DistributionType(LocalDistribution)
@@ -1975,7 +2005,9 @@ func (dsp *DistSQLPlanner) planAndRunSubquery(
19752005
subqueryPlanCtx.stmtType = tree.Rows
19762006
subqueryPlanCtx.skipDistSQLDiagramGeneration = skipDistSQLDiagramGeneration
19772007
subqueryPlanCtx.subOrPostQuery = true
1978-
subqueryPlanCtx.mustUseLeafTxn = mustUseLeafTxn
2008+
if innerPlansMustUseLeafTxn {
2009+
subqueryPlanCtx.flowConcurrency = distsql.ConcurrencyWithOuterPlan
2010+
}
19792011
if planner.instrumentation.ShouldSaveFlows() {
19802012
subqueryPlanCtx.saveFlows = getDefaultSaveFlowsFunc(ctx, planner, planComponentTypeSubquery)
19812013
}
@@ -2590,7 +2622,9 @@ func (dsp *DistSQLPlanner) planAndRunPostquery(
25902622
}
25912623
postqueryPlanCtx.associateNodeWithComponents = associateNodeWithComponents
25922624
postqueryPlanCtx.collectExecStats = planner.instrumentation.ShouldCollectExecStats()
2593-
postqueryPlanCtx.mustUseLeafTxn = parallelCheck
2625+
if parallelCheck {
2626+
postqueryPlanCtx.flowConcurrency = distsql.ConcurrencyParallelChecks
2627+
}
25942628

25952629
postqueryPhysPlan, physPlanCleanup, err := dsp.createPhysPlan(ctx, postqueryPlanCtx, postqueryPlan)
25962630
defer physPlanCleanup()

pkg/sql/planner.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,8 +1071,9 @@ func (p *planner) ClearTableStatsCache() {
10711071
}
10721072
}
10731073

1074-
// mustUseLeafTxn returns true if inner plans must use a leaf transaction.
1075-
func (p *planner) mustUseLeafTxn() bool {
1074+
// innerPlansMustUseLeafTxn returns true if inner plans must use a leaf
1075+
// transaction.
1076+
func (p *planner) innerPlansMustUseLeafTxn() bool {
10761077
return atomic.LoadInt32(&p.atomic.innerPlansMustUseLeafTxn) >= 1
10771078
}
10781079

pkg/sql/schema_changer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ func (sc *SchemaChanger) backfillQueryIntoTable(
470470
ctx, localPlanner, localPlanner.ExtendedEvalContextCopy,
471471
localPlanner.curPlan.subqueryPlans, recv, &subqueryResultMemAcc,
472472
false, /* skipDistSQLDiagramGeneration */
473-
false, /* mustUseLeafTxn */
473+
false, /* innerPlansMustUseLeafTxn */
474474
) {
475475
if planAndRunErr = rw.Err(); planAndRunErr != nil {
476476
return

0 commit comments

Comments
 (0)