Skip to content

Commit 6b3786d

Browse files
committed
kvfollowerreadsccl: deflake TestBoundedStalenessDataDriven
When using tenants, this test appears more prone to flakes. The flakes seem to come from two sources: 1. A lease would move off of node 1 and thus we'd see a "local read" succeed. Here we try to make this less likely by turning off the lease, replicate, and split queues and adding an early assertion that all of the leaseholders live on n1. 2. The schema change the test performs would take > 10s, which then invalidates many later assumptions that 10s in the past should be strictly before the schema change. We've kicked the can here and bumped it to 20s. Unfortunately, this still doesn't seem to be enough as we still occasionally see this fail. One note is that this test is intended to be skipped under stress, but in CI it is still run under stress because of changes to the meaning of skip.UnderStress. Informs #154710 Release note: None
1 parent fe15853 commit 6b3786d

File tree

2 files changed

+39
-22
lines changed

2 files changed

+39
-22
lines changed

pkg/ccl/kvccl/kvfollowerreadsccl/boundedstaleness_test.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
311311
skip.IgnoreLint(t, "test doesn't apply to external process multi-tenancy")
312312
}
313313

314+
require.NoError(t, tc.WaitForFullReplication())
315+
tc.ToggleLeaseQueues(false)
316+
tc.ToggleSplitQueues(false)
317+
tc.ToggleReplicateQueues(false)
318+
314319
savedTraceStmt := ""
315320
datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
316321
// Early exit non-query execution related commands.
@@ -326,7 +331,7 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
326331
return ""
327332
}
328333

329-
var showEvents bool
334+
var showEvents *bool
330335
var waitUntilFollowerReads bool
331336
var waitUntilMatch bool
332337
defer func() {
@@ -353,6 +358,9 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
353358
serverNum, err := strconv.ParseInt(arg.Vals[0], 10, 64)
354359
require.NoError(t, err)
355360
dbConn = tc.ServerConn(int(serverNum))
361+
case "ignore-events":
362+
f := false
363+
showEvents = &f
356364
default:
357365
t.Fatalf("unknown arg: %s", arg.Key)
358366
}
@@ -376,9 +384,12 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
376384
}
377385
return ""
378386
case "query":
379-
// Always show events.
387+
// Default to showing events
388+
if showEvents == nil {
389+
t := true
390+
showEvents = &t
391+
}
380392
bse.setStmt(traceStmt)
381-
showEvents = true
382393
rows, err := dbConn.Query(d.Input)
383394
if err != nil {
384395
return err.Error()
@@ -394,7 +405,7 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
394405
testutils.SucceedsSoon(t, func() error {
395406
ret = executeCmd()
396407
// Append events to the output if desired.
397-
if showEvents {
408+
if showEvents != nil && *showEvents {
398409
if !strings.HasSuffix(ret, "\n") {
399410
ret += "\n"
400411
}

pkg/ccl/kvccl/kvfollowerreadsccl/testdata/boundedstaleness/single_row

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ CREATE TABLE t(pk INT PRIMARY KEY) WITH (schema_locked = false);
1111
INSERT INTO t VALUES (1);
1212
----
1313

14+
# The tests assume that the leaseholder is node_idx=0 (n1), assert that here.
15+
query ignore-events
16+
SELECT DISTINCT(lease_holder) FROM [SHOW RANGES FROM TABLE t WITH DETAILS];
17+
----
18+
1
19+
1420
# If we try to read a timestamp that is impossible to satisfy with a follower
1521
# read, we should always be looking at the leaseholder in the nearest_only=False
1622
# case. We always do bounded staleness reads from node_idx 2, as node_idx 0 in a
@@ -62,57 +68,57 @@ events (1 found):
6268
# Note we have to wait until a match here, in case a follower read reads an
6369
# older version of the data.
6470
query idx=2 wait-until-follower-read wait-until-match
65-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s') WHERE pk = 1
71+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s') WHERE pk = 1
6672
----
6773
1
6874
events (1 found):
6975
* event 1: colbatchscan trace on node_idx 2: local follower read
7076

7177
query idx=2
72-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s') WHERE pk = 1
78+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s') WHERE pk = 1
7379
----
7480
1
7581
events (1 found):
7682
* event 1: colbatchscan trace on node_idx 2: local follower read
7783

7884
query idx=2
79-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s', false) WHERE pk = 1
85+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s', false) WHERE pk = 1
8086
----
8187
1
8288
events (1 found):
8389
* event 1: colbatchscan trace on node_idx 2: local follower read
8490

8591
query idx=2
86-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s', false) WHERE pk = 1
92+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s', false) WHERE pk = 1
8793
----
8894
1
8995
events (1 found):
9096
* event 1: colbatchscan trace on node_idx 2: local follower read
9197

9298
query idx=2
93-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s', true) WHERE pk = 1
99+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s', true) WHERE pk = 1
94100
----
95101
1
96102
events (1 found):
97103
* event 1: colbatchscan trace on node_idx 2: local follower read
98104

99105
query idx=2
100-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s', true) WHERE pk = 1
106+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s', true) WHERE pk = 1
101107
----
102108
1
103109
events (1 found):
104110
* event 1: colbatchscan trace on node_idx 2: local follower read
105111

106112
exec idx=2
107-
PREPARE max_staleness_prep AS SELECT pk FROM t AS OF SYSTEM TIME with_max_staleness('10s') WHERE pk = 1;
113+
PREPARE max_staleness_prep AS SELECT pk FROM t AS OF SYSTEM TIME with_max_staleness('20s') WHERE pk = 1;
108114
----
109115

110116
exec idx=2
111-
PREPARE min_timestamp_prep AS SELECT pk FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s') WHERE pk = 1
117+
PREPARE min_timestamp_prep AS SELECT pk FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s') WHERE pk = 1
112118
----
113119

114120
override-matching-stmt-for-tracing
115-
SELECT pk FROM t AS OF SYSTEM TIME with_max_staleness('10s') WHERE pk = 1
121+
SELECT pk FROM t AS OF SYSTEM TIME with_max_staleness('20s') WHERE pk = 1
116122
----
117123

118124
query idx=2
@@ -123,7 +129,7 @@ events (1 found):
123129
* event 1: colbatchscan trace on node_idx 2: local follower read
124130

125131
override-matching-stmt-for-tracing
126-
SELECT pk FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s') WHERE pk = 1
132+
SELECT pk FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s') WHERE pk = 1
127133
----
128134

129135
query idx=2
@@ -149,28 +155,28 @@ ALTER TABLE t ADD COLUMN new_col INT NOT NULL DEFAULT 2
149155
# Ensure we resort to the leaseholder as the schema change requires a recent read
150156
# in the nearest_only=False case.
151157
query idx=2
152-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s') WHERE pk = 1
158+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s') WHERE pk = 1
153159
----
154160
1 2
155161
events (1 found):
156162
* event 1: colbatchscan trace on node_idx 2: local read then remote leaseholder read
157163

158164
query idx=2
159-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s') WHERE pk = 1
165+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s') WHERE pk = 1
160166
----
161167
1 2
162168
events (1 found):
163169
* event 1: colbatchscan trace on node_idx 2: local read then remote leaseholder read
164170

165171
query idx=2
166-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s', false) WHERE pk = 1
172+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s', false) WHERE pk = 1
167173
----
168174
1 2
169175
events (1 found):
170176
* event 1: colbatchscan trace on node_idx 2: local read then remote leaseholder read
171177

172178
query idx=2
173-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s', false) WHERE pk = 1
179+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s', false) WHERE pk = 1
174180
----
175181
1 2
176182
events (1 found):
@@ -182,7 +188,7 @@ events (1 found):
182188
# Note that we retry until follower read here as the first schema read of
183189
# historical schema descriptors result in non-follower reads.
184190
query idx=2 wait-until-follower-read
185-
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('10s', true) WHERE pk = 1
191+
SELECT * FROM t AS OF SYSTEM TIME with_max_staleness('20s', true) WHERE pk = 1
186192
----
187193
1
188194
events (17 found):
@@ -205,7 +211,7 @@ events (17 found):
205211
* event 17: colbatchscan trace on node_idx 2: local follower read
206212

207213
query idx=2
208-
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '10s', true) WHERE pk = 1
214+
SELECT * FROM t AS OF SYSTEM TIME with_min_timestamp(now() - '20s', true) WHERE pk = 1
209215
----
210216
1
211217
events (17 found):
@@ -242,7 +248,7 @@ ALTER TABLE t2 ADD COLUMN new_col INT
242248
----
243249

244250
query idx=2
245-
SELECT * FROM t2 AS OF SYSTEM TIME with_min_timestamp(now() - '10s', true) WHERE pk = 2
251+
SELECT * FROM t2 AS OF SYSTEM TIME with_min_timestamp(now() - '20s', true) WHERE pk = 2
246252
----
247253
pq: referenced descriptor ID 105: looking up ID 105: descriptor not found
248254
events (10 found):
@@ -258,7 +264,7 @@ events (10 found):
258264
* event 10: transaction retry on node_idx: 2
259265

260266
query idx=2
261-
SELECT * FROM t2 AS OF SYSTEM TIME with_min_timestamp(now() - '10s', true) WHERE pk = 2
267+
SELECT * FROM t2 AS OF SYSTEM TIME with_min_timestamp(now() - '20s', true) WHERE pk = 2
262268
----
263269
pq: referenced descriptor ID 105: looking up ID 105: descriptor not found
264270
events (10 found):

0 commit comments

Comments
 (0)