Skip to content

Commit 0b8aa69

Browse files
authored
GT-379 Retriable batch reads in AQL cursors - V1 (#500)
1 parent 84832af commit 0b8aa69

12 files changed

+269
-80
lines changed

.travis.yml

+12-13
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,22 @@ go:
3030
- 1.20.3
3131

3232
env:
33-
global:
34-
- GOIMAGE=gcr.io/gcr-for-testing/golang:1.20.3
35-
- ALPINE_IMAGE=gcr.io/gcr-for-testing/alpine:3.17
36-
- STARTER=gcr.io/gcr-for-testing/arangodb/arangodb-starter:0.15.7
3733
jobs:
3834
- TEST_SUITE=run-unit-tests ALWAYS=1
3935

40-
- TEST_SUITE=run-tests-single ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb:3.9.6
41-
- TEST_SUITE=run-tests-single ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb-preview:3.10.5 TEST_DISALLOW_UNKNOWN_FIELDS=false ALWAYS=1
42-
- TEST_SUITE=run-tests-resilientsingle ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb-preview:3.10.5
43-
- TEST_SUITE=run-tests-cluster ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb-preview:3.10.5
44-
- TEST_SUITE=run-tests-cluster ARANGODB=gcr.io/gcr-for-testing/arangodb/enterprise:3.10.5
36+
- TEST_SUITE=run-tests-single
37+
- TEST_SUITE=run-tests-cluster
38+
# - TEST_SUITE=run-tests-resilientsingle
4539

46-
- TEST_SUITE=run-v2-tests-single ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb:3.9.6
47-
- TEST_SUITE=run-v2-tests-single ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb-preview:3.10.5 ALWAYS=1
48-
- TEST_SUITE=run-v2-tests-cluster ARANGODB=gcr.io/gcr-for-testing/arangodb/arangodb-preview:3.10.5
49-
- TEST_SUITE=run-v2-tests-cluster ARANGODB=gcr.io/gcr-for-testing/arangodb/enterprise:3.10.5
40+
- TEST_SUITE=run-v2-tests-single
41+
- TEST_SUITE=run-v2-tests-cluster
42+
# - TEST_SUITE=run-v2-tests-resilientsingle
43+
global:
44+
- ARANGODB=gcr.io/gcr-for-testing/arangodb/enterprise-preview:3.11.0-beta.1
45+
- GOIMAGE=gcr.io/gcr-for-testing/golang:1.20.3
46+
- ALPINE_IMAGE=gcr.io/gcr-for-testing/alpine:3.17
47+
- STARTER=gcr.io/gcr-for-testing/arangodb/arangodb-starter:0.15.7
48+
- TEST_DISALLOW_UNKNOWN_FIELDS=false
5049

5150
before_script:
5251
- |

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
- Use Go 1.20.3 for testing. Add govulncheck to pipeline
1111
- Fix test for extended names
1212
- Fix potential bug with DB name escaping for URL when requesting replication-related API
13+
- Retriable batch reads in AQL cursors
1314

1415
## [1.5.2](https://github.com/arangodb/go-driver/tree/v1.5.2) (2023-03-01)
1516
- Bump `DRIVER_VERSION`

cursor.go

+23-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// DISCLAIMER
33
//
4-
// Copyright 2017 ArangoDB GmbH, Cologne, Germany
4+
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
55
//
66
// Licensed under the Apache License, Version 2.0 (the "License");
77
// you may not use this file except in compliance with the License.
@@ -16,9 +16,6 @@
1616
// limitations under the License.
1717
//
1818
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19-
//
20-
// Author Ewout Prangsma
21-
//
2219

2320
package driver
2421

@@ -36,28 +33,34 @@ type QueryExtra interface {
3633
// GetProfileRaw returns raw profile information in json
3734
GetProfileRaw() ([]byte, bool, error)
3835

39-
// PlanRaw returns raw plan
36+
// GetPlanRaw returns raw plan
4037
GetPlanRaw() ([]byte, bool, error)
4138
}
4239

43-
// Statistics returned with the query cursor
40+
// QueryStatistics Statistics returned with the query cursor
4441
type QueryStatistics interface {
45-
// the total number of data-modification operations successfully executed.
42+
// WritesExecuted the total number of data-modification operations successfully executed.
4643
WritesExecuted() int64
47-
// The total number of data-modification operations that were unsuccessful
44+
45+
// WritesIgnored The total number of data-modification operations that were unsuccessful
4846
WritesIgnored() int64
49-
// The total number of documents iterated over when scanning a collection without an index.
47+
48+
// ScannedFull The total number of documents iterated over when scanning a collection without an index.
5049
ScannedFull() int64
51-
// The total number of documents iterated over when scanning a collection using an index.
50+
51+
// ScannedIndex The total number of documents iterated over when scanning a collection using an index.
5252
ScannedIndex() int64
53-
// the total number of documents that were removed after executing a filter condition in a FilterNode
53+
54+
// Filtered the total number of documents that were removed after executing a filter condition in a FilterNode
5455
Filtered() int64
55-
// Returns the numer of results before the last LIMIT in the query was applied.
56+
57+
// FullCount Returns the number of results before the last LIMIT in the query was applied.
5658
// A valid return value is only available when the has been created with a context that was
57-
// prepared with `WithFullCount`. Additionally this will also not return a valid value if
59+
// prepared with `WithFullCount`. Additionally, this will also not return a valid value if
5860
// the context was prepared with `WithStream`.
5961
FullCount() int64
60-
// Execution time of the query (wall-clock time). value will be set from the outside
62+
63+
// ExecutionTime of the query (wall-clock time). value will be set from the outside
6164
ExecutionTime() time.Duration
6265
}
6366

@@ -70,12 +73,17 @@ type Cursor interface {
7073
HasMore() bool
7174

7275
// ReadDocument reads the next document from the cursor.
73-
// The document data is stored into result, the document meta data is returned.
76+
// The document data is stored into result, the document metadata is returned.
7477
// If the cursor has no more documents, a NoMoreDocuments error is returned.
7578
// Note: If the query (resulting in this cursor) does not return documents,
7679
// then the returned DocumentMeta will be empty.
7780
ReadDocument(ctx context.Context, result interface{}) (DocumentMeta, error)
7881

82+
// RetryReadDocument reads the last document from the cursor once more time
83+
// It can be used e.g., in case of network error during ReadDocument
84+
// It requires 'driver.WithQueryAllowRetry' to be set to true on the Context during Cursor creation.
85+
RetryReadDocument(ctx context.Context, result interface{}) (DocumentMeta, error)
86+
7987
// Count returns the total number of result documents available.
8088
// A valid return value is only available when the cursor has been created with a context that was
8189
// prepared with `WithQueryCount` and not with `WithQueryStream`.

cursor_impl.go

+82-35
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// DISCLAIMER
33
//
4-
// Copyright 2017 ArangoDB GmbH, Cologne, Germany
4+
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
55
//
66
// Licensed under the Apache License, Version 2.0 (the "License");
77
// you may not use this file except in compliance with the License.
@@ -17,8 +17,6 @@
1717
//
1818
// Copyright holder is ArangoDB GmbH, Cologne, Germany
1919
//
20-
// Author Ewout Prangsma
21-
//
2220

2321
package driver
2422

@@ -37,19 +35,30 @@ func newCursor(data cursorData, endpoint string, db *database, allowDirtyReads b
3735
if db == nil {
3836
return nil, WithStack(InvalidArgumentError{Message: "db is nil"})
3937
}
40-
return &cursor{
38+
39+
c := &cursor{
4140
cursorData: data,
4241
endpoint: endpoint,
4342
db: db,
4443
conn: db.conn,
4544
allowDirtyReads: allowDirtyReads,
46-
}, nil
45+
}
46+
47+
if data.NextBatchID != "" {
48+
c.retryData = &retryData{
49+
cursorID: data.ID,
50+
currentBatchID: "1",
51+
}
52+
}
53+
54+
return c, nil
4755
}
4856

4957
type cursor struct {
5058
cursorData
5159
endpoint string
5260
resultIndex int
61+
retryData *retryData
5362
db *database
5463
conn Connection
5564
closed int32
@@ -58,6 +67,11 @@ type cursor struct {
5867
lastReadWasDirty bool
5968
}
6069

70+
type retryData struct {
71+
cursorID string
72+
currentBatchID string
73+
}
74+
6175
// CursorStats TODO: all these int64 should be changed into uint64
6276
type cursorStats struct {
6377
// The total number of data-modification operations successfully executed.
@@ -86,11 +100,11 @@ type cursorStats struct {
86100
CursorsRearmed uint64 `json:"cursorsRearmed,omitempty"`
87101
// CacheHits the total number of index entries read from in-memory caches for indexes of type edge or persistent.
88102
// This value will only be non-zero when reading from indexes that have an in-memory cache enabled,
89-
// and when the query allows using the in-memory cache (i.e. using equality lookups on all index attributes).
103+
// and when the query allows using the in-memory cache (i.e., using equality lookups on all index attributes).
90104
CacheHits uint64 `json:"cacheHits,omitempty"`
91105
// CacheMisses the total number of cache read attempts for index entries that could not be served from in-memory caches for indexes of type edge or persistent.
92106
// This value will only be non-zero when reading from indexes that have an in-memory cache enabled,
93-
// the query allows using the in-memory cache (i.e. using equality lookups on all index attributes) and the looked up values are not present in the cache.
107+
// the query allows using the in-memory cache (i.e., using equality lookups on all index attributes), and the looked-up values are not present in the cache.
94108
CacheMisses uint64 `json:"cacheMisses,omitempty"`
95109
}
96110

@@ -163,13 +177,14 @@ type cursorPlanNodes map[string]interface{}
163177
type cursorProfile map[string]interface{}
164178

165179
type cursorData struct {
166-
Key string `json:"_key,omitempty"`
167-
Count int64 `json:"count,omitempty"` // the total number of result documents available (only available if the query was executed with the count attribute set)
168-
ID string `json:"id"` // id of temporary cursor created on the server (optional, see above)
169-
Result []*RawObject `json:"result,omitempty"` // an array of result documents (might be empty if query has no results)
170-
HasMore bool `json:"hasMore,omitempty"` // A boolean indicator whether there are more results available for the cursor on the server
171-
Extra cursorExtra `json:"extra"`
172-
Cached bool `json:"cached,omitempty"`
180+
Key string `json:"_key,omitempty"`
181+
Count int64 `json:"count,omitempty"` // the total number of result documents available (only available if the query was executed with the count attribute set)
182+
ID string `json:"id"` // id of temporary cursor created on the server (optional, see above)
183+
Result []*RawObject `json:"result,omitempty"` // an array of result documents (might be empty if the query has no results)
184+
HasMore bool `json:"hasMore,omitempty"` // A boolean indicator whether there are more results available for the cursor on the server
185+
Extra cursorExtra `json:"extra"`
186+
Cached bool `json:"cached,omitempty"`
187+
NextBatchID string `json:"nextBatchId,omitempty"`
173188
ArangoError
174189
}
175190

@@ -178,22 +193,22 @@ func (c *cursor) relPath() string {
178193
return path.Join(c.db.relPath(), "_api", "cursor")
179194
}
180195

181-
// Name returns the name of the collection.
196+
// HasMore Name returns the name of the collection.
182197
func (c *cursor) HasMore() bool {
183198
return c.resultIndex < len(c.Result) || c.cursorData.HasMore
184199
}
185200

186201
// Count returns the total number of result documents available.
187202
// A valid return value is only available when the cursor has been created with a context that was
188-
// prepare with `WithQueryCount`.
203+
// prepared with `WithQueryCount`.
189204
func (c *cursor) Count() int64 {
190205
return c.cursorData.Count
191206
}
192207

193208
// Close deletes the cursor and frees the resources associated with it.
194209
func (c *cursor) Close() error {
195210
if c == nil {
196-
// Avoid panics in the case that someone defer's a close before checking that the cursor is not nil.
211+
// Avoid panics in the case that someone defers a close before checking that the cursor is not nil.
197212
return nil
198213
}
199214
if c := atomic.LoadInt32(&c.closed); c != 0 {
@@ -224,28 +239,60 @@ func (c *cursor) Close() error {
224239
}
225240

226241
// ReadDocument reads the next document from the cursor.
227-
// The document data is stored into result, the document meta data is returned.
242+
// The document data is stored into the result, the document metadata is returned.
228243
// If the cursor has no more documents, a NoMoreDocuments error is returned.
229244
func (c *cursor) ReadDocument(ctx context.Context, result interface{}) (DocumentMeta, error) {
245+
return c.readDocument(ctx, result, "")
246+
}
247+
248+
// RetryReadDocument reads the last document from the cursor once more time
249+
// It can be used e.g., in case of network error during ReadDocument
250+
// It requires 'driver.WithQueryAllowRetry' to be set to true on the Context during Cursor creation.
251+
func (c *cursor) RetryReadDocument(ctx context.Context, result interface{}) (DocumentMeta, error) {
252+
if c.resultIndex > 0 {
253+
c.resultIndex--
254+
}
255+
return c.readDocument(ctx, result, c.retryData.currentBatchID)
256+
}
257+
258+
func (c *cursor) readDocument(ctx context.Context, result interface{}, retryBatchID string) (DocumentMeta, error) {
230259
// Force use of initial endpoint
231260
ctx = WithEndpoint(ctx, c.endpoint)
232261

233-
if c.resultIndex >= len(c.Result) && c.cursorData.HasMore {
234-
// This is required since we are interested if this was a dirty read
262+
if c.resultIndex >= len(c.Result) && (c.cursorData.HasMore || retryBatchID != "") {
263+
// This is required since we are interested if this was a dirty read,
235264
// but we do not want to trash the users bool reference.
236265
var wasDirtyRead bool
237-
fetchctx := ctx
266+
fetchCtx := ctx
238267
if c.allowDirtyReads {
239-
fetchctx = WithAllowDirtyReads(ctx, &wasDirtyRead)
268+
fetchCtx = WithAllowDirtyReads(ctx, &wasDirtyRead)
269+
}
270+
271+
p := path.Join(c.relPath(), c.cursorData.ID)
272+
273+
// If we have a NextBatchID, use it
274+
if c.NextBatchID != "" {
275+
p = path.Join(c.relPath(), c.cursorData.ID, c.NextBatchID)
240276
}
241277

242-
// Fetch next batch
243-
req, err := c.conn.NewRequest("PUT", path.Join(c.relPath(), c.cursorData.ID))
278+
// We have to retry the batch instead of fetching the next one
279+
if retryBatchID != "" {
280+
p = path.Join(c.relPath(), c.retryData.cursorID, retryBatchID)
281+
}
282+
283+
// Update currentBatchID before fetching the next batch (no retry case)
284+
if c.NextBatchID != "" && retryBatchID == "" {
285+
c.retryData.currentBatchID = c.NextBatchID
286+
}
287+
288+
// Fetch the next batch
289+
req, err := c.conn.NewRequest("POST", p)
244290
if err != nil {
245291
return DocumentMeta{}, WithStack(err)
246292
}
247-
cs := applyContextSettings(fetchctx, req)
248-
resp, err := c.conn.Do(fetchctx, req)
293+
294+
cs := applyContextSettings(fetchCtx, req)
295+
resp, err := c.conn.Do(fetchCtx, req)
249296
if err != nil {
250297
return DocumentMeta{}, WithStack(err)
251298
}
@@ -295,7 +342,7 @@ func (c *cursor) ReadDocument(ctx context.Context, result interface{}) (Document
295342
return meta, nil
296343
}
297344

298-
// Return execution statistics for this cursor. This might not
345+
// Statistics Return execution statistics for this cursor. This might not
299346
// be valid if the cursor has been created with a context that was
300347
// prepared with `WithStream`
301348
func (c *cursor) Statistics() QueryStatistics {
@@ -306,40 +353,40 @@ func (c *cursor) Extra() QueryExtra {
306353
return c.cursorData.Extra
307354
}
308355

309-
// the total number of data-modification operations successfully executed.
356+
// WritesExecuted the total number of data-modification operations successfully executed.
310357
func (cs cursorStats) WritesExecuted() int64 {
311358
return cs.WritesExecutedInt
312359
}
313360

314-
// The total number of data-modification operations that were unsuccessful
361+
// WritesIgnored The total number of data-modification operations that were unsuccessful
315362
func (cs cursorStats) WritesIgnored() int64 {
316363
return cs.WritesIgnoredInt
317364
}
318365

319-
// The total number of documents iterated over when scanning a collection without an index.
366+
// ScannedFull The total number of documents iterated over when scanning a collection without an index.
320367
func (cs cursorStats) ScannedFull() int64 {
321368
return cs.ScannedFullInt
322369
}
323370

324-
// The total number of documents iterated over when scanning a collection using an index.
371+
// ScannedIndex The total number of documents iterated over when scanning a collection using an index.
325372
func (cs cursorStats) ScannedIndex() int64 {
326373
return cs.ScannedIndexInt
327374
}
328375

329-
// the total number of documents that were removed after executing a filter condition in a FilterNode
376+
// Filtered the total number of documents that were removed after executing a filter condition in a FilterNode
330377
func (cs cursorStats) Filtered() int64 {
331378
return cs.FilteredInt
332379
}
333380

334-
// Returns the numer of results before the last LIMIT in the query was applied.
381+
// FullCount Returns the number of results before the last LIMIT in the query was applied.
335382
// A valid return value is only available when the has been created with a context that was
336-
// prepared with `WithFullCount`. Additionally this will also not return a valid value if
383+
// prepared with `WithFullCount`. Additionally, this will also not return a valid value if
337384
// the context was prepared with `WithStream`.
338385
func (cs cursorStats) FullCount() int64 {
339386
return cs.FullCountInt
340387
}
341388

342-
// query execution time (wall-clock time). value will be set from the outside
389+
// ExecutionTime query execution time (wall-clock time). value will be set from the outside
343390
func (cs cursorStats) ExecutionTime() time.Duration {
344391
return time.Duration(cs.ExecutionTimeInt * float64(time.Second))
345392
}

0 commit comments

Comments
 (0)