@@ -107,15 +107,16 @@ impl<'a> SyncOperation<'a> {
107
107
while statement. step ( ) . into_db_result ( self . db ) ? == ResultCode :: ROW {
108
108
let type_name = statement. column_text ( 0 ) ?;
109
109
let id = statement. column_text ( 1 ) ?;
110
- let buckets = statement. column_int ( 3 ) ;
111
110
let data = statement. column_text ( 2 ) ;
112
111
113
112
let table_name = internal_table_name ( type_name) ;
114
113
115
114
if self . data_tables . contains ( & table_name) {
116
115
let quoted = quote_internal_name ( type_name, false ) ;
117
116
118
- if buckets == 0 {
117
+ // is_err() is essentially a NULL check here.
118
+ // NULL data means no PUT operations found, so we delete the row.
119
+ if data. is_err ( ) {
119
120
// DELETE
120
121
let delete_statement = self
121
122
. db
@@ -134,7 +135,7 @@ impl<'a> SyncOperation<'a> {
134
135
insert_statement. exec ( ) ?;
135
136
}
136
137
} else {
137
- if buckets == 0 {
138
+ if data . is_err ( ) {
138
139
// DELETE
139
140
// language=SQLite
140
141
let delete_statement = self
@@ -185,32 +186,29 @@ impl<'a> SyncOperation<'a> {
185
186
Ok ( match & self . partial {
186
187
None => {
187
188
// Complete sync
189
+ // See dart/test/sync_local_performance_test.dart for an annotated version of this query.
188
190
self . db
189
191
. prepare_v2 (
190
192
"\
191
- -- 1. Filter oplog by the ops added but not applied yet (oplog b).
192
- -- SELECT DISTINCT / UNION is important for cases with many duplicate ids.
193
193
WITH updated_rows AS (
194
- SELECT DISTINCT b.row_type, b.row_id FROM ps_buckets AS buckets
195
- CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
196
- AND (b.op_id > buckets.last_applied_op)
197
- UNION SELECT row_type, row_id FROM ps_updated_rows
194
+ SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
195
+ CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
196
+ AND (b.op_id > buckets.last_applied_op)
197
+ UNION ALL SELECT row_type, row_id FROM ps_updated_rows
198
198
)
199
199
200
- -- 3. Group the objects from different buckets together into a single one (ops).
201
- SELECT b.row_type as type,
202
- b.row_id as id,
203
- r.data as data,
204
- count(r.bucket) as buckets,
205
- /* max() affects which row is used for 'data' */
206
- max(r.op_id) as op_id
207
- -- 2. Find *all* current ops over different buckets for those objects (oplog r).
208
- FROM updated_rows b
209
- LEFT OUTER JOIN ps_oplog AS r
210
- ON r.row_type = b.row_type
211
- AND r.row_id = b.row_id
212
- -- Group for (3)
213
- GROUP BY b.row_type, b.row_id" ,
200
+ SELECT
201
+ b.row_type,
202
+ b.row_id,
203
+ (
204
+ SELECT iif(max(r.op_id), r.data, null)
205
+ FROM ps_oplog r
206
+ WHERE r.row_type = b.row_type
207
+ AND r.row_id = b.row_id
208
+
209
+ ) as data
210
+ FROM updated_rows b
211
+ GROUP BY b.row_type, b.row_id;" ,
214
212
)
215
213
. into_db_result ( self . db ) ?
216
214
}
@@ -220,33 +218,38 @@ GROUP BY b.row_type, b.row_id",
220
218
. prepare_v2 (
221
219
"\
222
220
-- 1. Filter oplog by the ops added but not applied yet (oplog b).
223
- -- SELECT DISTINCT / UNION is important for cases with many duplicate ids.
221
+ -- We do not do any DISTINCT operation here, since that introduces a temp b-tree.
222
+ -- We filter out duplicates using the GROUP BY below.
224
223
WITH
225
224
involved_buckets (id) AS MATERIALIZED (
226
225
SELECT id FROM ps_buckets WHERE ?1 IS NULL
227
226
OR name IN (SELECT value FROM json_each(json_extract(?1, '$.buckets')))
228
227
),
229
228
updated_rows AS (
230
- SELECT DISTINCT FALSE as local, b.row_type, b.row_id FROM ps_buckets AS buckets
231
- CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id AND (b.op_id > buckets.last_applied_op)
232
- WHERE buckets.id IN (SELECT id FROM involved_buckets)
229
+ SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
230
+ CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
231
+ AND (b.op_id > buckets.last_applied_op)
232
+ WHERE buckets.id IN (SELECT id FROM involved_buckets)
233
233
)
234
234
235
- -- 3. Group the objects from different buckets together into a single one (ops).
236
- SELECT b.row_type as type,
237
- b.row_id as id,
238
- r.data as data,
239
- count(r.bucket) as buckets,
240
- /* max() affects which row is used for 'data' */
241
- max(r.op_id) as op_id
242
235
-- 2. Find *all* current ops over different buckets for those objects (oplog r).
243
- FROM updated_rows b
244
- LEFT OUTER JOIN ps_oplog AS r
245
- ON r.row_type = b.row_type
246
- AND r.row_id = b.row_id
247
- AND r.bucket IN (SELECT id FROM involved_buckets)
248
- -- Group for (3)
249
- GROUP BY b.row_type, b.row_id" ,
236
+ SELECT
237
+ b.row_type,
238
+ b.row_id,
239
+ (
240
+ -- 3. For each unique row, select the data from the latest oplog entry.
241
+ -- The max(r.op_id) clause is used to select the latest oplog entry.
242
+ -- The iif is to avoid the max(r.op_id) column ending up in the results.
243
+ SELECT iif(max(r.op_id), r.data, null)
244
+ FROM ps_oplog r
245
+ WHERE r.row_type = b.row_type
246
+ AND r.row_id = b.row_id
247
+ AND r.bucket IN (SELECT id FROM involved_buckets)
248
+
249
+ ) as data
250
+ FROM updated_rows b
251
+ -- Group for (2)
252
+ GROUP BY b.row_type, b.row_id;" ,
250
253
)
251
254
. into_db_result ( self . db ) ?;
252
255
stmt. bind_text ( 1 , partial. args , Destructor :: STATIC ) ?;
0 commit comments