@@ -48,8 +48,6 @@ protected function getUpdateKey() {
48
48
}
49
49
50
50
protected function doDBUpdates () {
51
- $ batchSize = $ this ->getBatchSize ();
52
-
53
51
$ dbw = $ this ->getDB ( DB_PRIMARY );
54
52
if ( !$ dbw ->fieldExists ( 'revision ' , 'rev_comment_id ' , __METHOD__ ) ) {
55
53
$ this ->output ( "Run update.php to create rev_comment_id. \n" );
@@ -61,48 +59,41 @@ protected function doDBUpdates() {
61
59
}
62
60
63
61
$ this ->output ( "Merging the revision_comment_temp table into the revision table... \n" );
64
- $ conds = [];
65
62
$ updated = 0 ;
66
- $ sleep = (int )$ this ->getOption ( 'sleep ' , 0 );
63
+ $ highestRevId = (int )$ dbw ->newSelectQueryBuilder ()
64
+ ->select ( 'rev_id ' )
65
+ ->from ( 'revision ' )
66
+ ->limit ( 1 )
67
+ ->caller ( __METHOD__ )
68
+ ->orderBy ( 'rev_id ' , 'DESC ' )
69
+ ->fetchField ();
70
+ $ this ->output ( "Max rev_id $ highestRevId. \n" );
71
+ // Default batchSize from "$this->getBatchSize()" is 200, use 1000 to speed migration up
72
+ // There is "$this->waitForReplication()" after each batch anyway
73
+ $ batchSize = 1000 ;
74
+ $ lowId = -1 ;
75
+ $ highId = $ batchSize ;
67
76
while ( true ) {
68
- $ res = $ dbw ->newSelectQueryBuilder ()
69
- ->select ( [ 'rev_id ' , 'revcomment_comment_id ' ] )
70
- ->from ( 'revision ' )
71
- ->join ( 'revision_comment_temp ' , null , 'rev_id=revcomment_rev ' )
72
- ->where ( [ 'rev_comment_id ' => 0 ] )
73
- ->andWhere ( $ conds )
74
- ->limit ( $ batchSize )
75
- ->orderBy ( 'rev_id ' )
76
- ->caller ( __METHOD__ )
77
- ->fetchResultSet ();
78
-
79
- $ numRows = $ res ->numRows ();
80
-
81
- $ last = null ;
82
- foreach ( $ res as $ row ) {
83
- $ last = $ row ->rev_id ;
84
- $ dbw ->newUpdateQueryBuilder ()
85
- ->update ( 'revision ' )
86
- ->set ( [ 'rev_comment_id ' => $ row ->revcomment_comment_id ] )
87
- ->where ( [ 'rev_id ' => $ row ->rev_id ] )
88
- ->caller ( __METHOD__ )->execute ();
89
- $ updated += $ dbw ->affectedRows ();
90
- }
77
+ // `coalesce` covers case when some row is missing in revision_comment_temp.
78
+ // Original script used `join` which skipped revision row when `revision_comment_temp` was null.
79
+ //
80
+ // Not sure whether we should try to fix the data first
81
+ // RevisionSelectQueryBuilder::joinComment suggest that all revisions should have rev_comment_id set
82
+ $ query = "UPDATE revision
83
+ SET rev_comment_id = COALESCE((SELECT revcomment_comment_id FROM revision_comment_temp WHERE rev_id=revcomment_rev), rev_comment_id)
84
+ WHERE rev_id > $ lowId AND rev_id <= $ highId " ;
85
+ $ dbw ->query ( $ query , __METHOD__ );
86
+ $ affected = $ dbw ->affectedRows ();
87
+ $ updated += $ affected ;
88
+ $ this ->output ( "Updated $ affected revision rows from $ lowId to $ highId \n" );
89
+ $ this ->waitForReplication ();
91
90
92
- if ( $ numRows < $ batchSize ) {
93
- // We must have reached the end
91
+ if ( $ highId > $ highestRevId ) {
92
+ // We reached the end
94
93
break ;
95
94
}
96
-
97
- // @phan-suppress-next-line PhanTypeSuspiciousStringExpression last is not-null when used
98
- $ this ->output ( "... rev_id= $ last, updated $ updated \n" );
99
- $ conds = [ $ dbw ->expr ( 'rev_id ' , '> ' , $ last ) ];
100
-
101
- // Sleep between batches for replication to catch up
102
- $ this ->waitForReplication ();
103
- if ( $ sleep > 0 ) {
104
- sleep ( $ sleep );
105
- }
95
+ $ lowId = $ highId ;
96
+ $ highId = $ lowId + $ batchSize ;
106
97
}
107
98
$ this ->output (
108
99
"Completed merge of revision_comment_temp into the revision table, "
0 commit comments