1
1
/*-
2
2
* Copyright 2003-2005 Colin Percival
3
3
* Copyright 2012 Matthew Endsley
4
- * Copyright 2024 Erick Ortiz
5
4
* All rights reserved
6
5
*
7
6
* Redistribution and use in source and binary forms, with or without
8
- * modification, are permitted providing that the following conditions
7
+ * modification, are permitted providing that the following conditions
9
8
* are met:
10
9
* 1. Redistributions of source code must retain the above copyright
11
10
* notice, this list of conditions and the following disclaimer.
34
33
35
34
#define MIN (x ,y ) (((x)<(y)) ? (x) : (y))
36
35
36
+ static int64_t median3 (int64_t a , int64_t b , int64_t c ) {
37
+ return a < b ? (b < c ? b : a < c ? c : a ) : b > c ? b : a > c ? c : a ;
38
+ }
39
+
37
40
static void split (int64_t * indices , int64_t * values , int64_t start , int64_t length , int64_t offset ) {
38
41
int64_t i , j , k , pivotValue , tmp , rangeStart , rangeEnd ;
42
+ int64_t pivotStartValue , pivotEndValue ;
39
43
if (length < 16 ) {
40
44
for (k = start ; k < start + length ; k += j ) {
41
45
j = 1 ;
@@ -57,7 +61,24 @@ static void split(int64_t* indices, int64_t* values, int64_t start, int64_t leng
57
61
}
58
62
return ;
59
63
}
60
- pivotValue = values [indices [start + length / 2 ] + offset ];
64
+
65
+ /* Select pivot, algorithm by Bentley & McIlroy */
66
+ j = start + length / 2 ;
67
+ k = start + length - 1 ;
68
+ pivotValue = values [indices [j ] + offset ];
69
+ pivotStartValue = values [indices [start ] + offset ];
70
+ pivotEndValue = values [indices [k ] + offset ];
71
+ if (length > 40 ) {
72
+ /* Big array: Pseudomedian of 9 */
73
+ tmp = length / 8 ;
74
+ pivotValue = median3 (pivotValue , values [indices [j - tmp ] + offset ], values [indices [j + tmp ] + offset ]);
75
+ pivotStartValue = median3 (pivotStartValue , values [indices [start + tmp ] + offset ],
76
+ values [indices [start + tmp + tmp ] + offset ]);
77
+ pivotEndValue = median3 (pivotEndValue , values [indices [k - tmp ] + offset ],
78
+ values [indices [k - tmp - tmp ] + offset ]);
79
+ } /* Else medium array: Pseudomedian of 3 */
80
+ pivotValue = median3 (pivotValue , pivotStartValue , pivotEndValue );
81
+
61
82
rangeStart = 0 ;
62
83
rangeEnd = 0 ;
63
84
for (i = start ; i < start + length ; i ++ ) {
@@ -115,7 +136,6 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
115
136
charFreq [0 ] = 0 ;
116
137
for (i = 0 ; i < inputSize ; i ++ )
117
138
suffixArray [++ charFreq [inputString [i ]]] = i ;
118
- suffixArray [0 ] = inputSize ;
119
139
for (i = 0 ; i < inputSize ; i ++ )
120
140
sortedGroup [i ] = charFreq [inputString [i ]];
121
141
sortedGroup [inputSize ] = 0 ;
@@ -130,7 +150,8 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
130
150
groupLen -= suffixArray [i ];
131
151
i -= suffixArray [i ];
132
152
} else {
133
- if (groupLen ) suffixArray [i - groupLen ] = - groupLen ;
153
+ if (groupLen )
154
+ suffixArray [i - groupLen ] = - groupLen ;
134
155
groupLen = sortedGroup [suffixArray [i ]] + 1 - i ;
135
156
split (suffixArray , sortedGroup , i , groupLen , height );
136
157
i += groupLen ;
@@ -156,22 +177,26 @@ static int64_t calcMatchingLength(const uint8_t* oldData, int64_t oldDataSize, c
156
177
static int64_t binSearchSuffixArray (const int64_t * suffixArray , const uint8_t * oldData , int64_t oldDataSize ,
157
178
const uint8_t * newData , int64_t newDataSize , int64_t start , int64_t end ,
158
179
int64_t * bestMatchPosition ) {
159
- int64_t x ;
180
+ int64_t matchLengthStart , matchLengthEnd , midIndex , cmpsize ;
181
+ int32_t res ;
160
182
if (end - start < 2 ) {
161
- int64_t y ;
162
- x = calcMatchingLength (oldData + suffixArray [start ], oldDataSize - suffixArray [start ], newData , newDataSize );
163
- y = calcMatchingLength (oldData + suffixArray [end ], oldDataSize - suffixArray [end ], newData , newDataSize );
164
- if (x > y ) {
183
+ matchLengthStart = calcMatchingLength (oldData + suffixArray [start ], oldDataSize - suffixArray [start ], newData , newDataSize );
184
+ matchLengthEnd = calcMatchingLength (oldData + suffixArray [end ], oldDataSize - suffixArray [end ], newData , newDataSize );
185
+ if (matchLengthStart > matchLengthEnd ) {
165
186
* bestMatchPosition = suffixArray [start ];
166
- return x ;
187
+ return matchLengthStart ;
167
188
}
168
189
* bestMatchPosition = suffixArray [end ];
169
- return y ;
190
+ return matchLengthEnd ;
170
191
}
171
- x = start + (end - start ) / 2 ;
172
- if (memcmp (oldData + suffixArray [x ], newData ,MIN (oldDataSize - suffixArray [x ], newDataSize )) < 0 )
173
- return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , x , end , bestMatchPosition );
174
- return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , start , x , bestMatchPosition );
192
+ midIndex = start + (end - start ) / 2 ;
193
+ if (memcmp (oldData + suffixArray [midIndex ], newData , MIN (oldDataSize - suffixArray [matchLengthStart ], newDataSize )) < 0 ) {
194
+ cmpsize = MIN (oldDataSize - suffixArray [midIndex ], newDataSize );
195
+ res = memcmp (oldData + suffixArray [midIndex ], newData , cmpsize );
196
+ if (res < 0 || (res == 0 && cmpsize < newDataSize ))
197
+ return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , midIndex , end , bestMatchPosition );
198
+ }
199
+ return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , start , midIndex , bestMatchPosition );
175
200
}
176
201
177
202
static void offsetToBytes (const int64_t offset , uint8_t * bytebuf ) {
@@ -224,28 +249,29 @@ static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64
224
249
}
225
250
226
251
struct bsdiff_request {
227
- const uint8_t * old ;
228
- int64_t oldsize ;
229
- const uint8_t * new ;
230
- int64_t newsize ;
252
+ const uint8_t * oldData ;
253
+ int64_t oldDataSize ;
254
+ const uint8_t * newData ;
255
+ int64_t newDataSize ;
231
256
struct bsdiff_stream * stream ;
232
- int64_t * I ;
257
+ int64_t * indices ;
233
258
uint8_t * buffer ;
234
259
};
235
260
236
261
static int bsdiff_internal (const struct bsdiff_request req ) {
237
262
int64_t * suffix_array ,* rank_array ;
238
263
int64_t currentScan , matchedPosition , matchedLength ;
239
- int64_t lastScan , lastMatchedPosition , lastOffset ;
264
+ int64_t lastScan , lastMatchedPosition , lastOffset , lastWriteNewScan , lastWriteOldPosition ;
265
+ int64_t currentControlBlock [3 ], nextControlBlock [3 ];
240
266
int64_t oldscore , scoreCompare ;
241
267
int64_t score , scoreFront , lengthFront , scoreBack , lengthBack ;
242
268
int64_t overlapLength , scoreOverlap , lengthOverlap ;
243
269
int64_t i ;
244
270
uint8_t * diffBuf ;
245
271
uint8_t controlBuf [8 * 3 ];
246
- if ((rank_array = req .stream -> malloc ((req .oldsize + 1 ) * sizeof (int64_t ))) == NULL ) return -1 ;
247
- suffix_array = req .I ;
248
- quickSuffixSort (suffix_array , rank_array , req .old , req .oldsize );
272
+ if ((rank_array = req .stream -> malloc ((req .oldDataSize + 1 ) * sizeof (int64_t ))) == NULL ) return -1 ;
273
+ suffix_array = req .indices ;
274
+ quickSuffixSort (suffix_array , rank_array , req .oldData , req .oldDataSize );
249
275
req .stream -> free (rank_array );
250
276
diffBuf = req .buffer ;
251
277
/* Compute the differences, writing ctrl as we go */
@@ -255,96 +281,139 @@ static int bsdiff_internal(const struct bsdiff_request req) {
255
281
lastScan = 0 ;
256
282
lastMatchedPosition = 0 ;
257
283
lastOffset = 0 ;
258
- while (currentScan < req .newsize ) {
284
+ lastWriteNewScan = 0 ;
285
+ lastWriteOldPosition = 0 ;
286
+ memset (currentControlBlock , 0 , 3 );
287
+ while (currentScan < req .newDataSize ) {
259
288
oldscore = 0 ;
260
- for (scoreCompare = currentScan += matchedLength ; currentScan < req .newsize ; currentScan ++ ) {
261
- matchedLength = binSearchSuffixArray (suffix_array , req .old , req .oldsize , req .new + currentScan ,
262
- req .newsize - currentScan ,
263
- 0 , req .oldsize , & matchedPosition );
289
+ for (scoreCompare = currentScan += matchedLength ; currentScan < req .newDataSize ; currentScan ++ ) {
290
+ matchedLength = binSearchSuffixArray (suffix_array , req .oldData , req .oldDataSize , req .newData + currentScan ,
291
+ req .newDataSize - currentScan ,
292
+ 0 , req .oldDataSize , & matchedPosition );
264
293
for (; scoreCompare < currentScan + matchedLength ; scoreCompare ++ )
265
- if (( scoreCompare + lastOffset < req .oldsize ) &&
266
- ( req .old [scoreCompare + lastOffset ] == req .new [scoreCompare ]) )
294
+ if (scoreCompare + lastOffset < req .oldDataSize &&
295
+ req .oldData [scoreCompare + lastOffset ] == req .newData [scoreCompare ])
267
296
oldscore ++ ;
268
- if ((( matchedLength == oldscore ) && ( matchedLength != 0 ) ) ||
269
- ( matchedLength > oldscore + 8 ) )
297
+ if ((matchedLength == oldscore && matchedLength != 0 ) ||
298
+ matchedLength > oldscore + 8 )
270
299
break ;
271
- if (( currentScan + lastOffset < req .oldsize ) &&
272
- ( req .old [currentScan + lastOffset ] == req .new [currentScan ]) )
300
+ if (currentScan + lastOffset < req .oldDataSize &&
301
+ req .oldData [currentScan + lastOffset ] == req .newData [currentScan ])
273
302
oldscore -- ;
274
303
}
275
- if (matchedLength != oldscore || currentScan == req .newsize ) {
304
+ if (matchedLength != oldscore || currentScan == req .newDataSize ) {
276
305
score = 0 ;
277
306
scoreFront = 0 ;
278
307
lengthFront = 0 ;
279
- for (i = 0 ; ( lastScan + i < currentScan ) && ( lastMatchedPosition + i < req .oldsize ) ;) {
280
- if (req .old [lastMatchedPosition + i ] == req .new [lastScan + i ]) score ++ ;
308
+ for (i = 0 ; lastScan + i < currentScan && lastMatchedPosition + i < req .oldDataSize ;) {
309
+ if (req .oldData [lastMatchedPosition + i ] == req .newData [lastScan + i ]) score ++ ;
281
310
i ++ ;
282
311
if (score * 2 - i > scoreFront * 2 - lengthFront ) {
283
312
scoreFront = score ;
284
313
lengthFront = i ;
285
- };
286
- };
314
+ }
315
+ }
287
316
288
317
lengthBack = 0 ;
289
- if (currentScan < req .newsize ) {
318
+ if (currentScan < req .newDataSize ) {
290
319
score = 0 ;
291
320
scoreBack = 0 ;
292
321
for (i = 1 ; (currentScan >= lastScan + i ) && (matchedPosition >= i ); i ++ ) {
293
- if (req .old [matchedPosition - i ] == req .new [currentScan - i ]) score ++ ;
322
+ if (req .oldData [matchedPosition - i ] == req .newData [currentScan - i ]) score ++ ;
294
323
if (score * 2 - i > scoreBack * 2 - lengthBack ) {
295
324
scoreBack = score ;
296
325
lengthBack = i ;
297
- };
298
- };
299
- };
326
+ }
327
+ }
328
+ }
300
329
301
330
if (lastScan + lengthFront > currentScan - lengthBack ) {
302
331
overlapLength = (lastScan + lengthFront ) - (currentScan - lengthBack );
303
332
score = 0 ;
304
333
scoreOverlap = 0 ;
305
334
lengthOverlap = 0 ;
306
335
for (i = 0 ; i < overlapLength ; i ++ ) {
307
- if (req .new [lastScan + lengthFront - overlapLength + i ] ==
308
- req .old [lastMatchedPosition + lengthFront - overlapLength + i ])
336
+ if (req .newData [lastScan + lengthFront - overlapLength + i ] ==
337
+ req .oldData [lastMatchedPosition + lengthFront - overlapLength + i ])
309
338
score ++ ;
310
- if (req .new [currentScan - lengthBack + i ] ==
311
- req .old [matchedPosition - lengthBack + i ])
339
+ if (req .newData [currentScan - lengthBack + i ] ==
340
+ req .oldData [matchedPosition - lengthBack + i ])
312
341
score -- ;
313
342
if (score > scoreOverlap ) {
314
343
scoreOverlap = score ;
315
344
lengthOverlap = i + 1 ;
316
- };
317
- };
345
+ }
346
+ }
318
347
319
348
lengthFront += lengthOverlap - overlapLength ;
320
349
lengthBack -= lengthOverlap ;
321
- };
350
+ }
322
351
323
- offsetToBytes ( lengthFront , controlBuf ) ;
324
- offsetToBytes (( currentScan - lengthBack ) - (lastScan + lengthFront ), controlBuf + 8 );
325
- offsetToBytes (( matchedPosition - lengthBack ) - (lastMatchedPosition + lengthFront ), controlBuf + 16 );
352
+ nextControlBlock [ 0 ] = lengthFront ;
353
+ nextControlBlock [ 1 ] = currentScan - lengthBack - (lastScan + lengthFront );
354
+ nextControlBlock [ 2 ] = matchedPosition - lengthBack - (lastMatchedPosition + lengthFront );
326
355
327
- /* Write control data */
328
- if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
329
- return -1 ;
356
+ if (nextControlBlock [0 ]) {
357
+ if (currentControlBlock [0 ] || currentControlBlock [1 ] || currentControlBlock [2 ]) {
358
+ offsetToBytes (currentControlBlock [0 ], controlBuf );
359
+ offsetToBytes (currentControlBlock [1 ], controlBuf + 8 );
360
+ offsetToBytes (currentControlBlock [2 ], controlBuf + 16 );
330
361
331
- /* Write diff data */
332
- for (i = 0 ; i < lengthFront ; i ++ )
333
- diffBuf [i ] = req .new [lastScan + i ] - req .old [lastMatchedPosition + i ];
334
- if (writedata (req .stream , diffBuf , lengthFront ))
335
- return -1 ;
362
+ /* Write control data */
363
+ if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
364
+ return -1 ;
336
365
337
- /* Write extra data */
338
- for (i = 0 ; i < (currentScan - lengthBack ) - (lastScan + lengthFront ); i ++ )
339
- diffBuf [i ] = req .new [lastScan + lengthFront + i ];
340
- if (writedata (req .stream , diffBuf , (currentScan - lengthBack ) - (lastScan + lengthFront )))
341
- return -1 ;
366
+ /* Write diff data */
367
+ for (i = 0 ; i < currentControlBlock [0 ]; i ++ )
368
+ diffBuf [i ] = req .newData [lastWriteNewScan + i ] - req .oldData [lastWriteOldPosition + i ];
369
+
370
+ if (writedata (req .stream , diffBuf , currentControlBlock [0 ]))
371
+ return -1 ;
372
+
373
+ /* Write extra data */
374
+ for (i = 0 ; i < currentControlBlock [1 ]; i ++ )
375
+ diffBuf [i ] = req .newData [lastWriteNewScan + currentControlBlock [0 ] + i ];
376
+ if (writedata (req .stream , diffBuf , currentControlBlock [1 ]))
377
+ return -1 ;
378
+
379
+ lastWriteNewScan = lastScan ;
380
+ lastWriteOldPosition = lastMatchedPosition ;
381
+ }
382
+ currentControlBlock [0 ] = nextControlBlock [0 ];
383
+ currentControlBlock [1 ] = nextControlBlock [1 ];
384
+ currentControlBlock [2 ] = nextControlBlock [2 ];
385
+ } else {
386
+ currentControlBlock [1 ] += nextControlBlock [1 ];
387
+ currentControlBlock [2 ] += nextControlBlock [2 ];
388
+ }
342
389
343
390
lastScan = currentScan - lengthBack ;
344
391
lastMatchedPosition = matchedPosition - lengthBack ;
345
392
lastOffset = matchedPosition - currentScan ;
346
- };
347
- };
393
+ }
394
+ }
395
+
396
+ if (currentControlBlock [0 ] || currentControlBlock [1 ]) {
397
+ offsetToBytes (currentControlBlock [0 ], controlBuf );
398
+ offsetToBytes (currentControlBlock [1 ], controlBuf + 8 );
399
+ offsetToBytes (currentControlBlock [2 ], controlBuf + 16 );
400
+
401
+ /* Write control data */
402
+ if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
403
+ return -1 ;
404
+
405
+ /* Write diff data */
406
+ for (i = 0 ; i < currentControlBlock [0 ]; i ++ )
407
+ diffBuf [i ] = req .newData [lastWriteNewScan + i ] - req .oldData [lastWriteOldPosition + i ];
408
+ if (writedata (req .stream , diffBuf , currentControlBlock [0 ]))
409
+ return -1 ;
410
+
411
+ /* Write extra data */
412
+ for (i = 0 ; i < currentControlBlock [1 ]; i ++ )
413
+ diffBuf [i ] = req .newData [lastWriteNewScan + currentControlBlock [0 ] + i ];
414
+ if (writedata (req .stream , diffBuf , currentControlBlock [1 ]))
415
+ return -1 ;
416
+ }
348
417
349
418
return 0 ;
350
419
}
@@ -353,24 +422,24 @@ int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* new, int64_t news
353
422
int result ;
354
423
struct bsdiff_request req ;
355
424
356
- if ((req .I = stream -> malloc ((oldsize + 1 ) * sizeof (int64_t ))) == NULL )
425
+ if ((req .indices = stream -> malloc ((oldsize + 1 ) * sizeof (int64_t ))) == NULL )
357
426
return -1 ;
358
427
359
428
if ((req .buffer = stream -> malloc (newsize + 1 )) == NULL ) {
360
- stream -> free (req .I );
429
+ stream -> free (req .indices );
361
430
return -1 ;
362
431
}
363
432
364
- req .old = old ;
365
- req .oldsize = oldsize ;
366
- req .new = new ;
367
- req .newsize = newsize ;
433
+ req .oldData = old ;
434
+ req .oldDataSize = oldsize ;
435
+ req .newData = new ;
436
+ req .newDataSize = newsize ;
368
437
req .stream = stream ;
369
438
370
439
result = bsdiff_internal (req );
371
440
372
441
stream -> free (req .buffer );
373
- stream -> free (req .I );
442
+ stream -> free (req .indices );
374
443
375
444
return result ;
376
445
}
@@ -464,7 +533,7 @@ int main(int argc, char* argv[]) {
464
533
return 1 ;
465
534
}
466
535
467
- /* Write header (signature+newsize)*/
536
+ /* Write header (signature+newsize) */
468
537
offsetToBytes (newsize , buf );
469
538
if (fwrite ("ENDSLEY/BSDIFF43" , 16 , 1 , pf ) != 1 ||
470
539
fwrite (buf , sizeof (buf ), 1 , pf ) != 1 ) {
0 commit comments