Skip to content

Commit 123771b

Browse files
committed
1 parent 1cc74cc commit 123771b

File tree

1 file changed

+148
-79
lines changed

1 file changed

+148
-79
lines changed

bsdiff.c

+148-79
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
/*-
22
* Copyright 2003-2005 Colin Percival
33
* Copyright 2012 Matthew Endsley
4-
* Copyright 2024 Erick Ortiz
54
* All rights reserved
65
*
76
* Redistribution and use in source and binary forms, with or without
8-
* modification, are permitted providing that the following conditions
7+
* modification, are permitted providing that the following conditions
98
* are met:
109
* 1. Redistributions of source code must retain the above copyright
1110
* notice, this list of conditions and the following disclaimer.
@@ -34,8 +33,13 @@
3433

3534
#define MIN(x,y) (((x)<(y)) ? (x) : (y))
3635

36+
static int64_t median3(int64_t a, int64_t b, int64_t c) {
37+
return a < b ? (b < c ? b : a < c ? c : a) : b > c ? b : a > c ? c : a;
38+
}
39+
3740
static void split(int64_t* indices, int64_t* values, int64_t start, int64_t length, int64_t offset) {
3841
int64_t i, j, k, pivotValue, tmp, rangeStart, rangeEnd;
42+
int64_t pivotStartValue, pivotEndValue;
3943
if (length < 16) {
4044
for (k = start; k < start + length; k += j) {
4145
j = 1;
@@ -57,7 +61,24 @@ static void split(int64_t* indices, int64_t* values, int64_t start, int64_t leng
5761
}
5862
return;
5963
}
60-
pivotValue = values[indices[start + length / 2] + offset];
64+
65+
/* Select pivot, algorithm by Bentley & McIlroy */
66+
j = start + length / 2;
67+
k = start + length - 1;
68+
pivotValue = values[indices[j] + offset];
69+
pivotStartValue = values[indices[start] + offset];
70+
pivotEndValue = values[indices[k] + offset];
71+
if (length > 40) {
72+
/* Big array: Pseudomedian of 9 */
73+
tmp = length / 8;
74+
pivotValue = median3(pivotValue, values[indices[j - tmp] + offset], values[indices[j + tmp] + offset]);
75+
pivotStartValue = median3(pivotStartValue, values[indices[start + tmp] + offset],
76+
values[indices[start + tmp + tmp] + offset]);
77+
pivotEndValue = median3(pivotEndValue, values[indices[k - tmp] + offset],
78+
values[indices[k - tmp - tmp] + offset]);
79+
} /* Else medium array: Pseudomedian of 3 */
80+
pivotValue = median3(pivotValue, pivotStartValue, pivotEndValue);
81+
6182
rangeStart = 0;
6283
rangeEnd = 0;
6384
for (i = start; i < start + length; i++) {
@@ -115,7 +136,6 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
115136
charFreq[0] = 0;
116137
for (i = 0; i < inputSize; i++)
117138
suffixArray[++charFreq[inputString[i]]] = i;
118-
suffixArray[0] = inputSize;
119139
for (i = 0; i < inputSize; i++)
120140
sortedGroup[i] = charFreq[inputString[i]];
121141
sortedGroup[inputSize] = 0;
@@ -130,7 +150,8 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
130150
groupLen -= suffixArray[i];
131151
i -= suffixArray[i];
132152
} else {
133-
if (groupLen) suffixArray[i - groupLen] = -groupLen;
153+
if (groupLen)
154+
suffixArray[i - groupLen] = -groupLen;
134155
groupLen = sortedGroup[suffixArray[i]] + 1 - i;
135156
split(suffixArray, sortedGroup, i, groupLen, height);
136157
i += groupLen;
@@ -156,22 +177,26 @@ static int64_t calcMatchingLength(const uint8_t* oldData, int64_t oldDataSize, c
156177
static int64_t binSearchSuffixArray(const int64_t* suffixArray, const uint8_t* oldData, int64_t oldDataSize,
157178
const uint8_t* newData, int64_t newDataSize, int64_t start, int64_t end,
158179
int64_t* bestMatchPosition) {
159-
int64_t x;
180+
int64_t matchLengthStart, matchLengthEnd, midIndex, cmpsize;
181+
int32_t res;
160182
if (end - start < 2) {
161-
int64_t y;
162-
x = calcMatchingLength(oldData + suffixArray[start], oldDataSize - suffixArray[start], newData, newDataSize);
163-
y = calcMatchingLength(oldData + suffixArray[end], oldDataSize - suffixArray[end], newData, newDataSize);
164-
if (x > y) {
183+
matchLengthStart = calcMatchingLength(oldData + suffixArray[start], oldDataSize - suffixArray[start], newData, newDataSize);
184+
matchLengthEnd = calcMatchingLength(oldData + suffixArray[end], oldDataSize - suffixArray[end], newData, newDataSize);
185+
if (matchLengthStart > matchLengthEnd) {
165186
*bestMatchPosition = suffixArray[start];
166-
return x;
187+
return matchLengthStart;
167188
}
168189
*bestMatchPosition = suffixArray[end];
169-
return y;
190+
return matchLengthEnd;
170191
}
171-
x = start + (end - start) / 2;
172-
if (memcmp(oldData + suffixArray[x], newData,MIN(oldDataSize-suffixArray[x], newDataSize)) < 0)
173-
return binSearchSuffixArray(suffixArray, oldData, oldDataSize, newData, newDataSize, x, end, bestMatchPosition);
174-
return binSearchSuffixArray(suffixArray, oldData, oldDataSize, newData, newDataSize, start, x, bestMatchPosition);
192+
midIndex = start + (end - start) / 2;
193+
if (memcmp(oldData + suffixArray[midIndex], newData, MIN(oldDataSize - suffixArray[matchLengthStart], newDataSize)) < 0) {
194+
cmpsize = MIN(oldDataSize - suffixArray[midIndex], newDataSize);
195+
res = memcmp(oldData + suffixArray[midIndex], newData, cmpsize);
196+
if (res < 0 || (res == 0 && cmpsize < newDataSize))
197+
return binSearchSuffixArray(suffixArray, oldData, oldDataSize, newData, newDataSize, midIndex, end, bestMatchPosition);
198+
}
199+
return binSearchSuffixArray(suffixArray, oldData, oldDataSize, newData, newDataSize, start, midIndex, bestMatchPosition);
175200
}
176201

177202
static void offsetToBytes(const int64_t offset, uint8_t* bytebuf) {
@@ -224,28 +249,29 @@ static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64
224249
}
225250

226251
struct bsdiff_request {
227-
const uint8_t* old;
228-
int64_t oldsize;
229-
const uint8_t* new;
230-
int64_t newsize;
252+
const uint8_t* oldData;
253+
int64_t oldDataSize;
254+
const uint8_t* newData;
255+
int64_t newDataSize;
231256
struct bsdiff_stream* stream;
232-
int64_t* I;
257+
int64_t* indices;
233258
uint8_t* buffer;
234259
};
235260

236261
static int bsdiff_internal(const struct bsdiff_request req) {
237262
int64_t* suffix_array,* rank_array;
238263
int64_t currentScan, matchedPosition, matchedLength;
239-
int64_t lastScan, lastMatchedPosition, lastOffset;
264+
int64_t lastScan, lastMatchedPosition, lastOffset, lastWriteNewScan, lastWriteOldPosition;
265+
int64_t currentControlBlock[3], nextControlBlock[3];
240266
int64_t oldscore, scoreCompare;
241267
int64_t score, scoreFront, lengthFront, scoreBack, lengthBack;
242268
int64_t overlapLength, scoreOverlap, lengthOverlap;
243269
int64_t i;
244270
uint8_t* diffBuf;
245271
uint8_t controlBuf[8 * 3];
246-
if ((rank_array = req.stream->malloc((req.oldsize + 1) * sizeof(int64_t))) == NULL) return -1;
247-
suffix_array = req.I;
248-
quickSuffixSort(suffix_array, rank_array, req.old, req.oldsize);
272+
if ((rank_array = req.stream->malloc((req.oldDataSize + 1) * sizeof(int64_t))) == NULL) return -1;
273+
suffix_array = req.indices;
274+
quickSuffixSort(suffix_array, rank_array, req.oldData, req.oldDataSize);
249275
req.stream->free(rank_array);
250276
diffBuf = req.buffer;
251277
/* Compute the differences, writing ctrl as we go */
@@ -255,96 +281,139 @@ static int bsdiff_internal(const struct bsdiff_request req) {
255281
lastScan = 0;
256282
lastMatchedPosition = 0;
257283
lastOffset = 0;
258-
while (currentScan < req.newsize) {
284+
lastWriteNewScan = 0;
285+
lastWriteOldPosition = 0;
286+
memset(currentControlBlock, 0, 3);
287+
while (currentScan < req.newDataSize) {
259288
oldscore = 0;
260-
for (scoreCompare = currentScan += matchedLength; currentScan < req.newsize; currentScan++) {
261-
matchedLength = binSearchSuffixArray(suffix_array, req.old, req.oldsize, req.new + currentScan,
262-
req.newsize - currentScan,
263-
0, req.oldsize, &matchedPosition);
289+
for (scoreCompare = currentScan += matchedLength; currentScan < req.newDataSize; currentScan++) {
290+
matchedLength = binSearchSuffixArray(suffix_array, req.oldData, req.oldDataSize, req.newData + currentScan,
291+
req.newDataSize - currentScan,
292+
0, req.oldDataSize, &matchedPosition);
264293
for (; scoreCompare < currentScan + matchedLength; scoreCompare++)
265-
if ((scoreCompare + lastOffset < req.oldsize) &&
266-
(req.old[scoreCompare + lastOffset] == req.new[scoreCompare]))
294+
if (scoreCompare + lastOffset < req.oldDataSize &&
295+
req.oldData[scoreCompare + lastOffset] == req.newData[scoreCompare])
267296
oldscore++;
268-
if (((matchedLength == oldscore) && (matchedLength != 0)) ||
269-
(matchedLength > oldscore + 8))
297+
if ((matchedLength == oldscore && matchedLength != 0) ||
298+
matchedLength > oldscore + 8)
270299
break;
271-
if ((currentScan + lastOffset < req.oldsize) &&
272-
(req.old[currentScan + lastOffset] == req.new[currentScan]))
300+
if (currentScan + lastOffset < req.oldDataSize &&
301+
req.oldData[currentScan + lastOffset] == req.newData[currentScan])
273302
oldscore--;
274303
}
275-
if (matchedLength != oldscore || currentScan == req.newsize) {
304+
if (matchedLength != oldscore || currentScan == req.newDataSize) {
276305
score = 0;
277306
scoreFront = 0;
278307
lengthFront = 0;
279-
for (i = 0; (lastScan + i < currentScan) && (lastMatchedPosition + i < req.oldsize);) {
280-
if (req.old[lastMatchedPosition + i] == req.new[lastScan + i]) score++;
308+
for (i = 0; lastScan + i < currentScan && lastMatchedPosition + i < req.oldDataSize;) {
309+
if (req.oldData[lastMatchedPosition + i] == req.newData[lastScan + i]) score++;
281310
i++;
282311
if (score * 2 - i > scoreFront * 2 - lengthFront) {
283312
scoreFront = score;
284313
lengthFront = i;
285-
};
286-
};
314+
}
315+
}
287316

288317
lengthBack = 0;
289-
if (currentScan < req.newsize) {
318+
if (currentScan < req.newDataSize) {
290319
score = 0;
291320
scoreBack = 0;
292321
for (i = 1; (currentScan >= lastScan + i) && (matchedPosition >= i); i++) {
293-
if (req.old[matchedPosition - i] == req.new[currentScan - i]) score++;
322+
if (req.oldData[matchedPosition - i] == req.newData[currentScan - i]) score++;
294323
if (score * 2 - i > scoreBack * 2 - lengthBack) {
295324
scoreBack = score;
296325
lengthBack = i;
297-
};
298-
};
299-
};
326+
}
327+
}
328+
}
300329

301330
if (lastScan + lengthFront > currentScan - lengthBack) {
302331
overlapLength = (lastScan + lengthFront) - (currentScan - lengthBack);
303332
score = 0;
304333
scoreOverlap = 0;
305334
lengthOverlap = 0;
306335
for (i = 0; i < overlapLength; i++) {
307-
if (req.new[lastScan + lengthFront - overlapLength + i] ==
308-
req.old[lastMatchedPosition + lengthFront - overlapLength + i])
336+
if (req.newData[lastScan + lengthFront - overlapLength + i] ==
337+
req.oldData[lastMatchedPosition + lengthFront - overlapLength + i])
309338
score++;
310-
if (req.new[currentScan - lengthBack + i] ==
311-
req.old[matchedPosition - lengthBack + i])
339+
if (req.newData[currentScan - lengthBack + i] ==
340+
req.oldData[matchedPosition - lengthBack + i])
312341
score--;
313342
if (score > scoreOverlap) {
314343
scoreOverlap = score;
315344
lengthOverlap = i + 1;
316-
};
317-
};
345+
}
346+
}
318347

319348
lengthFront += lengthOverlap - overlapLength;
320349
lengthBack -= lengthOverlap;
321-
};
350+
}
322351

323-
offsetToBytes(lengthFront, controlBuf);
324-
offsetToBytes((currentScan - lengthBack) - (lastScan + lengthFront), controlBuf + 8);
325-
offsetToBytes((matchedPosition - lengthBack) - (lastMatchedPosition + lengthFront), controlBuf + 16);
352+
nextControlBlock[0] = lengthFront;
353+
nextControlBlock[1] = currentScan - lengthBack - (lastScan + lengthFront);
354+
nextControlBlock[2] = matchedPosition - lengthBack - (lastMatchedPosition + lengthFront);
326355

327-
/* Write control data */
328-
if (writedata(req.stream, controlBuf, sizeof(controlBuf)))
329-
return -1;
356+
if (nextControlBlock[0]) {
357+
if (currentControlBlock[0] || currentControlBlock[1] || currentControlBlock[2]) {
358+
offsetToBytes(currentControlBlock[0], controlBuf);
359+
offsetToBytes(currentControlBlock[1], controlBuf + 8);
360+
offsetToBytes(currentControlBlock[2], controlBuf + 16);
330361

331-
/* Write diff data */
332-
for (i = 0; i < lengthFront; i++)
333-
diffBuf[i] = req.new[lastScan + i] - req.old[lastMatchedPosition + i];
334-
if (writedata(req.stream, diffBuf, lengthFront))
335-
return -1;
362+
/* Write control data */
363+
if (writedata(req.stream, controlBuf, sizeof(controlBuf)))
364+
return -1;
336365

337-
/* Write extra data */
338-
for (i = 0; i < (currentScan - lengthBack) - (lastScan + lengthFront); i++)
339-
diffBuf[i] = req.new[lastScan + lengthFront + i];
340-
if (writedata(req.stream, diffBuf, (currentScan - lengthBack) - (lastScan + lengthFront)))
341-
return -1;
366+
/* Write diff data */
367+
for (i = 0; i < currentControlBlock[0]; i++)
368+
diffBuf[i] = req.newData[lastWriteNewScan + i] - req.oldData[lastWriteOldPosition + i];
369+
370+
if (writedata(req.stream, diffBuf, currentControlBlock[0]))
371+
return -1;
372+
373+
/* Write extra data */
374+
for (i = 0; i < currentControlBlock[1]; i++)
375+
diffBuf[i] = req.newData[lastWriteNewScan + currentControlBlock[0] + i];
376+
if (writedata(req.stream, diffBuf, currentControlBlock[1]))
377+
return -1;
378+
379+
lastWriteNewScan = lastScan;
380+
lastWriteOldPosition = lastMatchedPosition;
381+
}
382+
currentControlBlock[0] = nextControlBlock[0];
383+
currentControlBlock[1] = nextControlBlock[1];
384+
currentControlBlock[2] = nextControlBlock[2];
385+
} else {
386+
currentControlBlock[1] += nextControlBlock[1];
387+
currentControlBlock[2] += nextControlBlock[2];
388+
}
342389

343390
lastScan = currentScan - lengthBack;
344391
lastMatchedPosition = matchedPosition - lengthBack;
345392
lastOffset = matchedPosition - currentScan;
346-
};
347-
};
393+
}
394+
}
395+
396+
if (currentControlBlock[0] || currentControlBlock[1]) {
397+
offsetToBytes(currentControlBlock[0], controlBuf);
398+
offsetToBytes(currentControlBlock[1], controlBuf + 8);
399+
offsetToBytes(currentControlBlock[2], controlBuf + 16);
400+
401+
/* Write control data */
402+
if (writedata(req.stream, controlBuf, sizeof(controlBuf)))
403+
return -1;
404+
405+
/* Write diff data */
406+
for (i = 0; i < currentControlBlock[0]; i++)
407+
diffBuf[i] = req.newData[lastWriteNewScan + i] - req.oldData[lastWriteOldPosition + i];
408+
if (writedata(req.stream, diffBuf, currentControlBlock[0]))
409+
return -1;
410+
411+
/* Write extra data */
412+
for (i = 0; i < currentControlBlock[1]; i++)
413+
diffBuf[i] = req.newData[lastWriteNewScan + currentControlBlock[0] + i];
414+
if (writedata(req.stream, diffBuf, currentControlBlock[1]))
415+
return -1;
416+
}
348417

349418
return 0;
350419
}
@@ -353,24 +422,24 @@ int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* new, int64_t news
353422
int result;
354423
struct bsdiff_request req;
355424

356-
if ((req.I = stream->malloc((oldsize + 1) * sizeof(int64_t))) == NULL)
425+
if ((req.indices = stream->malloc((oldsize + 1) * sizeof(int64_t))) == NULL)
357426
return -1;
358427

359428
if ((req.buffer = stream->malloc(newsize + 1)) == NULL) {
360-
stream->free(req.I);
429+
stream->free(req.indices);
361430
return -1;
362431
}
363432

364-
req.old = old;
365-
req.oldsize = oldsize;
366-
req.new = new;
367-
req.newsize = newsize;
433+
req.oldData = old;
434+
req.oldDataSize = oldsize;
435+
req.newData = new;
436+
req.newDataSize = newsize;
368437
req.stream = stream;
369438

370439
result = bsdiff_internal(req);
371440

372441
stream->free(req.buffer);
373-
stream->free(req.I);
442+
stream->free(req.indices);
374443

375444
return result;
376445
}
@@ -464,7 +533,7 @@ int main(int argc, char* argv[]) {
464533
return 1;
465534
}
466535

467-
/* Write header (signature+newsize)*/
536+
/* Write header (signature+newsize) */
468537
offsetToBytes(newsize, buf);
469538
if (fwrite("ENDSLEY/BSDIFF43", 16, 1, pf) != 1 ||
470539
fwrite(buf, sizeof(buf), 1, pf) != 1) {

0 commit comments

Comments
 (0)