Skip to content

Commit cc7d23b

Browse files
authored
Merge pull request #2965 from facebook/offbase
Converge sumtype (offset | repcode) numeric representation towards offBase
2 parents 70df5de + 03903f5 commit cc7d23b

File tree

12 files changed

+275
-218
lines changed

12 files changed

+275
-218
lines changed

contrib/seekable_format/zstdseek_decompress.c

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,64 @@
2323
# endif
2424
#endif
2525

26+
/* ************************************************************
27+
* Detect POSIX version
28+
* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
29+
* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
30+
* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
31+
* Value of PLATFORM_POSIX_VERSION can be forced on command line
32+
***************************************************************/
33+
#ifndef PLATFORM_POSIX_VERSION
34+
35+
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
36+
|| defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
37+
/* exception rule : force posix version to 200112L,
38+
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
39+
# define PLATFORM_POSIX_VERSION 200112L
40+
41+
/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
42+
* note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
43+
* Posix specification mandates its presence and its content, but target system must respect this spec.
44+
* It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
45+
* otherwise it will block preprocessing stage.
46+
* The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
47+
*/
48+
# elif !defined(_WIN32) \
49+
&& ( defined(__unix__) || defined(__unix) \
50+
|| defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
51+
52+
# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__)
53+
# ifndef _POSIX_C_SOURCE
54+
# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
55+
# endif
56+
# endif
57+
# include <unistd.h> /* declares _POSIX_VERSION */
58+
# if defined(_POSIX_VERSION) /* POSIX compliant */
59+
# define PLATFORM_POSIX_VERSION _POSIX_VERSION
60+
# else
61+
# define PLATFORM_POSIX_VERSION 1
62+
# endif
63+
64+
# ifdef __UCLIBC__
65+
# ifndef __USE_MISC
66+
# define __USE_MISC /* enable st_mtim on uclibc */
67+
# endif
68+
# endif
69+
70+
# else /* non-unix target platform (like Windows) */
71+
# define PLATFORM_POSIX_VERSION 0
72+
# endif
73+
74+
#endif /* PLATFORM_POSIX_VERSION */
75+
76+
2677
/* ************************************************************
2778
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
2879
***************************************************************/
2980
#if defined(_MSC_VER) && _MSC_VER >= 1400
3081
# define LONG_SEEK _fseeki64
3182
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32-
# define LONG_SEEK fseeko
83+
# define LONG_SEEK fseeko
3384
#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
3485
# define LONG_SEEK fseeko64
3586
#elif defined(_WIN32) && !defined(__DJGPP__)

lib/compress/zstd_compress.c

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2940,7 +2940,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
29402940
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
29412941
so we provide seqStoreSeqs[i].offset - 1 */
29422942
ZSTD_updateRep(updatedRepcodes.rep,
2943-
seqStoreSeqs[i].offBase - 1,
2943+
seqStoreSeqs[i].offBase,
29442944
seqStoreSeqs[i].litLength == 0);
29452945
literalsRead += outSeqs[i].litLength;
29462946
}
@@ -3433,20 +3433,22 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
34333433
}
34343434

34353435
/**
3436-
* Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3437-
* offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
3436+
* Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
3437+
* offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
34383438
*/
34393439
static U32
3440-
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
3440+
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
34413441
{
3442-
U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */
3443-
assert(STORED_IS_REPCODE(offCode));
3444-
if (adjustedOffCode == ZSTD_REP_NUM) {
3445-
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3446-
assert(rep[0] > 0);
3442+
U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */
3443+
assert(OFFBASE_IS_REPCODE(offBase));
3444+
if (adjustedRepCode == ZSTD_REP_NUM) {
3445+
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1
3446+
* This is only valid if it results in a valid offset value, aka > 0.
3447+
*/
3448+
assert(rep[0] > 1);
34473449
return rep[0] - 1;
34483450
}
3449-
return rep[adjustedOffCode];
3451+
return rep[adjustedRepCode];
34503452
}
34513453

34523454
/**
@@ -3468,11 +3470,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
34683470
for (; idx < nbSeq; ++idx) {
34693471
seqDef* const seq = seqStore->sequencesStart + idx;
34703472
U32 const ll0 = (seq->litLength == 0);
3471-
U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
3473+
U32 const offBase = seq->offBase;
34723474
assert(seq->offBase > 0);
3473-
if (STORED_IS_REPCODE(offCode)) {
3474-
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
3475-
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
3475+
if (OFFBASE_IS_REPCODE(offBase)) {
3476+
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
3477+
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
34763478
/* Adjust simulated decompression repcode history if we come across a mismatch. Replace
34773479
* the repcode with the offset it actually references, determined by the compression
34783480
* repcode history.
@@ -3484,8 +3486,8 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
34843486
/* Compression repcode history is always updated with values directly from the unmodified seqStore.
34853487
* Decompression repcode history may use modified seq->offset value taken from compression repcode history.
34863488
*/
3487-
ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
3488-
ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
3489+
ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
3490+
ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
34893491
}
34903492
}
34913493

@@ -5770,26 +5772,26 @@ ZSTD_validateSequence(U32 offCode, U32 matchLength,
57705772
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
57715773
*/
57725774
size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
5773-
RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
5775+
RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), corruption_detected, "Offset too large!");
57745776
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
57755777
return 0;
57765778
}
57775779

57785780
/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
5779-
static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
5781+
static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
57805782
{
5781-
U32 offCode = STORE_OFFSET(rawOffset);
5783+
U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
57825784

57835785
if (!ll0 && rawOffset == rep[0]) {
5784-
offCode = STORE_REPCODE_1;
5786+
offBase = REPCODE1_TO_OFFBASE;
57855787
} else if (rawOffset == rep[1]) {
5786-
offCode = STORE_REPCODE(2 - ll0);
5788+
offBase = REPCODE_TO_OFFBASE(2 - ll0);
57875789
} else if (rawOffset == rep[2]) {
5788-
offCode = STORE_REPCODE(3 - ll0);
5790+
offBase = REPCODE_TO_OFFBASE(3 - ll0);
57895791
} else if (ll0 && rawOffset == rep[0] - 1) {
5790-
offCode = STORE_REPCODE_3;
5792+
offBase = REPCODE3_TO_OFFBASE;
57915793
}
5792-
return offCode;
5794+
return offBase;
57935795
}
57945796

57955797
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
@@ -5819,19 +5821,19 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
58195821
U32 const litLength = inSeqs[idx].litLength;
58205822
U32 const ll0 = (litLength == 0);
58215823
U32 const matchLength = inSeqs[idx].matchLength;
5822-
U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
5823-
ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
5824+
U32 const offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
5825+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
58245826

5825-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
5827+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
58265828
if (cctx->appliedParams.validateSequences) {
58275829
seqPos->posInSrc += litLength + matchLength;
5828-
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
5830+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, seqPos->posInSrc,
58295831
cctx->appliedParams.cParams.windowLog, dictSize),
58305832
"Sequence validation failed");
58315833
}
58325834
RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
58335835
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5834-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
5836+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
58355837
ip += matchLength + litLength;
58365838
}
58375839
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
@@ -5888,7 +5890,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
58885890
U32 litLength = currSeq.litLength;
58895891
U32 matchLength = currSeq.matchLength;
58905892
U32 const rawOffset = currSeq.offset;
5891-
U32 offCode;
5893+
U32 offBase;
58925894

58935895
/* Modify the sequence depending on where endPosInSequence lies */
58945896
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
@@ -5942,20 +5944,20 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
59425944
}
59435945
/* Check if this offset can be represented with a repcode */
59445946
{ U32 const ll0 = (litLength == 0);
5945-
offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
5946-
ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
5947+
offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
5948+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
59475949
}
59485950

59495951
if (cctx->appliedParams.validateSequences) {
59505952
seqPos->posInSrc += litLength + matchLength;
5951-
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
5953+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, seqPos->posInSrc,
59525954
cctx->appliedParams.cParams.windowLog, dictSize),
59535955
"Sequence validation failed");
59545956
}
5955-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
5957+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
59565958
RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
59575959
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5958-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
5960+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
59595961
ip += matchLength + litLength;
59605962
}
59615963
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);

lib/compress/zstd_compress_internal.h

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -577,29 +577,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
577577
while (ip < iend) *op++ = *ip++;
578578
}
579579

580-
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
581-
#define STORE_REPCODE_1 STORE_REPCODE(1)
582-
#define STORE_REPCODE_2 STORE_REPCODE(2)
583-
#define STORE_REPCODE_3 STORE_REPCODE(3)
584-
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
585-
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
586-
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
587-
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
588-
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
589-
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
590-
#define STORED_TO_OFFBASE(o) ((o)+1)
591-
#define OFFBASE_TO_STORED(o) ((o)-1)
580+
581+
#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
582+
#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
583+
#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
584+
#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
585+
#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
586+
#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
587+
#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
588+
#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
589+
#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
592590

593591
/*! ZSTD_storeSeq() :
594-
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
595-
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
592+
* Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
593+
* @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
596594
* @matchLength : must be >= MINMATCH
597-
* Allowed to overread literals up to litLimit.
595+
* Allowed to over-read literals up to litLimit.
598596
*/
599597
HINT_INLINE UNUSED_ATTR void
600598
ZSTD_storeSeq(seqStore_t* seqStorePtr,
601599
size_t litLength, const BYTE* literals, const BYTE* litLimit,
602-
U32 offBase_minus1,
600+
U32 offBase,
603601
size_t matchLength)
604602
{
605603
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
@@ -608,8 +606,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
608606
static const BYTE* g_start = NULL;
609607
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
610608
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
611-
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
612-
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
609+
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
610+
pos, (U32)litLength, (U32)matchLength, (U32)offBase);
613611
}
614612
#endif
615613
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -619,9 +617,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
619617
assert(literals + litLength <= litLimit);
620618
if (litEnd <= litLimit_w) {
621619
/* Common case we can use wildcopy.
622-
* First copy 16 bytes, because literals are likely short.
623-
*/
624-
assert(WILDCOPY_OVERLENGTH >= 16);
620+
* First copy 16 bytes, because literals are likely short.
621+
*/
622+
ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
625623
ZSTD_copy16(seqStorePtr->lit, literals);
626624
if (litLength > 16) {
627625
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
@@ -640,7 +638,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
640638
seqStorePtr->sequences[0].litLength = (U16)litLength;
641639

642640
/* match offset */
643-
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
641+
seqStorePtr->sequences[0].offBase = offBase;
644642

645643
/* match Length */
646644
assert(matchLength >= MINMATCH);
@@ -658,17 +656,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
658656

659657
/* ZSTD_updateRep() :
660658
* updates in-place @rep (array of repeat offsets)
661-
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
659+
* @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
662660
*/
663661
MEM_STATIC void
664-
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
662+
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
665663
{
666-
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
664+
if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
667665
rep[2] = rep[1];
668666
rep[1] = rep[0];
669-
rep[0] = STORED_OFFSET(offBase_minus1);
667+
rep[0] = OFFBASE_TO_OFFSET(offBase);
670668
} else { /* repcode */
671-
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
669+
U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
672670
if (repCode > 0) { /* note : if repCode==0, no change */
673671
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
674672
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@@ -685,11 +683,11 @@ typedef struct repcodes_s {
685683
} repcodes_t;
686684

687685
MEM_STATIC repcodes_t
688-
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
686+
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
689687
{
690688
repcodes_t newReps;
691689
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
692-
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
690+
ZSTD_updateRep(newReps.rep, offBase, ll0);
693691
return newReps;
694692
}
695693

0 commit comments

Comments
 (0)