diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 11bf201bc36..d228a57944b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -40,6 +40,7 @@ #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" #endif +#define PREFETCH_DISTANCE (8*CACHELINE_SIZE) /*_******************************************************* * Memory operations @@ -1414,6 +1415,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + int counter = 0; + const int kPrefetchRatio = 16; DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); /* Literals are split between internal buffer & output buffer */ @@ -1501,6 +1504,12 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); if (litPtr + sequence.litLength > dctx->litBufferEnd) break; { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + /* Don't prefetch too often, because it is actually slower */ + counter++; + if ((counter / kPrefetchRatio) == 0) { + PREFETCH_L1(op+PREFETCH_DISTANCE); + PREFETCH_L1(litPtr+PREFETCH_DISTANCE); + } #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); @@ -1562,6 +1571,8 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, for ( ; nbSeq ; nbSeq--) { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); + PREFETCH_L1(op+PREFETCH_DISTANCE); + PREFETCH_L1(litPtr+PREFETCH_DISTANCE); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); @@ -1655,6 +1666,8 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, for ( ; nbSeq ; nbSeq--) { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); + PREFETCH_L1(op+PREFETCH_DISTANCE); + PREFETCH_L1(litPtr+PREFETCH_DISTANCE); #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) assert(!ZSTD_isError(oneSeqSize)); ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);