From a90e0c0eb9a230176284d23d61fe521ce11af610 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 12 Oct 2020 18:27:54 -0400 Subject: [PATCH 01/15] Move block header write into compressBlock_internal() --- lib/compress/zstd_compress.c | 40 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 93c4075c521..2f4d5b4c3de 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2693,9 +2693,17 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) zc->blockState.nextCBlock = tmp; } +/* Writes the block header */ +static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) + const void* src, size_t srcSize, U32 frame, U32 lastBlock) { /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold @@ -2704,7 +2712,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, const U32 rleMaxLength = 25; size_t cSize; const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; + BYTE* op = (BYTE*)(dst + ZSTD_blockHeaderSize); DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@ -2724,7 +2732,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, - dst, dstCapacity, + op, dstCapacity, srcSize, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); @@ -2758,7 +2766,13 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, */ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - + if (cSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + writeBlockHeader(dst, cSize, srcSize, lastBlock); + cSize += ZSTD_blockHeaderSize; + } return cSize; } @@ -2910,23 +2924,11 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, assert(cSize <= blockSize + ZSTD_blockHeaderSize); } else { cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); + op, dstCapacity, + ip, blockSize, 1 /* frame */, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); - - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - } else { - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; - } } - ip += blockSize; assert(remaining >= blockSize); remaining -= blockSize; @@ -3080,7 +3082,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */, 0); FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); From ad42e058ee0ddc7a9a42ea839cb64bfd34719a6a Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 12 Oct 2020 18:33:11 -0400 Subject: [PATCH 02/15] Add a nbSeq argument to compressSequences() Refactor ZSTD_compressBlock_internal() to do the block header write within and add nbSeq argument to compressSequences() --- lib/compress/zstd_compress.c | 208 ++++++++++++++++++++++++- lib/decompress/zstd_decompress.c | 9 +- lib/decompress/zstd_decompress_block.c | 13 +- 3 files changed, 221 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2f4d5b4c3de..d19f4a2a636 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2196,13 +2196,13 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); BYTE* seqHead; BYTE* lastNCount = NULL; @@ -2472,6 +2472,16 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; +static U32 countLiteralsBytes2(const seqStore_t* seqStore) { + U32 literalsBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + } + return literalsBytes; +} + static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -2695,12 +2705,194 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) /* Writes the block header */ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + DEBUGLOG(3, "writeBlockHeader: cSize: %u blockSize: %u lastBlock: %u", cSize, blockSize, lastBlock); U32 const cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, cBlockHeader); } +static U32 countLiteralsBytes(const seqStore_t* seqStore) { + U32 literalsBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +static U32 countMatchBytes(const seqStore_t* seqStore) { + U32 matchBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.matchLength + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* The issue is with setting the end of the literals. Existence of last literals in the seq store make it so that + we have to be careful with where we put our litEnds and whatnot. */ +static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, U32 nbSeq, U32 srcSize) { + size_t nbSeqFirstHalf = nbSeq/2; + size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1; + DEBUGLOG(2, "first half nbseq: %u second half nbseq: %u", nbSeqFirstHalf, nbSeqSecondHalf); + + const BYTE* const litEnd = firstSeqStore->lit; + const BYTE* const seqEnd = firstSeqStore->sequences; + + if (firstSeqStore->longLengthID != 0) { + DEBUGLOG(2, "long lenght ID present"); + if (firstSeqStore->longLengthPos < nbSeqFirstHalf) { + secondSeqStore->longLengthID = 0; + } else { + firstSeqStore->longLengthID = 0; + secondSeqStore->longLengthPos = secondSeqStore->longLengthPos - nbSeqFirstHalf; + } + } + + firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; + + U32 literalsBytesFirstHalf = countLiteralsBytes(firstSeqStore); + firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; + + secondSeqStore->sequencesStart += nbSeqFirstHalf; + secondSeqStore->sequences = seqEnd; + secondSeqStore->litStart += literalsBytesFirstHalf; + secondSeqStore->lit = litEnd; + secondSeqStore->llCode += nbSeqFirstHalf; + secondSeqStore->mlCode += nbSeqFirstHalf; + secondSeqStore->ofCode += nbSeqFirstHalf; + +} + +static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + /* Attempt block splitting here */ + DEBUGLOG(3, "Block size pre-split is: %u - lastBlock: %u, dst ptr: %u op: %u", srcSize, lastBlock, dst, op); + DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countLiteralsBytes(&zc->seqStore) + countMatchBytes(&zc->seqStore)); + seqStore_t firstHalfSeqStore = zc->seqStore; + seqStore_t secondHalfSeqStore = zc->seqStore; + + setUpSeqStores(&firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize); + + assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); + assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); + + size_t cSizeFirstHalf; + size_t cSizeSecondHalf; + + size_t literalsBytesFirstHalf = countLiteralsBytes(&firstHalfSeqStore); + size_t srcBytesFirstHalf = literalsBytesFirstHalf + countMatchBytes(&firstHalfSeqStore); + size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; + DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countLiteralsBytes(&secondHalfSeqStore), countLiteralsBytes(&zc->seqStore)); + DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countMatchBytes(&firstHalfSeqStore), countMatchBytes(&secondHalfSeqStore), countMatchBytes(&zc->seqStore)); + DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); + + { U32 cSeqsSizeFirstHalf = ZSTD_compressSequences(&firstHalfSeqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcBytesFirstHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (!zc->isFirstBlock && + ZSTD_maybeRLE(&firstHalfSeqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSizeFirstHalf = 1; + } + + if (cSeqsSizeFirstHalf == 0) { + cSizeFirstHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesFirstHalf, 0); + FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); + DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSizeFirstHalf); + } else if (cSeqsSizeFirstHalf == 1) { + cSizeFirstHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesFirstHalf, 0); + FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); + DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSizeFirstHalf); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(zc); + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + writeBlockHeader(op, cSeqsSizeFirstHalf, srcBytesFirstHalf, 0); + cSizeFirstHalf = ZSTD_blockHeaderSize + cSeqsSizeFirstHalf; + DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSizeFirstHalf); + } + } + + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + + ip += srcBytesFirstHalf; + op += cSizeFirstHalf; + dstCapacity - cSizeFirstHalf; + + { U32 cSeqsSizeSecondHalf = ZSTD_compressSequences(&secondHalfSeqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcBytesSecondHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + if (ZSTD_maybeRLE(&firstHalfSeqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + cSeqsSizeSecondHalf = 1; + } + + if (cSeqsSizeSecondHalf == 0) { + cSizeSecondHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesSecondHalf, lastBlock); + FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); + DEBUGLOG(2, "2: Writing out nocompress block, size: %zu", cSizeSecondHalf); + } else if (cSeqsSizeSecondHalf == 1) { + cSizeSecondHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesSecondHalf, lastBlock); + FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); + DEBUGLOG(2, "2: Writing out RLE block, size: %zu", cSizeSecondHalf); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(zc); + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + writeBlockHeader(op, cSeqsSizeSecondHalf, srcBytesSecondHalf, lastBlock); + cSizeSecondHalf = ZSTD_blockHeaderSize + cSeqsSizeSecondHalf; + DEBUGLOG(3, "2: Writing out compressed block, size: %zu", cSizeSecondHalf); + } + } + + DEBUGLOG(2, "cSizeFirstHalf: %u cSizeSecondHalf: %u", cSizeFirstHalf, cSizeSecondHalf); + cSize = cSizeFirstHalf + cSizeSecondHalf; + return cSize; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame, U32 lastBlock) @@ -2711,8 +2903,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, */ const U32 rleMaxLength = 25; size_t cSize; + size_t nbSeq; const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)(dst + ZSTD_blockHeaderSize); + BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@ -2720,6 +2913,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } if (zc->seqCollector.collectSequences) { @@ -2728,11 +2922,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, return 0; } + if (nbSeq >= 2) { + return ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); + } + /* encode sequences and literals */ cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, - op, dstCapacity, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, srcSize, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); @@ -2770,7 +2968,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); } else { - writeBlockHeader(dst, cSize, srcSize, lastBlock); + writeBlockHeader(op, cSize, srcSize, lastBlock); cSize += ZSTD_blockHeaderSize; } return cSize; @@ -2936,7 +3134,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, assert(dstCapacity >= cSize); dstCapacity -= cSize; cctx->isFirstBlock = 0; - DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + DEBUGLOG(2, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 15139501bea..9f753911d0d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -763,7 +763,7 @@ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSiz static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - DEBUGLOG(5, "ZSTD_copyRawBlock"); + DEBUGLOG(2, "ZSTD_copyRawBlock: %u", srcSize); RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; @@ -847,6 +847,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { + DEBUGLOG(2, "Remaining dstCap: %u", (size_t)(oend-op)); size_t decodedSize; blockProperties_t blockProperties; size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); @@ -875,8 +876,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) + if (decodedSize != 0) { + DEBUGLOG(2, "Decoded: %u", decodedSize); op += decodedSize; + } assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; @@ -1189,7 +1192,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + DEBUGLOG(2, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index b71bc20d65b..56e4b5ef87a 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -775,6 +775,9 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* bounds checks : careful of address space overflow in 32-bit mode */ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + DEBUGLOG(2, "sequence length: %u", sequenceLength); + DEBUGLOG(2, "oLitEnd: %u iLitEnd: %u match: %u", oLitEnd, iLitEnd, match); + DEBUGLOG(2, "seq ll: %u, condition: %u", sequence.litLength, (size_t)(litLimit - *litPtr)); RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); assert(op < op + sequenceLength); assert(oLitEnd < op + sequenceLength); @@ -850,8 +853,13 @@ size_t ZSTD_execSequence(BYTE* op, op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ + /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /*DEBUGLOG(2, "oLitEnd: %u, oMatchEnd: %u iLitEnd: %u matchPos: %u", oLitEnd, oMatchEnd, iLitEnd, match); + DEBUGLOG(2, "off: %u ml: %u ll: %u", sequence.offset, sequence.matchLength, sequence.litLength); + DEBUGLOG(2, "first condition: %u", (size_t)(oLitEnd - prefixStart)); + DEBUGLOG(2, "break condition: %u", (size_t)(oLitEnd - virtualStart));*/ /* offset beyond prefix -> go into extDict */ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); @@ -1210,6 +1218,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) { + DEBUGLOG(2, "too small lastll"); + } RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); @@ -1458,7 +1469,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * (note: but it could be evaluated from current-lowLimit) */ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + DEBUGLOG(2, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); From b1c47646eacce50999a46e158114443e07f95b55 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 30 Nov 2020 13:41:03 -0500 Subject: [PATCH 03/15] Messy copy to bring superblock estimation over --- lib/compress/zstd_compress.c | 451 ++++++++++++++++++++++++++++++++++- 1 file changed, 444 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d19f4a2a636..e508b19e2b1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2226,6 +2226,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, bmi2); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); + DEBUGLOG(2, "Actual litSize: %u", cSize); op += cSize; } @@ -2364,6 +2365,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, "emitting an uncompressed block."); return 0; } + DEBUGLOG(2, "Actual seqSize: %u", bitstreamSize); } DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); @@ -2712,6 +2714,383 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB MEM_writeLE24(op, cBlockHeader); } +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + DEBUGLOG(2, "fseMetadata->fseTablesSize: %u", fseMetadata->fseTablesSize); + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t literalsSize, seqSize; + literalsSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + seqSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + DEBUGLOG(2, "Estimated litSize: %u seqSize: %u", literalsSize, seqSize); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + static U32 countLiteralsBytes(const seqStore_t* seqStore) { U32 literalsBytes = 0; U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; @@ -2745,8 +3124,8 @@ static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1; DEBUGLOG(2, "first half nbseq: %u second half nbseq: %u", nbSeqFirstHalf, nbSeqSecondHalf); - const BYTE* const litEnd = firstSeqStore->lit; - const BYTE* const seqEnd = firstSeqStore->sequences; + const BYTE* litEnd = firstSeqStore->lit; + const seqDef* const seqEnd = firstSeqStore->sequences; if (firstSeqStore->longLengthID != 0) { DEBUGLOG(2, "long lenght ID present"); @@ -2797,8 +3176,62 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, setUpSeqStores(&firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize); assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); - assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); + assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) + == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); + /* Check that splitting would actually improve compression. Return 0 if not */ + { + ZSTD_entropyCTablesMetadata_t entropyMetadataOriginal; + size_t estimatedOriginalSize; + ZSTD_entropyCTablesMetadata_t entropyMetadataFirstHalf; + size_t estimatedFirstHalfSize; + ZSTD_entropyCTablesMetadata_t entropyMetadataSecondHalf; + size_t estimatedSecondHalfSize; + size_t estimatedSplitBlocksCompressedSize; + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadataOriginal, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + estimatedOriginalSize = ZSTD_estimateSubBlockSize(zc->seqStore.litStart, (size_t)(zc->seqStore.lit - zc->seqStore.litStart), + zc->seqStore.ofCode, zc->seqStore.llCode, zc->seqStore.mlCode, + (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadataOriginal, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadataOriginal.hufMetadata.hType == set_compressed), 1); + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&firstHalfSeqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadataFirstHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + estimatedFirstHalfSize = ZSTD_estimateSubBlockSize(firstHalfSeqStore.litStart, (size_t)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart), + firstHalfSeqStore.ofCode, firstHalfSeqStore.llCode, firstHalfSeqStore.mlCode, + (size_t)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadataFirstHalf, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadataFirstHalf.hufMetadata.hType == set_compressed), 1); + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&secondHalfSeqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadataSecondHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + estimatedSecondHalfSize = ZSTD_estimateSubBlockSize(secondHalfSeqStore.litStart, (size_t)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart), + secondHalfSeqStore.ofCode, secondHalfSeqStore.llCode, secondHalfSeqStore.mlCode, + (size_t)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadataSecondHalf, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadataSecondHalf.hufMetadata.hType == set_compressed), 1); + estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; + + DEBUGLOG(2, "Estimated original block size is: %u", estimatedOriginalSize); + DEBUGLOG(2, "Estimated split block size is: %u - split: %u - %u", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (estimatedSplitBlocksCompressedSize > estimatedOriginalSize) { + DEBUGLOG(2, "BAIL SPLIT"); + return 0; + } else { + DEBUGLOG(2, "WILL SPLIT"); + } + } size_t cSizeFirstHalf; size_t cSizeSecondHalf; @@ -2809,7 +3242,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countMatchBytes(&firstHalfSeqStore), countMatchBytes(&secondHalfSeqStore), countMatchBytes(&zc->seqStore)); DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); - { U32 cSeqsSizeFirstHalf = ZSTD_compressSequences(&firstHalfSeqStore, + { U32 cSeqsSizeFirstHalf = ZSTD_entropyCompressSequences(&firstHalfSeqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, @@ -2854,9 +3287,9 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, ip += srcBytesFirstHalf; op += cSizeFirstHalf; - dstCapacity - cSizeFirstHalf; + dstCapacity -= cSizeFirstHalf; - { U32 cSeqsSizeSecondHalf = ZSTD_compressSequences(&secondHalfSeqStore, + { U32 cSeqsSizeSecondHalf = ZSTD_entropyCompressSequences(&secondHalfSeqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, @@ -2923,7 +3356,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } if (nbSeq >= 2) { - return ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); + size_t splitBlocksCompressedSize; + splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); + if (splitBlocksCompressedSize != 0) { + return splitBlocksCompressedSize; + } } /* encode sequences and literals */ From 14cbfe9d31a2bda19452f716fd7a8c74036d2394 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 7 Dec 2020 11:31:09 -0500 Subject: [PATCH 04/15] Right before nuking the existing superblocks entropy counter --- lib/compress/zstd_compress.c | 332 ++++++++++---------- lib/compress/zstd_compress_internal.h | 56 ++++ lib/compress/zstd_compress_superblock.c | 384 +----------------------- 3 files changed, 221 insertions(+), 551 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e508b19e2b1..824987e6698 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2177,93 +2177,34 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } -/* ZSTD_entropyCompressSequences_internal(): - * actually compresses both literals and sequences */ MEM_STATIC size_t -ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - void* entropyWorkspace, size_t entropyWkspSize, - const int bmi2) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned* count = (unsigned*)entropyWorkspace; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; +ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + FSE_CTable* CTable_LitLength, + FSE_CTable* CTable_OffsetBits, + FSE_CTable* CTable_MatchLength, + const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, + void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef* const sequences = seqStorePtr->sequencesStart; - const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; + BYTE* const ostart = dst; + BYTE* const oend = dstEnd; BYTE* op = ostart; - BYTE* seqHead; - BYTE* lastNCount = NULL; + BYTE* seqHead = op++; - entropyWorkspace = count + (MaxSeq + 1); - entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); - - DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); - - /* Compress literals */ - { const BYTE* const literals = seqStorePtr->litStart; - size_t const litSize = (size_t)(seqStorePtr->lit - literals); - size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, - bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - DEBUGLOG(2, "Actual litSize: %u", cSize); - op += cSize; - } - - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, "Can't fit seq hdr in output buf!"); - if (nbSeq < 128) { - *op++ = (BYTE)nbSeq; - } else if (nbSeq < LONGNBSEQ) { - op[0] = (BYTE)((nbSeq>>8) + 0x80); - op[1] = (BYTE)nbSeq; - op+=2; - } else { - op[0]=0xFF; - MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); - op+=3; - } assert(op <= oend); - if (nbSeq==0) { - /* Copy the old tables over as if we repeated them */ - ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building LL table"); nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, + countWorkspace, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); @@ -2272,7 +2213,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, + countWorkspace, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), @@ -2281,18 +2222,22 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, if (LLtype == set_compressed) lastNCount = op; op += countSize; + if (fseMetadata) { + if (LLtype == set_compressed) fseMetadata->lastCountSize = countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } assert(op <= oend); } } /* build CTable for Offsets */ { unsigned max = MaxOff; size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, + countWorkspace, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); @@ -2300,7 +2245,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, + countWorkspace, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), @@ -2309,16 +2254,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, if (Offtype == set_compressed) lastNCount = op; op += countSize; + if (fseMetadata) { + if (Offtype == set_compressed) fseMetadata->lastCountSize = countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } assert(op <= oend); } } /* build CTable for MatchLengths */ { unsigned max = MaxML; size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, + countWorkspace, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); @@ -2326,7 +2275,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, + countWorkspace, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), @@ -2335,10 +2284,99 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, if (MLtype == set_compressed) lastNCount = op; op += countSize; + if (fseMetadata) { + if (MLtype == set_compressed) fseMetadata->lastCountSize = countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } assert(op <= oend); } } *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + return op-ostart; +} + +/* ZSTD_entropyCompressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + U32 entropyStatisticsSize; + const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + BYTE* lastNCount = NULL; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + DEBUGLOG(2, "Actual litSize: %u", cSize); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + + entropyStatisticsSize = ZSTD_buildEntropyStatistics(seqStorePtr, nbSeq, + CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, + prevEntropy, nextEntropy, op, oend, + strategy, lastNCount, NULL /* no fseMetadata needed */, + count, entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(entropyStatisticsSize, "Entropy building failed!"); + op += entropyStatisticsSize; { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), @@ -2474,7 +2512,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -static U32 countLiteralsBytes2(const seqStore_t* seqStore) { +static U32 countSeqStoreLiteralsBytes2(const seqStore_t* seqStore) { U32 literalsBytes = 0; U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; for (int i = 0; i < nbSeqs; ++i) { @@ -2714,31 +2752,6 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB MEM_writeLE24(op, cBlockHeader); } -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - /** ZSTD_buildSuperBlockEntropy_literal() : * Builds entropy for the super-block literals. * Stores literals block type (raw, rle, compressed, repeat) and @@ -2871,6 +2884,7 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; + U32 entropyStatisticsSize; assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); @@ -2956,13 +2970,12 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, /** ZSTD_buildSuperBlockEntropy() : * Builds entropy for the super-block. * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) +size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) { size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); @@ -3071,15 +3084,15 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, return cSeqSizeEstimate + sequencesSectionHeaderSize; } -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { +size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { size_t literalsSize, seqSize; literalsSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, &entropy->huf, &entropyMetadata->hufMetadata, @@ -3091,7 +3104,25 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, return seqSize + literalsSize + ZSTD_blockHeaderSize; } -static U32 countLiteralsBytes(const seqStore_t* seqStore) { +/* Builds entropy statistics and uses them for blocksize estimation */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx* zc, seqStore_t* seqStore) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + size_t estimatedSize; + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + estimatedSize = ZSTD_estimateSubBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1); + return estimatedSize; +} + +static U32 countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { U32 literalsBytes = 0; U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; for (int i = 0; i < nbSeqs; ++i) { @@ -3104,7 +3135,7 @@ static U32 countLiteralsBytes(const seqStore_t* seqStore) { return literalsBytes; } -static U32 countMatchBytes(const seqStore_t* seqStore) { +static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) { U32 matchBytes = 0; U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; for (int i = 0; i < nbSeqs; ++i) { @@ -3117,8 +3148,6 @@ static U32 countMatchBytes(const seqStore_t* seqStore) { return matchBytes; } -/* The issue is with setting the end of the literals. Existence of last literals in the seq store make it so that - we have to be careful with where we put our litEnds and whatnot. */ static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, U32 nbSeq, U32 srcSize) { size_t nbSeqFirstHalf = nbSeq/2; size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1; @@ -3139,7 +3168,7 @@ static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; - U32 literalsBytesFirstHalf = countLiteralsBytes(firstSeqStore); + U32 literalsBytesFirstHalf = countSeqStoreLiteralsBytes(firstSeqStore); firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; secondSeqStore->sequencesStart += nbSeqFirstHalf; @@ -3169,7 +3198,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, /* Attempt block splitting here */ DEBUGLOG(3, "Block size pre-split is: %u - lastBlock: %u, dst ptr: %u op: %u", srcSize, lastBlock, dst, op); - DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countLiteralsBytes(&zc->seqStore) + countMatchBytes(&zc->seqStore)); + DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countSeqStoreLiteralsBytes(&zc->seqStore) + countSeqStoreMatchBytes(&zc->seqStore)); seqStore_t firstHalfSeqStore = zc->seqStore; seqStore_t secondHalfSeqStore = zc->seqStore; @@ -3181,48 +3210,15 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, /* Check that splitting would actually improve compression. Return 0 if not */ { - ZSTD_entropyCTablesMetadata_t entropyMetadataOriginal; size_t estimatedOriginalSize; - ZSTD_entropyCTablesMetadata_t entropyMetadataFirstHalf; size_t estimatedFirstHalfSize; - ZSTD_entropyCTablesMetadata_t entropyMetadataSecondHalf; size_t estimatedSecondHalfSize; size_t estimatedSplitBlocksCompressedSize; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadataOriginal, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - estimatedOriginalSize = ZSTD_estimateSubBlockSize(zc->seqStore.litStart, (size_t)(zc->seqStore.lit - zc->seqStore.litStart), - zc->seqStore.ofCode, zc->seqStore.llCode, zc->seqStore.mlCode, - (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart), - &zc->blockState.nextCBlock->entropy, &entropyMetadataOriginal, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, - (int)(entropyMetadataOriginal.hufMetadata.hType == set_compressed), 1); - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&firstHalfSeqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadataFirstHalf, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - estimatedFirstHalfSize = ZSTD_estimateSubBlockSize(firstHalfSeqStore.litStart, (size_t)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart), - firstHalfSeqStore.ofCode, firstHalfSeqStore.llCode, firstHalfSeqStore.mlCode, - (size_t)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart), - &zc->blockState.nextCBlock->entropy, &entropyMetadataFirstHalf, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, - (int)(entropyMetadataFirstHalf.hufMetadata.hType == set_compressed), 1); - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&secondHalfSeqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadataSecondHalf, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - estimatedSecondHalfSize = ZSTD_estimateSubBlockSize(secondHalfSeqStore.litStart, (size_t)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart), - secondHalfSeqStore.ofCode, secondHalfSeqStore.llCode, secondHalfSeqStore.mlCode, - (size_t)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart), - &zc->blockState.nextCBlock->entropy, &entropyMetadataSecondHalf, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, - (int)(entropyMetadataSecondHalf.hufMetadata.hType == set_compressed), 1); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &firstHalfSeqStore); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &secondHalfSeqStore); estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; - + DEBUGLOG(2, "Estimated original block size is: %u", estimatedOriginalSize); DEBUGLOG(2, "Estimated split block size is: %u - split: %u - %u", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); if (estimatedSplitBlocksCompressedSize > estimatedOriginalSize) { @@ -3235,11 +3231,11 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, size_t cSizeFirstHalf; size_t cSizeSecondHalf; - size_t literalsBytesFirstHalf = countLiteralsBytes(&firstHalfSeqStore); - size_t srcBytesFirstHalf = literalsBytesFirstHalf + countMatchBytes(&firstHalfSeqStore); + size_t literalsBytesFirstHalf = countSeqStoreLiteralsBytes(&firstHalfSeqStore); + size_t srcBytesFirstHalf = literalsBytesFirstHalf + countSeqStoreMatchBytes(&firstHalfSeqStore); size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; - DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countLiteralsBytes(&secondHalfSeqStore), countLiteralsBytes(&zc->seqStore)); - DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countMatchBytes(&firstHalfSeqStore), countMatchBytes(&secondHalfSeqStore), countMatchBytes(&zc->seqStore)); + DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countSeqStoreLiteralsBytes(&secondHalfSeqStore), countSeqStoreLiteralsBytes(&zc->seqStore)); + DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore)); DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); { U32 cSeqsSizeFirstHalf = ZSTD_entropyCompressSequences(&firstHalfSeqStore, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6083ed66418..303f8feccf3 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -82,6 +82,62 @@ typedef struct { ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; +/*-************************************* +* Entropy buffer statistics structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/** ZSTD_estimateSubBlockSize() : + * Estimates the size that the block will be, based on literals and sequences. + * @return : estimated size or error code */ +size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy); + typedef struct { U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ U32 len; /* Raw length of match */ diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 6a7b02342fd..66fffd1b6ac 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -15,288 +15,10 @@ #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ #include "hist.h" /* HIST_countFast_wksp */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ #include "zstd_compress_sequences.h" #include "zstd_compress_literals.h" -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/** ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/** ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/** ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - ZSTD_memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/** ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - /** ZSTD_compressSubBlock_literal() : * Compresses literals section for a sub-block. * When we have to write the Huffman table we will sometimes choose a header @@ -576,110 +298,6 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, return op-ostart; } -static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, - const ZSTD_hufCTables_t* huf, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = 255; - size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - - if (hufMetadata->hType == set_basic) return litSize; - else if (hufMetadata->hType == set_rle) return 1; - else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { - size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); - if (ZSTD_isError(largest)) return litSize; - { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); - if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; - return cLitSizeEstimate + literalSectionHeaderSize; - } } - assert(0); /* impossible */ - return 0; -} - -static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, - const BYTE* codeTable, unsigned maxCode, - size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, - short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, - void* workspace, size_t wkspSize) -{ - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; - const BYTE* const ctStart = ctp; - const BYTE* const ctEnd = ctStart + nbSeq; - size_t cSymbolTypeSizeEstimateInBits = 0; - unsigned max = maxCode; - - HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ - if (type == set_basic) { - /* We selected this encoding type, so it must be valid. */ - assert(max <= defaultMax); - cSymbolTypeSizeEstimateInBits = max <= defaultMax - ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) - : ERROR(GENERIC); - } else if (type == set_rle) { - cSymbolTypeSizeEstimateInBits = 0; - } else if (type == set_compressed || type == set_repeat) { - cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); - } - if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; - while (ctp < ctEnd) { - if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; - else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ - ctp++; - } - return cSymbolTypeSizeEstimateInBits / 8; -} - -static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, - nbSeq, fseTables->offcodeCTable, NULL, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, - nbSeq, fseTables->litlengthCTable, LL_bits, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, - nbSeq, fseTables->matchlengthCTable, ML_bits, - ML_defaultNorm, ML_defaultNormLog, MaxML, - workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; -} - -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { - size_t cSizeEstimate = 0; - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); - return cSizeEstimate + ZSTD_blockHeaderSize; -} - static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) { if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) From 3efd5105b51e3e0500e67b4adef2e9791be76924 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 7 Dec 2020 11:52:27 -0500 Subject: [PATCH 05/15] Refactor FSE statistics calc --- lib/compress/zstd_compress.c | 151 ++++++++++------------------------- 1 file changed, 43 insertions(+), 108 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 824987e6698..0b4959038b0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2178,34 +2178,32 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) } MEM_STATIC size_t -ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, - FSE_CTable* CTable_LitLength, - FSE_CTable* CTable_OffsetBits, - FSE_CTable* CTable_MatchLength, - const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - BYTE* dst, const BYTE* const dstEnd, - ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, - void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { +ZSTD_buildFseStatistics(const BYTE* const ofCodeTable, + const BYTE* const llCodeTable, + const BYTE* const mlCodeTable, + FSE_CTable* CTable_LitLength, + FSE_CTable* CTable_OffsetBits, + FSE_CTable* CTable_MatchLength, + size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, + void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = dst; - BYTE* const oend = dstEnd; + const BYTE* const oend = dstEnd; BYTE* op = ostart; BYTE* seqHead = op++; assert(op <= oend); - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, countWorkspace, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, + LLFSELog, prevEntropy->litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); @@ -2215,8 +2213,8 @@ ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, countWorkspace, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); if (LLtype == set_compressed) @@ -2235,10 +2233,10 @@ ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, countWorkspace, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, + OffFSELog, prevEntropy->offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ @@ -2247,8 +2245,8 @@ ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, countWorkspace, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); if (Offtype == set_compressed) @@ -2265,10 +2263,10 @@ ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, size_t const mostFrequent = HIST_countFast_wksp( countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, countWorkspace, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, + MLFSELog, prevEntropy->matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ @@ -2277,8 +2275,8 @@ ZSTD_buildEntropyStatistics(seqStore_t* seqStorePtr, size_t nbSeq, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, countWorkspace, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); if (MLtype == set_compressed) @@ -2370,12 +2368,14 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, return (size_t)(op - ostart); } - entropyStatisticsSize = ZSTD_buildEntropyStatistics(seqStorePtr, nbSeq, + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + entropyStatisticsSize = ZSTD_buildFseStatistics(ofCodeTable, llCodeTable, mlCodeTable, CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, - prevEntropy, nextEntropy, op, oend, + nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend, strategy, lastNCount, NULL /* no fseMetadata needed */, count, entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(entropyStatisticsSize, "Entropy building failed!"); + FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!"); op += entropyStatisticsSize; { size_t const bitstreamSize = ZSTD_encodeSequences( @@ -2866,6 +2866,7 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, ZSTD_fseCTablesMetadata_t* fseMetadata, void* workspace, size_t wkspSize) { + /* Size the workspaces */ BYTE* const wkspStart = (BYTE*)workspace; BYTE* const wkspEnd = wkspStart + wkspSize; BYTE* const countWkspStart = wkspStart; @@ -2873,97 +2874,31 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); BYTE* const cTableWksp = countWkspStart + countWkspSize; const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; U32 entropyStatisticsSize; + BYTE* lastNCount = NULL; assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); ZSTD_memset(workspace, 0, wkspSize); fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - LL_defaultNorm, LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - OF_defaultNorm, OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ML_defaultNorm, ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; + return ZSTD_buildFseStatistics(ofCodeTable, llCodeTable, mlCodeTable, + CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, + nbSeq, prevEntropy, nextEntropy, op, oend, + strategy, lastNCount, fseMetadata, + countWksp, cTableWksp, cTableWkspSize); } From d225955cf00ade6a07b77ce39176dc445d9efa7c Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 7 Dec 2020 13:12:41 -0500 Subject: [PATCH 06/15] Rudimentary linear sweep for split points --- lib/compress/zstd_compress.c | 154 +++++++++++++++++++++-------------- 1 file changed, 91 insertions(+), 63 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0b4959038b0..763982ab7b9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2178,17 +2178,17 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) } MEM_STATIC size_t -ZSTD_buildFseStatistics(const BYTE* const ofCodeTable, - const BYTE* const llCodeTable, - const BYTE* const mlCodeTable, - FSE_CTable* CTable_LitLength, - FSE_CTable* CTable_OffsetBits, - FSE_CTable* CTable_MatchLength, - size_t nbSeq, - const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, - BYTE* dst, const BYTE* const dstEnd, - ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, - void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { +ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, + const BYTE* const llCodeTable, + const BYTE* const mlCodeTable, + FSE_CTable* CTable_LitLength, + FSE_CTable* CTable_OffsetBits, + FSE_CTable* CTable_MatchLength, + size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, + void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ BYTE* const ostart = dst; const BYTE* const oend = dstEnd; @@ -2207,7 +2207,7 @@ ZSTD_buildFseStatistics(const BYTE* const ofCodeTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, @@ -2239,7 +2239,7 @@ ZSTD_buildFseStatistics(const BYTE* const ofCodeTable, OffFSELog, prevEntropy->offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, @@ -2269,7 +2269,7 @@ ZSTD_buildFseStatistics(const BYTE* const ofCodeTable, MLFSELog, prevEntropy->matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, @@ -2343,7 +2343,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, bmi2); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); - DEBUGLOG(2, "Actual litSize: %u", cSize); + DEBUGLOG(2, "Actual litSize: %zu", cSize); op += cSize; } @@ -2370,7 +2370,8 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); - entropyStatisticsSize = ZSTD_buildFseStatistics(ofCodeTable, llCodeTable, mlCodeTable, + /* build stats for sequences */ + entropyStatisticsSize = ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable, CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend, strategy, lastNCount, NULL /* no fseMetadata needed */, @@ -2403,7 +2404,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, "emitting an uncompressed block."); return 0; } - DEBUGLOG(2, "Actual seqSize: %u", bitstreamSize); + DEBUGLOG(2, "Actual seqSize: %zu", bitstreamSize); } DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); @@ -2745,7 +2746,7 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) /* Writes the block header */ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { - DEBUGLOG(3, "writeBlockHeader: cSize: %u blockSize: %u lastBlock: %u", cSize, blockSize, lastBlock); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); U32 const cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); @@ -2894,7 +2895,7 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, ZSTD_memset(workspace, 0, wkspSize); fseMetadata->lastCountSize = 0; - return ZSTD_buildFseStatistics(ofCodeTable, llCodeTable, mlCodeTable, + return ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable, CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, nbSeq, prevEntropy, nextEntropy, op, oend, strategy, lastNCount, fseMetadata, @@ -2972,17 +2973,23 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ if (type == set_basic) { + DEBUGLOG(2, "Type == set_basic"); /* We selected this encoding type, so it must be valid. */ assert(max <= defaultMax); cSymbolTypeSizeEstimateInBits = max <= defaultMax ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) : ERROR(GENERIC); } else if (type == set_rle) { + DEBUGLOG(2, "Type == RLE"); cSymbolTypeSizeEstimateInBits = 0; } else if (type == set_compressed || type == set_repeat) { + DEBUGLOG(2, "Type == set_compressed"); cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); } - if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + DEBUGLOG(2, "Returning inaccurate"); + return nbSeq * 10; + } while (ctp < ctEnd) { if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ @@ -3015,7 +3022,6 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, ML_defaultNorm, ML_defaultNormLog, MaxML, workspace, wkspSize); if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - DEBUGLOG(2, "fseMetadata->fseTablesSize: %u", fseMetadata->fseTablesSize); return cSeqSizeEstimate + sequencesSectionHeaderSize; } @@ -3035,7 +3041,7 @@ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, seqSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, workspace, wkspSize, writeSeqEntropy); - DEBUGLOG(2, "Estimated litSize: %u seqSize: %u", literalsSize, seqSize); + DEBUGLOG(2, "Estimated litSize: %zu seqSize: %zu", literalsSize, seqSize); return seqSize + literalsSize + ZSTD_blockHeaderSize; } @@ -3083,16 +3089,16 @@ static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) { return matchBytes; } -static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, U32 nbSeq, U32 srcSize) { - size_t nbSeqFirstHalf = nbSeq/2; - size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1; - DEBUGLOG(2, "first half nbseq: %u second half nbseq: %u", nbSeqFirstHalf, nbSeqSecondHalf); - - const BYTE* litEnd = firstSeqStore->lit; - const seqDef* const seqEnd = firstSeqStore->sequences; +static void splitSeqStores(const seqStore_t* originalSeqStore, + seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, + size_t nbSeqFirstHalf) { + + BYTE* const litEnd = originalSeqStore->lit; + seqDef* const seqEnd = originalSeqStore->sequences; + *firstSeqStore = *originalSeqStore; + *secondSeqStore = *originalSeqStore; if (firstSeqStore->longLengthID != 0) { - DEBUGLOG(2, "long lenght ID present"); if (firstSeqStore->longLengthPos < nbSeqFirstHalf) { secondSeqStore->longLengthID = 0; } else { @@ -3113,16 +3119,57 @@ static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore secondSeqStore->llCode += nbSeqFirstHalf; secondSeqStore->mlCode += nbSeqFirstHalf; secondSeqStore->ofCode += nbSeqFirstHalf; + DEBUGLOG(2, "Split into: %u and %u", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart), + (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); +} + +#define NB_SPLIT_POINTS_TO_TEST 2 +static int setUpSeqStores(ZSTD_CCtx* zc, + seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, + U32 nbSeq, U32 srcSize) { + size_t increment = nbSeq/NB_SPLIT_POINTS_TO_TEST + 1; + size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); + size_t minEstimatedCSize = estimatedOriginalSize; + size_t minEstimatedCSizeIdx = 0; + size_t i; + + if (increment == 0) { + return 0; + } + + DEBUGLOG(2, "total nbseq: %u, increment: %zu", nbSeq, increment); + for (i = increment; i < nbSeq; i += increment) { + /* Check that splitting would actually improve compression. Return 0 if not */ + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t estimatedSplitBlocksCompressedSize; + size_t nbSeqFirstHalf = i; + splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore); + estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; + DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize); + DEBUGLOG(2, "Estimated split block size is: %zu - split: %zu - %zu", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (estimatedSplitBlocksCompressedSize < minEstimatedCSize) { + minEstimatedCSizeIdx = i; + minEstimatedCSize = estimatedSplitBlocksCompressedSize; + } + } + + if (minEstimatedCSizeIdx != 0) { + DEBUGLOG(2, "WILL SPLIT"); + splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx); + return 1; + } else { + DEBUGLOG(2, "NOT SPLITTING"); + return 0; + } } static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { - /* This the upper bound for the length of an rle block. - * This isn't the actual upper bound. Finding the real threshold - * needs further investigation. - */ const U32 rleMaxLength = 25; size_t cSize; const BYTE* ip = (const BYTE*)src; @@ -3132,45 +3179,26 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, (unsigned)zc->blockState.matchState.nextToUpdate); /* Attempt block splitting here */ - DEBUGLOG(3, "Block size pre-split is: %u - lastBlock: %u, dst ptr: %u op: %u", srcSize, lastBlock, dst, op); - DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countSeqStoreLiteralsBytes(&zc->seqStore) + countSeqStoreMatchBytes(&zc->seqStore)); - seqStore_t firstHalfSeqStore = zc->seqStore; - seqStore_t secondHalfSeqStore = zc->seqStore; - - setUpSeqStores(&firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize); + DEBUGLOG(3, "Block size pre-split is: %zu - lastBlock: %u", srcSize, lastBlock); + DEBUGLOG(3, "srcSize: %zu seq store size: %u", srcSize, countSeqStoreLiteralsBytes(&zc->seqStore) + countSeqStoreMatchBytes(&zc->seqStore)); + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + if (setUpSeqStores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize) != 1) { + return 0; + } assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); - /* Check that splitting would actually improve compression. Return 0 if not */ - { - size_t estimatedOriginalSize; - size_t estimatedFirstHalfSize; - size_t estimatedSecondHalfSize; - size_t estimatedSplitBlocksCompressedSize; - estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); - estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &firstHalfSeqStore); - estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &secondHalfSeqStore); - estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; - - DEBUGLOG(2, "Estimated original block size is: %u", estimatedOriginalSize); - DEBUGLOG(2, "Estimated split block size is: %u - split: %u - %u", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); - if (estimatedSplitBlocksCompressedSize > estimatedOriginalSize) { - DEBUGLOG(2, "BAIL SPLIT"); - return 0; - } else { - DEBUGLOG(2, "WILL SPLIT"); - } - } size_t cSizeFirstHalf; size_t cSizeSecondHalf; size_t literalsBytesFirstHalf = countSeqStoreLiteralsBytes(&firstHalfSeqStore); size_t srcBytesFirstHalf = literalsBytesFirstHalf + countSeqStoreMatchBytes(&firstHalfSeqStore); size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; - DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countSeqStoreLiteralsBytes(&secondHalfSeqStore), countSeqStoreLiteralsBytes(&zc->seqStore)); - DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore)); + DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %u, orig: %u", literalsBytesFirstHalf, countSeqStoreLiteralsBytes(&secondHalfSeqStore), countSeqStoreLiteralsBytes(&zc->seqStore)); + DEBUGLOG(3, "match bytes first half: %u match bytes second half: %u, orig: %u", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore)); DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); { U32 cSeqsSizeFirstHalf = ZSTD_entropyCompressSequences(&firstHalfSeqStore, @@ -3252,7 +3280,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, } } - DEBUGLOG(2, "cSizeFirstHalf: %u cSizeSecondHalf: %u", cSizeFirstHalf, cSizeSecondHalf); + DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); cSize = cSizeFirstHalf + cSizeSecondHalf; return cSize; } From 8a72b7ff13e8e93978e3ae5bff555b985b8c6f01 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 7 Dec 2020 17:02:24 -0500 Subject: [PATCH 07/15] Refactor block compression logic into single function --- lib/compress/zstd_compress.c | 143 +++++++++++++++-------------------- 1 file changed, 61 insertions(+), 82 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 763982ab7b9..6b079d3fa6f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3167,6 +3167,60 @@ static int setUpSeqStores(ZSTD_CCtx* zc, } } +static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) { + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize = ZSTD_entropyCompressSequences(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_maybeRLE(seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSize); + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(zc); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSize); + } + return cSize; +} + static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { @@ -3201,85 +3255,16 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, DEBUGLOG(3, "match bytes first half: %u match bytes second half: %u, orig: %u", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore)); DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); - { U32 cSeqsSizeFirstHalf = ZSTD_entropyCompressSequences(&firstHalfSeqStore, - &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, - srcBytesFirstHalf, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - zc->bmi2); - - if (!zc->isFirstBlock && - ZSTD_maybeRLE(&firstHalfSeqStore) && - ZSTD_isRLE((BYTE const*)src, srcSize)) { - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - cSeqsSizeFirstHalf = 1; - } - - if (cSeqsSizeFirstHalf == 0) { - cSizeFirstHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesFirstHalf, 0); - FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); - DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSizeFirstHalf); - } else if (cSeqsSizeFirstHalf == 1) { - cSizeFirstHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesFirstHalf, 0); - FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); - DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSizeFirstHalf); - } else { - U32 cBlockHeader; - /* Error checking and repcodes update */ - ZSTD_confirmRepcodesAndEntropyTables(zc); - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - writeBlockHeader(op, cSeqsSizeFirstHalf, srcBytesFirstHalf, 0); - cSizeFirstHalf = ZSTD_blockHeaderSize + cSeqsSizeFirstHalf; - DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSizeFirstHalf); - } - } - - { int i; + cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */); + { + int i; for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + ip += srcBytesFirstHalf; + op += cSizeFirstHalf; + dstCapacity -= cSizeFirstHalf; } - - ip += srcBytesFirstHalf; - op += cSizeFirstHalf; - dstCapacity -= cSizeFirstHalf; - - { U32 cSeqsSizeSecondHalf = ZSTD_entropyCompressSequences(&secondHalfSeqStore, - &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, - srcBytesSecondHalf, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - zc->bmi2); - if (ZSTD_maybeRLE(&firstHalfSeqStore) && - ZSTD_isRLE((BYTE const*)src, srcSize)) { - cSeqsSizeSecondHalf = 1; - } - - if (cSeqsSizeSecondHalf == 0) { - cSizeSecondHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesSecondHalf, lastBlock); - FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); - DEBUGLOG(2, "2: Writing out nocompress block, size: %zu", cSizeSecondHalf); - } else if (cSeqsSizeSecondHalf == 1) { - cSizeSecondHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesSecondHalf, lastBlock); - FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); - DEBUGLOG(2, "2: Writing out RLE block, size: %zu", cSizeSecondHalf); - } else { - U32 cBlockHeader; - /* Error checking and repcodes update */ - ZSTD_confirmRepcodesAndEntropyTables(zc); - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - writeBlockHeader(op, cSeqsSizeSecondHalf, srcBytesSecondHalf, lastBlock); - cSizeSecondHalf = ZSTD_blockHeaderSize + cSeqsSizeSecondHalf; - DEBUGLOG(3, "2: Writing out compressed block, size: %zu", cSizeSecondHalf); - } - } - + cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */); DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); cSize = cSizeFirstHalf + cSizeSecondHalf; return cSize; @@ -3308,12 +3293,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - ZSTD_confirmRepcodesAndEntropyTables(zc); - return 0; - } - if (nbSeq >= 2) { size_t splitBlocksCompressedSize; splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); From 44b68c8d13aa026bcebf09bb85ab729c6d1d2eb9 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 7 Dec 2020 17:11:23 -0500 Subject: [PATCH 08/15] Add block splitter to experimental params --- lib/compress/zstd_compress.c | 20 ++++++++++++++++++-- lib/compress/zstd_compress_internal.h | 3 +++ lib/zstd.h | 11 ++++++++++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6b079d3fa6f..1490b044a21 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -485,6 +485,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; + + case ZSTD_c_splitBlocks: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; default: bounds.error = ERROR(parameter_unsupported); @@ -547,6 +552,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: default: return 0; } @@ -599,6 +605,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_stableOutBuffer: case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -810,6 +817,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->validateSequences = value; return CCtxParams->validateSequences; + case ZSTD_c_splitBlocks: + BOUNDCHECK(ZSTD_c_splitBlocks, value); + CCtxParams->splitBlocks = value; + return CCtxParams->splitBlocks; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -933,6 +945,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_validateSequences : *value = (int)CCtxParams->validateSequences; break; + case ZSTD_c_splitBlocks : + *value = (int)CCtxParams->splitBlocks; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -3293,14 +3308,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } - if (nbSeq >= 2) { + zc->appliedParams.splitBlocks = 1; /* remove */ + if (zc->appliedParams.splitBlocks && nbSeq >= 2) { size_t splitBlocksCompressedSize; splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); if (splitBlocksCompressedSize != 0) { return splitBlocksCompressedSize; } } - + /* encode sequences and literals */ cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 303f8feccf3..48b6c084109 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -312,6 +312,9 @@ struct ZSTD_CCtx_params_s { ZSTD_sequenceFormat_e blockDelimiters; int validateSequences; + /* Block splitting */ + int splitBlocks; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ diff --git a/lib/zstd.h b/lib/zstd.h index 64265f68286..e876c40ba26 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -419,6 +419,7 @@ typedef enum { * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences + * ZSTD_c_splitBlocks * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -434,7 +435,8 @@ typedef enum { ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam11=1008, - ZSTD_c_experimentalParam12=1009 + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010 } ZSTD_cParameter; typedef struct { @@ -1832,6 +1834,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 +/* ZSTD_c_splitBlocks + * Default is 0 == disabled. Set to 1 to enable block splitting. + * + * Will attempt to split blocks in order to improve compression ratio at the cost of speed. + */ +#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. From 25265093430f9d9112c3f1c829a1d2714c0afa21 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 8 Dec 2020 13:06:18 -0500 Subject: [PATCH 09/15] Update function names for consistency --- lib/compress/zstd_compress.c | 198 ++++++++++++++---------- lib/compress/zstd_compress_internal.h | 8 +- lib/compress/zstd_compress_superblock.c | 2 +- lib/decompress/zstd_decompress.c | 9 +- lib/decompress/zstd_decompress_block.c | 13 +- 5 files changed, 127 insertions(+), 103 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1490b044a21..1a430e544b0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2192,6 +2192,32 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } +/* Pseudocode algorithm for finding the optimal partition: + * Given n sequences: + * Let epsilon = 1 + * + +typedef struct { + size_t startIdx; + size_t endIdx; +} ZSTD_sequenceWindow; + +size_t ZSTD_sequenceWindow_moveStartIdx(ZSTD_sequenceWindow* sequenceWindow) { + ++sequenceWindow->startIdx; +} + +size_t ZSTD_sequenceWindow_moveEndIdx(ZSTD_sequenceWindow* sequenceWindow) { + ++sequenceWindow->endIdx; +} + +size_t ZSTD_sequenceWindow_currentCost(ZSTD_sequenceWindow* sequenceWindow) { + return 0; +} + +/* ZSTD_buildSequencesStatistics(): + * Returns the size of the statistics for a given set of sequences, or a ZSTD error code + */ + MEM_STATIC size_t ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, const BYTE* const llCodeTable, @@ -2325,7 +2351,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ U32 entropyStatisticsSize; const seqDef* const sequences = seqStorePtr->sequencesStart; const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; @@ -2335,7 +2360,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - BYTE* seqHead; BYTE* lastNCount = NULL; entropyWorkspace = count + (MaxSeq + 1); @@ -2528,16 +2552,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; -static U32 countSeqStoreLiteralsBytes2(const seqStore_t* seqStore) { - U32 literalsBytes = 0; - U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; - for (int i = 0; i < nbSeqs; ++i) { - seqDef seq = seqStore->sequencesStart[i]; - literalsBytes += seq.litLength; - } - return literalsBytes; -} - static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -2761,19 +2775,19 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) /* Writes the block header */ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { - DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); U32 const cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); } -/** ZSTD_buildSuperBlockEntropy_literal() : +/** ZSTD_buildBlockEntropyStats_literals() : * Builds entropy for the super-block literals. * Stores literals block type (raw, rle, compressed, repeat) and * huffman description table to hufMetadata. * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, +static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, const ZSTD_hufCTables_t* prevHuf, ZSTD_hufCTables_t* nextHuf, ZSTD_hufCTablesMetadata_t* hufMetadata, @@ -2791,7 +2805,7 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz unsigned huffLog = HUF_TABLELOG_DEFAULT; HUF_repeat repeat = prevHuf->repeatMode; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); /* Prepare nextEntropy assuming reusing the existing table */ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); @@ -2871,11 +2885,11 @@ static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSiz } } -/** ZSTD_buildSuperBlockEntropy_sequences() : +/** ZSTD_buildBlockEntropyStats_sequences() : * Builds entropy for the super-block sequences. * Stores symbol compression modes and fse table to fseMetadata. * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, +static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2902,11 +2916,10 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; - U32 entropyStatisticsSize; BYTE* lastNCount = NULL; assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); ZSTD_memset(workspace, 0, wkspSize); fseMetadata->lastCountSize = 0; @@ -2918,10 +2931,10 @@ static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, } -/** ZSTD_buildSuperBlockEntropy() : +/** ZSTD_buildBlockEntropyStats() : * Builds entropy for the super-block. * @return : 0 on success or error code */ -size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2929,21 +2942,21 @@ size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, void* workspace, size_t wkspSize) { size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats"); entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, &prevEntropy->huf, &nextEntropy->huf, &entropyMetadata->hufMetadata, ZSTD_disableLiteralsCompression(cctxParams), workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, &prevEntropy->fse, &nextEntropy->fse, cctxParams, &entropyMetadata->fseMetadata, workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); return 0; } @@ -3064,7 +3077,7 @@ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx* zc, seqStore_t* seqStore) { ZSTD_entropyCTablesMetadata_t entropyMetadata; size_t estimatedSize; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(seqStore, + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, @@ -3078,10 +3091,12 @@ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx return estimatedSize; } -static U32 countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { - U32 literalsBytes = 0; - U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; - for (int i = 0; i < nbSeqs; ++i) { +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { + size_t literalsBytes = 0; + size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { seqDef seq = seqStore->sequencesStart[i]; literalsBytes += seq.litLength; if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) { @@ -3091,10 +3106,12 @@ static U32 countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { return literalsBytes; } -static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) { - U32 matchBytes = 0; - U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; - for (int i = 0; i < nbSeqs; ++i) { +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { + size_t matchBytes = 0; + size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { seqDef seq = seqStore->sequencesStart[i]; matchBytes += seq.matchLength + MINMATCH; if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) { @@ -3104,12 +3121,17 @@ static U32 countSeqStoreMatchBytes(const seqStore_t* seqStore) { return matchBytes; } -static void splitSeqStores(const seqStore_t* originalSeqStore, +/* ZSTD_splitSeqStores(): + * Splits the original seqStore into two, with nbSeqFirstHalf sequences in the first + * seqStore, and the remainder in the second. + */ +static void ZSTD_splitSeqStores(const seqStore_t* originalSeqStore, seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, size_t nbSeqFirstHalf) { BYTE* const litEnd = originalSeqStore->lit; seqDef* const seqEnd = originalSeqStore->sequences; + U32 literalsBytesFirstHalf; *firstSeqStore = *originalSeqStore; *secondSeqStore = *originalSeqStore; @@ -3124,7 +3146,7 @@ static void splitSeqStores(const seqStore_t* originalSeqStore, firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; - U32 literalsBytesFirstHalf = countSeqStoreLiteralsBytes(firstSeqStore); + literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(firstSeqStore); firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; secondSeqStore->sequencesStart += nbSeqFirstHalf; @@ -3134,15 +3156,21 @@ static void splitSeqStores(const seqStore_t* originalSeqStore, secondSeqStore->llCode += nbSeqFirstHalf; secondSeqStore->mlCode += nbSeqFirstHalf; secondSeqStore->ofCode += nbSeqFirstHalf; - DEBUGLOG(2, "Split into: %u and %u", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart), - (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); + DEBUGLOG(2, "Split into: %u and %u seqs", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart), + (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); } -#define NB_SPLIT_POINTS_TO_TEST 2 -static int setUpSeqStores(ZSTD_CCtx* zc, +/* ZSTD_deriveSplitSeqstores() + * Simple block splitting approach: test a set number of fixed block partitions. + * For now, just a single split down the middle of the block. + * + * Returns 1 if the a split was performed, 0 if not. + */ +#define NB_BLOCK_SEGMENTS_TO_TEST 2 +static int ZSTD_deriveSplitSeqstores(ZSTD_CCtx* zc, seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, - U32 nbSeq, U32 srcSize) { - size_t increment = nbSeq/NB_SPLIT_POINTS_TO_TEST + 1; + U32 nbSeq) { + size_t increment = nbSeq/NB_BLOCK_SEGMENTS_TO_TEST + 1; size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); size_t minEstimatedCSize = estimatedOriginalSize; size_t minEstimatedCSizeIdx = 0; @@ -3152,6 +3180,7 @@ static int setUpSeqStores(ZSTD_CCtx* zc, return 0; } + DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize); DEBUGLOG(2, "total nbseq: %u, increment: %zu", nbSeq, increment); for (i = increment; i < nbSeq; i += increment) { /* Check that splitting would actually improve compression. Return 0 if not */ @@ -3159,12 +3188,11 @@ static int setUpSeqStores(ZSTD_CCtx* zc, size_t estimatedSecondHalfSize; size_t estimatedSplitBlocksCompressedSize; size_t nbSeqFirstHalf = i; - splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf); + ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf); estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore); estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore); estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; - - DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize); + DEBUGLOG(2, "Estimated split block size is: %zu - split: %zu - %zu", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); if (estimatedSplitBlocksCompressedSize < minEstimatedCSize) { minEstimatedCSizeIdx = i; @@ -3174,7 +3202,7 @@ static int setUpSeqStores(ZSTD_CCtx* zc, if (minEstimatedCSizeIdx != 0) { DEBUGLOG(2, "WILL SPLIT"); - splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx); + ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx); return 1; } else { DEBUGLOG(2, "NOT SPLITTING"); @@ -3182,6 +3210,13 @@ static int setUpSeqStores(ZSTD_CCtx* zc, } } +/* ZSTD_compressSequences_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the size of that block or a ZSTD error code + */ + +/* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well */ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -3226,7 +3261,6 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS FORWARD_IF_ERROR(cSize, "RLE compress block failed"); DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize); } else { - U32 cBlockHeader; /* Error checking and repcodes update */ ZSTD_confirmRepcodesAndEntropyTables(zc); writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); @@ -3236,52 +3270,56 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS return cSize; } +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio. + * + * Returns 0 if it would not be advantageous to split the block. Otherwise, returns the combined size + * of the multiple blocks, or a ZSTD error code. + */ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { - const U32 rleMaxLength = 25; + const void* src, size_t srcSize, U32 lastBlock, U32 nbSeq) { size_t cSize; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + size_t cSizeFirstHalf; + size_t cSizeSecondHalf; DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); - - /* Attempt block splitting here */ DEBUGLOG(3, "Block size pre-split is: %zu - lastBlock: %u", srcSize, lastBlock); - DEBUGLOG(3, "srcSize: %zu seq store size: %u", srcSize, countSeqStoreLiteralsBytes(&zc->seqStore) + countSeqStoreMatchBytes(&zc->seqStore)); - seqStore_t firstHalfSeqStore; - seqStore_t secondHalfSeqStore; - if (setUpSeqStores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize) != 1) { + DEBUGLOG(3, "srcSize: %zu seq store size: %zu", srcSize, ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore) + ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); + /* Attempt block splitting here */ + if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) { + /* Not advantageous to split blocks */ return 0; } assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) - == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); + == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); - size_t cSizeFirstHalf; - size_t cSizeSecondHalf; - - size_t literalsBytesFirstHalf = countSeqStoreLiteralsBytes(&firstHalfSeqStore); - size_t srcBytesFirstHalf = literalsBytesFirstHalf + countSeqStoreMatchBytes(&firstHalfSeqStore); - size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; - DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %u, orig: %u", literalsBytesFirstHalf, countSeqStoreLiteralsBytes(&secondHalfSeqStore), countSeqStoreLiteralsBytes(&zc->seqStore)); - DEBUGLOG(3, "match bytes first half: %u match bytes second half: %u, orig: %u", countSeqStoreMatchBytes(&firstHalfSeqStore), countSeqStoreMatchBytes(&secondHalfSeqStore), countSeqStoreMatchBytes(&zc->seqStore)); - DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); - - cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */); { - int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; - ip += srcBytesFirstHalf; - op += cSizeFirstHalf; - dstCapacity -= cSizeFirstHalf; - } - cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */); - DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); - cSize = cSizeFirstHalf + cSizeSecondHalf; + size_t literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(&firstHalfSeqStore); + size_t srcBytesFirstHalf = literalsBytesFirstHalf + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore); + size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; + DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore), ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore)); + DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&secondHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); + DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); + + cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */); + { /* Perform necessary updates before compressing next block */ + ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, ZSTD_REP_NUM); + ip += srcBytesFirstHalf; + op += cSizeFirstHalf; + dstCapacity -= cSizeFirstHalf; + } + cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */); + DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); + cSize = cSizeFirstHalf + cSizeSecondHalf; + } return cSize; } @@ -3311,7 +3349,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->appliedParams.splitBlocks = 1; /* remove */ if (zc->appliedParams.splitBlocks && nbSeq >= 2) { size_t splitBlocksCompressedSize; - splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); + splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); if (splitBlocksCompressedSize != 0) { return splitBlocksCompressedSize; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 48b6c084109..33c653e27e0 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -89,7 +89,7 @@ typedef struct { * Stores Literals Block Type for a super-block in hType, and * huffman tree description in hufDesBuffer. * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ typedef struct { symbolEncodingType_e hType; BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; @@ -100,7 +100,7 @@ typedef struct { * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and * fse tables in fseTablesBuffer. * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ typedef struct { symbolEncodingType_e llType; symbolEncodingType_e ofType; @@ -115,10 +115,10 @@ typedef struct { ZSTD_fseCTablesMetadata_t fseMetadata; } ZSTD_entropyCTablesMetadata_t; -/** ZSTD_buildSuperBlockEntropy() : +/** ZSTD_buildBlockEntropyStats() : * Builds entropy for the super-block. * @return : 0 on success or error code */ -size_t ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 66fffd1b6ac..948203628cf 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -448,7 +448,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, unsigned lastBlock) { ZSTD_entropyCTablesMetadata_t entropyMetadata; - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9f753911d0d..15139501bea 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -763,7 +763,7 @@ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSiz static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - DEBUGLOG(2, "ZSTD_copyRawBlock: %u", srcSize); + DEBUGLOG(5, "ZSTD_copyRawBlock"); RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; @@ -847,7 +847,6 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { - DEBUGLOG(2, "Remaining dstCap: %u", (size_t)(oend-op)); size_t decodedSize; blockProperties_t blockProperties; size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); @@ -876,10 +875,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) { - DEBUGLOG(2, "Decoded: %u", decodedSize); + if (decodedSize != 0) op += decodedSize; - } assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; @@ -1192,7 +1189,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(2, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 56e4b5ef87a..b71bc20d65b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -775,9 +775,6 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* bounds checks : careful of address space overflow in 32-bit mode */ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); - DEBUGLOG(2, "sequence length: %u", sequenceLength); - DEBUGLOG(2, "oLitEnd: %u iLitEnd: %u match: %u", oLitEnd, iLitEnd, match); - DEBUGLOG(2, "seq ll: %u, condition: %u", sequence.litLength, (size_t)(litLimit - *litPtr)); RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); assert(op < op + sequenceLength); assert(oLitEnd < op + sequenceLength); @@ -853,13 +850,8 @@ size_t ZSTD_execSequence(BYTE* op, op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ - /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /*DEBUGLOG(2, "oLitEnd: %u, oMatchEnd: %u iLitEnd: %u matchPos: %u", oLitEnd, oMatchEnd, iLitEnd, match); - DEBUGLOG(2, "off: %u ml: %u ll: %u", sequence.offset, sequence.matchLength, sequence.litLength); - DEBUGLOG(2, "first condition: %u", (size_t)(oLitEnd - prefixStart)); - DEBUGLOG(2, "break condition: %u", (size_t)(oLitEnd - virtualStart));*/ /* offset beyond prefix -> go into extDict */ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); @@ -1218,9 +1210,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) { - DEBUGLOG(2, "too small lastll"); - } RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); @@ -1469,7 +1458,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * (note: but it could be evaluated from current-lowLimit) */ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(2, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); From 7b462b4694f0f04da600986a871533a3fdb8de3f Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Thu, 10 Dec 2020 12:18:43 -0500 Subject: [PATCH 10/15] Integrate optimal algorithm --- lib/compress/zstd_compress.c | 270 +++++++++++++++--- .../dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf | 0 2 files changed, 238 insertions(+), 32 deletions(-) create mode 100644 lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1a430e544b0..dfa439d8ce7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2192,28 +2192,6 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } -/* Pseudocode algorithm for finding the optimal partition: - * Given n sequences: - * Let epsilon = 1 - * - -typedef struct { - size_t startIdx; - size_t endIdx; -} ZSTD_sequenceWindow; - -size_t ZSTD_sequenceWindow_moveStartIdx(ZSTD_sequenceWindow* sequenceWindow) { - ++sequenceWindow->startIdx; -} - -size_t ZSTD_sequenceWindow_moveEndIdx(ZSTD_sequenceWindow* sequenceWindow) { - ++sequenceWindow->endIdx; -} - -size_t ZSTD_sequenceWindow_currentCost(ZSTD_sequenceWindow* sequenceWindow) { - return 0; -} - /* ZSTD_buildSequencesStatistics(): * Returns the size of the statistics for a given set of sequences, or a ZSTD error code */ @@ -3001,17 +2979,14 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ if (type == set_basic) { - DEBUGLOG(2, "Type == set_basic"); /* We selected this encoding type, so it must be valid. */ assert(max <= defaultMax); cSymbolTypeSizeEstimateInBits = max <= defaultMax ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) : ERROR(GENERIC); } else if (type == set_rle) { - DEBUGLOG(2, "Type == RLE"); cSymbolTypeSizeEstimateInBits = 0; } else if (type == set_compressed || type == set_repeat) { - DEBUGLOG(2, "Type == set_compressed"); cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); } if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { @@ -3069,7 +3044,6 @@ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, seqSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, workspace, wkspSize, writeSeqEntropy); - DEBUGLOG(2, "Estimated litSize: %zu seqSize: %zu", literalsSize, seqSize); return seqSize + literalsSize + ZSTD_blockHeaderSize; } @@ -3121,6 +3095,53 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { return matchBytes; } + +#include +void reverse(size_t arr[], int n) +{ + for (int low = 0, high = n - 1; low < high; low++, high--) + { + size_t temp = arr[low]; + arr[low] = arr[high]; + arr[high] = temp; + } +} + +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) { + BYTE* const litEnd = originalSeqStore->lit; + seqDef* const seqEnd = originalSeqStore->sequences; + U32 literalsBytes; + U32 literalsBytesPreceding = 0; + *resultSeqStore = *originalSeqStore; + + /* First calculate the number of literal bytes before startIdx */ + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + if (originalSeqStore->longLengthID != 0) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthID = 0; + } else { + resultSeqStore->longLengthPos -= startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->litStart += literalsBytesPreceding; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + resultSeqStore->lit = litEnd; + } else { + resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + /* ZSTD_splitSeqStores(): * Splits the original seqStore into two, with nbSeqFirstHalf sequences in the first * seqStore, and the remainder in the second. @@ -3160,6 +3181,99 @@ static void ZSTD_splitSeqStores(const seqStore_t* originalSeqStore, (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); } +/* Pseudocode algorithm for finding the optimal partition: + * Given n sequences: + * Let epsilon = 1 + */ +typedef struct { + size_t startIdx; + size_t endIdx; + size_t costBound; +} ZSTD_sequenceWindow; + +size_t chunkCost(ZSTD_CCtx* zc, seqStore_t* seqStore, size_t startIdx, size_t endIdx) { + seqStore_t seqStoreChunk; + size_t size; + ZSTD_deriveSeqStoreChunk(&seqStoreChunk, seqStore, startIdx, endIdx); + size = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &seqStoreChunk); + return size + ZSTD_blockHeaderSize; +} + +size_t optimalPartition(size_t* partition, ZSTD_CCtx* zc, seqStore_t* seqStore, size_t nbSeq) { + const static size_t kInvalidCost = INT_MAX; + size_t singleChunkCost = chunkCost(zc, seqStore, 0, nbSeq-1); + ZSTD_sequenceWindow* windows = (ZSTD_sequenceWindow*)ZSTD_customMalloc(sizeof(ZSTD_sequenceWindow)*50, ZSTD_defaultCMem); + double costBound = 5000 /* smallest possible block size */; + double eps = 0.9; + for (int i = 0; i < 50; ++i) { + ZSTD_sequenceWindow currWindow = {0, 0, costBound}; + windows[i] = currWindow; + if (costBound >= singleChunkCost) { + /* Mark end of the array */ + windows[i+1].endIdx = INT_MAX; + printf("Exists: %d windows\n", i); + break; + } + costBound *= 1 + eps; + } + + size_t* predecessors = (size_t*)ZSTD_customMalloc(sizeof(size_t)*(nbSeq+1), ZSTD_defaultCMem); + size_t* minCost = (size_t*)ZSTD_customMalloc(sizeof(size_t)*(nbSeq+1), ZSTD_defaultCMem); + memset(minCost, kInvalidCost, sizeof(size_t)*(nbSeq+1)); + minCost[0] = 0; + int ops = 0; + for (size_t i = 0; i < nbSeq; i += 1) { + assert(minCost[i] != kInvalidCost); + size_t lastEnd = i + 1; + size_t windowIdx = 0; + while (windows[windowIdx].endIdx != INT_MAX) { + ZSTD_sequenceWindow window = windows[windowIdx]; + assert(window.startIdx == i); + if (window.endIdx < lastEnd) { + window.endIdx = lastEnd; + } + size_t windowCost; + while (1) { + ++ops; + windowCost = chunkCost(zc, seqStore, window.startIdx, window.endIdx - 1); + if (minCost[i] + windowCost < minCost[window.endIdx]) { + minCost[window.endIdx] = minCost[i] + windowCost; + predecessors[window.endIdx] = i; + } + lastEnd = window.endIdx; + if (window.endIdx == nbSeq) break; + if (windowCost >= window.costBound) break; + window.endIdx += 1; + } + window.startIdx += 1; + windows[windowIdx++] = window; + } + } + size_t* finalPartition = (size_t*)ZSTD_customMalloc(sizeof(size_t)*nbSeq /* can be smaller */, ZSTD_defaultCMem); + size_t currPos = nbSeq; + size_t i = 0; + printf("nbSeq: %zu\n", nbSeq); + while (currPos != 0) { + finalPartition[i] = currPos; + currPos = predecessors[currPos]; + ++i; + } + finalPartition[i] = INT_MAX; + reverse(finalPartition, i); + i = 0; + printf("Final partition: "); + while (finalPartition[i] != INT_MAX) { + printf("%zu ", finalPartition[i++]); + } + memcpy(partition, finalPartition, (i+1)*sizeof(size_t)); + printf("\nminCost: %zu\n", minCost[nbSeq]); + if (i <= 1) { + printf("no good partition\n"); + return 0; + } + return minCost[nbSeq]; +} + /* ZSTD_deriveSplitSeqstores() * Simple block splitting approach: test a set number of fixed block partitions. * For now, just a single split down the middle of the block. @@ -3188,7 +3302,10 @@ static int ZSTD_deriveSplitSeqstores(ZSTD_CCtx* zc, size_t estimatedSecondHalfSize; size_t estimatedSplitBlocksCompressedSize; size_t nbSeqFirstHalf = i; - ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, nbSeqFirstHalf); + printf("1 start: %u, end: %u\n", 0, nbSeqFirstHalf); + ZSTD_deriveSeqStoreChunk(firstSeqStore, &zc->seqStore, 0, nbSeqFirstHalf); + printf("2 start: %u, end: %u\n", nbSeqFirstHalf, nbSeq); + ZSTD_deriveSeqStoreChunk(secondSeqStore, &zc->seqStore, nbSeqFirstHalf, nbSeq); estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore); estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore); estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; @@ -3270,6 +3387,41 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS return cSize; } +static int equalSeqStores(const seqStore_t* seqStore1, const seqStore_t* seqStore2) { + int exit = 0; + if (seqStore1->lit != seqStore2->lit) { + printf("different lits\n"); + exit = 1; + } if (seqStore1->litStart != seqStore2->litStart) { + printf("different litstarts\n"); + exit = 1; + } if (seqStore1->sequences != seqStore2->sequences) { + printf("different sequences\n"); + exit = 1; + } if (seqStore1->sequencesStart != seqStore2->sequencesStart) { + printf("different sequencesStart\n"); + exit = 1; + } if (seqStore1->llCode != seqStore2->llCode) { + printf("different llCode\n"); + exit = 1; + } if (seqStore1->mlCode != seqStore2->mlCode) { + printf("different mlCode\n"); + exit = 1; + } if (seqStore1->ofCode != seqStore2->ofCode) { + printf("different ofCode\n"); + exit = 1; + } if (seqStore1->longLengthID != seqStore2->longLengthID) { + printf("different longLengthID\n"); + exit = 1; + } if (seqStore1->longLengthPos != seqStore2->longLengthPos) { + printf("different longLengthPos\n"); + exit = 1; + } + + return exit; +} +#include +static size_t optimalSize = 0; /* ZSTD_compressBlock_splitBlock(): * Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio. * @@ -3279,7 +3431,7 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock, U32 nbSeq) { - size_t cSize; + size_t cSize = 0; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; seqStore_t firstHalfSeqStore; @@ -3289,8 +3441,58 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); - DEBUGLOG(3, "Block size pre-split is: %zu - lastBlock: %u", srcSize, lastBlock); - DEBUGLOG(3, "srcSize: %zu seq store size: %zu", srcSize, ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore) + ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); + printf("Block size pre-split is: %zu - lastBlock: %u\n", srcSize, lastBlock); + printf("srcSize: %zu seq store size: %zu\n", srcSize, ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore) + ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); + { + size_t* partitions = (size_t*)ZSTD_customMalloc(sizeof(size_t)*10, ZSTD_defaultCMem); + size_t projectedBest = optimalPartition(partitions, zc, &zc->seqStore, nbSeq); + if (projectedBest == 0) { + printf("No projected gain from split\n"); + goto _shortcut; + } + optimalSize += projectedBest; + printf("Supposed best is: %zu running: %u\n", projectedBest, optimalSize); + size_t startIdx = 0; + size_t endIdx = 0; + size_t i = 0; + size_t srcBytesCum = 0; + /*if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) { + return 0; + } + partitions[0] = 0; + partitions[0] = (size_t)(firstHalfSeqStore.sequences-firstHalfSeqStore.sequencesStart); + partitions[1] = nbSeq; + partitions[2] = INT_MAX;*/ + while (partitions[i] != INT_MAX) { + endIdx = partitions[i]; + printf("start: %u, end: %u\n", startIdx, endIdx); + seqStore_t chunkSeqStore = zc->seqStore; + ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); + size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore); + size_t lastBlockFinal = lastBlock && (nbSeq == endIdx); + srcBytesCum += srcBytes; + printf("srcbytes: %u cum: %u lastLL: %u\n", srcBytes, srcBytesCum, 131072 - srcBytesCum); + if (nbSeq == endIdx) { + srcBytes += 131072 - srcBytesCum; + srcBytesCum += 131072 - srcBytesCum; + printf("srcBytesCum final: %zu\n", srcBytesCum); + } + printf("srcBytes final: %zu\n", srcBytes); + size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockFinal); + printf("cSize final: %zu", cSizeChunk); + { + ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + } + startIdx = partitions[i]; + ++i; + cSize += cSizeChunk; + } + return cSize; + } +_shortcut: /* Attempt block splitting here */ if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) { /* Not advantageous to split blocks */ @@ -3305,18 +3507,22 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, size_t literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(&firstHalfSeqStore); size_t srcBytesFirstHalf = literalsBytesFirstHalf + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore); size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; + size_t srcBytesSecondHalfSeqStore = ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore) + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore); DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore), ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore)); DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&secondHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); - DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); + printf("Src bytes first half: %zu src bytes second half: %zu (seqStore: %u) - total: %zu", srcBytesFirstHalf, srcBytesSecondHalf, srcBytesSecondHalfSeqStore, srcBytesFirstHalf + srcBytesSecondHalf); cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */); { /* Perform necessary updates before compressing next block */ - ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, ZSTD_REP_NUM); + ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); ip += srcBytesFirstHalf; op += cSizeFirstHalf; dstCapacity -= cSizeFirstHalf; } cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */); + { /* Perform necessary updates before compressing next block */ + ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); + } DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); cSize = cSizeFirstHalf + cSizeSecondHalf; } diff --git a/lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf b/lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf new file mode 100644 index 00000000000..e69de29bb2d From 6560b88534ec6611d40b662cdd81eb55c82261f9 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 16 Dec 2020 17:21:24 -0500 Subject: [PATCH 11/15] Attempt at a recursive solution --- lib/compress/zstd_compress.c | 125 +++++++++++++++++------------------ 1 file changed, 59 insertions(+), 66 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index dfa439d8ce7..0541a3de4e5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -29,6 +29,7 @@ #include "zstd_opt.h" #include "zstd_ldm.h" #include "zstd_compress_superblock.h" +#include /* *************************************************************** * Tuning parameters @@ -2782,7 +2783,6 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi unsigned maxSymbolValue = 255; unsigned huffLog = HUF_TABLELOG_DEFAULT; HUF_repeat repeat = prevHuf->repeatMode; - DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); /* Prepare nextEntropy assuming reusing the existing table */ @@ -2920,7 +2920,6 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, void* workspace, size_t wkspSize) { size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildBlockEntropyStats"); entropyMetadata->hufMetadata.hufDesSize = ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, &prevEntropy->huf, &nextEntropy->huf, @@ -3096,7 +3095,6 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { } -#include void reverse(size_t arr[], int n) { for (int low = 0, high = n - 1; low < high; low++, high--) @@ -3113,8 +3111,6 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_ seqDef* const seqEnd = originalSeqStore->sequences; U32 literalsBytes; U32 literalsBytesPreceding = 0; - *resultSeqStore = *originalSeqStore; - /* First calculate the number of literal bytes before startIdx */ if (startIdx > 0) { resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; @@ -3420,8 +3416,56 @@ static int equalSeqStores(const seqStore_t* seqStore1, const seqStore_t* seqStor return exit; } -#include -static size_t optimalSize = 0; + +typedef struct { + size_t* splitLocations; + size_t idx; + size_t depth; +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 + +static size_t deriveBlockBoundsHelper(size_t startIdx, size_t endIdx, ZSTD_CCtx* zc, seqStore_t* origSeqStore, seqStoreSplits* splits) { + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING) { + return 0; + } + seqStore_t origSeqStoreChunk = *origSeqStore; + seqStore_t firstHalfSeqStore = *origSeqStore; + seqStore_t secondHalfSeqStore = *origSeqStore; + ZSTD_deriveSeqStoreChunk(&origSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, (startIdx + endIdx)/2); + ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, (startIdx + endIdx)/2, endIdx); + size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &origSeqStoreChunk); + size_t estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &firstHalfSeqStore); + size_t estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &secondHalfSeqStore); + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + deriveBlockBoundsHelper(startIdx, (startIdx + endIdx)/2, zc, origSeqStore, splits); + splits->splitLocations[splits->idx] = (startIdx + endIdx)/2; + splits->idx++; + deriveBlockBoundsHelper((startIdx + endIdx)/2, endIdx, zc, origSeqStore, splits); + return (startIdx + endIdx)/2; + } else { + return 0; + } +} + +static size_t deriveBlockBounds(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* partitions, U32 nbSeq) { + seqStoreSplits splits; + splits.idx = 0; + splits.splitLocations = (size_t*)partitions; + + deriveBlockBoundsHelper(0, nbSeq, zc, &zc->seqStore, &splits); + splits.splitLocations[splits.idx] = nbSeq; + + if (splits.idx == 0) { + return 0; + } else { + return splits.idx; + } +} + /* ZSTD_compressBlock_splitBlock(): * Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio. * @@ -3441,45 +3485,28 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); - printf("Block size pre-split is: %zu - lastBlock: %u\n", srcSize, lastBlock); - printf("srcSize: %zu seq store size: %zu\n", srcSize, ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore) + ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); { - size_t* partitions = (size_t*)ZSTD_customMalloc(sizeof(size_t)*10, ZSTD_defaultCMem); - size_t projectedBest = optimalPartition(partitions, zc, &zc->seqStore, nbSeq); - if (projectedBest == 0) { - printf("No projected gain from split\n"); - goto _shortcut; + size_t* partitions = (size_t*)ZSTD_customMalloc(sizeof(size_t)*32, ZSTD_defaultCMem); + size_t numPartitions = deriveBlockBounds(zc, dst, dstCapacity, src, srcSize, partitions, nbSeq); + if (numPartitions == 0) { + return 0; } - optimalSize += projectedBest; - printf("Supposed best is: %zu running: %u\n", projectedBest, optimalSize); + size_t i = 0; size_t startIdx = 0; size_t endIdx = 0; - size_t i = 0; size_t srcBytesCum = 0; - /*if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) { - return 0; - } - partitions[0] = 0; - partitions[0] = (size_t)(firstHalfSeqStore.sequences-firstHalfSeqStore.sequencesStart); - partitions[1] = nbSeq; - partitions[2] = INT_MAX;*/ - while (partitions[i] != INT_MAX) { + while (i <= numPartitions) { endIdx = partitions[i]; - printf("start: %u, end: %u\n", startIdx, endIdx); seqStore_t chunkSeqStore = zc->seqStore; ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore); size_t lastBlockFinal = lastBlock && (nbSeq == endIdx); srcBytesCum += srcBytes; - printf("srcbytes: %u cum: %u lastLL: %u\n", srcBytes, srcBytesCum, 131072 - srcBytesCum); if (nbSeq == endIdx) { srcBytes += 131072 - srcBytesCum; srcBytesCum += 131072 - srcBytesCum; - printf("srcBytesCum final: %zu\n", srcBytesCum); } - printf("srcBytes final: %zu\n", srcBytes); size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockFinal); - printf("cSize final: %zu", cSizeChunk); { ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); ip += srcBytes; @@ -3487,44 +3514,10 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, dstCapacity -= cSizeChunk; } startIdx = partitions[i]; - ++i; cSize += cSizeChunk; + ++i; } - return cSize; - } -_shortcut: - /* Attempt block splitting here */ - if (!ZSTD_deriveSplitSeqstores(zc, &firstHalfSeqStore, &secondHalfSeqStore, nbSeq)) { - /* Not advantageous to split blocks */ - return 0; - } - - assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); - assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) - == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); - - { - size_t literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(&firstHalfSeqStore); - size_t srcBytesFirstHalf = literalsBytesFirstHalf + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore); - size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; - size_t srcBytesSecondHalfSeqStore = ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore) + ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore); - DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, ZSTD_countSeqStoreLiteralsBytes(&secondHalfSeqStore), ZSTD_countSeqStoreLiteralsBytes(&zc->seqStore)); - DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", ZSTD_countSeqStoreMatchBytes(&firstHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&secondHalfSeqStore), ZSTD_countSeqStoreMatchBytes(&zc->seqStore)); - printf("Src bytes first half: %zu src bytes second half: %zu (seqStore: %u) - total: %zu", srcBytesFirstHalf, srcBytesSecondHalf, srcBytesSecondHalfSeqStore, srcBytesFirstHalf + srcBytesSecondHalf); - - cSizeFirstHalf = ZSTD_compressSequences_singleBlock(zc, &firstHalfSeqStore, op, dstCapacity, ip, srcBytesFirstHalf, 0 /* lastBlock */); - { /* Perform necessary updates before compressing next block */ - ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); - ip += srcBytesFirstHalf; - op += cSizeFirstHalf; - dstCapacity -= cSizeFirstHalf; - } - cSizeSecondHalf = ZSTD_compressSequences_singleBlock(zc, &secondHalfSeqStore, op, dstCapacity, ip, srcBytesSecondHalf, lastBlock /* lastBlock */); - { /* Perform necessary updates before compressing next block */ - ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); - } - DEBUGLOG(2, "cSizeFirstHalf: %zu cSizeSecondHalf: %zu", cSizeFirstHalf, cSizeSecondHalf); - cSize = cSizeFirstHalf + cSizeSecondHalf; + ZSTD_customFree(partitions, ZSTD_defaultCMem); } return cSize; } From 0f3d2f5d740ece2df77e8c731823782042d255e0 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 29 Dec 2020 12:35:22 -0500 Subject: [PATCH 12/15] Clean up old code, refactor --- lib/compress/zstd_compress.c | 322 ++++++----------------------------- 1 file changed, 56 insertions(+), 266 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0541a3de4e5..873f3b5099e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3046,8 +3046,11 @@ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, return seqSize + literalsSize + ZSTD_blockHeaderSize; } -/* Builds entropy statistics and uses them for blocksize estimation */ -static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(const ZSTD_CCtx* zc, seqStore_t* seqStore) { +/* Builds entropy statistics and uses them for blocksize estimation. + * + * Returns the estimated compressed size of the seqStore, or a zstd error. + */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) { ZSTD_entropyCTablesMetadata_t entropyMetadata; size_t estimatedSize; FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, @@ -3094,23 +3097,16 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { return matchBytes; } - -void reverse(size_t arr[], int n) -{ - for (int low = 0, high = n - 1; low < high; low++, high--) - { - size_t temp = arr[low]; - arr[low] = arr[high]; - arr[high] = temp; - } -} - +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + */ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore, size_t startIdx, size_t endIdx) { BYTE* const litEnd = originalSeqStore->lit; seqDef* const seqEnd = originalSeqStore->sequences; U32 literalsBytes; U32 literalsBytesPreceding = 0; + + *resultSeqStore = *originalSeqStore; /* First calculate the number of literal bytes before startIdx */ if (startIdx > 0) { resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; @@ -3138,191 +3134,6 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_ resultSeqStore->ofCode += startIdx; } -/* ZSTD_splitSeqStores(): - * Splits the original seqStore into two, with nbSeqFirstHalf sequences in the first - * seqStore, and the remainder in the second. - */ -static void ZSTD_splitSeqStores(const seqStore_t* originalSeqStore, - seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, - size_t nbSeqFirstHalf) { - - BYTE* const litEnd = originalSeqStore->lit; - seqDef* const seqEnd = originalSeqStore->sequences; - U32 literalsBytesFirstHalf; - *firstSeqStore = *originalSeqStore; - *secondSeqStore = *originalSeqStore; - - if (firstSeqStore->longLengthID != 0) { - if (firstSeqStore->longLengthPos < nbSeqFirstHalf) { - secondSeqStore->longLengthID = 0; - } else { - firstSeqStore->longLengthID = 0; - secondSeqStore->longLengthPos = secondSeqStore->longLengthPos - nbSeqFirstHalf; - } - } - - firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; - - literalsBytesFirstHalf = ZSTD_countSeqStoreLiteralsBytes(firstSeqStore); - firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; - - secondSeqStore->sequencesStart += nbSeqFirstHalf; - secondSeqStore->sequences = seqEnd; - secondSeqStore->litStart += literalsBytesFirstHalf; - secondSeqStore->lit = litEnd; - secondSeqStore->llCode += nbSeqFirstHalf; - secondSeqStore->mlCode += nbSeqFirstHalf; - secondSeqStore->ofCode += nbSeqFirstHalf; - DEBUGLOG(2, "Split into: %u and %u seqs", (U32)(firstSeqStore->sequences - firstSeqStore->sequencesStart), - (U32)(secondSeqStore->sequences - secondSeqStore->sequencesStart)); -} - -/* Pseudocode algorithm for finding the optimal partition: - * Given n sequences: - * Let epsilon = 1 - */ -typedef struct { - size_t startIdx; - size_t endIdx; - size_t costBound; -} ZSTD_sequenceWindow; - -size_t chunkCost(ZSTD_CCtx* zc, seqStore_t* seqStore, size_t startIdx, size_t endIdx) { - seqStore_t seqStoreChunk; - size_t size; - ZSTD_deriveSeqStoreChunk(&seqStoreChunk, seqStore, startIdx, endIdx); - size = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &seqStoreChunk); - return size + ZSTD_blockHeaderSize; -} - -size_t optimalPartition(size_t* partition, ZSTD_CCtx* zc, seqStore_t* seqStore, size_t nbSeq) { - const static size_t kInvalidCost = INT_MAX; - size_t singleChunkCost = chunkCost(zc, seqStore, 0, nbSeq-1); - ZSTD_sequenceWindow* windows = (ZSTD_sequenceWindow*)ZSTD_customMalloc(sizeof(ZSTD_sequenceWindow)*50, ZSTD_defaultCMem); - double costBound = 5000 /* smallest possible block size */; - double eps = 0.9; - for (int i = 0; i < 50; ++i) { - ZSTD_sequenceWindow currWindow = {0, 0, costBound}; - windows[i] = currWindow; - if (costBound >= singleChunkCost) { - /* Mark end of the array */ - windows[i+1].endIdx = INT_MAX; - printf("Exists: %d windows\n", i); - break; - } - costBound *= 1 + eps; - } - - size_t* predecessors = (size_t*)ZSTD_customMalloc(sizeof(size_t)*(nbSeq+1), ZSTD_defaultCMem); - size_t* minCost = (size_t*)ZSTD_customMalloc(sizeof(size_t)*(nbSeq+1), ZSTD_defaultCMem); - memset(minCost, kInvalidCost, sizeof(size_t)*(nbSeq+1)); - minCost[0] = 0; - int ops = 0; - for (size_t i = 0; i < nbSeq; i += 1) { - assert(minCost[i] != kInvalidCost); - size_t lastEnd = i + 1; - size_t windowIdx = 0; - while (windows[windowIdx].endIdx != INT_MAX) { - ZSTD_sequenceWindow window = windows[windowIdx]; - assert(window.startIdx == i); - if (window.endIdx < lastEnd) { - window.endIdx = lastEnd; - } - size_t windowCost; - while (1) { - ++ops; - windowCost = chunkCost(zc, seqStore, window.startIdx, window.endIdx - 1); - if (minCost[i] + windowCost < minCost[window.endIdx]) { - minCost[window.endIdx] = minCost[i] + windowCost; - predecessors[window.endIdx] = i; - } - lastEnd = window.endIdx; - if (window.endIdx == nbSeq) break; - if (windowCost >= window.costBound) break; - window.endIdx += 1; - } - window.startIdx += 1; - windows[windowIdx++] = window; - } - } - size_t* finalPartition = (size_t*)ZSTD_customMalloc(sizeof(size_t)*nbSeq /* can be smaller */, ZSTD_defaultCMem); - size_t currPos = nbSeq; - size_t i = 0; - printf("nbSeq: %zu\n", nbSeq); - while (currPos != 0) { - finalPartition[i] = currPos; - currPos = predecessors[currPos]; - ++i; - } - finalPartition[i] = INT_MAX; - reverse(finalPartition, i); - i = 0; - printf("Final partition: "); - while (finalPartition[i] != INT_MAX) { - printf("%zu ", finalPartition[i++]); - } - memcpy(partition, finalPartition, (i+1)*sizeof(size_t)); - printf("\nminCost: %zu\n", minCost[nbSeq]); - if (i <= 1) { - printf("no good partition\n"); - return 0; - } - return minCost[nbSeq]; -} - -/* ZSTD_deriveSplitSeqstores() - * Simple block splitting approach: test a set number of fixed block partitions. - * For now, just a single split down the middle of the block. - * - * Returns 1 if the a split was performed, 0 if not. - */ -#define NB_BLOCK_SEGMENTS_TO_TEST 2 -static int ZSTD_deriveSplitSeqstores(ZSTD_CCtx* zc, - seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, - U32 nbSeq) { - size_t increment = nbSeq/NB_BLOCK_SEGMENTS_TO_TEST + 1; - size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &zc->seqStore); - size_t minEstimatedCSize = estimatedOriginalSize; - size_t minEstimatedCSizeIdx = 0; - size_t i; - - if (increment == 0) { - return 0; - } - - DEBUGLOG(2, "Estimated original block size is: %zu", estimatedOriginalSize); - DEBUGLOG(2, "total nbseq: %u, increment: %zu", nbSeq, increment); - for (i = increment; i < nbSeq; i += increment) { - /* Check that splitting would actually improve compression. Return 0 if not */ - size_t estimatedFirstHalfSize; - size_t estimatedSecondHalfSize; - size_t estimatedSplitBlocksCompressedSize; - size_t nbSeqFirstHalf = i; - printf("1 start: %u, end: %u\n", 0, nbSeqFirstHalf); - ZSTD_deriveSeqStoreChunk(firstSeqStore, &zc->seqStore, 0, nbSeqFirstHalf); - printf("2 start: %u, end: %u\n", nbSeqFirstHalf, nbSeq); - ZSTD_deriveSeqStoreChunk(secondSeqStore, &zc->seqStore, nbSeqFirstHalf, nbSeq); - estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, firstSeqStore); - estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, secondSeqStore); - estimatedSplitBlocksCompressedSize = estimatedFirstHalfSize + estimatedSecondHalfSize; - - DEBUGLOG(2, "Estimated split block size is: %zu - split: %zu - %zu", estimatedSplitBlocksCompressedSize, estimatedFirstHalfSize, estimatedSecondHalfSize); - if (estimatedSplitBlocksCompressedSize < minEstimatedCSize) { - minEstimatedCSizeIdx = i; - minEstimatedCSize = estimatedSplitBlocksCompressedSize; - } - } - - if (minEstimatedCSizeIdx != 0) { - DEBUGLOG(2, "WILL SPLIT"); - ZSTD_splitSeqStores(&zc->seqStore, firstSeqStore, secondSeqStore, minEstimatedCSizeIdx); - return 1; - } else { - DEBUGLOG(2, "NOT SPLITTING"); - return 0; - } -} - /* ZSTD_compressSequences_singleBlock(): * Compresses a seqStore into a block with a block header, into the buffer dst. * @@ -3383,87 +3194,65 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS return cSize; } -static int equalSeqStores(const seqStore_t* seqStore1, const seqStore_t* seqStore2) { - int exit = 0; - if (seqStore1->lit != seqStore2->lit) { - printf("different lits\n"); - exit = 1; - } if (seqStore1->litStart != seqStore2->litStart) { - printf("different litstarts\n"); - exit = 1; - } if (seqStore1->sequences != seqStore2->sequences) { - printf("different sequences\n"); - exit = 1; - } if (seqStore1->sequencesStart != seqStore2->sequencesStart) { - printf("different sequencesStart\n"); - exit = 1; - } if (seqStore1->llCode != seqStore2->llCode) { - printf("different llCode\n"); - exit = 1; - } if (seqStore1->mlCode != seqStore2->mlCode) { - printf("different mlCode\n"); - exit = 1; - } if (seqStore1->ofCode != seqStore2->ofCode) { - printf("different ofCode\n"); - exit = 1; - } if (seqStore1->longLengthID != seqStore2->longLengthID) { - printf("different longLengthID\n"); - exit = 1; - } if (seqStore1->longLengthPos != seqStore2->longLengthPos) { - printf("different longLengthPos\n"); - exit = 1; - } - - return exit; -} - +/* Struct to keep track of where we are in our recursive calls. */ typedef struct { - size_t* splitLocations; - size_t idx; - size_t depth; + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ } seqStoreSplits; #define MIN_SEQUENCES_BLOCK_SPLITTING 300 +#define MAX_NB_SPLITS 196 -static size_t deriveBlockBoundsHelper(size_t startIdx, size_t endIdx, ZSTD_CCtx* zc, seqStore_t* origSeqStore, seqStoreSplits* splits) { - if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING) { +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of the original seqStore, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recursive down the two sub-blocks. + * The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In practice, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * + * Returns the sequence index at which to split, or 0 if we should not split. + */ +static size_t deriveBlockBoundsHelper(ZSTD_CCtx* zc, seqStoreSplits* splits, size_t startIdx, size_t endIdx, const seqStore_t* origSeqStore) { + seqStore_t origSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) { return 0; } - seqStore_t origSeqStoreChunk = *origSeqStore; - seqStore_t firstHalfSeqStore = *origSeqStore; - seqStore_t secondHalfSeqStore = *origSeqStore; ZSTD_deriveSeqStoreChunk(&origSeqStoreChunk, origSeqStore, startIdx, endIdx); ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, (startIdx + endIdx)/2); ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, (startIdx + endIdx)/2, endIdx); - size_t estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &origSeqStoreChunk); - size_t estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &firstHalfSeqStore); - size_t estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(zc, &secondHalfSeqStore); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&origSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc); if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { - deriveBlockBoundsHelper(startIdx, (startIdx + endIdx)/2, zc, origSeqStore, splits); + deriveBlockBoundsHelper(zc, splits, startIdx, (startIdx + endIdx)/2, origSeqStore); splits->splitLocations[splits->idx] = (startIdx + endIdx)/2; splits->idx++; - deriveBlockBoundsHelper((startIdx + endIdx)/2, endIdx, zc, origSeqStore, splits); + deriveBlockBoundsHelper(zc, splits, (startIdx + endIdx)/2, endIdx, origSeqStore); return (startIdx + endIdx)/2; } else { return 0; } } +/* Base recursive function. Populates a table of partitions indices. + * + * Returns the number of splits made (which equals the size of the partition table - 1). + */ static size_t deriveBlockBounds(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - void* partitions, U32 nbSeq) { + U32 partitions[], U32 nbSeq) { seqStoreSplits splits; splits.idx = 0; - splits.splitLocations = (size_t*)partitions; + splits.splitLocations = partitions; - deriveBlockBoundsHelper(0, nbSeq, zc, &zc->seqStore, &splits); + deriveBlockBoundsHelper(zc, &splits, 0, nbSeq, &zc->seqStore); splits.splitLocations[splits.idx] = nbSeq; - - if (splits.idx == 0) { - return 0; - } else { - return splits.idx; - } + return splits.idx; } /* ZSTD_compressBlock_splitBlock(): @@ -3486,27 +3275,29 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); { - size_t* partitions = (size_t*)ZSTD_customMalloc(sizeof(size_t)*32, ZSTD_defaultCMem); - size_t numPartitions = deriveBlockBounds(zc, dst, dstCapacity, src, srcSize, partitions, nbSeq); - if (numPartitions == 0) { - return 0; - } + U32 partitions[MAX_NB_SPLITS]; + size_t numSplits = deriveBlockBounds(zc, dst, dstCapacity, src, srcSize, partitions, nbSeq); size_t i = 0; size_t startIdx = 0; size_t endIdx = 0; size_t srcBytesCum = 0; - while (i <= numPartitions) { + + if (numSplits == 0) { + return 0; + } + while (i <= numSplits) { endIdx = partitions[i]; seqStore_t chunkSeqStore = zc->seqStore; ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore); - size_t lastBlockFinal = lastBlock && (nbSeq == endIdx); + size_t lastBlock = lastBlock && (nbSeq == endIdx); srcBytesCum += srcBytes; - if (nbSeq == endIdx) { - srcBytes += 131072 - srcBytesCum; - srcBytesCum += 131072 - srcBytesCum; + if (endIdx == nbSeq) { + /* This is the final partition, need to account for last literals */ + srcBytes += zc->blockSize - srcBytesCum; + srcBytesCum += zc->blockSize - srcBytesCum; } - size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockFinal); + size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlock); { ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); ip += srcBytes; @@ -3517,7 +3308,6 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, cSize += cSizeChunk; ++i; } - ZSTD_customFree(partitions, ZSTD_defaultCMem); } return cSize; } From f61b4179e6bbec7cfeda6046ece9bec2a1f9e427 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 29 Dec 2020 15:56:13 -0500 Subject: [PATCH 13/15] Add unit tests and fuzzer param --- lib/compress/zstd_compress.c | 345 ++++++++++-------- .../dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf | 0 tests/fuzz/zstd_helpers.c | 1 + tests/fuzzer.c | 9 + 4 files changed, 198 insertions(+), 157 deletions(-) delete mode 100644 lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 873f3b5099e..9c4835b1007 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -29,7 +29,6 @@ #include "zstd_opt.h" #include "zstd_ldm.h" #include "zstd_compress_superblock.h" -#include /* *************************************************************** * Tuning parameters @@ -2193,28 +2192,42 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) return (cctxParams->targetCBlockSize != 0); } +/* ZSTD_useBlockSplitting(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useBlockSplitting(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useBlockSplitting(splitBlocks=%d)", cctxParams->splitBlocks); + return (cctxParams->splitBlocks != 0); +} + /* ZSTD_buildSequencesStatistics(): - * Returns the size of the statistics for a given set of sequences, or a ZSTD error code + * Returns the size of the statistics for a given set of sequences, or a ZSTD error code, */ - MEM_STATIC size_t -ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, - const BYTE* const llCodeTable, - const BYTE* const mlCodeTable, - FSE_CTable* CTable_LitLength, - FSE_CTable* CTable_OffsetBits, - FSE_CTable* CTable_MatchLength, - size_t nbSeq, +ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, BYTE* dst, const BYTE* const dstEnd, - ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, - void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) { + ZSTD_strategy strategy, BYTE** lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, + void* entropyWorkspace, size_t entropyWkspSize) { U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ BYTE* const ostart = dst; const BYTE* const oend = dstEnd; BYTE* op = ostart; BYTE* seqHead = op++; + unsigned* const countWorkspace = (unsigned*)entropyWorkspace; + + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); assert(op <= oend); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; @@ -2238,7 +2251,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); if (LLtype == set_compressed) - lastNCount = op; + *lastNCount = op; op += countSize; if (fseMetadata) { if (LLtype == set_compressed) fseMetadata->lastCountSize = countSize; @@ -2270,7 +2283,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); if (Offtype == set_compressed) - lastNCount = op; + *lastNCount = op; op += countSize; if (fseMetadata) { if (Offtype == set_compressed) fseMetadata->lastCountSize = countSize; @@ -2300,7 +2313,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); if (MLtype == set_compressed) - lastNCount = op; + *lastNCount = op; op += countSize; if (fseMetadata) { if (MLtype == set_compressed) fseMetadata->lastCountSize = countSize; @@ -2330,7 +2343,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 entropyStatisticsSize; + size_t entropyStatisticsSize; const seqDef* const sequences = seqStorePtr->sequencesStart; const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; @@ -2361,7 +2374,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, bmi2); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); - DEBUGLOG(2, "Actual litSize: %zu", cSize); op += cSize; } @@ -2386,14 +2398,11 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, return (size_t)(op - ostart); } - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); /* build stats for sequences */ - entropyStatisticsSize = ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable, - CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, + entropyStatisticsSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend, - strategy, lastNCount, NULL /* no fseMetadata needed */, - count, entropyWorkspace, entropyWkspSize); + strategy, &lastNCount, NULL /* no fseMetadata needed */, + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!"); op += entropyStatisticsSize; @@ -2422,7 +2431,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, "emitting an uncompressed block."); return 0; } - DEBUGLOG(2, "Actual seqSize: %zu", bitstreamSize); } DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); @@ -2762,7 +2770,7 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB } /** ZSTD_buildBlockEntropyStats_literals() : - * Builds entropy for the super-block literals. + * Builds entropy for the literals. * Stores literals block type (raw, rle, compressed, repeat) and * huffman description table to hufMetadata. * @return : size of huffman description table or error code */ @@ -2780,7 +2788,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); BYTE* const nodeWksp = countWkspStart + countWkspSize; const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; unsigned huffLog = HUF_TABLELOG_DEFAULT; HUF_repeat repeat = prevHuf->repeatMode; DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); @@ -2795,7 +2803,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi } /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 +#ifndef COMPRESS_LITERALS_SIZE_MIN +#define COMPRESS_LITERALS_SIZE_MIN 63 +#endif { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; if (srcSize <= minLitSize) { DEBUGLOG(5, "set_basic - too small"); @@ -2864,7 +2874,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi } /** ZSTD_buildBlockEntropyStats_sequences() : - * Builds entropy for the super-block sequences. + * Builds entropy for the sequences. * Stores symbol compression modes and fse table to fseMetadata. * @return : size of fse tables or error code */ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, @@ -2874,38 +2884,19 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, ZSTD_fseCTablesMetadata_t* fseMetadata, void* workspace, size_t wkspSize) { - /* Size the workspaces */ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; BYTE* lastNCount = NULL; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); ZSTD_memset(workspace, 0, wkspSize); fseMetadata->lastCountSize = 0; - return ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable, - CTable_LitLength, CTable_OffsetBits, CTable_MatchLength, - nbSeq, prevEntropy, nextEntropy, op, oend, - strategy, lastNCount, fseMetadata, - countWksp, cTableWksp, cTableWkspSize); + return ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, prevEntropy, nextEntropy, op, oend, + strategy, &lastNCount, fseMetadata, + workspace, wkspSize); } @@ -2937,6 +2928,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, return 0; } +/* Returns the size estimate for the literals section (header + content) of a block */ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, const ZSTD_hufCTables_t* huf, const ZSTD_hufCTablesMetadata_t* hufMetadata, @@ -2962,6 +2954,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit return 0; } +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, const BYTE* codeTable, unsigned maxCode, size_t nbSeq, const FSE_CTable* fseCTable, @@ -2989,7 +2982,6 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); } if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { - DEBUGLOG(2, "Returning inaccurate"); return nbSeq * 10; } while (ctp < ctEnd) { @@ -2997,9 +2989,10 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ ctp++; } - return cSymbolTypeSizeEstimateInBits / 8; + return cSymbolTypeSizeEstimateInBits >> 3; } +/* Returns the size estimate for the sequences section (header + content) of a block */ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, const BYTE* llCodeTable, const BYTE* mlCodeTable, @@ -3009,7 +3002,7 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, void* workspace, size_t wkspSize, int writeEntropy) { - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); size_t cSeqSizeEstimate = 0; cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, nbSeq, fseTables->offcodeCTable, NULL, @@ -3027,6 +3020,7 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, return cSeqSizeEstimate + sequencesSectionHeaderSize; } +/* Returns the size estimate for a given stream of literals, of, ll, ml */ size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, const BYTE* ofCodeTable, const BYTE* llCodeTable, @@ -3098,21 +3092,21 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { } /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. */ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore, size_t startIdx, size_t endIdx) { BYTE* const litEnd = originalSeqStore->lit; - seqDef* const seqEnd = originalSeqStore->sequences; U32 literalsBytes; U32 literalsBytesPreceding = 0; *resultSeqStore = *originalSeqStore; - /* First calculate the number of literal bytes before startIdx */ if (startIdx > 0) { resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); } + /* Move longLengthPos into the correct position if necessary */ if (originalSeqStore->longLengthID != 0) { if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { resultSeqStore->longLengthID = 0; @@ -3125,6 +3119,7 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_ literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); resultSeqStore->litStart += literalsBytesPreceding; if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ resultSeqStore->lit = litEnd; } else { resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; @@ -3137,14 +3132,14 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_ /* ZSTD_compressSequences_singleBlock(): * Compresses a seqStore into a block with a block header, into the buffer dst. * - * Returns the size of that block or a ZSTD error code + * Returns the total size of that block (including header) or a ZSTD error code. + * + * TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well */ - -/* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well */ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastBlock) { + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) { const U32 rleMaxLength = 25; BYTE* op = (BYTE*)dst; const BYTE* ip = (const BYTE*)src; @@ -3159,7 +3154,6 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS if (!zc->isFirstBlock && cSeqsSize < rleMaxLength && - ZSTD_maybeRLE(seqStore) && ZSTD_isRLE((BYTE const*)src, srcSize)) { /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -3179,17 +3173,17 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS if (cSeqsSize == 0) { cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "Nocompress block failed"); - DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSize); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); } else if (cSeqsSize == 1) { cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "RLE compress block failed"); - DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); } else { /* Error checking and repcodes update */ ZSTD_confirmRepcodesAndEntropyTables(zc); writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); cSize = ZSTD_blockHeaderSize + cSeqsSize; - DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSize); + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); } return cSize; } @@ -3204,117 +3198,149 @@ typedef struct { #define MAX_NB_SPLITS 196 /* Helper function to perform the recursive search for block splits. - * Estimates the cost of the original seqStore, and estimates the cost of splitting the sequences in half. - * If advantageous to split, then we recursive down the two sub-blocks. - * The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. - * In practice, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then + * we do not recurse. * - * Returns the sequence index at which to split, or 0 if we should not split. + * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. */ -static size_t deriveBlockBoundsHelper(ZSTD_CCtx* zc, seqStoreSplits* splits, size_t startIdx, size_t endIdx, const seqStore_t* origSeqStore) { - seqStore_t origSeqStoreChunk; +static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { + seqStore_t fullSeqStoreChunk; seqStore_t firstHalfSeqStore; seqStore_t secondHalfSeqStore; size_t estimatedOriginalSize; size_t estimatedFirstHalfSize; size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) { - return 0; + return; } - ZSTD_deriveSeqStoreChunk(&origSeqStoreChunk, origSeqStore, startIdx, endIdx); - ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, (startIdx + endIdx)/2); - ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, (startIdx + endIdx)/2, endIdx); - estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&origSeqStoreChunk, zc); + ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc); estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc); estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { - deriveBlockBoundsHelper(zc, splits, startIdx, (startIdx + endIdx)/2, origSeqStore); - splits->splitLocations[splits->idx] = (startIdx + endIdx)/2; + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = midIdx; splits->idx++; - deriveBlockBoundsHelper(zc, splits, (startIdx + endIdx)/2, endIdx, origSeqStore); - return (startIdx + endIdx)/2; - } else { - return 0; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); } } -/* Base recursive function. Populates a table of partitions indices. +/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. * * Returns the number of splits made (which equals the size of the partition table - 1). */ -static size_t deriveBlockBounds(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 partitions[], U32 nbSeq) { - seqStoreSplits splits; - splits.idx = 0; - splits.splitLocations = partitions; - - deriveBlockBoundsHelper(zc, &splits, 0, nbSeq, &zc->seqStore); +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { + seqStoreSplits splits = {partitions, 0}; + if (nbSeq <= 4) { + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); splits.splitLocations[splits.idx] = nbSeq; return splits.idx; } /* ZSTD_compressBlock_splitBlock(): - * Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio. + * Attempts to split a given block into multiple blocks to improve compression ratio. * - * Returns 0 if it would not be advantageous to split the block. Otherwise, returns the combined size - * of the multiple blocks, or a ZSTD error code. + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. */ -static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 lastBlock, U32 nbSeq) { +static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) { size_t cSize = 0; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; - seqStore_t firstHalfSeqStore; - seqStore_t secondHalfSeqStore; - size_t cSizeFirstHalf; - size_t cSizeSecondHalf; + U32 partitions[MAX_NB_SPLITS]; + size_t i = 0; + size_t startIdx = 0; + size_t endIdx; + size_t srcBytesTotal = 0; + size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); - { - U32 partitions[MAX_NB_SPLITS]; - size_t numSplits = deriveBlockBounds(zc, dst, dstCapacity, src, srcSize, partitions, nbSeq); - size_t i = 0; - size_t startIdx = 0; - size_t endIdx = 0; - size_t srcBytesCum = 0; - - if (numSplits == 0) { - return 0; + + if (numSplits == 0) { + size_t cSizeSingleBlock = ZSTD_compressSequences_singleBlock(zc, &zc->seqStore, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + return cSizeSingleBlock; + } + + for (i = 0; i <= numSplits; ++i) { + seqStore_t chunkSeqStore; + size_t srcBytes; + size_t cSizeChunk; + U32 lastBlockActual; + + endIdx = partitions[i]; + ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); + srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore); + lastBlockActual = lastBlock && (nbSeq == endIdx); + srcBytesTotal += srcBytes; + if (i == numSplits) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; } - while (i <= numSplits) { - endIdx = partitions[i]; - seqStore_t chunkSeqStore = zc->seqStore; - ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); - size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore); - size_t lastBlock = lastBlock && (nbSeq == endIdx); - srcBytesCum += srcBytes; - if (endIdx == nbSeq) { - /* This is the final partition, need to account for last literals */ - srcBytes += zc->blockSize - srcBytesCum; - srcBytesCum += zc->blockSize - srcBytesCum; - } - size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlock); - { - ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); - ip += srcBytes; - op += cSizeChunk; - dstCapacity -= cSizeChunk; - } - startIdx = partitions[i]; - cSize += cSizeChunk; - ++i; + + cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockActual); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + startIdx = partitions[i]; + } + return cSize; +} + +static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) { + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + size_t nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + cSize = 0; + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + return cSize; } + nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } + + assert(zc->appliedParams.splitBlocks == 1); + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); return cSize; } static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame, U32 lastBlock) + const void* src, size_t srcSize, U32 frame) { /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold @@ -3322,7 +3348,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, */ const U32 rleMaxLength = 25; size_t cSize; - size_t nbSeq; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", @@ -3332,23 +3357,19 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } - nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } - zc->appliedParams.splitBlocks = 1; /* remove */ - if (zc->appliedParams.splitBlocks && nbSeq >= 2) { - size_t splitBlocksCompressedSize; - splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); - if (splitBlocksCompressedSize != 0) { - return splitBlocksCompressedSize; - } + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); + return 0; } /* encode sequences and literals */ cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, - op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + dst, dstCapacity, srcSize, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); @@ -3382,13 +3403,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, */ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - if (cSize == 0) { - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - } else { - writeBlockHeader(op, cSize, srcSize, lastBlock); - cSize += ZSTD_blockHeaderSize; - } + return cSize; } @@ -3498,7 +3513,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, * Frame is supposed already started (header already produced) * @return : compressed size, or an error code */ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastFrameChunk) @@ -3538,13 +3553,29 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); assert(cSize > 0); assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_useBlockSplitting(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0); } else { cSize = ZSTD_compressBlock_internal(cctx, - op, dstCapacity, - ip, blockSize, 1 /* frame */, lastBlock); + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } } + ip += blockSize; assert(remaining >= blockSize); remaining -= blockSize; @@ -3552,7 +3583,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, assert(dstCapacity >= cSize); dstCapacity -= cSize; cctx->isFirstBlock = 0; - DEBUGLOG(2, "ZSTD_compress_frameChunk: adding a block of size %u", + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } @@ -3698,7 +3729,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */, 0); + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); diff --git a/lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf b/lib/obj/conf_2c4cce56e07d576a865fce347f466405/dynamic/libzstd.1.4.6.dylib.ld_Ak6QYf deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 8ef77869261..ec263cbde5f 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -94,6 +94,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); + setRand(cctx, ZSTD_c_splitBlocks, 0, 1, producer); if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 5f707e027d5..dbd98bb73d5 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1544,6 +1544,15 @@ static int basicUnitTests(U32 const seed, double compressibility) ZSTD_freeCCtx(cctx); } + DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitBlocks, 1) ); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK(cSize); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t cSize1, cSize2; From df71c3cfea8e1d52c8e1ae8c10c2149325132f05 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Fri, 8 Jan 2021 15:50:19 -0500 Subject: [PATCH 14/15] Refactor buildSequencesStatistics() to avoid pointer increment for superblocks --- lib/compress/zstd_compress.c | 85 +++++++++++++------------ lib/compress/zstd_compress_internal.h | 2 +- lib/compress/zstd_compress_superblock.c | 4 +- 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9c4835b1007..854a8e39891 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2204,18 +2204,18 @@ static int ZSTD_useBlockSplitting(const ZSTD_CCtx_params* cctxParams) /* ZSTD_buildSequencesStatistics(): * Returns the size of the statistics for a given set of sequences, or a ZSTD error code, + * Also modifies LLtype, Offtype, MLtype, and lastNCount to the appropriate values. */ MEM_STATIC size_t ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + U32* LLtype, U32* Offtype, U32* MLtype, BYTE** lastNCount, const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, BYTE* dst, const BYTE* const dstEnd, - ZSTD_strategy strategy, BYTE** lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata, + ZSTD_strategy strategy, void* entropyWorkspace, size_t entropyWkspSize) { - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ BYTE* const ostart = dst; const BYTE* const oend = dstEnd; BYTE* op = ostart; - BYTE* seqHead = op++; unsigned* const countWorkspace = (unsigned*)entropyWorkspace; @@ -2229,34 +2229,31 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); assert(op <= oend); + assert(LLtype && Offtype && MLtype); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building LL table"); nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + *LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, countWorkspace, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(*LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + CTable_LitLength, LLFSELog, (symbolEncodingType_e)*LLtype, countWorkspace, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) + if (*LLtype == set_compressed) *lastNCount = op; op += countSize; - if (fseMetadata) { - if (LLtype == set_compressed) fseMetadata->lastCountSize = countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } assert(op <= oend); } } /* build CTable for Offsets */ @@ -2267,28 +2264,24 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + *Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, countWorkspace, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(*Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)*Offtype, countWorkspace, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) + if (*Offtype == set_compressed) *lastNCount = op; op += countSize; - if (fseMetadata) { - if (Offtype == set_compressed) fseMetadata->lastCountSize = countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } assert(op <= oend); } } /* build CTable for MatchLengths */ @@ -2297,32 +2290,26 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + *MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, countWorkspace, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + assert(!(*MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable( op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)*MLtype, countWorkspace, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) + if (*MLtype == set_compressed) *lastNCount = op; op += countSize; - if (fseMetadata) { - if (MLtype == set_compressed) fseMetadata->lastCountSize = countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } assert(op <= oend); } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); return op-ostart; } @@ -2353,6 +2340,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; BYTE* lastNCount = NULL; + BYTE* seqHead; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); @@ -2398,13 +2386,22 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, return (size_t)(op - ostart); } - /* build stats for sequences */ - entropyStatisticsSize = ZSTD_buildSequencesStatistics(seqStorePtr, - nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend, - strategy, &lastNCount, NULL /* no fseMetadata needed */, + { + U32 LLtype; + U32 Offtype; + U32 MLtype; + seqHead = op++; + /* build stats for sequences */ + entropyStatisticsSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &LLtype, &Offtype, &MLtype, &lastNCount, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!"); - op += entropyStatisticsSize; + FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!"); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + op += entropyStatisticsSize; + } { size_t const bitstreamSize = ZSTD_encodeSequences( op, (size_t)(oend - op), @@ -2890,13 +2887,23 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; BYTE* lastNCount = NULL; + size_t hSize; + U32 LLtype; + U32 Offtype; + U32 MLtype; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); ZSTD_memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - return ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, prevEntropy, nextEntropy, op, oend, - strategy, &lastNCount, fseMetadata, + hSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &LLtype, &Offtype, &MLtype, &lastNCount, + prevEntropy, nextEntropy, op, oend, + strategy, workspace, wkspSize); + fseMetadata->lastNCount = lastNCount; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + return hSize; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 33c653e27e0..6cfeb628c5d 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -107,7 +107,7 @@ typedef struct { symbolEncodingType_e mlType; BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ + BYTE* lastNCount; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ } ZSTD_fseCTablesMetadata_t; typedef struct { diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 948203628cf..1fceb90bd63 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -221,9 +221,9 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables * block, since it isn't worth optimizing. */ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + if (writeEntropy && fseMetadata->lastNCount && (op - fseMetadata->lastNCount) < 4) { /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(fseMetadata->lastCountSize + bitstreamSize == 3); + assert(op - fseMetadata->lastNCount == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0; From af3d67984ca89993d998840dd789e6ebe4eafb80 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 11 Jan 2021 08:30:16 -0500 Subject: [PATCH 15/15] Change 1.3.4 bugfix to be cross-compatible with superblocks and normal compression --- lib/compress/zstd_compress.c | 47 ++++++++++++------------- lib/compress/zstd_compress_internal.h | 2 +- lib/compress/zstd_compress_superblock.c | 4 +-- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 854a8e39891..3be9e1df8bc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2208,7 +2208,7 @@ static int ZSTD_useBlockSplitting(const ZSTD_CCtx_params* cctxParams) */ MEM_STATIC size_t ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, - U32* LLtype, U32* Offtype, U32* MLtype, BYTE** lastNCount, + U32* LLtype, U32* Offtype, U32* MLtype, size_t* lastCountSize, const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, BYTE* dst, const BYTE* const dstEnd, ZSTD_strategy strategy, @@ -2252,7 +2252,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); if (*LLtype == set_compressed) - *lastNCount = op; + *lastCountSize = countSize; op += countSize; assert(op <= oend); } } @@ -2280,7 +2280,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); if (*Offtype == set_compressed) - *lastNCount = op; + *lastCountSize = countSize; op += countSize; assert(op <= oend); } } @@ -2306,7 +2306,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); if (*MLtype == set_compressed) - *lastNCount = op; + *lastCountSize = countSize; op += countSize; assert(op <= oend); } } @@ -2339,8 +2339,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - BYTE* lastNCount = NULL; - BYTE* seqHead; + size_t lastCountSize = 0; entropyWorkspace = count + (MaxSeq + 1); entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); @@ -2385,15 +2384,14 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); return (size_t)(op - ostart); } - { U32 LLtype; U32 Offtype; U32 MLtype; - seqHead = op++; + BYTE* seqHead = op++; /* build stats for sequences */ entropyStatisticsSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, - &LLtype, &Offtype, &MLtype, &lastNCount, + &LLtype, &Offtype, &MLtype, &lastCountSize, &prevEntropy->fse, &nextEntropy->fse, op, oend, strategy, @@ -2421,9 +2419,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, * In this exceedingly rare case, we will simply emit an uncompressed * block, since it isn't worth optimizing. */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0; @@ -2886,20 +2884,20 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; - BYTE* lastNCount = NULL; size_t hSize; - U32 LLtype; - U32 Offtype; - U32 MLtype; + + /* ZSTD_buildSequencesStatistics() is guaranteed to overwrite these values */ + U32 LLtype = set_basic; + U32 Offtype = set_basic; + U32 MLtype = set_basic; DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); ZSTD_memset(workspace, 0, wkspSize); hSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, - &LLtype, &Offtype, &MLtype, &lastNCount, + &LLtype, &Offtype, &MLtype, &fseMetadata->lastCountSize, prevEntropy, nextEntropy, op, oend, strategy, workspace, wkspSize); - fseMetadata->lastNCount = lastNCount; fseMetadata->llType = (symbolEncodingType_e) LLtype; fseMetadata->ofType = (symbolEncodingType_e) Offtype; fseMetadata->mlType = (symbolEncodingType_e) MLtype; @@ -3104,8 +3102,8 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore, size_t startIdx, size_t endIdx) { BYTE* const litEnd = originalSeqStore->lit; - U32 literalsBytes; - U32 literalsBytesPreceding = 0; + size_t literalsBytes; + size_t literalsBytesPreceding = 0; *resultSeqStore = *originalSeqStore; if (startIdx > 0) { @@ -3118,7 +3116,7 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_ if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { resultSeqStore->longLengthID = 0; } else { - resultSeqStore->longLengthPos -= startIdx; + resultSeqStore->longLengthPos -= (U32)startIdx; } } resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; @@ -3240,7 +3238,7 @@ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx } if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); - splits->splitLocations[splits->idx] = midIdx; + splits->splitLocations[splits->idx] = (U32)midIdx; splits->idx++; ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); } @@ -3322,21 +3320,20 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, const void* src, size_t srcSize, U32 lastBlock) { const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; - size_t nbSeq; + U32 nbSeq; size_t cSize; DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { - cSize = 0; if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); return cSize; } - nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } assert(zc->appliedParams.splitBlocks == 1); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6cfeb628c5d..43252aee425 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -107,7 +107,7 @@ typedef struct { symbolEncodingType_e mlType; BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; size_t fseTablesSize; - BYTE* lastNCount; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSequences_internal() */ } ZSTD_fseCTablesMetadata_t; typedef struct { diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 1fceb90bd63..70d78ef854e 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -221,9 +221,9 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables * block, since it isn't worth optimizing. */ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (writeEntropy && fseMetadata->lastNCount && (op - fseMetadata->lastNCount) < 4) { + if (writeEntropy && fseMetadata->lastCountSize && (bitstreamSize + fseMetadata->lastCountSize) < 4) { /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - fseMetadata->lastNCount == 3); + assert(bitstreamSize + fseMetadata->lastCountSize == 3); DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " "emitting an uncompressed block."); return 0;