diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8c8c33a859d..b4e455f5a19 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2328,6 +2328,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } + printf("-----NEW BLOCK-----\n"); if (zc->externSeqStore.pos < zc->externSeqStore.size) { assert(!zc->appliedParams.ldmParams.enableLdm); /* Updates ldmSeqStore.pos */ @@ -2338,7 +2339,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { - rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0, 0}; ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; @@ -2360,6 +2361,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); } } + printf("Finished BuildSeqStore()\n"); return ZSTDbss_compress; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index b161a208cf0..a982dbb8492 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -131,6 +131,26 @@ typedef struct { U32 lowLimit; /* below that point, no more valid data */ } ZSTD_window_t; +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ + + U32 rangeFlag; /* If == 1, then members of this rawSeqStore represent different things: + * seq.matchLength == start of a match + * seq.litLength == end of a match + * capacity == reference start index of this ldm seq store + */ + +} rawSeqStore_t; + typedef struct ZSTD_matchState_t ZSTD_matchState_t; struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ @@ -150,6 +170,7 @@ struct ZSTD_matchState_t { * dedicated dictionary search structure. */ optState_t opt; /* optimal parser state */ + rawSeqStore_t ldmSeqStore; /* raw seq store containing LDMs */ const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; }; @@ -183,19 +204,6 @@ typedef struct { U32 windowLog; /* Window log for the LDM */ } ldmParams_t; -typedef struct { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; - -typedef struct { - rawSeq* seq; /* The start of the sequences */ - size_t pos; /* The position where reading stopped. <= size. */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; - typedef struct { int collectSequences; ZSTD_Sequence* seqStart; diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index dbfce3dce7f..f242529a245 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -562,6 +562,64 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, return sequence; } +/** + * Converts the elements of a rawSeqStore into a series of ranges representing + * the beginning and end of a match. We store the start of a match in "litLength" + * and end of a match in "matchLength". So a rawSeqStore containing: + * (litLength: 1000, matchLength: 500) + * (litLength: 2000, matchLength: 1000) + * (litLength: 4000, matchLength: 1000) + * + * would be converted into: + * + * (matchStart: 1000, matchEnd: 1500) + * (matchStart: 3500, matchEnd: 4500) + * (matchStart: 8500, matchEnd: 9500) + */ + +static void printSeqStore(rawSeqStore_t* rawSeqStore) { + printf("rawSeqStore: pos: %zu\n", rawSeqStore->pos); + for (int i = 0; i < rawSeqStore->size; ++i) { + printf("(of:%u ml:%u ll: %u)\n", rawSeqStore->seq[i].offset, rawSeqStore->seq[i].matchLength, rawSeqStore->seq[i].litLength); + } +} + +static void adjustLdmSeqStore(rawSeqStore_t* rawSeqStore, int baseDiff) { + size_t i = 0; + for (i; i < rawSeqStore->size; ++i) { + rawSeqStore->seq[i].matchLength += (size_t)baseDiff; + rawSeqStore->seq[i].litLength += (size_t)baseDiff; + } +} + +static void convertSeqStoreToRanges(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 seqStoreStartPos) { + if (rawSeqStore->size == 0) + return; + size_t i; + size_t currPos = 0; + printf("Conversion...\n"); + rawSeqStore->rangeFlag = 1; + for(i = 0 ; i < rawSeqStore->size; ++i) { + size_t matchStart; + size_t matchEnd; + currPos += rawSeqStore->seq[i].litLength; + matchStart = currPos; + currPos += rawSeqStore->seq[i].matchLength; + matchEnd = currPos; + rawSeqStore->seq[i].matchLength = matchStart; + rawSeqStore->seq[i].litLength = matchEnd; + printf("(%u, %u)\n", matchStart, matchEnd); + } + printf("size:%u\n", rawSeqStore->size); + /* this is maybe a lil bit janky of a way to check for a multi-block seqstore */ + if (rawSeqStore->seq[rawSeqStore->size - 1].litLength > srcSize) { + printf("SETTING RANGEFLAG TO 2\n"); + rawSeqStore->rangeFlag = 2; /* Signifies that this is a seqstore that spans + multiple blocks. */ + adjustLdmSeqStore(rawSeqStore, seqStoreStartPos); + } +} + size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -576,6 +634,31 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, /* Input positions */ BYTE const* ip = istart; + /* If compression strategy uses optimal parser, use LDMs only as candidates + * rather than accepting all LDMs and calling regular match finder on literal + * blocks in between. + */ + if (cParams->strategy >= ZSTD_btopt) { + printf("ldmSeqStore start idx: %u\n", (U32)(istart - ms->window.base)); + size_t cLen; + if ((*rawSeqStore).rangeFlag == 0) { + /* only convert the rawSeqStore once, in case it spans multiple blocks */ + printSeqStore(rawSeqStore); + convertSeqStoreToRanges(rawSeqStore, srcSize, (U32)(istart - ms->window.base)); /* sets rangeFlag to true */ + } + (*rawSeqStore).capacity = (U32)(istart - ms->window.base); + const BYTE* const prevBase = (BYTE const*)ms->window.base; + ms->ldmSeqStore = *rawSeqStore; + cLen = blockCompressor(ms, seqStore, rep, src, srcSize); + if (prevBase != ms->window.base) { + int baseDiff = (int)(prevBase - ms->window.base); + printf("Bases were different, adjusting, diff = %d\n", baseDiff); + adjustLdmSeqStore(rawSeqStore, baseDiff); + printSeqStore(rawSeqStore); + } + return cLen; + } + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 5acc9e0b680..05bb750740c 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -763,8 +763,134 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); } } +/********************************* +* LDM functions +*********************************/ + +/* TODO: Increment by SBI */ +static void getNextLdm(U32* ldmStart, U32* ldmEnd, rawSeqStore_t* ldmSeqStore, + const U32 ldmSeqStoreStartPos, U32 currPosInBlock, U32 current, U32 startBlockIdx, U32 endBlockIdx) { + if (ldmSeqStore->pos >= ldmSeqStore->size - 1 || /* pos == size-1 means currLdm is the last one, should never fetch another */ + ldmSeqStore->size == 0 || ldmSeqStore->rangeFlag == 0) + return; + + if (ldmSeqStore->rangeFlag == 2) { + // No need to adjust if we have an absolute seq store + if (current > *ldmEnd) { + /* If our current pos is greater than current match end, we need to fetch a new match */ + //printf("Getting next ldm: current: %u and currLdmEnd: %u\n", current, *ldmEnd); + //printf("Range before update: (%u, %u)\n", *ldmStart, *ldmEnd); + ldmSeqStore->pos++; + *ldmStart = ldmSeqStore->seq[ldmSeqStore->pos].matchLength; + *ldmEnd = ldmSeqStore->seq[ldmSeqStore->pos].litLength; + //printf("Newly fetched ldm: (%u, %u)\n", *ldmStart, *ldmEnd); + } + /* Handle match splitting, which only applies for multi-threaded cases + * If an LDM starts before the block ends, and ends after the block ends, we split the match into two. + * Don't increment pos so we stay on this match until it ends. + */ + if (*ldmStart < endBlockIdx && *ldmEnd > endBlockIdx) { + //printf("Splitting match @ end: ldmEnd: %u, endBlockIdx: %u\n", *ldmEnd, endBlockIdx); + *ldmEnd = endBlockIdx; + //printf("Range after split: (%u, %u)\n", *ldmStart, *ldmEnd); + } + + if (*ldmStart < startBlockIdx && *ldmEnd > startBlockIdx) { + //printf("Splitting match @ start: ldmStart: %u, startBlockIdx: %u\n", *ldmStart, startBlockIdx); + *ldmStart = startBlockIdx; + //printf("Range after split: (%u, %u)\n", *ldmStart, *ldmEnd); + } + + } else { + /* In this case, all of the LDMs are within this one block */ + U32 ldmStartAdjusted = *ldmStart + startBlockIdx; + U32 ldmEndAdjusted = *ldmEnd + startBlockIdx; + if (current >= ldmEndAdjusted) { + //printf("Getting next raw ldm range at: current: %u with seqStore.pos: %u .size: %u\n", current, ldmSeqStore->pos, ldmSeqStore->size); + //printf("Current raw ldm range: (%u, %u) -> abs: (%u, %u)\n", *ldmStart, *ldmEnd, ldmStartAdjusted, ldmEndAdjusted); + ldmSeqStore->pos++; + *ldmStart = ldmSeqStore->seq[ldmSeqStore->pos].matchLength; + *ldmEnd = ldmSeqStore->seq[ldmSeqStore->pos].litLength; + //printf("New raw ldm range: (%u, %u) -> abs: (%u, %u) at pos: %u\n", *ldmStart, *ldmEnd, *ldmStart + startBlockIdx, *ldmEnd + startBlockIdx, ldmSeqStore->pos); + } + } +} + +static void maybeAddLdm(const rawSeqStore_t* const ldmSeqStore, ZSTD_match_t* matches, + U32* nbMatches, U32 ldmStart, U32 ldmEnd, U32 current, U32 startBlockIdx) { + if (ldmSeqStore->size == 0) + return; + assert(ldmSeqStore->rangeFlag != 0); + /* Adjusted ldms for when the ldm seq store was calculated for this block only */ + U32 ldmStartAdjusted = ldmSeqStore->rangeFlag == 1 ? ldmStart + startBlockIdx : ldmStart; + U32 ldmEndAdjusted = ldmSeqStore->rangeFlag == 1 ? ldmEnd + startBlockIdx : ldmEnd; + + /* Current must be within the adjusted ldm */ + if (ldmSeqStore->rangeFlag == 1) { + if (current < ldmStartAdjusted || current >= ldmEndAdjusted) + return; + } else if (ldmSeqStore->rangeFlag == 2) { + if (!(current >= ldmStartAdjusted) || !(current < ldmEndAdjusted)) + return; + } + + U32 originalMatchLength = ldmEndAdjusted - ldmStartAdjusted; + U32 posDifference = current - ldmStartAdjusted; + if (posDifference > 0 /* TODO: change */ || posDifference >= originalMatchLength /*- MINMATCH*/ /* underflow here if we do this? */) { + return; + } + printf("Original matchlen: %u - ", originalMatchLength); + printf("Considering LDM range (%u, %u) -> abs: (%u, %u) @ current = %u\n", ldmStart, ldmEnd, ldmStartAdjusted, ldmEndAdjusted, current); + U32 candidateOffCode = ldmSeqStore->seq[ldmSeqStore->pos].offset + posDifference + ZSTD_REP_MOVE; + U32 candidateMatchLength = originalMatchLength - posDifference; + if (candidateMatchLength < ZSTD_LDM_MINMATCH_MIN) { + //printf("too small\n"); + return; + } + //printf("adjusted to (of(code): %u, ml %u)\n", candidateOffCode, candidateMatchLength); + if ((*nbMatches == 0 || candidateMatchLength >= matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM) { + printf("large enough, adding\n"); + /*matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++;*/ + /* Add sifting */ + + if (*nbMatches == 0) { + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } else { + if (candidateMatchLength == matches[*nbMatches-1].len) { + U32 candidateMatchIdx = *nbMatches; + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + //printf("Sifting...: idx: %u, len: %u, off: %u\n", candidateMatchIdx, candidateMatchLength, candidateOffCode); + //printf("Current best is...: idx: %u, len: %u, off: %u\n", *nbMatches-1, matches[*nbMatches-1].len, matches[*nbMatches-1].off); + if (candidateOffCode != matches[*nbMatches].off) + printf("DIFF: ldm: (len: %u, off: %u), best: (len: %u, off: %u)\n", candidateMatchLength, candidateOffCode, matches[*nbMatches-1].len, matches[*nbMatches-1].off); + //printf("Current best is...: idx: %u, len: %u, off: %u\n", *nbMatches-1, matches[*nbMatches-1].len, matches[*nbMatches-1].off); + while (candidateMatchIdx > 0 && + matches[candidateMatchIdx].off > matches[candidateMatchIdx - 1].off && + matches[candidateMatchIdx].len == matches[candidateMatchIdx - 1].len) { + //printf("Compared to: idx: %u, len: %u, off: %u", candidateMatchIdx - 1, matches[candidateMatchIdx - 1].len, matches[candidateMatchIdx - 1].off); + ZSTD_match_t tmp = matches[candidateMatchIdx - 1]; + matches[candidateMatchIdx - 1] = matches[candidateMatchIdx]; + matches[candidateMatchIdx] = tmp; + --candidateMatchIdx; + } + (*nbMatches)++; + } else { + printf("MATCHDIFF: ldm: (len: %u, off: %u), best: (len: %u, off: %u)\n", candidateMatchLength, candidateOffCode, matches[*nbMatches-1].len, matches[*nbMatches-1].off); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } + } + } +} + /*-******************************* * Optimal parser *********************************/ @@ -813,6 +939,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; U32 nextToUpdate3 = ms->nextToUpdate; + U32 const startBlockIdx = (U32)(istart - base); + U32 const endBlockIdx = startBlockIdx + srcSize; + + U32 currLdmStart; + U32 currLdmEnd; ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; @@ -821,9 +952,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* init */ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + //printf("ZSTD_compressBlock_opt_generic: current=%u, sbi=%u, ebi=%u\n", (U32)(ip - base), startBlockIdx, endBlockIdx); assert(optLevel <= 2); ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); + int ldmAdjusted = 0; + + /* Set current LDM candidate to whatever might have been considered in prev block */ + if (ms->ldmSeqStore.size != 0) { + size_t readIdx = ms->ldmSeqStore.pos == 0 ? 0 : ms->ldmSeqStore.pos - 1; + currLdmStart = ms->ldmSeqStore.seq[readIdx].matchLength; + currLdmEnd = ms->ldmSeqStore.seq[readIdx].litLength; + //printf("Starting opt with ldm : (%u, %u)\n", currLdmStart, currLdmEnd); + } else { + currLdmStart = currLdmEnd = 0; + } /* Match Loop */ while (ip < ilimit) { @@ -832,7 +975,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + U32 const current = (U32)(ip - base); + + getNextLdm(&currLdmStart, &currLdmEnd, &ms->ldmSeqStore, ms->ldmSeqStore.capacity, (U32)(ip-istart), current, startBlockIdx, startBlockIdx + srcSize); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + maybeAddLdm(&ms->ldmSeqStore, matches, &nbMatches, currLdmStart, currLdmEnd, current, startBlockIdx); + if (!nbMatches) { ip++; continue; } /* initialize opt[0] */ @@ -890,6 +1038,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* check further positions */ for (cur = 1; cur <= last_pos; cur++) { const BYTE* const inr = ip + cur; + U32 const current = (U32)(inr - base); assert(cur < ZSTD_OPT_NUM); DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) @@ -945,7 +1094,13 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const previousPrice = opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + + /* Fetch next LDM if necessary */ + + getNextLdm(&currLdmStart, &currLdmEnd, &ms->ldmSeqStore, ms->ldmSeqStore.capacity, (U32)(inr-istart), current, startBlockIdx, startBlockIdx + srcSize); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + maybeAddLdm(&ms->ldmSeqStore, matches, &nbMatches, currLdmStart, currLdmEnd, current, startBlockIdx); + U32 matchNb; if (!nbMatches) { DEBUGLOG(7, "rPos:%u : no match found", cur); @@ -1061,6 +1216,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } /* while (ip < ilimit) */ + //printf("Finished opt\n"); /* Return the last literals size */ return (size_t)(iend - anchor); } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index baf6ef4ca6d..f789ae5bfdd 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -266,7 +266,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) /* ===== Seq Pool Wrapper ====== */ -static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0}; +static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef ZSTDMT_bufferPool ZSTDMT_seqPool; @@ -277,7 +277,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) static rawSeqStore_t bufferToSeq(buffer_t buffer) { - rawSeqStore_t seq = {NULL, 0, 0, 0}; + rawSeqStore_t seq = {NULL, 0, 0, 0, 0}; seq.seq = (rawSeq*)buffer.start; seq.capacity = buffer.capacity / sizeof(rawSeq); return seq;