From 9f7d8ae1a37a74a13d9f01766b2195cc751ccd19 Mon Sep 17 00:00:00 2001 From: Vianney Tran Date: Thu, 4 Feb 2021 13:32:48 -0500 Subject: [PATCH] Update vendored zstd to 1.4.8 --- bitstream.h | 105 +- compiler.h | 135 ++- cover.c | 51 +- cover.h | 12 +- cpu.h | 4 +- debug.c | 42 +- debug.h | 71 +- entropy_common.c | 276 +++-- error_private.c | 4 +- error_private.h | 10 +- fastcover.c | 49 +- fse.h | 92 +- fse_compress.c | 98 +- fse_decompress.c | 186 ++- hist.c | 98 +- hist.h | 46 +- huf.h | 79 +- huf_compress.c | 450 ++++--- huf_decompress.c | 586 +++++---- mem.h | 161 ++- pool.c | 40 +- pool.h | 4 +- threading.c | 18 +- threading.h | 7 +- xxhash.c | 120 +- xxhash.h | 44 +- zbuff.h | 2 +- zbuff_common.c | 2 +- zbuff_compress.c | 2 +- zbuff_decompress.c | 2 +- zdict.c | 51 +- zdict.h | 81 +- zstd.h | 478 ++++++-- zstd_common.c | 20 +- zstd_compress.c | 2315 ++++++++++++++++++++++++++---------- zstd_compress_internal.h | 290 ++++- zstd_compress_literals.c | 24 +- zstd_compress_literals.h | 2 +- zstd_compress_sequences.c | 60 +- zstd_compress_sequences.h | 9 +- zstd_compress_superblock.c | 849 +++++++++++++ zstd_compress_superblock.h | 32 + zstd_cwksp.h | 98 +- zstd_ddict.c | 28 +- zstd_ddict.h | 4 +- zstd_decompress.c | 567 +++++---- zstd_decompress_block.c | 669 +++++++---- zstd_decompress_block.h | 9 +- zstd_decompress_internal.h | 27 +- zstd_deps.h | 111 ++ zstd_double_fast.c | 57 +- zstd_double_fast.h | 2 +- zstd_errors.h | 4 +- zstd_fast.c | 90 +- zstd_fast.h | 2 +- zstd_internal.h | 254 +++- zstd_lazy.c | 459 +++++-- zstd_lazy.h | 22 +- zstd_ldm.c | 107 +- zstd_ldm.h | 13 +- zstd_legacy.h | 2 +- zstd_opt.c | 347 ++++-- zstd_opt.h | 2 +- zstd_v01.c | 34 +- zstd_v01.h | 2 +- zstd_v02.c | 20 +- zstd_v02.h | 2 +- zstd_v03.c | 20 +- zstd_v03.h | 2 +- zstd_v04.c | 24 +- zstd_v04.h | 2 +- zstd_v05.c | 34 +- zstd_v05.h | 2 +- zstd_v06.c | 34 +- zstd_v06.h | 2 +- zstd_v07.c | 38 +- zstd_v07.h | 2 +- zstdmt_compress.c | 525 ++------ zstdmt_compress.h | 136 +-- 79 files changed, 7315 insertions(+), 3445 deletions(-) create mode 100644 zstd_compress_superblock.c create mode 100644 zstd_compress_superblock.h create mode 100644 zstd_deps.h diff --git a/bitstream.h b/bitstream.h index 1c294b8..d9a2730 100644 --- a/bitstream.h +++ b/bitstream.h @@ -1,35 +1,15 @@ /* ****************************************************************** - bitstream - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * bitstream + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #ifndef BITSTREAM_H_MODULE #define BITSTREAM_H_MODULE @@ -37,7 +17,6 @@ #if defined (__cplusplus) extern "C" { #endif - /* * This API consists of small unitary functions, which must be inlined for best performance. * Since link-time-optimization is not available for all compilers, @@ -48,6 +27,7 @@ extern "C" { * Dependencies ******************************************/ #include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ @@ -55,10 +35,12 @@ extern "C" { /*========================================= * Target specific =========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#elif defined(__ICCARM__) -# include +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif #endif #define STREAM_ACCUMULATOR_MIN_32 25 @@ -160,9 +142,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) assert(val != 0); { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse ( &r, val ); - return (unsigned) r; +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ @@ -218,7 +203,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { - MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); assert(nbBits < BIT_MASK_SIZE); assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; @@ -291,7 +276,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) { - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } bitD->start = (const char*)srcBuffer; bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); @@ -337,12 +322,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } -MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } -MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ @@ -350,10 +335,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } -MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else assert(nbBits < BIT_MASK_SIZE); return bitContainer & BIT_mask[nbBits]; +#endif } /*! BIT_lookBits() : @@ -362,7 +351,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * On 32-bits, maxNbBits==24. * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { /* arbitrate between double-shift and shift+mask */ #if 1 @@ -385,7 +374,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); } -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } @@ -394,7 +383,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) * Read (consume) next n bits from local register and update. * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); @@ -411,6 +400,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) return value; } +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + /*! BIT_reloadDStream() : * Refill `bitD` from buffer previously set in BIT_initDStream() . * This function is safe, it guarantees it will not read beyond src buffer. @@ -422,10 +428,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) return BIT_DStream_overflow; if (bitD->ptr >= bitD->limitPtr) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; + return BIT_reloadDStreamFast(bitD); } if (bitD->ptr == bitD->start) { if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; diff --git a/compiler.h b/compiler.h index 1877a0c..3e454f3 100644 --- a/compiler.h +++ b/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -17,7 +17,7 @@ /* force inlining */ #if !defined(ZSTD_NO_INLINE) -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else # define INLINE_KEYWORD @@ -38,6 +38,17 @@ #endif +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + /** * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant * parameters. They must be inlined for the compiler to eliminate the constant @@ -117,6 +128,9 @@ # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) # else # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ @@ -136,7 +150,7 @@ /* vectorization * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__clang__) && defined(__GNUC__) +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # else @@ -146,6 +160,19 @@ # define DONT_VECTORIZE #endif +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ @@ -156,4 +183,106 @@ # pragma warning(disable : 4324) /* disable: C4324: padded structure */ #endif +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +#if ZSTD_MEMORY_SANITIZER +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +#include "zstd_deps.h" /* intptr_t */ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +#if ZSTD_ADDRESS_SANITIZER +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + #endif /* ZSTD_COMPILER_H */ diff --git a/cover.c b/cover.c index 2e129dd..697f483 100644 --- a/cover.c +++ b/cover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -40,33 +40,42 @@ * Constants ***************************************/ #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) -#define DEFAULT_SPLITPOINT 1.0 +#define COVER_DEFAULT_SPLITPOINT 1.0 /*-************************************* * Console display ***************************************/ +#ifndef LOCALDISPLAYLEVEL static int g_displayLevel = 2; +#endif +#undef DISPLAY #define DISPLAY(...) \ { \ fprintf(stderr, __VA_ARGS__); \ fflush(stderr); \ } +#undef LOCALDISPLAYLEVEL #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ if (displayLevel >= l) { \ DISPLAY(__VA_ARGS__); \ } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ if (displayLevel >= l) { \ - if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ g_time = clock(); \ DISPLAY(__VA_ARGS__); \ } \ } +#undef DISPLAYUPDATE #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) -static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; -static clock_t g_time = 0; /*-************************************* * Hash table @@ -120,9 +129,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) { /** * Internal hash function */ -static const U32 prime4bytes = 2654435761U; +static const U32 COVER_prime4bytes = 2654435761U; static U32 COVER_map_hash(COVER_map_t *map, U32 key) { - return (key * prime4bytes) >> (32 - map->sizeLog); + return (key * COVER_prime4bytes) >> (32 - map->sizeLog); } /** @@ -215,7 +224,7 @@ typedef struct { } COVER_ctx_t; /* We need a global context for qsort... */ -static COVER_ctx_t *g_ctx = NULL; +static COVER_ctx_t *g_coverCtx = NULL; /*-************************************* * Helper functions @@ -258,11 +267,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { /** * Same as COVER_cmp() except ties are broken by pointer value - * NOTE: g_ctx must be set to call this function. A global is required because + * NOTE: g_coverCtx must be set to call this function. A global is required because * qsort doesn't take an opaque pointer. */ -static int COVER_strict_cmp(const void *lp, const void *rp) { - int result = COVER_cmp(g_ctx, lp, rp); +static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_coverCtx, lp, rp); if (result == 0) { result = lp < rp ? -1 : 1; } @@ -271,8 +280,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) { /** * Faster version for d <= 8. */ -static int COVER_strict_cmp8(const void *lp, const void *rp) { - int result = COVER_cmp8(g_ctx, lp, rp); +static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_coverCtx, lp, rp); if (result == 0) { result = lp < rp ? -1 : 1; } @@ -603,7 +612,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, /* qsort doesn't take an opaque pointer, so pass as a global. * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. */ - g_ctx = ctx; + g_coverCtx = ctx; #if defined(__OpenBSD__) mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); @@ -946,7 +955,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){ free(selection.dictContent); } -COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { @@ -954,8 +963,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t largestCompressed = 0; BYTE* customDictContentEnd = customDictContent + dictContentSize; - BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize); - BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize); + BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity); + BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity); double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; if (!largestDictbuffer || !candidateDictBuffer) { @@ -967,7 +976,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, /* Initial dictionary size and compressed size */ memcpy(largestDictbuffer, customDictContent, dictContentSize); dictContentSize = ZDICT_finalizeDictionary( - largestDictbuffer, dictContentSize, customDictContent, dictContentSize, + largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); if (ZDICT_isError(dictContentSize)) { @@ -1001,7 +1010,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, while (dictContentSize < largestDict) { memcpy(candidateDictBuffer, largestDictbuffer, largestDict); dictContentSize = ZDICT_finalizeDictionary( - candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize, + candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); if (ZDICT_isError(dictContentSize)) { @@ -1079,7 +1088,7 @@ static void COVER_tryParameters(void *opaque) { { const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, dictBufferCapacity, parameters); - selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, totalCompressedSize); @@ -1106,7 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* constants */ const unsigned nbThreads = parameters->nbThreads; const double splitPoint = - parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; + parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint; const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; diff --git a/cover.h b/cover.h index d9e0636..f2fbc53 100644 --- a/cover.h +++ b/cover.h @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2017-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #include /* fprintf */ #include /* malloc, free, qsort */ #include /* memset */ @@ -142,6 +152,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection); * smallest dictionary within a specified regression of the compressed size * from the largest dictionary. */ - COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); diff --git a/cpu.h b/cpu.h index 5f0923f..cb21059 100644 --- a/cpu.h +++ b/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Facebook, Inc. + * Copyright (c) 2018-2020, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,8 +16,6 @@ * https://github.com/facebook/folly/blob/master/folly/CpuId.h */ -#include - #include "mem.h" #ifdef _MSC_VER diff --git a/debug.c b/debug.c index 3ebdd1c..f303f4a 100644 --- a/debug.c +++ b/debug.c @@ -1,35 +1,15 @@ /* ****************************************************************** - debug - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * debug + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ diff --git a/debug.h b/debug.h index b4fc89d..8b57343 100644 --- a/debug.h +++ b/debug.h @@ -1,35 +1,15 @@ /* ****************************************************************** - debug - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * debug + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ @@ -71,15 +51,6 @@ extern "C" { #endif -/* DEBUGFILE can be defined externally, - * typically through compiler command line. - * note : currently useless. - * Value must be stderr or stdout */ -#ifndef DEBUGFILE -# define DEBUGFILE stderr -#endif - - /* recommended values for DEBUGLEVEL : * 0 : release mode, no debug, all run-time checks disabled * 1 : enables assert() only, no display @@ -96,7 +67,8 @@ extern "C" { */ #if (DEBUGLEVEL>=1) -# include +# define ZSTD_DEPS_NEED_ASSERT +# include "zstd_deps.h" #else # ifndef assert /* assert may be already defined, due to prior #include */ # define assert(condition) ((void)0) /* disable assert (default) */ @@ -104,7 +76,8 @@ extern "C" { #endif #if (DEBUGLEVEL>=2) -# include +# define ZSTD_DEPS_NEED_IO +# include "zstd_deps.h" extern int g_debuglevel; /* the variable is only declared, it actually lives in debug.c, and is shared by the whole process. @@ -112,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared, It's useful when enabling very verbose levels on selective conditions (such as position in src) */ -# define RAWLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __VA_ARGS__); \ +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ } } -# define DEBUGLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ } } #else # define RAWLOG(l, ...) {} /* disabled */ diff --git a/entropy_common.c b/entropy_common.c index b12944e..f9fcb1a 100644 --- a/entropy_common.c +++ b/entropy_common.c @@ -1,36 +1,16 @@ -/* - Common functions of New Generation Entropy library - Copyright (C) 2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -*************************************************************************** */ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ /* ************************************* * Dependencies @@ -58,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) { const BYTE* const istart = (const BYTE*) headerBuffer; const BYTE* const iend = istart + hbSize; @@ -70,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t U32 bitStream; int bitCount; unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; int previous0 = 0; - if (hbSize < 4) { - /* This function only works when hbSize >= 4 */ - char buffer[4]; - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, headerBuffer, hbSize); + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, buffer, sizeof(buffer)); if (FSE_isError(countSize)) return countSize; if (countSize > hbSize) return ERROR(corruption_detected); return countSize; } } - assert(hbSize >= 4); + assert(hbSize >= 8); /* init */ - memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); @@ -97,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t threshold = 1<1) & (charnum<=*maxSVPtr)) { + for (;;) { if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; - bitStream = MEM_readLE32(ip) >> bitCount; + /* Count the number of repeats. Each time the + * 2-bit repeat code is 0b11 there is another + * repeat. + * Avoid UB by setting the high bit to 1. + */ + int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; } - n0 += bitStream & 3; + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; } else { - bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; int count; if ((bitStream & (threshold-1)) < (U32)max) { @@ -139,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t } count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } normalizedCounter[charnum++] = (short)count; previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); } + if (charnum >= maxSV1) break; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { ip += bitCount>>3; bitCount &= 7; } else { bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; ip = iend - 4; } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + bitStream = MEM_readLE32(ip) >> bitCount; + } } if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); if (bitCount > 32) return ERROR(corruption_detected); *maxSVPtr = charnum-1; @@ -164,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t return ip-istart; } +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). @@ -175,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) { U32 weightTotal; const BYTE* ip = (const BYTE*) src; @@ -183,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (!srcSize) return ERROR(srcSize_wrong); iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ oSize = iSize - 127; @@ -197,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, huffWeight[n+1] = ip[n/2] & 15; } } } else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); if (FSE_isError(oSize)) return oSize; } /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); @@ -234,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, *nbSymbolsPtr = (U32)(oSize+1); return iSize+1; } + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} diff --git a/error_private.c b/error_private.c index 7c1bb67..45bba53 100644 --- a/error_private.c +++ b/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -47,6 +47,8 @@ const char* ERR_getErrorString(ERR_enum code) /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/error_private.h b/error_private.h index 0d2fa7e..71b37b8 100644 --- a/error_private.h +++ b/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -21,7 +21,7 @@ extern "C" { /* **************************************** * Dependencies ******************************************/ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ #include "zstd_errors.h" /* enum list */ @@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum; /*-**************************************** * Error codes handling ******************************************/ -#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#undef ERROR /* already defined on Visual Studio */ #define ERROR(name) ZSTD_ERROR(name) #define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) @@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + /*-**************************************** * Error Strings diff --git a/fastcover.c b/fastcover.c index 941bb5a..dad521c 100644 --- a/fastcover.c +++ b/fastcover.c @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2018-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + /*-************************************* * Dependencies ***************************************/ @@ -11,6 +21,7 @@ #include "threading.h" #include "cover.h" #include "zstd_internal.h" /* includes zstd.h */ +#include "zstd_compress_internal.h" /* ZSTD_hash*() */ #ifndef ZDICT_STATIC_LINKING_ONLY #define ZDICT_STATIC_LINKING_ONLY #endif @@ -23,7 +34,7 @@ #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) #define FASTCOVER_MAX_F 31 #define FASTCOVER_MAX_ACCEL 10 -#define DEFAULT_SPLITPOINT 0.75 +#define FASTCOVER_DEFAULT_SPLITPOINT 0.75 #define DEFAULT_F 20 #define DEFAULT_ACCEL 1 @@ -31,50 +42,50 @@ /*-************************************* * Console display ***************************************/ +#ifndef LOCALDISPLAYLEVEL static int g_displayLevel = 2; +#endif +#undef DISPLAY #define DISPLAY(...) \ { \ fprintf(stderr, __VA_ARGS__); \ fflush(stderr); \ } +#undef LOCALDISPLAYLEVEL #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ if (displayLevel >= l) { \ DISPLAY(__VA_ARGS__); \ } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ if (displayLevel >= l) { \ - if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ g_time = clock(); \ DISPLAY(__VA_ARGS__); \ } \ } +#undef DISPLAYUPDATE #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) -static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; -static clock_t g_time = 0; /*-************************************* * Hash Functions ***************************************/ -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - - /** - * Hash the d-byte value pointed to by p and mod 2^f + * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector */ -static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) { +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { if (d == 6) { - return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1); + return ZSTD_hash6Ptr(p, f); } - return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1); + return ZSTD_hash8Ptr(p, f); } @@ -476,7 +487,7 @@ static void FASTCOVER_tryParameters(void *opaque) parameters, segmentFreqs); const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); - selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, totalCompressedSize); @@ -607,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* constants */ const unsigned nbThreads = parameters->nbThreads; const double splitPoint = - parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; + parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint; const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; diff --git a/fse.h b/fse.h index a7553e3..dd5fc44 100644 --- a/fse.h +++ b/fse.h @@ -1,35 +1,15 @@ /* ****************************************************************** - FSE : Finite State Entropy codec - Public Prototypes declaration - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #if defined (__cplusplus) @@ -43,7 +23,7 @@ extern "C" { /*-***************************************** * Dependencies ******************************************/ -#include /* size_t, ptrdiff_t */ +#include "zstd_deps.h" /* size_t, ptrdiff_t */ /*-***************************************** @@ -157,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. @@ -248,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ @@ -308,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste *******************************************/ /* FSE buffer bounds */ #define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); @@ -342,18 +335,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); -/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ typedef enum { FSE_repeat_none, /**< Cannot use the previous table */ @@ -664,6 +669,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) #ifndef FSE_DEFAULT_MEMORY_USAGE # define FSE_DEFAULT_MEMORY_USAGE 13 #endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif /*!FSE_MAX_SYMBOL_VALUE : * Maximum symbol value authorized. @@ -697,7 +705,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" #endif -#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) #endif /* FSE_STATIC_LINKING_ONLY */ diff --git a/fse_compress.c b/fse_compress.c index 68b47e1..89cb9df 100644 --- a/fse_compress.c +++ b/fse_compress.c @@ -1,42 +1,20 @@ /* ****************************************************************** - FSE : Finite State Entropy encoder - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * FSE : Finite State Entropy encoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** * Includes ****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ #include "compiler.h" #include "mem.h" /* U32, U16, etc. */ #include "debug.h" /* assert, DEBUGLOG */ @@ -45,6 +23,9 @@ #define FSE_STATIC_LINKING_ONLY #include "fse.h" #include "error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ /* ************************************************************** @@ -94,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); U32 const step = FSE_TABLESTEP(tableSize); - U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; - FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + U32 highThreshold = tableSize-1; + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); /* CTable header */ - if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; assert(tableLog < 16); /* required for threshold strategy to work */ @@ -109,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ #ifdef __clang_analyzer__ - memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ #endif /* symbol start positions */ @@ -188,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, return 0; } - +#ifndef ZSTD_NO_UNUSED_FUNCTIONS size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); } +#endif @@ -327,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) size_t size; if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)malloc(size); + return (FSE_CTable*)ZSTD_malloc(size); } -void FSE_freeCTable (FSE_CTable* ct) { free(ct); } +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) @@ -361,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); } - /* Secondary normalization method. To be used when primary method fails. */ -static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) { short const NOT_YET_ASSIGNED = -2; U32 s; @@ -382,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, continue; } if (count[s] <= lowThreshold) { - norm[s] = -1; + norm[s] = lowProbCount; distributed++; total -= count[s]; continue; @@ -434,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, { U64 const vStepLog = 62 - tableLog; U64 const mid = (1ULL << (vStepLog-1)) - 1; - U64 const rStep = ((((U64)1<> scale); @@ -490,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, } } if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); if (FSE_isError(errorCode)) return errorCode; } else normalizedCounter[largest] += (short)stillToDistribute; @@ -645,9 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } - +#ifndef ZSTD_NO_UNUSED_FUNCTIONS /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` size must be `(1<= 2048) ); /* Write table description header */ { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); @@ -701,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src typedef struct { FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; - BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + union { + U32 hist_wksp[HIST_WKSP_SIZE_U32]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + } workspace; } fseWkspMax_t; size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) { fseWkspMax_t scratchBuffer; - DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } @@ -716,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS { return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); } - +#endif #endif /* FSE_COMMONDEFS_ONLY */ diff --git a/fse_decompress.c b/fse_decompress.c index 4f07378..c164430 100644 --- a/fse_decompress.c +++ b/fse_decompress.c @@ -1,48 +1,29 @@ /* ****************************************************************** - FSE : Finite State Entropy decoder - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** * Includes ****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ +#include "debug.h" /* assert */ #include "bitstream.h" #include "compiler.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" #include "error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* ************************************************************** @@ -51,11 +32,6 @@ #define FSE_isError ERR_isError #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ -/* check and forward error code */ -#ifndef CHECK_F -#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } -#endif - /* ************************************************************** * Templates @@ -84,25 +60,27 @@ FSE_DTable* FSE_createDTable (unsigned tableLog) { if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); } void FSE_freeDTable (FSE_DTable* dt) { - free(dt); + ZSTD_free(dt); } -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; U32 highThreshold = tableSize-1; /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); @@ -120,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; symbolNext[s] = normalizedCounter[s]; } } } - memcpy(dt, &DTableH, sizeof(DTableH)); + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); ip += NCountLength; cSrcSize -= NCountLength; - CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); } typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) { - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); } - +#endif #endif /* FSE_COMMONDEFS_ONLY */ diff --git a/hist.c b/hist.c index 45b7bab..8be9a30 100644 --- a/hist.c +++ b/hist.c @@ -1,36 +1,16 @@ /* ****************************************************************** - hist : Histogram functions - part of Finite State Entropy project - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* --- dependencies --- */ @@ -54,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned largestCount=0; - memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } while (ip= HIST_WKSP_SIZE_U32. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. * @return : largest histogram frequency, - * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ static size_t HIST_count_parallel_wksp( unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize, @@ -91,22 +71,21 @@ static size_t HIST_count_parallel_wksp( { const BYTE* ip = (const BYTE*)source; const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); unsigned max=0; U32* const Counting1 = workSpace; U32* const Counting2 = Counting1 + 256; U32* const Counting3 = Counting2 + 256; U32* const Counting4 = Counting3 + 256; - memset(workSpace, 0, 4*256*sizeof(unsigned)); - /* safety checks */ + assert(*maxSymbolValuePtr <= 255); if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); + ZSTD_memset(count, 0, countSize); *maxSymbolValuePtr = 0; return 0; } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); /* by stripes of 16 bytes */ { U32 cached = MEM_read32(ip); ip += 4; @@ -138,21 +117,18 @@ static size_t HIST_count_parallel_wksp( /* finish last symbols */ while (ipmaxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } - { U32 s; - if (maxSymbolValue > 255) maxSymbolValue = 255; - for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; + for (s=0; s<256; s++) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s] > max) max = Counting1[s]; } } - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } return (size_t)max; } @@ -172,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); } -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); -} - /* HIST_count_wksp() : * Same as HIST_count(), but using an externally provided scratch buffer. * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ @@ -195,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); } +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) { unsigned tmpCounters[HIST_WKSP_SIZE_U32]; return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); } +#endif diff --git a/hist.h b/hist.h index 8b38935..245be35 100644 --- a/hist.h +++ b/hist.h @@ -1,40 +1,20 @@ /* ****************************************************************** - hist : Histogram functions - part of Finite State Entropy project - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* --- dependencies --- */ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ /* --- simple histogram functions --- */ diff --git a/huf.h b/huf.h index 6b572c4..1afef90 100644 --- a/huf.h +++ b/huf.h @@ -1,35 +1,15 @@ /* ****************************************************************** - huff0 huffman codec, - part of Finite State Entropy library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #if defined (__cplusplus) @@ -40,7 +20,7 @@ extern "C" { #define HUF_H_298734234 /* *** Dependencies *** */ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ /* *** library symbols visibility *** */ @@ -110,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, /** HUF_compress4X_wksp() : * Same as HUF_compress2(), but uses externally allocated `workSpace`. * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -131,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* *** Dependencies *** */ #include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" /* *** Constants *** */ @@ -153,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ - void* name##hv = &(name##hb); \ - HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; @@ -204,10 +190,11 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, * or to save and regenerate 'CTable' using external methods. */ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ @@ -244,9 +231,22 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize); +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); /** HUF_getNbBits() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX @@ -350,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS #endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif #endif /* HUF_STATIC_LINKING_ONLY */ diff --git a/huf_compress.c b/huf_compress.c index f074f1e..fed76ca 100644 --- a/huf_compress.c +++ b/huf_compress.c @@ -1,35 +1,15 @@ /* ****************************************************************** - Huffman encoder, part of New Generation Entropy library - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** @@ -43,8 +23,7 @@ /* ************************************************************** * Includes ****************************************************************/ -#include /* memcpy, memset */ -#include /* printf (debug) */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ #include "compiler.h" #include "bitstream.h" #include "hist.h" @@ -60,8 +39,6 @@ ****************************************************************/ #define HUF_isError ERR_isError #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } /* ************************************************************** @@ -92,7 +69,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; - BYTE scratchBuffer[1< 0); /* check result */ if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); @@ -186,15 +159,15 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void /* Prepare base value per rank */ { U32 n, nextRankStart = 0; for (n=1; n<=tableLog; n++) { - U32 current = nextRankStart; + U32 curr = nextRankStart; nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; + rankVal[n] = curr; } } /* fill nbBits */ { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) { const U32 largestBits = huffNode[lastNonNull].nbBits; - if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; /* there are several too large elements (at least >= 2) */ { int totalCost = 0; const U32 baseCost = 1 << (largestBits - maxNbBits); - U32 n = lastNonNull; + int n = (int)lastNonNull; + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ while (huffNode[n].nbBits > maxNbBits) { totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); huffNode[n].nbBits = (BYTE)maxNbBits; - n --; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); /* repay normalized cost */ { U32 const noSymbol = 0xF0F0F0F0; U32 rankLast[HUF_TABLELOG_MAX+2]; - int pos; - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); { U32 currentNbBits = maxNbBits; + int pos; for (pos=n ; pos >= 0; pos--) { if (huffNode[pos].nbBits >= currentNbBits) continue; currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = pos; + rankLast[maxNbBits-currentNbBits] = (U32)pos; } } while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 highPos = rankLast[nBitsToDecrease]; - U32 lowPos = rankLast[nBitsToDecrease-1]; + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ if (lowPos == noSymbol) break; { U32 const highTotal = huffNode[highPos].count; U32 const lowTotal = 2 * huffNode[lowPos].count; if (highTotal <= lowTotal) break; } } /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease ++; + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ if (rankLast[nBitsToDecrease-1] == noSymbol) - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits ++; + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ rankLast[nBitsToDecrease] = noSymbol; else { rankLast[nBitsToDecrease]--; if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } } /* while (totalCost > 0) */ - + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { while (huffNode[n].nbBits == maxNbBits) n--; huffNode[n+1].nbBits--; - rankLast[1] = n+1; + assert(n >= 0); + rankLast[1] = (U32)(n+1); totalCost++; continue; } huffNode[ rankLast[1] + 1 ].nbBits--; rankLast[1]++; totalCost ++; - } } } /* there are several too large elements (at least >= 2) */ + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ return maxNbBits; } - typedef struct { U32 base; - U32 current; + U32 curr; } rankPos; -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) -{ - rankPos rank[32]; - U32 n; +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 - memset(rank, 0, sizeof(rank)); - for (n=0; n<=maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rank[r].base ++; +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; } - for (n=30; n>0; n--) rank[n-1].base += rank[n].base; - for (n=0; n<32; n++) rank[n].current = rank[n].base; - for (n=0; n<=maxSymbolValue; n++) { + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { U32 const c = count[n]; U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rank[r].current++; - while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { huffNode[pos] = huffNode[pos-1]; pos--; } @@ -343,45 +412,40 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). */ #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) { - nodeElt* const huffNode0 = (nodeElt*)workSpace; - nodeElt* const huffNode = huffNode0+1; - U32 n, nonNullRank; + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; int lowS, lowN; - U16 nodeNb = STARTNODE; - U32 nodeRoot; - - /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue); - + int nodeNb = STARTNODE; + int n, nodeRoot; /* init for parents */ - nonNullRank = maxSymbolValue; + nonNullRank = (int)maxSymbolValue; while(huffNode[nonNullRank].count == 0) nonNullRank--; lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; nodeNb++; lowS-=2; for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ /* create parents */ while (nodeNb <= nodeRoot) { - U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = nodeNb; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; nodeNb++; } @@ -392,42 +456,73 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo for (n=0; n<=nonNullRank; n++) huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - for (n=0; n<=nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { U16 min = 0; - for (n=maxNbBits; n>0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - for (n=0; n<=maxSymbolValue; n++) - tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ - for (n=0; n<=maxSymbolValue; n++) - tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ - } - - return maxNbBits; + return nonNullRank; } -/** HUF_buildCTable() : - * @return : maxNbBits - * Note : count is used before tree is written, so they can safely overlap +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. */ -size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) { - huffNodeTable nodeTable; - return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { size_t nbBits = 0; int s; @@ -437,7 +532,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count return nbBits >> 3; } -static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { int bad = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { @@ -476,7 +571,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, /* init */ if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); if (HUF_isError(initErr)) return 0; } n = srcSize & ~3; /* join to mod 4 */ @@ -573,7 +668,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart, (U16)cSize); @@ -581,7 +677,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart+2, (U16)cSize); @@ -589,7 +686,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart+4, (U16)cSize); @@ -597,12 +695,14 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); if (cSize==0) return 0; op += cSize; } - return op-ostart; + return (size_t)(op-ostart); } size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) @@ -618,40 +718,44 @@ static size_t HUF_compressCTable_internal( HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) { size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); if (HUF_isError(cSize)) { return cSize; } if (cSize==0) { return 0; } /* uncompressible */ op += cSize; /* check compressibility */ + assert(op >= ostart); if ((size_t)(op-ostart) >= srcSize-1) { return 0; } - return op-ostart; + return (size_t)(op-ostart); } typedef struct { unsigned count[HUF_SYMBOLVALUE_MAX + 1]; HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - huffNodeTable nodeTable; + HUF_buildCTable_wksp_tables buildCTable_wksp; } HUF_compress_tables_t; /* HUF_compress_internal() : - * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ static size_t HUF_compress_internal (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, + void* workSpace_align4, size_t wkspSize, HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, const int bmi2) { - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + /* checks & inits */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); if (!srcSize) return 0; /* Uncompressed */ if (!dstSize) return 0; /* cannot fit anything within dst budget */ @@ -669,7 +773,7 @@ HUF_compress_internal (void* dst, size_t dstSize, } /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ } @@ -691,11 +795,11 @@ HUF_compress_internal (void* dst, size_t dstSize, huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, maxSymbolValue, huffLog, - table->nodeTable, sizeof(table->nodeTable)); + &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; /* Zero unused symbols in CTable, so we can check it for validity */ - memset(table->CTable + (maxSymbolValue + 1), 0, + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); } @@ -716,7 +820,7 @@ HUF_compress_internal (void* dst, size_t dstSize, op += hSize; if (repeat) { *repeat = HUF_repeat_none; } if (oldHufTable) - memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ } return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, @@ -747,14 +851,6 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize, repeat, preferRepeat, bmi2); } -size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - /* HUF_compress4X_repeat(): * compress input using 4 streams. * provide workspace to generate compression tables */ @@ -784,6 +880,25 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize, hufTable, repeat, preferRepeat, bmi2); } +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +{ + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) @@ -796,3 +911,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi { return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); } +#endif diff --git a/huf_decompress.c b/huf_decompress.c index bb2d0a9..65e157e 100644 --- a/huf_decompress.c +++ b/huf_decompress.c @@ -1,41 +1,21 @@ /* ****************************************************************** - huff0 huffman decoder, - part of Finite State Entropy library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** * Dependencies ****************************************************************/ -#include /* memcpy, memset */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ #include "compiler.h" #include "bitstream.h" /* BIT_* */ #include "fse.h" /* to compress headers */ @@ -61,9 +41,6 @@ * Error Management ****************************************************************/ #define HUF_isError ERR_isError -#ifndef CHECK_F -#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } -#endif /* ************************************************************** @@ -126,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) { DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); + ZSTD_memcpy(&dtd, table, sizeof(dtd)); return dtd; } @@ -138,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) /*-***************************/ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) { U32 tableLog = 0; U32 nbSymbols = 0; size_t iSize; void* const dtPtr = DTable + 1; HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); if (HUF_isError(iSize)) return iSize; /* Table header */ @@ -168,40 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); } - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; n> 1; - U32 u; - HUF_DEltX1 D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (u = rankVal[w]; u < rankVal[w] + length; u++) - dt[u] = D; - rankVal[w] += length; - } } + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } return iSize; } -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX1_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - FORCE_INLINE_TEMPLATE BYTE HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) { @@ -282,6 +358,7 @@ HUF_decompress4X1_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; const void* const dtPtr = DTable + 1; const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; @@ -306,9 +383,9 @@ HUF_decompress4X1_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal = BIT_DStream_unfinished; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); @@ -317,8 +394,7 @@ HUF_decompress4X1_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + for ( ; (endSignal) & (op4 < olimit) ; ) { HUF_DECODE_SYMBOLX1_2(op1, &bitD1); HUF_DECODE_SYMBOLX1_2(op2, &bitD2); HUF_DECODE_SYMBOLX1_2(op3, &bitD3); @@ -335,10 +411,10 @@ HUF_decompress4X1_usingDTable_internal_body( HUF_DECODE_SYMBOLX1_0(op2, &bitD2); HUF_DECODE_SYMBOLX1_0(op3, &bitD3); HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - BIT_reloadDStream(&bitD1); - BIT_reloadDStream(&bitD2); - BIT_reloadDStream(&bitD3); - BIT_reloadDStream(&bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; } /* check corruption */ @@ -400,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, } -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - size_t HUF_decompress4X1_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, @@ -430,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; @@ -447,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - #endif /* HUF_FORCE_DECOMPRESS_X2 */ @@ -485,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co U32 rankVal[HUF_TABLELOG_MAX + 1]; /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); /* fill skipped values */ if (minWeight>1) { @@ -527,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const U32 minBits = nbBitsBaseline - maxWeight; U32 s; - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); /* fill DTable */ for (s=0; s wkspSize) return ERROR(tableLog_tooLarge); rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; @@ -610,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, /* Get start index of each weight */ { U32 w, nextRankStart = 0; for (w=1; w= 1 */ - memcpy(op, dt+val, 2); + ZSTD_memcpy(op, dt+val, 2); BIT_skipBits(DStream, dt[val].nbBits); return dt[val].length; } @@ -680,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); + ZSTD_memcpy(op, dt+val, 1); if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); else { if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { @@ -757,7 +799,6 @@ HUF_decompress1X2_usingDTable_internal_body( return dstSize; } - FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -769,6 +810,7 @@ HUF_decompress4X2_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); const void* const dtPtr = DTable+1; const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; @@ -793,7 +835,7 @@ HUF_decompress4X2_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal; + U32 endSignal = 1; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; @@ -804,8 +846,29 @@ HUF_decompress4X2_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else HUF_DECODE_SYMBOLX2_2(op1, &bitD1); HUF_DECODE_SYMBOLX2_2(op2, &bitD2); HUF_DECODE_SYMBOLX2_2(op3, &bitD3); @@ -822,8 +885,12 @@ HUF_decompress4X2_usingDTable_internal_body( HUF_DECODE_SYMBOLX2_0(op2, &bitD2); HUF_DECODE_SYMBOLX2_0(op3, &bitD3); HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif } /* check corruption */ @@ -876,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, } -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - size_t HUF_decompress4X2_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, @@ -923,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - #endif /* HUF_FORCE_DECOMPRESS_X1 */ @@ -1037,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) } -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif - - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif - } -} - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } -} - -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, @@ -1131,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); #if defined(HUF_FORCE_DECOMPRESS_X1) @@ -1154,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } } -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { @@ -1185,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; @@ -1232,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds #endif } } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif diff --git a/mem.h b/mem.h index 530d30c..4728ef7 100644 --- a/mem.h +++ b/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,8 +18,10 @@ extern "C" { /*-**************************************** * Dependencies ******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ +#include /* size_t, ptrdiff_t */ +#include "compiler.h" /* __has_builtin */ +#include "debug.h" /* DEBUG_STATIC_ASSERT */ +#include "zstd_deps.h" /* ZSTD_memcpy */ /*-**************************************** @@ -39,93 +41,15 @@ extern "C" { # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ #endif -#ifndef __has_builtin -# define __has_builtin(x) 0 /* compat. with non-clang compilers */ -#endif - -/* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } -MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } - -/* detects whether we are being compiled under msan */ -#if defined (__has_feature) -# if __has_feature(memory_sanitizer) -# define MEMORY_SANITIZER 1 -# endif -#endif - -#if defined (MEMORY_SANITIZER) -/* Not all platforms that support msan provide sanitizers/msan_interface.h. - * We therefore declare the functions we need ourselves, rather than trying to - * include the header file... */ - -#include /* intptr_t */ - -/* Make memory region fully initialized (without changing its contents). */ -void __msan_unpoison(const volatile void *a, size_t size); - -/* Make memory region fully uninitialized (without changing its contents). - This is a legacy interface that does not update origin information. Use - __msan_allocated_memory() instead. */ -void __msan_poison(const volatile void *a, size_t size); - -/* Returns the offset of the first (at least partially) poisoned byte in the - memory range, or -1 if the whole range is good. */ -intptr_t __msan_test_shadow(const volatile void *x, size_t size); -#endif - -/* detects whether we are being compiled under asan */ -#if defined (__has_feature) -# if __has_feature(address_sanitizer) -# define ADDRESS_SANITIZER 1 -# endif -#elif defined(__SANITIZE_ADDRESS__) -# define ADDRESS_SANITIZER 1 -#endif - -#if defined (ADDRESS_SANITIZER) -/* Not all platforms that support asan provide sanitizers/asan_interface.h. - * We therefore declare the functions we need ourselves, rather than trying to - * include the header file... */ - -/** - * Marks a memory region ([addr, addr+size)) as unaddressable. - * - * This memory must be previously allocated by your program. Instrumented - * code is forbidden from accessing addresses in this region until it is - * unpoisoned. This function is not guaranteed to poison the entire region - - * it could poison only a subregion of [addr, addr+size) due to ASan - * alignment restrictions. - * - * \note This function is not thread-safe because no two threads can poison or - * unpoison memory in the same memory region simultaneously. - * - * \param addr Start of memory region. - * \param size Size of memory region. */ -void __asan_poison_memory_region(void const volatile *addr, size_t size); - -/** - * Marks a memory region ([addr, addr+size)) as addressable. - * - * This memory must be previously allocated by your program. Accessing - * addresses in this region is allowed until this region is poisoned again. - * This function could unpoison a super-region of [addr, addr+size) due - * to ASan alignment restrictions. - * - * \note This function is not thread-safe because no two threads can - * poison or unpoison memory in the same memory region simultaneously. - * - * \param addr Start of memory region. - * \param size Size of memory region. */ -void __asan_unpoison_memory_region(void const volatile *addr, size_t size); -#endif - - /*-************************************************************** * Basic Types *****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); /*-************************************************************** -* Memory I/O +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation *****************************************************************/ /* MEM_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. @@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = MEM_STATIC U16 MEM_read16(const void* memPtr) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC U32 MEM_read32(const void* memPtr) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC U64 MEM_read64(const void* memPtr) { - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC size_t MEM_readST(const void* memPtr) { - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC void MEM_write16(void* memPtr, U16 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } MEM_STATIC void MEM_write32(void* memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } MEM_STATIC void MEM_write64(void* memPtr, U64 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } #endif /* MEM_FORCE_MEMORY_ACCESS */ @@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) MEM_writeBE64(memPtr, (U64)val); } +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + #if defined (__cplusplus) } diff --git a/pool.c b/pool.c index f575935..4c1b833 100644 --- a/pool.c +++ b/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -10,9 +10,9 @@ /* ====== Dependencies ======= */ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ #include "debug.h" /* assert */ -#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ +#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */ #include "pool.h" /* ====== Compiler specifics ====== */ @@ -105,6 +105,10 @@ static void* POOL_thread(void* opaque) { assert(0); /* Unreachable */ } +POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { + return POOL_create (numThreads, 0); +} + POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); } @@ -115,14 +119,14 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, /* Check parameters */ if (!numThreads) { return NULL; } /* Allocate the context and zero initialize */ - ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); + ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); if (!ctx) { return NULL; } /* Initialize the job queue. * It needs one extra space since one space is wasted to differentiate * empty and full queues. */ ctx->queueSize = queueSize + 1; - ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); ctx->queueHead = 0; ctx->queueTail = 0; ctx->numThreadsBusy = 0; @@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, } ctx->shutdown = 0; /* Allocate space for the thread handles */ - ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); ctx->threadCapacity = 0; ctx->customMem = customMem; /* Check for errors */ @@ -179,12 +183,14 @@ void POOL_free(POOL_ctx *ctx) { ZSTD_pthread_mutex_destroy(&ctx->queueMutex); ZSTD_pthread_cond_destroy(&ctx->queuePushCond); ZSTD_pthread_cond_destroy(&ctx->queuePopCond); - ZSTD_free(ctx->queue, ctx->customMem); - ZSTD_free(ctx->threads, ctx->customMem); - ZSTD_free(ctx, ctx->customMem); + ZSTD_customFree(ctx->queue, ctx->customMem); + ZSTD_customFree(ctx->threads, ctx->customMem); + ZSTD_customFree(ctx, ctx->customMem); } - +void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { + POOL_free (pool); +} size_t POOL_sizeof(POOL_ctx *ctx) { if (ctx==NULL) return 0; /* supports sizeof NULL */ @@ -203,11 +209,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) return 0; } /* numThreads > threadCapacity */ - { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); if (!threadPool) return 1; /* replace existing thread pool */ - memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); - ZSTD_free(ctx->threads, ctx->customMem); + ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_customFree(ctx->threads, ctx->customMem); ctx->threads = threadPool; /* Initialize additional threads */ { size_t threadId; @@ -301,7 +307,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) struct POOL_ctx_s { int dummy; }; -static POOL_ctx g_ctx; +static POOL_ctx g_poolCtx; POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); @@ -311,11 +317,11 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM (void)numThreads; (void)queueSize; (void)customMem; - return &g_ctx; + return &g_poolCtx; } void POOL_free(POOL_ctx* ctx) { - assert(!ctx || ctx == &g_ctx); + assert(!ctx || ctx == &g_poolCtx); (void)ctx; } @@ -337,7 +343,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { size_t POOL_sizeof(POOL_ctx* ctx) { if (ctx==NULL) return 0; /* supports sizeof NULL */ - assert(ctx == &g_ctx); + assert(ctx == &g_poolCtx); return sizeof(*ctx); } diff --git a/pool.h b/pool.h index 458d37f..816e25f 100644 --- a/pool.h +++ b/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,7 +16,7 @@ extern "C" { #endif -#include /* size_t */ +#include "zstd_deps.h" #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ #include "zstd.h" diff --git a/threading.c b/threading.c index 482664b..92cf57c 100644 --- a/threading.c +++ b/threading.c @@ -2,12 +2,13 @@ * Copyright (c) 2016 Tino Reichardt * All rights reserved. * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). - * - * You can contact the author at: - * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * You may select, at your option, one of the above-listed licenses. */ /** @@ -77,11 +78,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) -#include +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) { - *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); + *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); if (!*mutex) return 1; return pthread_mutex_init(*mutex, attr); @@ -93,14 +95,14 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) return 0; { int const ret = pthread_mutex_destroy(*mutex); - free(*mutex); + ZSTD_free(*mutex); return ret; } } int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) { - *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t)); + *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); if (!*cond) return 1; return pthread_cond_init(*cond, attr); @@ -112,7 +114,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) return 0; { int const ret = pthread_cond_destroy(*cond); - free(*cond); + ZSTD_free(*cond); return ret; } } diff --git a/threading.h b/threading.h index 3193ca7..fd0060d 100644 --- a/threading.h +++ b/threading.h @@ -2,12 +2,13 @@ * Copyright (c) 2016 Tino Reichardt * All rights reserved. * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). - * - * You can contact the author at: - * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * You may select, at your option, one of the above-listed licenses. */ #ifndef THREADING_H_938743 diff --git a/xxhash.c b/xxhash.c index 99d2459..e708df3 100644 --- a/xxhash.c +++ b/xxhash.c @@ -1,35 +1,15 @@ /* -* xxHash - Fast Hash algorithm -* Copyright (C) 2012-2016, Yann Collet -* -* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are -* met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following disclaimer -* in the documentation and/or other materials provided with the -* distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -* You can contact the author at : -* - xxHash homepage: http://www.xxhash.com -* - xxHash source repository : https://github.com/Cyan4973/xxHash + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ @@ -97,14 +77,12 @@ * Includes & Memory related functions ***************************************/ /* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ -#include -#include /* size_t */ -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -/* for memcpy() */ -#include -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } #ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY @@ -115,49 +93,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp /* ************************************* * Compiler Specific Options ***************************************/ -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR - - -#ifdef _MSC_VER -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif +#include "compiler.h" /* ************************************* * Basic Types ***************************************/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ -# endif -#endif - +#include "mem.h" /* BYTE, U32, U64, size_t */ #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) @@ -183,14 +125,14 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } static U32 XXH_read32(const void* memPtr) { U32 val; - memcpy(&val, memPtr, sizeof(val)); + ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } static U64 XXH_read64(const void* memPtr) { U64 val; - memcpy(&val, memPtr, sizeof(val)); + ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } @@ -327,12 +269,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } ****************************/ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) { - memcpy(dstState, srcState, sizeof(*dstState)); + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) { - memcpy(dstState, srcState, sizeof(*dstState)); + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); } @@ -574,12 +516,12 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); + ZSTD_memcpy(statePtr, &state, sizeof(state)); return XXH_OK; } @@ -587,12 +529,12 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); + ZSTD_memcpy(statePtr, &state, sizeof(state)); return XXH_OK; } @@ -729,7 +671,9 @@ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, c state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } state->memsize += (U32)len; return XXH_OK; } @@ -861,14 +805,14 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t { XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) diff --git a/xxhash.h b/xxhash.h index 9bad1f5..eceb55d 100644 --- a/xxhash.h +++ b/xxhash.h @@ -1,35 +1,15 @@ /* - xxHash - Extremely Fast Hash algorithm - Header File - Copyright (C) 2012-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : https://github.com/Cyan4973/xxHash + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* Notice extracted from xxHash homepage : @@ -75,7 +55,7 @@ extern "C" { /* **************************** * Definitions ******************************/ -#include /* size_t */ +#include "zstd_deps.h" typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; diff --git a/zbuff.h b/zbuff.h index 04183ea..7686f76 100644 --- a/zbuff.h +++ b/zbuff.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zbuff_common.c b/zbuff_common.c index 661b9b0..e2b9613 100644 --- a/zbuff_common.c +++ b/zbuff_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zbuff_compress.c b/zbuff_compress.c index f39c60d..2d20b13 100644 --- a/zbuff_compress.c +++ b/zbuff_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zbuff_decompress.c b/zbuff_decompress.c index 923c22b..d3c49e8 100644 --- a/zbuff_decompress.c +++ b/zbuff_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zdict.c b/zdict.c index 4a263d8..7ac7c60 100644 --- a/zdict.c +++ b/zdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -48,6 +48,7 @@ # define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" +#include "zstd_compress_internal.h" /* ZSTD_loadCEntropy() */ /*-************************************* @@ -61,14 +62,15 @@ #define NOISELENGTH 32 -static const int g_compressionLevel_default = 3; static const U32 g_selectivity_default = 9; /*-************************************* * Console display ***************************************/ +#undef DISPLAY #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#undef DISPLAYLEVEL #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } @@ -99,6 +101,26 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) return MEM_readLE32((const char*)dictBuffer + 4); } +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!bs || !wksp) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); + } + + free(bs); + free(wksp); + } + + return headerSize; +} /*-******************************************************** * Dictionary training functions @@ -508,6 +530,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, clock_t displayClock = 0; clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10; +# undef DISPLAYUPDATE # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \ if (ZDICT_clockSpan(displayClock) > refreshRate) \ { displayClock = clock(); DISPLAY(__VA_ARGS__); \ @@ -588,12 +611,12 @@ typedef struct #define MAXREPOFFSET 1024 -static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, const void* src, size_t srcSize, U32 notificationLevel) { - size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog); + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); size_t cSize; if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ @@ -682,7 +705,7 @@ static void ZDICT_flatLit(unsigned* countLit) #define OFFCODE_MAX 30 /* only applicable to first block */ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, - unsigned compressionLevel, + int compressionLevel, const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, const void* dictBuffer, size_t dictBufferSize, unsigned notificationLevel) @@ -717,7 +740,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, memset(repOffset, 0, sizeof(repOffset)); repOffset[1] = repOffset[4] = repOffset[8] = 1; memset(bestRepOffset, 0, sizeof(bestRepOffset)); - if (compressionLevel==0) compressionLevel = g_compressionLevel_default; + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); @@ -731,7 +754,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* collect stats on all samples */ for (u=0; u=1) params.zParams.notificationLevel = DEBUGLEVEL; #endif diff --git a/zdict.h b/zdict.h index 37978ec..b782993 100644 --- a/zdict.h +++ b/zdict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -61,9 +61,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); +typedef struct { + int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics according to the zstd + * dictionary format. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each + * sample in order. The samples are used to construct the statistics, so they + * should be representative of what you will compress with this dictionary. + * + * The compression level can be set in `parameters`. You should pass the + * compression level you expect to use in production. The statistics for each + * compression level differ, so tuning the dictionary for the compression level + * can help quite a bit. + * + * You can set an explicit dictionary ID in `parameters`, or allow us to pick + * a random dictionary ID for you, but we can't guarantee no collisions. + * + * The dstDictBuffer and the dictContent may overlap, and the content will be + * appended to the end of the header. If the header + the content doesn't fit in + * maxDictSize the beginning of the content is truncated to make room, since it + * is presumed that the most profitable content is at the end of the dictionary, + * since that is the cheapest to reference. + * + * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes. + * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). + * + * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if + * instructed to, using notificationLevel>0. + * NOTE: This function currently may fail in several edge cases including: + * * Not enough samples + * * Samples are uncompressible + * * Samples are all exactly the same + */ +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + /*====== Helper functions ======*/ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); @@ -78,11 +126,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); * Use them only in association with static linking. * ==================================================================================== */ -typedef struct { - int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ - unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */ -} ZDICT_params_t; +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 /*! ZDICT_cover_params_t: * k and d are the only required parameters. @@ -198,28 +243,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t* parameters); -/*! ZDICT_finalizeDictionary(): - * Given a custom content as a basis for dictionary, and a set of samples, - * finalize dictionary by adding headers and statistics. - * - * Samples must be stored concatenated in a flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. - * - * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. - * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. - * - * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), - * or an error code, which can be tested by ZDICT_isError(). - * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. - * Note 2: dictBuffer and dictContent can overlap - */ -#define ZDICT_CONTENTSIZE_MIN 128 -#define ZDICT_DICTSIZE_MIN 256 -ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, - const void* dictContent, size_t dictContentSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); - typedef struct { unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ ZDICT_params_t zParams; @@ -256,7 +279,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API -# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) +# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) # elif (ZDICT_GCC_VERSION >= 301) # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) diff --git a/zstd.h b/zstd.h index 72080ea..b0ecdf5 100644 --- a/zstd.h +++ b/zstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -72,16 +72,21 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 4 - +#define ZSTD_VERSION_RELEASE 8 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE #define ZSTD_QUOTE(str) #str #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); /* ************************************* * Default constant @@ -274,7 +279,10 @@ typedef enum { * Default level is ZSTD_CLEVEL_DEFAULT==3. * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level resets all other compression parameters to default */ + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ /* Advanced compression parameters : * It's possible to pin down compression parameters to some specific values. * In which case, these values are no longer dynamically selected by the compressor */ @@ -331,7 +339,9 @@ typedef enum { * for large inputs, by finding large matches at long distance. * It increases memory usage and window size. * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. */ + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. * Larger values increase memory usage and compression ratio, * but decrease compression speed. @@ -362,16 +372,20 @@ typedef enum { ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ /* multi-threading parameters */ - /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). - * They return an error otherwise. */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression work is performed in parallel, within worker threads. + * while compression is performed in parallel, within worker thread(s). * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. @@ -400,6 +414,11 @@ typedef enum { * ZSTD_c_literalCompressionMode * ZSTD_c_targetCBlockSize * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -410,7 +429,12 @@ typedef enum { ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 } ZSTD_cParameter; typedef struct { @@ -519,11 +543,15 @@ typedef enum { /* note : additional experimental parameters are also available * within the experimental section of the API. * At the time of this writing, they include : - * ZSTD_c_format + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ - ZSTD_d_experimentalParam1=1000 + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002 } ZSTD_dParameter; @@ -659,8 +687,9 @@ typedef enum { * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) * - output->pos must be <= dstCapacity, input->pos must be <= srcSize * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, * and then immediately returns, just indicating that there is some data remaining to be flushed. * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. @@ -763,7 +792,7 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* This function is redundant with the advanced API and equivalent to: * - * ZSTD_DCtx_reset(zds); + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_refDDict(zds, NULL); */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); @@ -1095,21 +1124,40 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; typedef struct { - unsigned int matchPos; /* Match pos in dst */ - /* If seqDef.offset > 3, then this is seqDef.offset - 3 - * If seqDef.offset < 3, then this is the corresponding repeat offset - * But if seqDef.offset < 3 and litLength == 0, this is the - * repeat offset before the corresponding repeat offset - * And if seqDef.offset == 3 and litLength == 0, this is the - * most recent repeat offset - 1 - */ - unsigned int offset; - unsigned int litLength; /* Literal length */ - unsigned int matchLength; /* Match length */ - /* 0 when seq not rep and seqDef.offset otherwise - * when litLength == 0 this will be <= 4, otherwise <= 3 like normal - */ - unsigned int rep; + unsigned int offset; /* The offset of the match. (NOT the same as the offset code) + * If offset == 0 and matchLength == 0, this sequence represents the last + * literals in the block of litLength size. + */ + + unsigned int litLength; /* Literal length of the sequence. */ + unsigned int matchLength; /* Match length of the sequence. */ + + /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0. + * In this case, we will treat the sequence as a marker for a block boundary. + */ + + unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'. + * Ranges from [0, 3]. + * + * Repeat offsets are essentially previous offsets from previous sequences sorted in + * recency order. For more detail, see doc/zstd_compression_format.md + * + * If rep == 0, then 'offset' does not contain a repeat offset. + * If rep > 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ } ZSTD_Sequence; typedef struct { @@ -1151,6 +1199,12 @@ typedef enum { * Decoder cannot recognise automatically this format, requiring this instruction. */ } ZSTD_format_e; +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + typedef enum { /* Note: this enum and the behavior it controls are effectively internal * implementation details of the compressor. They are expected to continue @@ -1248,14 +1302,74 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * * zc can be used to insert custom compression params. * This function invokes ZSTD_compress2 - * @return : number of sequences extracted + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); /*************************************** @@ -1263,23 +1377,28 @@ ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, ***************************************/ /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage of a future - * {D,C}Ctx, before its creation. - * - * ZSTD_estimateCCtxSize() will provide a budget large enough for any - * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), - * this estimate does not include space for a window buffer, so this estimate - * is guaranteed to be enough for single-shot compressions, but not streaming - * compressions. It will however assume the input may be arbitrarily large, - * which is the worst case. If srcSize is known to always be small, - * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with - * ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with - * ZSTD_CCtxParams_setParameter(). - * - * Note: only single-threaded compression is supported. This function will - * return an error code if ZSTD_c_nbWorkers is >= 1. */ + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); @@ -1362,7 +1481,11 @@ ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); @@ -1375,13 +1498,36 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTD_compressionParameters cParams, ZSTD_customMem customMem); +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, ZSTD_customMem customMem); - - /*************************************** * Advanced compression functions ***************************************/ @@ -1394,6 +1540,12 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictS * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + /*! ZSTD_getCParams() : * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. * `estimatedSrcSize` value is optional, select 0 if not known */ @@ -1508,6 +1660,143 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * but compression ratio may regress significantly if guess considerably underestimates */ #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1556,8 +1845,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z /*! ZSTD_CCtxParams_setParameter() : * Similar to ZSTD_CCtx_setParameter. * Set one compression parameter, selected by enum ZSTD_cParameter. - * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); @@ -1637,11 +1928,60 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre */ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + /* ZSTD_d_format * experimental parameter, * allowing selection between ZSTD_format_e input compression formats */ #define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 /*! ZSTD_DCtx_setFormat() : * Instruct the decoder context about what kind of data to decode next. @@ -1670,7 +2010,8 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( ********************************************************************/ /*===== Advanced Streaming compression functions =====*/ -/**! ZSTD_initCStream_srcSize() : + +/*! ZSTD_initCStream_srcSize() : * This function is deprecated, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) @@ -1687,7 +2028,7 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); -/**! ZSTD_initCStream_usingDict() : +/*! ZSTD_initCStream_usingDict() : * This function is deprecated, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); @@ -1704,7 +2045,7 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); -/**! ZSTD_initCStream_advanced() : +/*! ZSTD_initCStream_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * // Pseudocode: Set each zstd parameter and leave the rest as-is. @@ -1725,7 +2066,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs, ZSTD_parameters params, unsigned long long pledgedSrcSize); -/**! ZSTD_initCStream_usingCDict() : +/*! ZSTD_initCStream_usingCDict() : * This function is deprecated, and equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, cdict); @@ -1735,7 +2076,7 @@ ZSTD_initCStream_advanced(ZSTD_CStream* zcs, */ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); -/**! ZSTD_initCStream_usingCDict_advanced() : +/*! ZSTD_initCStream_usingCDict_advanced() : * This function is DEPRECATED, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. @@ -1808,7 +2149,8 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); /*===== Advanced Streaming decompression functions =====*/ -/** + +/*! * This function is deprecated, and is equivalent to: * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); @@ -1819,7 +2161,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); */ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); -/** +/*! * This function is deprecated, and is equivalent to: * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); @@ -1830,7 +2172,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic */ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); -/** +/*! * This function is deprecated, and is equivalent to: * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); @@ -1892,7 +2234,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/*- +/** Buffer-less streaming decompression (synchronous mode) A ZSTD_DCtx object is required to track streaming operations. diff --git a/zstd_common.c b/zstd_common.c index 667f4a2..939e9f0 100644 --- a/zstd_common.c +++ b/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -13,8 +13,8 @@ /*-************************************* * Dependencies ***************************************/ -#include /* malloc, calloc, free */ -#include /* memset */ +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ #include "error_private.h" #include "zstd_internal.h" @@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString /*=************************************************************** * Custom allocator ****************************************************************/ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) { if (customMem.customAlloc) return customMem.customAlloc(customMem.opaque, size); - return malloc(size); + return ZSTD_malloc(size); } -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) { if (customMem.customAlloc) { /* calloc implemented as malloc+memset; * not as efficient as calloc, but next best guess for custom malloc */ void* const ptr = customMem.customAlloc(customMem.opaque, size); - memset(ptr, 0, size); + ZSTD_memset(ptr, 0, size); return ptr; } - return calloc(1, size); + return ZSTD_calloc(1, size); } -void ZSTD_free(void* ptr, ZSTD_customMem customMem) +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) { if (ptr!=NULL) { if (customMem.customFree) customMem.customFree(customMem.opaque, ptr); else - free(ptr); + ZSTD_free(ptr); } } diff --git a/zstd_compress.c b/zstd_compress.c index 35346b9..dd6f68d 100644 --- a/zstd_compress.c +++ b/zstd_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,8 +11,7 @@ /*-************************************* * Dependencies ***************************************/ -#include /* INT_MAX */ -#include /* memset */ +#include "zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ #include "cpu.h" #include "mem.h" #include "hist.h" /* HIST_countFast_wksp */ @@ -28,11 +27,32 @@ #include "zstd_lazy.h" #include "zstd_opt.h" #include "zstd_ldm.h" +#include "zstd_compress_superblock.h" + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ +#ifndef ZSTD_COMPRESS_HEAPMODE +# define ZSTD_COMPRESS_HEAPMODE 0 +#endif /*-************************************* * Helper functions ***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ size_t ZSTD_compressBound(size_t srcSize) { return ZSTD_COMPRESSBOUND(srcSize); } @@ -44,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) { struct ZSTD_CDict_s { const void* dictContent; size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ ZSTD_cwksp workspace; ZSTD_matchState_t matchState; @@ -61,7 +82,7 @@ ZSTD_CCtx* ZSTD_createCCtx(void) static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) { assert(cctx != NULL); - memset(cctx, 0, sizeof(*cctx)); + ZSTD_memset(cctx, 0, sizeof(*cctx)); cctx->customMem = memManager; cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); @@ -74,36 +95,34 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) { ZSTD_STATIC_ASSERT(zcss_init==0); ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); if (!cctx) return NULL; ZSTD_initCCtx(cctx, customMem); return cctx; } } -ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) { ZSTD_cwksp ws; ZSTD_CCtx* cctx; if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - ZSTD_cwksp_init(&ws, workspace, workspaceSize); + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); - if (cctx == NULL) { - return NULL; - } - memset(cctx, 0, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); ZSTD_cwksp_move(&cctx->workspace, &ws); cctx->staticSize = workspaceSize; /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( - &cctx->workspace, HUF_WORKSPACE_SIZE); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); return cctx; } @@ -113,10 +132,10 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) */ static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) { - ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); ZSTD_freeCDict(cctx->localDict.cdict); - memset(&cctx->localDict, 0, sizeof(cctx->localDict)); - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); cctx->cdict = NULL; } @@ -147,7 +166,7 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); ZSTD_freeCCtxContent(cctx); if (!cctxInWorkspace) { - ZSTD_free(cctx, cctx->customMem); + ZSTD_customFree(cctx, cctx->customMem); } } return 0; @@ -183,15 +202,32 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_compressionParameters cParams) { ZSTD_CCtx_params cctxParams; - memset(&cctxParams, 0, sizeof(cctxParams)); + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); cctxParams.cParams = cParams; - cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + assert(!ZSTD_checkCParams(cParams)); - cctxParams.fParams.contentSizeFlag = 1; return cctxParams; } @@ -199,13 +235,12 @@ static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( ZSTD_customMem customMem) { ZSTD_CCtx_params* params; - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - params = (ZSTD_CCtx_params*)ZSTD_calloc( + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( sizeof(ZSTD_CCtx_params), customMem); if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); params->customMem = customMem; - params->compressionLevel = ZSTD_CLEVEL_DEFAULT; - params->fParams.contentSizeFlag = 1; return params; } @@ -217,7 +252,7 @@ ZSTD_CCtx_params* ZSTD_createCCtxParams(void) size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) { if (params == NULL) { return 0; } - ZSTD_free(params, params->customMem); + ZSTD_customFree(params, params->customMem); return 0; } @@ -227,8 +262,8 @@ size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) } size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - RETURN_ERROR_IF(!cctxParams, GENERIC); - memset(cctxParams, 0, sizeof(*cctxParams)); + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->compressionLevel = compressionLevel; cctxParams->fParams.contentSizeFlag = 1; return 0; @@ -236,9 +271,9 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { - RETURN_ERROR_IF(!cctxParams, GENERIC); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - memset(cctxParams, 0, sizeof(*cctxParams)); + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); assert(!ZSTD_checkCParams(params.cParams)); cctxParams->cParams = params.cParams; cctxParams->fParams = params.fParams; @@ -249,12 +284,12 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete /* ZSTD_assignParamsToCCtxParams() : * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) + const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) { ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params.cParams)); - ret.cParams = params.cParams; - ret.fParams = params.fParams; + assert(!ZSTD_checkCParams(params->cParams)); + ret.cParams = params->cParams; + ret.fParams = params->fParams; ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ return ret; } @@ -339,8 +374,18 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; return bounds; case ZSTD_c_enableLongDistanceMatching: @@ -386,7 +431,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_forceAttachDict: - ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); bounds.lowerBound = ZSTD_dictDefaultAttach; bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; @@ -407,10 +452,25 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; return bounds; + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + default: - { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; - return boundError; - } + bounds.error = ERROR(parameter_unsupported); + return bounds; } } @@ -428,7 +488,7 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) #define BOUNDCHECK(cParam, val) { \ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ - parameter_outOfBound); \ + parameter_outOfBound, "Param out of bounds"); \ } @@ -455,6 +515,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_jobSize: case ZSTD_c_overlapLog: case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: case ZSTD_c_enableLongDistanceMatching: case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: @@ -464,6 +525,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_literalCompressionMode: case ZSTD_c_targetCBlockSize: case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: default: return 0; } @@ -476,7 +541,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) if (ZSTD_isUpdateAuthorized(param)) { cctx->cParamsChanged = 1; } else { - RETURN_ERROR(stage_wrong); + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); } } switch(param) @@ -505,15 +570,20 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_jobSize: case ZSTD_c_overlapLog: case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: case ZSTD_c_enableLongDistanceMatching: case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_targetCBlockSize: case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: break; - default: RETURN_ERROR(parameter_unsupported); + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); } @@ -530,10 +600,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return (size_t)CCtxParams->format; case ZSTD_c_compressionLevel : { - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); - if (value) { /* 0 : does not change current level */ + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else CCtxParams->compressionLevel = value; - } if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; return 0; /* return type (size_t) cannot represent negative values */ } @@ -618,7 +689,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); CCtxParams->nbWorkers = value; return CCtxParams->nbWorkers; #endif @@ -631,7 +702,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, /* Adjust to the minimum non-default value. */ if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) value = ZSTDMT_JOBSIZE_MIN; - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); assert(value >= 0); CCtxParams->jobSize = value; return CCtxParams->jobSize; @@ -642,7 +713,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); CCtxParams->overlapLog = value; return CCtxParams->overlapLog; #endif @@ -652,11 +723,15 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); CCtxParams->rsyncable = value; return CCtxParams->rsyncable; #endif + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + case ZSTD_c_enableLongDistanceMatching : CCtxParams->ldmParams.enableLdm = (value!=0); return CCtxParams->ldmParams.enableLdm; @@ -681,7 +756,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, case ZSTD_c_ldmHashRateLog : RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, - parameter_outOfBound); + parameter_outOfBound, "Param out of bounds!"); CCtxParams->ldmParams.hashRateLog = value; return CCtxParams->ldmParams.hashRateLog; @@ -697,6 +772,26 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->srcSizeHint = value; return CCtxParams->srcSizeHint; + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -784,6 +879,9 @@ size_t ZSTD_CCtxParams_getParameter( *value = CCtxParams->rsyncable; break; #endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; case ZSTD_c_enableLongDistanceMatching : *value = CCtxParams->ldmParams.enableLdm; break; @@ -805,6 +903,18 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_srcSizeHint : *value = (int)CCtxParams->srcSizeHint; break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -821,8 +931,11 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) { DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); - RETURN_ERROR_IF(cctx->cdict, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); cctx->requestedParams = *params; return 0; @@ -831,11 +944,20 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; return 0; } +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + /** * Initializes the local dict using the requested parameters. * NOTE: This does not use the pledged src size, because it may be used for more @@ -844,8 +966,6 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) { ZSTD_localDict* const dl = &cctx->localDict; - ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, 0, dl->dictSize); if (dl->dict == NULL) { /* No local dictionary. */ assert(dl->dictBuffer == NULL); @@ -862,14 +982,14 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) assert(cctx->cdict == NULL); assert(cctx->prefixDict.dict == NULL); - dl->cdict = ZSTD_createCDict_advanced( + dl->cdict = ZSTD_createCDict_advanced2( dl->dict, dl->dictSize, ZSTD_dlm_byRef, dl->dictContentType, - cParams, + &cctx->requestedParams, cctx->customMem); - RETURN_ERROR_IF(!dl->cdict, memory_allocation); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); cctx->cdict = dl->cdict; return 0; } @@ -878,9 +998,8 @@ size_t ZSTD_CCtx_loadDictionary_advanced( ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "no malloc for static CCtx"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); ZSTD_clearAllDicts(cctx); /* in case one already exists */ if (dict == NULL || dictSize == 0) /* no dictionary mode */ @@ -888,9 +1007,12 @@ size_t ZSTD_CCtx_loadDictionary_advanced( if (dictLoadMethod == ZSTD_dlm_byRef) { cctx->localDict.dict = dict; } else { - void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); - RETURN_ERROR_IF(!dictBuffer, memory_allocation); - memcpy(dictBuffer, dict, dictSize); + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); cctx->localDict.dictBuffer = dictBuffer; cctx->localDict.dict = dictBuffer; } @@ -915,13 +1037,22 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); /* Free the existing local cdict (if any) to save memory. */ ZSTD_clearAllDicts(cctx); cctx->cdict = cdict; return 0; } +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) { return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); @@ -930,11 +1061,14 @@ size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSiz size_t ZSTD_CCtx_refPrefix_advanced( ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); ZSTD_clearAllDicts(cctx); - cctx->prefixDict.dict = prefix; - cctx->prefixDict.dictSize = prefixSize; - cctx->prefixDict.dictContentType = dictContentType; + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } return 0; } @@ -949,7 +1083,8 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } @@ -996,31 +1131,78 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams) /** ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ -static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) { U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); return hashLog - btScale; } +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + /** ZSTD_adjustCParams_internal() : * optimize `cPar` for a specified input (`srcSize` and `dictSize`). * mostly downsize to reduce memory consumption and initialization latency. * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ static ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, - size_t dictSize) + size_t dictSize, + ZSTD_cParamMode_e mode) { - static const U64 minSrcSize = 513; /* (1<<9) + 1 */ - static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); - if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) - srcSize = minSrcSize; /* presumed small when there is a dictionary */ - else if (srcSize == 0) - srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + + switch (mode) { + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_unknown: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } /* resize windowLog if input is small enough, to use less memory */ if ( (srcSize < maxWindowResize) @@ -1031,10 +1213,11 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, ZSTD_highbit32(tSize-1) + 1; if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; } - if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; - { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); + { U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); } if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) @@ -1049,27 +1232,39 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, size_t dictSize) { cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; } ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { ZSTD_compressionParameters cParams; if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { srcSizeHint = CCtxParams->srcSizeHint; } - cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; - if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; - if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; - if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; - if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; - if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; - if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); assert(!ZSTD_checkCParams(cParams)); - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); } static size_t @@ -1100,32 +1295,61 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, return tableSpace + optSpace; } -size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) { - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - U32 const divider = (cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); - size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; - size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + - matchStateSize + ldmSpace + ldmSeqSpace; - size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); - DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace); - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return cctxSpace + neededSpace; - } + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1136,7 +1360,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); return ZSTD_estimateCCtxSize_usingCParams(cParams); } @@ -1155,15 +1379,18 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); - size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) - + ZSTD_cwksp_alloc_size(outBuffSize); - - return CCtxSize + streamingSize; + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); } } @@ -1175,7 +1402,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); return ZSTD_estimateCStreamSize_usingCParams(cParams); } @@ -1243,7 +1470,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, assert(cParams1.strategy == cParams2.strategy); } -static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -1268,16 +1495,6 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) ms->dictMatchState = NULL; } -/** - * Indicates whether this compression proceeds directly from user-provided - * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or - * whether the context needs to buffer the input/output (ZSTDb_buffered). - */ -typedef enum { - ZSTDb_not_buffered, - ZSTDb_buffered -} ZSTD_buffered_policy_e; - /** * Controls, for this matchState reset, whether the tables need to be cleared / * prepared for the coming compression (ZSTDcrp_makeClean), or whether the @@ -1320,10 +1537,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); if (forceResetIndex == ZSTDirp_reset) { - memset(&ms->window, 0, sizeof(ms->window)); - ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ - ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); } @@ -1408,45 +1622,32 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; - size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + ZSTD_indexResetPolicy_e needsIndexReset = + (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - needsIndexReset = ZSTDirp_reset; - } + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + buffInSize, buffOutSize, pledgedSrcSize); + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); - ZSTD_cwksp_bump_oversized_duration(ws, 0); + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); /* Check if workspace is large enough, alloc a new one if needed */ - { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); - size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); - - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - + { int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", - neededSpace>>10, matchStateSize>>10, bufferSpace>>10); + DEBUGLOG(4, "Need %zu B workspace", neededSpace); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); if (workspaceTooSmall || workspaceWasteful) { @@ -1459,7 +1660,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, needsIndexReset = ZSTDirp_reset; ZSTD_cwksp_free(ws, zc->customMem); - FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); DEBUGLOG(5, "reserving object space"); /* Statically sized space. @@ -1470,7 +1671,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); - zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); } } @@ -1501,6 +1702,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->seqStore.maxNbLit = blockSize; /* buffers */ + zc->bufferedPolicy = zbuff; zc->inBuffSize = buffInSize; zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); zc->outBuffSize = buffOutSize; @@ -1513,7 +1715,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ((size_t)1) << (params.ldmParams.hashLog - params.ldmParams.bucketSizeLog); zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); } /* sequences storage */ @@ -1530,22 +1732,30 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ¶ms.cParams, crp, needsIndexReset, - ZSTD_resetTarget_CCtx)); + ZSTD_resetTarget_CCtx), ""); /* ldm hash table */ if (params.ldmParams.enableLdm) { /* TODO: avoid memset? */ size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); - memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; - memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + ZSTD_window_init(&zc->ldmState.window); ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; } + /* Due to alignment, when reusing a workspace, we can actually consume + * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h + */ + assert(ZSTD_cwksp_used(ws) >= neededSpace && + ZSTD_cwksp_used(ws) <= neededSpace + 3); + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; return 0; } @@ -1583,12 +1793,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, U64 pledgedSrcSize) { size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - return ( pledgedSrcSize <= cutoff - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params->attachDictPref == ZSTD_dictForceAttach ) - && params->attachDictPref != ZSTD_dictForceCopy - && !params->forceWindow; /* dictMatchState isn't correctly - * handled in _enforceMaxDist */ + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ } static size_t @@ -1598,16 +1810,24 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Resize working context table params for input only, since the dict * has its own tables. */ - params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff)); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); } { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc @@ -1634,7 +1854,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, cctx->dictID = cdict->dictID; /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); return 0; } @@ -1647,6 +1867,8 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, { const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + assert(!cdict->matchState.dedicatedDictSearch); + DEBUGLOG(4, "copying dictionary into context"); { unsigned const windowLog = params.cParams.windowLog; @@ -1655,7 +1877,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff)); + ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); @@ -1667,10 +1889,10 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - memcpy(cctx->blockState.matchState.hashTable, + ZSTD_memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, hSize * sizeof(U32)); - memcpy(cctx->blockState.matchState.chainTable, + ZSTD_memcpy(cctx->blockState.matchState.chainTable, cdict->matchState.chainTable, chainSize * sizeof(U32)); } @@ -1679,7 +1901,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, { int const h3log = cctx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); } ZSTD_cwksp_mark_tables_clean(&cctx->workspace); @@ -1695,7 +1917,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, cctx->dictID = cdict->dictID; /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); return 0; } @@ -1736,9 +1958,10 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_buffered_policy_e zbuff) { DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; @@ -1760,13 +1983,13 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, int const h3log = srcCCtx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - memcpy(dstCCtx->blockState.matchState.hashTable, + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, hSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.chainTable, + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, srcCCtx->blockState.matchState.chainTable, chainSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.hashTable3, + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, srcCCtx->blockState.matchState.hashTable3, h3Size * sizeof(U32)); } @@ -1784,7 +2007,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, dstCCtx->dictID = srcCCtx->dictID; /* copy block state */ - memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); return 0; } @@ -1797,7 +2020,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) { ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); @@ -1824,7 +2047,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ assert(size < (1U<<31)); /* can be casted to int */ -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) /* To validate that the table re-use logic is sound, and that we don't * access table space that we haven't cleaned, we re-"poison" the table * space every time we mark it dirty. @@ -1889,16 +2112,6 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par /* See doc/zstd_compression_format.md for detailed format description */ -static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); - return ZSTD_blockHeaderSize + srcSize; -} - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; @@ -1921,25 +2134,20 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } -static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) { - switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: - return 0; - case ZSTD_lcm_uncompressed: - return 1; - default: - assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: - return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); - } + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); } -/* ZSTD_compressSequences_internal(): +/* ZSTD_entropyCompressSequences_internal(): * actually compresses both literals and sequences */ MEM_STATIC size_t -ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -1949,7 +2157,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, { const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned count[MaxSeq+1]; + unsigned* count = (unsigned*)entropyWorkspace; FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; @@ -1965,8 +2173,12 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, BYTE* seqHead; BYTE* lastNCount = NULL; - DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; @@ -1979,14 +2191,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, literals, litSize, entropyWorkspace, entropyWkspSize, bmi2); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; } /* Sequences Header */ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall); + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); if (nbSeq < 128) { *op++ = (BYTE)nbSeq; } else if (nbSeq < LONGNBSEQ) { @@ -2001,7 +2213,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, assert(op <= oend); if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ - memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); return (size_t)(op - ostart); } @@ -2031,7 +2243,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); if (LLtype == set_compressed) lastNCount = op; op += countSize; @@ -2059,7 +2271,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); if (Offtype == set_compressed) lastNCount = op; op += countSize; @@ -2085,7 +2297,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); if (MLtype == set_compressed) lastNCount = op; op += countSize; @@ -2101,7 +2313,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, CTable_LitLength, llCodeTable, sequences, nbSeq, longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); op += bitstreamSize; assert(op <= oend); /* zstd versions <= 1.3.4 mistakenly report corruption when @@ -2126,7 +2338,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t -ZSTD_compressSequences(seqStore_t* seqStorePtr, +ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, const ZSTD_CCtx_params* cctxParams, @@ -2135,7 +2347,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { - size_t const cSize = ZSTD_compressSequences_internal( + size_t const cSize = ZSTD_entropyCompressSequences_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, entropyWorkspace, entropyWkspSize, bmi2); @@ -2145,13 +2357,13 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); if (cSize >= maxCSize) return 0; /* block not compressed */ } - + DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); return cSize; } @@ -2160,7 +2372,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, * assumption : strat is a valid strategy */ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) { - static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, @@ -2190,7 +2402,17 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_compressBlock_btlazy2_dictMatchState, ZSTD_compressBlock_btopt_dictMatchState, ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState } + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } }; ZSTD_blockCompressor selectedCompressor; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); @@ -2204,7 +2426,7 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, const BYTE* anchor, size_t lastLLSize) { - memcpy(seqStorePtr->lit, anchor, lastLLSize); + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; } @@ -2225,7 +2447,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { - ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); @@ -2241,10 +2467,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* limited update after a very long match */ { const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); + const U32 curr = (U32)(istart-base); if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ - if (current > ms->nextToUpdate + 384) - ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); } /* select and store sequences */ @@ -2264,14 +2490,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { - rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; /* Updates ldmSeqStore.size */ FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, &zc->appliedParams.ldmParams, - src, srcSize)); + src, srcSize), ""); /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&ldmSeqStore, @@ -2281,6 +2507,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; @@ -2292,17 +2519,25 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) { const seqStore_t* seqStore = ZSTD_getSeqStore(zc); - const seqDef* seqs = seqStore->sequencesStart; - size_t seqsSize = seqStore->sequences - seqs; + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - size_t i; size_t position; int repIdx; + size_t i; + repcodes_t updatedRepcodes; assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; if (i == seqStore->longLengthPos) { if (seqStore->longLengthID == 1) { @@ -2312,42 +2547,47 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } } - if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = outSeqs[i].offset; - repIdx = (unsigned int)i - outSeqs[i].offset; - - if (outSeqs[i].litLength == 0) { - if (outSeqs[i].offset < 3) { - --repIdx; + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; } else { - repIdx = (unsigned int)i - 1; + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; } - ++outSeqs[i].rep; - } - assert(repIdx >= -3); - outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; - if (outSeqs[i].rep == 4) { - --outSeqs[i].offset; } - } else { - outSeqs[i].offset -= ZSTD_REP_NUM; } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = (unsigned int)position; - position += outSeqs[i].matchLength; + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; } - zc->seqCollector.seqIndex += seqsSize; + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; } -size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) { const size_t dstCapacity = ZSTD_compressBound(srcSize); - void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); SeqCollector seqCollector; - RETURN_ERROR_IF(dst == NULL, memory_allocation); + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; @@ -2356,20 +2596,70 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, zc->seqCollector = seqCollector; ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_free(dst, ZSTD_defaultCMem); + ZSTD_customFree(dst, ZSTD_defaultCMem); return zc->seqCollector.seqIndex; } -/* Returns true if the given block is a RLE block */ -static int ZSTD_isRLE(const BYTE *ip, size_t length) { +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; size_t i; - if (length < 2) return 1; - for (i = 1; i < length; ++i) { - if (ip[0] != ip[i]) return 0; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } } return 1; } +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame) @@ -2387,24 +2677,31 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, (unsigned)zc->blockState.matchState.nextToUpdate); { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } } if (zc->seqCollector.collectSequences) { ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); return 0; } /* encode sequences and literals */ - cSize = ZSTD_compressSequences(&zc->seqStore, + cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, dst, dstCapacity, srcSize, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder (cli only) to throw a "should consume all input error." @@ -2420,10 +2717,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - /* confirm repcodes and entropy tables when emitting a compressed block */ - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; + ZSTD_confirmRepcodesAndEntropyTables(zc); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2435,6 +2729,80 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, return cSize; } +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, @@ -2478,9 +2846,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); - DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); if (cctx->appliedParams.fParams.checksumFlag && srcSize) XXH64_update(&cctx->xxhState, src, srcSize); @@ -2500,21 +2869,31 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - { size_t cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize); + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); } else { - const U32 cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } } + ip += blockSize; assert(remaining >= blockSize); remaining -= blockSize; @@ -2546,10 +2925,10 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, size_t pos=0; assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - if (params->format == ZSTD_f_zstd1) { MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; @@ -2582,7 +2961,8 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, */ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) { - RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, + "dst buf is too small to write frame trailer empty block."); { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ MEM_writeLE24(dst, cBlockHeader24); return ZSTD_blockHeaderSize; @@ -2591,13 +2971,16 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) { - RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, - parameter_unsupported); + parameter_unsupported, + "incompatible with ldm"); cctx->externSeqStore.seq = seq; cctx->externSeqStore.size = nbSeq; cctx->externSeqStore.capacity = nbSeq; cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; return 0; } @@ -2618,7 +3001,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (frame && (cctx->stage==ZSTDcs_init)) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - FORWARD_IF_ERROR(fhSize); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); assert(fhSize <= dstCapacity); dstCapacity -= fhSize; dst = (char*)dst + fhSize; @@ -2645,7 +3028,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); @@ -2682,7 +3065,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const { DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } @@ -2691,6 +3074,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const * @return : 0, or an error code */ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* src, size_t srcSize, @@ -2702,6 +3086,11 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_window_update(&ms->window, src, srcSize); ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + /* Assert that we the ms params match the params we're being given */ ZSTD_assertEqualCParams(params->cParams, ms->cParams); @@ -2714,6 +3103,9 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + switch(params->cParams.strategy) { case ZSTD_fast: @@ -2726,8 +3118,12 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE) + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else if (chunk >= HASH_READ_SIZE) { ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + } break; case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ @@ -2751,102 +3147,91 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, /* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ U32 s; - RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } for (s = 0; s <= maxSymbolValue; ++s) { - RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } } - return 0; + return FSE_repeat_valid; } - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff; - size_t dictID; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); - dictPtr += 4; /* skip magic number */ - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); - dictPtr += 4; + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; - { unsigned maxSymbolValue = 255; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); dictPtr += hufHeaderSize; } { unsigned offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); /* fill all offset symbols to avoid garbage at end of table */ RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); - /* Every match length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); - /* Every literal length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); dictPtr += litlengthHeaderSize; } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); bs->rep[0] = MEM_readLE32(dictPtr+0); bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); @@ -2858,23 +3243,56 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ } - /* All offset values <= dictContentSize + 128 KB must be representable */ - FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); - /* All repCodes must be <= dictContentSize and != 0*/ + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ { U32 u; for (u=0; u<3; u++) { - RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); - } } + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} - bs->entropy.huf.repeatMode = HUF_repeat_valid; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( - ms, ws, params, dictPtr, dictContentSize, dtlm)); - return dictID; + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); } + return dictID; } /** ZSTD_compress_insertDictionary() : @@ -2882,6 +3300,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ldmState_t* ls, ZSTD_cwksp* ws, const ZSTD_CCtx_params* params, const void* dict, size_t dictSize, @@ -2891,7 +3310,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, { DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<8)) { - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); return 0; } @@ -2899,15 +3318,15 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, /* dict restricted modes */ if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictContentType == ZSTD_dct_auto) { DEBUGLOG(4, "raw content dictionary detected"); return ZSTD_loadDictionaryContent( - ms, ws, params, dict, dictSize, dtlm); + ms, ls, ws, params, dict, dictSize, dtlm); } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); assert(0); /* impossible */ } @@ -2917,7 +3336,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, } #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) -#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ @@ -2944,17 +3363,18 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) ); + ZSTDcrp_makeClean, zbuff) , ""); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, - dictContentType, dtlm, cctx->entropyWorkspace) + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, dict, dictSize, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; } @@ -2971,7 +3391,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, { DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -2986,7 +3406,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, @@ -2995,9 +3415,9 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); @@ -3024,7 +3444,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); dstCapacity -= fhSize; op += fhSize; cctx->stage = ZSTDcs_ongoing; @@ -3033,7 +3453,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->stage != ZSTDcs_ending) { /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); MEM_writeLE32(op, cBlockHeader24); op += ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize; @@ -3041,7 +3461,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->appliedParams.fParams.checksumFlag) { U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); MEM_writeLE32(op, checksum); op += 4; @@ -3059,9 +3479,9 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - FORWARD_IF_ERROR(endResult); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); @@ -3076,12 +3496,11 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, return cSize + endResult; } - static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_parameters params) + const ZSTD_parameters* params) { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); @@ -3100,12 +3519,12 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, ZSTD_parameters params) { DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); return ZSTD_compress_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - params); + ¶ms); } /* Internal */ @@ -3119,7 +3538,7 @@ size_t ZSTD_compress_advanced_internal( DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) ); + params, srcSize, ZSTDb_not_buffered) , ""); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3129,8 +3548,9 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); assert(params.fParams.contentSizeFlag == 1); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } @@ -3150,10 +3570,17 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, int compressionLevel) { size_t result; +#if ZSTD_COMPRESS_HEAPMODE + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); +#else ZSTD_CCtx ctxBody; ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ +#endif return result; } @@ -3176,7 +3603,7 @@ size_t ZSTD_estimateCDictSize_advanced( size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); } @@ -3194,20 +3621,25 @@ static size_t ZSTD_initCDict_internal( const void* dictBuffer, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) + ZSTD_CCtx_params params) { DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); - assert(!ZSTD_checkCParams(cParams)); - cdict->matchState.cParams = cParams; + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { + cdict->matchState.dedicatedDictSearch = 0; + } if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); - RETURN_ERROR_IF(!internalBuffer, memory_allocation); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); cdict->dictContent = internalBuffer; - memcpy(internalBuffer, dictBuffer, dictSize); + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); } cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); @@ -3217,23 +3649,20 @@ static size_t ZSTD_initCDict_internal( FORWARD_IF_ERROR(ZSTD_reset_matchState( &cdict->matchState, &cdict->workspace, - &cParams, + ¶ms.cParams, ZSTDcrp_makeClean, ZSTDirp_reset, - ZSTD_resetTarget_CDict)); + ZSTD_resetTarget_CDict), ""); /* (Maybe) load the dictionary * Skips loading the dictionary if it is < 8 bytes. */ - { ZSTD_CCtx_params params; - memset(¶ms, 0, sizeof(params)); - params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; params.fParams.contentSizeFlag = 1; - params.cParams = cParams; { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, &cdict->workspace, + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, ¶ms, cdict->dictContent, cdict->dictContentSize, dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= (size_t)(U32)-1); cdict->dictID = (U32)dictID; } @@ -3242,13 +3671,11 @@ static size_t ZSTD_initCDict_internal( return 0; } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams, ZSTD_customMem customMem) { - DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { size_t const workspaceSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + @@ -3256,16 +3683,16 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); - void* const workspace = ZSTD_malloc(workspaceSize, customMem); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); ZSTD_cwksp ws; ZSTD_CDict* cdict; if (!workspace) { - ZSTD_free(workspace, customMem); + ZSTD_customFree(workspace, customMem); return NULL; } - ZSTD_cwksp_init(&ws, workspace, workspaceSize); + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); assert(cdict != NULL); @@ -3273,35 +3700,94 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, cdict->customMem = customMem; cdict->compressionLevel = 0; /* signals advanced API usage */ - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dictBuffer, dictSize, - dictLoadMethod, dictContentType, - cParams) )) { - ZSTD_freeCDict(cdict); - return NULL; - } - return cdict; } } +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + cctxParams.cParams = cParams; + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; } ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; } size_t ZSTD_freeCDict(ZSTD_CDict* cdict) @@ -3311,7 +3797,7 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict) int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); ZSTD_cwksp_free(&cdict->workspace, cMem); if (!cdictInWorkspace) { - ZSTD_free(cdict, cMem); + ZSTD_customFree(cdict, cMem); } return 0; } @@ -3344,12 +3830,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict( + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + matchStateSize; ZSTD_CDict* cdict; + ZSTD_CCtx_params params; if ((size_t)workspace & 7) return NULL; /* 8-aligned */ { ZSTD_cwksp ws; - ZSTD_cwksp_init(&ws, workspace, workspaceSize); + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); if (cdict == NULL) return NULL; ZSTD_cwksp_move(&cdict->workspace, &ws); @@ -3359,10 +3846,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict( (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, dictLoadMethod, dictContentType, - cParams) )) + params) )) return NULL; return cdict; @@ -3374,6 +3864,17 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) return cdict->matchState.cParams; } +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + + /* ZSTD_compressBegin_usingCDict_advanced() : * cdict must be != NULL */ size_t ZSTD_compressBegin_usingCDict_advanced( @@ -3381,7 +3882,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER @@ -3425,7 +3926,7 @@ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3481,32 +3982,12 @@ size_t ZSTD_CStreamOutSize(void) return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; } -static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, - const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, - const ZSTD_CDict* const cdict, - ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_resetCStream_internal"); - /* Finalize the compression parameters */ - params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_buffered) ); - - cctx->inToCompress = 0; - cctx->inBuffPos = 0; - cctx->inBuffTarget = cctx->blockSize - + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ - cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; - cctx->streamStage = zcss_load; - cctx->frameEnded = 0; - return 0; /* ready to go */ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; } /* ZSTD_resetCStream(): @@ -3519,8 +4000,8 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) */ U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); return 0; } @@ -3534,16 +4015,16 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_internal"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zcs->requestedParams = *params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); } else { /* Dictionary is cleared if !cdict */ - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); } return 0; } @@ -3556,10 +4037,10 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); zcs->requestedParams.fParams = fParams; - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); return 0; } @@ -3567,8 +4048,8 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); return 0; } @@ -3587,20 +4068,20 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, */ U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_initCStream_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) { DEBUGLOG(4, "ZSTD_initCStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } @@ -3612,19 +4093,19 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigne */ U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_initCStream_srcSize"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); return 0; } size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) { DEBUGLOG(4, "ZSTD_initCStream"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); return 0; } @@ -3637,14 +4118,6 @@ static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) return hintInSize; } -static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - if (length) memcpy(dst, src, length); - return length; -} - /** ZSTD_compressStream_generic(): * internal function for all *compressStream*() variants * non-static, because can be called from zstdmt_compress.c @@ -3655,21 +4128,26 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, ZSTD_EndDirective const flushMode) { const char* const istart = (const char*)input->src; - const char* const iend = istart + input->size; - const char* ip = istart + input->pos; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; char* const ostart = (char*)output->dst; - char* const oend = ostart + output->size; - char* op = ostart + output->pos; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; U32 someMoreWork = 1; /* check expectations */ DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } assert(output->pos <= output->size); assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); while (someMoreWork) { switch(zcs->streamStage) @@ -3679,26 +4157,29 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, case zcss_load: if ( (flushMode == ZSTD_e_end) - && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ && (zcs->inBuffPos == 0) ) { /* shortcut to compression pass directly into output buffer */ size_t const cSize = ZSTD_compressEnd(zcs, op, oend-op, ip, iend-ip); DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); ip = iend; op += cSize; zcs->frameEnded = 1; ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); someMoreWork = 0; break; } - /* complete loading into inBuffer */ - { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; size_t const loaded = ZSTD_limitCopy( zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); zcs->inBuffPos += loaded; - ip += loaded; + if (loaded != 0) + ip += loaded; if ( (flushMode == ZSTD_e_continue) && (zcs->inBuffPos < zcs->inBuffTarget) ) { /* not enough input to fill full block : stop here */ @@ -3712,31 +4193,49 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, } /* compress current block (note : this stage cannot be stopped in the middle) */ DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); - { void* cDst; + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; size_t oSize = oend-op; - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - if (oSize >= ZSTD_compressBound(iSize)) + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) cDst = op; /* compress into output buffer, to skip flush stage */ else cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = lastBlock ? - ZSTD_compressEnd(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : - ZSTD_compressContinue(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize); - zcs->frameEnded = lastBlock; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; - DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", - (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } if (cDst == op) { /* no need to flush */ op += cSize; if (zcs->frameEnded) { @@ -3753,12 +4252,14 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, /* fall-through */ case zcss_flush: DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), zcs->outBuff + zcs->outBuffFlushedSize, toFlush); DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - op += flushed; + if (flushed) + op += flushed; zcs->outBuffFlushedSize += flushed; if (toFlush!=flushed) { /* flush not fully completed, presumably because dst is too small */ @@ -3802,10 +4303,120 @@ static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); return ZSTD_nextInputSizeHint_MTorST(zcs); } +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->streamStage = zcss_load; + cctx->appliedParams = params; + } else +#endif + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, @@ -3814,82 +4425,65 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, { DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); /* check conditions */ - RETURN_ERROR_IF(output->pos > output->size, GENERIC); - RETURN_ERROR_IF(input->pos > input->size, GENERIC); - assert(cctx!=NULL); + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); /* transparent initialization stage */ if (cctx->streamStage == zcss_init) { - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ - assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ - params.cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); - - -#ifdef ZSTD_MULTITHREAD - if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { - params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ - } - if (params.nbWorkers > 0) { - /* mt context creation */ - if (cctx->mtctx == NULL) { - DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", - params.nbWorkers); - cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); - RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); - } - /* mt compression */ - DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); - FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( - cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); - cctx->streamStage = zcss_load; - cctx->appliedParams.nbWorkers = params.nbWorkers; - } else -#endif - { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, - params, cctx->pledgedSrcSizePlusOne-1) ); - assert(cctx->streamStage == zcss_load); - assert(cctx->appliedParams.nbWorkers == 0); - } } + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } /* end of transparent initialization stage */ + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); /* compression stage */ #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbWorkers > 0) { - int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); size_t flushMin; - assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); if (cctx->cParamsChanged) { ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); cctx->cParamsChanged = 0; } - do { + for (;;) { + size_t const ipos = input->pos; + size_t const opos = output->pos; flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } - FORWARD_IF_ERROR(flushMin); - } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + + if (endOp == ZSTD_e_continue) { + /* We only require some progress with ZSTD_e_continue, not maximal progress. + * We're done if we've consumed or produced any bytes, or either buffer is + * full. + */ + if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) + break; + } else { + assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + /* We require maximal progress. We're done when the flush is complete or the + * output buffer is full. + */ + if (flushMin == 0 || output->pos == output->size) + break; + } + } DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); /* Either we don't require maximum forward progress, we've finished the * flush, or we are out of output space. */ - assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); + ZSTD_setBufferExpectations(cctx, output, input); return flushMin; } #endif - FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ } @@ -3912,23 +4506,435 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; { size_t oPos = 0; size_t iPos = 0; size_t const result = ZSTD_compressStream2_simpleArgs(cctx, dst, dstCapacity, &oPos, src, srcSize, &iPos, ZSTD_e_end); - FORWARD_IF_ERROR(result); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); if (result != 0) { /* compression not completed, due to lack of output space */ assert(oPos == dstCapacity); - RETURN_ERROR(dstSize_tooSmall); + RETURN_ERROR(dstSize_tooSmall, ""); } assert(iPos == srcSize); /* all input is expected consumed */ return oPos; } } +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(cctx); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + /*====== Finalize ======*/ /*! ZSTD_flushStream() : @@ -3944,7 +4950,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { ZSTD_inBuffer input = { NULL, 0, 0 }; size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - FORWARD_IF_ERROR( remainingToFlush ); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ /* single thread mode : attempt to calculate remaining to flush more precisely */ { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; @@ -4069,35 +5075,134 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; -/*! ZSTD_getCParams() : +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { - size_t const addedSize = srcSizeHint ? 0 : 500; - U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); - int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ - if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ - if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; - if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); } } +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + /*! ZSTD_getParams() : * same idea as ZSTD_getCParams() * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). * Fields of `ZSTD_frameParameters` are set to default values */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); - memset(¶ms, 0, sizeof(params)); + ZSTD_memset(¶ms, 0, sizeof(params)); params.cParams = cParams; params.fParams.contentSizeFlag = 1; return params; } + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} diff --git a/zstd_compress_internal.h b/zstd_compress_internal.h index 14036f8..80d150a 100644 --- a/zstd_compress_internal.h +++ b/zstd_compress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -28,7 +28,6 @@ extern "C" { #endif - /*-************************************* * Constants ***************************************/ @@ -64,7 +63,7 @@ typedef struct { } ZSTD_localDict; typedef struct { - U32 CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; HUF_repeat repeatMode; } ZSTD_hufCTables_t; @@ -83,10 +82,27 @@ typedef struct { } ZSTD_entropyCTables_t; typedef struct { - U32 off; - U32 len; + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ } ZSTD_match_t; +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + typedef struct { int price; U32 off; @@ -147,9 +163,13 @@ struct ZSTD_matchState_t { U32* hashTable; U32* hashTable3; U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ optState_t opt; /* optimal parser state */ const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; }; typedef struct { @@ -166,6 +186,7 @@ typedef struct { typedef struct { ZSTD_window_t window; /* State for the window round buffer management */ ldmEntry_t* hashTable; + U32 loadedDictEnd; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ U64 hashPower; /* Used to compute the rolling hash. * Depends on ldmParams.minMatchLength */ @@ -180,19 +201,6 @@ typedef struct { U32 windowLog; /* Window log for the LDM */ } ldmParams_t; -typedef struct { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; - -typedef struct { - rawSeq* seq; /* The start of the sequences */ - size_t pos; /* The position where reading stopped. <= size. */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; - typedef struct { int collectSequences; ZSTD_Sequence* seqStart; @@ -227,10 +235,34 @@ struct ZSTD_CCtx_params_s { /* Long distance matching parameters */ ldmParams_t ldmParams; + /* Dedicated dict search algorithm trigger */ + int enableDedicatedDictSearch; + + /* Input/output buffer modes */ + ZSTD_bufferMode_e inBufferMode; + ZSTD_bufferMode_e outBufferMode; + + /* Sequence compression API */ + ZSTD_sequenceFormat_e blockDelimiters; + int validateSequences; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ +#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) +#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) + +/** + * Indicates whether this compression proceeds directly from user-provided + * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or + * whether the context needs to buffer the input/output (ZSTDb_buffered). + */ +typedef enum { + ZSTDb_not_buffered, + ZSTDb_buffered +} ZSTD_buffered_policy_e; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ @@ -246,9 +278,11 @@ struct ZSTD_CCtx_s { unsigned long long producedCSize; XXH64_state_t xxhState; ZSTD_customMem customMem; + ZSTD_threadPool* pool; size_t staticSize; SeqCollector seqCollector; int isFirstBlock; + int initialized; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ @@ -256,7 +290,10 @@ struct ZSTD_CCtx_s { size_t maxNbLdmSequences; rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ ZSTD_blockState_t blockState; - U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ + + /* Wether we are streaming or not */ + ZSTD_buffered_policy_e bufferedPolicy; /* streaming */ char* inBuff; @@ -271,6 +308,10 @@ struct ZSTD_CCtx_s { ZSTD_cStreamStage streamStage; U32 frameEnded; + /* Stable in/out buffer verification */ + ZSTD_inBuffer expectedInBuffer; + size_t expectedOutBufferSize; + /* Dictionary */ ZSTD_localDict localDict; const ZSTD_CDict* cdict; @@ -284,8 +325,32 @@ struct ZSTD_CCtx_s { typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; - +typedef enum { + ZSTD_noDict = 0, + ZSTD_extDict = 1, + ZSTD_dictMatchState = 2, + ZSTD_dedicatedDictSearch = 3 +} ZSTD_dictMode_e; + +typedef enum { + ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. + * In this mode we use both the srcSize and the dictSize + * when selecting and adjusting parameters. + */ + ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. + * In this mode we only take the srcSize into account when selecting + * and adjusting parameters. + */ + ZSTD_cpm_createCDict = 2, /* Creating a CDict. + * In this mode we take both the source size and the dictionary size + * into account when selecting and adjusting the parameters. + */ + ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. + * We don't know what these parameters are for. We default to the legacy + * behavior of taking both the source size and the dict size into account + * when selecting and adjusting parameters. + */ +} ZSTD_cParamMode_e; typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -324,6 +389,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + /* ZSTD_cParam_withinBounds: * @return 1 if value is within cParam bounds, * 0 otherwise */ @@ -336,6 +426,30 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) return 1; } +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + /* ZSTD_minGain() : * minimum compression required * to generate a compress block or a compressed literals section. @@ -348,6 +462,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + /*! ZSTD_safecopyLiterals() : * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single @@ -432,9 +561,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val) if (MEM_isLittleEndian()) { if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); +# if STATIC_BMI2 + return _tzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_ctzll((U64)val) >> 3); # else @@ -451,8 +583,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_ctz((U32)val) >> 3); # else @@ -466,9 +597,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val) } else { /* Big Endian CPU */ if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); +# if STATIC_BMI2 + return _lzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_clzll(val) >> 3); # else @@ -482,8 +616,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_clz((U32)val) >> 3); # else @@ -564,7 +697,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } -MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) { switch(mls) { @@ -680,7 +814,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) return ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : ms->dictMatchState != NULL ? - ZSTD_dictMatchState : + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : ZSTD_noDict; } @@ -692,8 +826,8 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, void const* srcEnd) { - U32 const current = (U32)((BYTE const*)srcEnd - window.base); - return current > ZSTD_CURRENT_MAX; + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + return curr > ZSTD_CURRENT_MAX; } /** @@ -729,18 +863,30 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * windowLog <= 31 ==> 3<<29 + 1<base); - U32 const newCurrent = (current & cycleMask) + maxDist; - U32 const correction = current - newCurrent; + U32 const curr = (U32)((BYTE const*)src - window->base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = curr - newCurrent; assert((maxDist & cycleMask) == 0); - assert(current > newCurrent); + assert(curr > newCurrent); /* Loose bound, should be around 1<<29 (see above) */ assert(correction > 1<<28); window->base += correction; window->dictBase += correction; - window->lowLimit -= correction; - window->dictLimit -= correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, window->lowLimit); @@ -844,6 +990,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, } } } } +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + /** * ZSTD_window_update(): * Updates the window by appending [src, src + srcSize) to the window. @@ -857,6 +1012,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); /* Check if blocks follow each other */ if (src != window->nextSrc) { /* not contiguous */ @@ -867,7 +1026,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, window->dictLimit = (U32)distanceFromBase; window->dictBase = window->base; window->base = ip - distanceFromBase; - // ms->nextToUpdate = window->dictLimit; + /* ms->nextToUpdate = window->dictLimit; */ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ contiguous = 0; } @@ -883,12 +1042,35 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, return contiguous; } -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) { U32 const maxDistance = 1U << windowLog; U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ U32 const matchLowest = isDictionary ? lowestValid : withinWindow; return matchLowest; } @@ -931,6 +1113,20 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) } #endif +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); /* ============================================================== * Private declarations @@ -940,9 +1136,10 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) /* ZSTD_getCParamsFromCCtxParams() : * cParams are built depending on compressionLevel, src size hints, * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! */ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); /*! ZSTD_initCStream_internal() : * Private use only. Init streaming operation. @@ -999,5 +1196,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); */ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); #endif /* ZSTD_COMPRESS_H */ diff --git a/zstd_compress_literals.c b/zstd_compress_literals.c index 6c13331..6dd1c14 100644 --- a/zstd_compress_literals.c +++ b/zstd_compress_literals.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,7 +18,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, BYTE* const ostart = (BYTE* const)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); switch(flSize) { @@ -35,7 +35,8 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, assert(0); } - memcpy(ostart + flSize, src, srcSize); + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); return srcSize + flSize; } @@ -62,6 +63,7 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* } ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); return flSize+1; } @@ -80,11 +82,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, symbolEncodingType_e hType = set_compressed; size_t cLitSize; - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", - disableLiteralCompression); + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); if (disableLiteralCompression) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); @@ -102,24 +104,25 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, cLitSize = singleStream ? HUF_compress1X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : HUF_compress4X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); hType = set_repeat; } } if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); } @@ -150,5 +153,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, default: /* not possible : lhSize is {3,4,5} */ assert(0); } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); return lhSize+cLitSize; } diff --git a/zstd_compress_literals.h b/zstd_compress_literals.h index 97273d7..8b08705 100644 --- a/zstd_compress_literals.h +++ b/zstd_compress_literals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_compress_sequences.c b/zstd_compress_sequences.c index 0ff7a26..be30c08 100644 --- a/zstd_compress_sequences.c +++ b/zstd_compress_sequences.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { return maxSymbolValue; } +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + /** * Returns the cost in bytes of encoding the normalized count header. * Returns an error if any of the helper functions return an error. @@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, BYTE wksp[FSE_NCOUNTBOUND]; S16 norm[MaxSeq + 1]; const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); } @@ -86,7 +99,7 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t * Returns the cost in bits of encoding the distribution in count using ctable. * Returns an error if ctable cannot represent all the symbols in count. */ -static size_t ZSTD_fseBitCost( +size_t ZSTD_fseBitCost( FSE_CTable const* ctable, unsigned const* count, unsigned const max) @@ -96,18 +109,22 @@ static size_t ZSTD_fseBitCost( unsigned s; FSE_CState_t cstate; FSE_initCState(&cstate, ctable); - RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, - "Repeat FSE_CTable has maxSymbolValue %u < %u", + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } for (s = 0; s <= max; ++s) { unsigned const tableLog = cstate.stateLog; unsigned const badCost = (tableLog + 1) << kAccuracyLog; unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); if (count[s] == 0) continue; - RETURN_ERROR_IF(bitCost >= badCost, GENERIC, - "Repeat FSE_CTable has Prob[%u] == 0", s); - cost += count[s] * bitCost; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; } return cost >> kAccuracyLog; } @@ -117,15 +134,15 @@ static size_t ZSTD_fseBitCost( * table described by norm. The max symbol support by norm is assumed >= max. * norm must be valid for every symbol with non-zero probability in count. */ -static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) { unsigned const shift = 8 - accuracyLog; size_t cost = 0; unsigned s; assert(accuracyLog <= 8); for (s = 0; s <= max; ++s) { - unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; unsigned const norm256 = normAcc << shift; assert(norm256 > 0); assert(norm256 < 256); @@ -230,15 +247,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, switch (type) { case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); *op = codeTable[0]; return 1; case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); return 0; case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */ + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -249,14 +266,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, nbSeq_1--; } assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize)); + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); return NCountSize; } } - default: assert(0); RETURN_ERROR(GENERIC); + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); } } @@ -290,7 +308,7 @@ ZSTD_encodeSequences_body( if (MEM_32bits()) BIT_flushBits(&blockStream); if (longOffsets) { U32 const ofBits = ofCodeTable[nbSeq-1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); BIT_flushBits(&blockStream); @@ -327,7 +345,7 @@ ZSTD_encodeSequences_body( BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { BIT_addBits(&blockStream, sequences[n].offset, extraBits); BIT_flushBits(&blockStream); /* (7)*/ diff --git a/zstd_compress_sequences.h b/zstd_compress_sequences.h index 57e8e36..b884e45 100644 --- a/zstd_compress_sequences.h +++ b/zstd_compress_sequences.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -44,4 +44,11 @@ size_t ZSTD_encodeSequences( FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); #endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/zstd_compress_superblock.c b/zstd_compress_superblock.c new file mode 100644 index 0000000..c227e4a --- /dev/null +++ b/zstd_compress_superblock.c @@ -0,0 +1,849 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_superblock.h" + +#include "zstd_internal.h" /* ZSTD_getSequenceLength */ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} diff --git a/zstd_compress_superblock.h b/zstd_compress_superblock.h new file mode 100644 index 0000000..35d2072 --- /dev/null +++ b/zstd_compress_superblock.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include "zstd.h" /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/zstd_cwksp.h b/zstd_cwksp.h index fc9765b..ffb3a8d 100644 --- a/zstd_cwksp.h +++ b/zstd_cwksp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -24,16 +24,6 @@ extern "C" { * Constants ***************************************/ -/* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 - -/* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 - /* Since the workspace is effectively its own little malloc implementation / * arena, when we run under ASAN, we should similarly insert redzones between * each internal element of the workspace, so ASAN will catch overruns that @@ -54,6 +44,16 @@ typedef enum { ZSTD_cwksp_alloc_aligned } ZSTD_cwksp_alloc_phase_e; +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + /** * Zstd fits all its internal datastructures into a single continuous buffer, * so that it only needs to perform a single OS allocation (or so that a buffer @@ -102,7 +102,7 @@ typedef enum { * * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, * so that literally everything fits in a single buffer. Note: if present, - * this must be the first object in the workspace, since ZSTD_free{CCtx, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, * CDict}() rely on a pointer comparison to see whether one or two frees are * required. * @@ -147,9 +147,10 @@ typedef struct { void* tableValidEnd; void* allocStart; - int allocFailed; + BYTE allocFailed; int workspaceOversizedDuration; ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; } ZSTD_cwksp; /*-************************************* @@ -188,7 +189,9 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { * else is though. */ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (size == 0) + return 0; +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; #else return size; @@ -238,7 +241,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( ZSTD_cwksp_internal_advance_phase(ws, phase); alloc = (BYTE *)ws->allocStart - bytes; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (bytes == 0) + return NULL; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* over-reserve space */ alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; #endif @@ -257,11 +263,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( } ws->allocStart = alloc; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on * either size. */ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } #endif return alloc; @@ -306,8 +314,10 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { } ws->tableEnd = end; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - __asan_unpoison_memory_region(alloc, bytes); +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } #endif return alloc; @@ -321,7 +331,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { void* alloc = ws->objectEnd; void* end = (BYTE*)alloc + roundedBytes; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* over-reserve space */ end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; #endif @@ -342,11 +352,13 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { ws->tableEnd = end; ws->tableValidEnd = end; -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on * either size. */ alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } #endif return alloc; @@ -355,7 +367,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) /* To validate that the table re-use logic is sound, and that we don't * access table space that we haven't cleaned, we re-"poison" the table * space every time we mark it dirty. */ @@ -390,7 +402,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { assert(ws->tableValidEnd >= ws->objectEnd); assert(ws->tableValidEnd <= ws->allocStart); if (ws->tableValidEnd < ws->tableEnd) { - memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); } ZSTD_cwksp_mark_tables_clean(ws); } @@ -402,8 +414,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { DEBUGLOG(4, "cwksp: clearing tables!"); -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; __asan_poison_memory_region(ws->objectEnd, size); } @@ -420,7 +436,7 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { DEBUGLOG(4, "cwksp: clearing!"); -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) /* To validate that the context re-use logic is sound, and that we don't * access stuff that this compression hasn't initialized, we re-"poison" * the workspace (or at least the non-static, non-table parts of it) @@ -431,8 +447,12 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { } #endif -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; __asan_poison_memory_region(ws->objectEnd, size); } @@ -452,7 +472,7 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { * Any existing values in the workspace are ignored (the previously managed * buffer, if present, must be separately freed). */ -MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ ws->workspace = start; @@ -460,24 +480,25 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { ws->objectEnd = ws->workspace; ws->tableValidEnd = ws->objectEnd; ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; ZSTD_cwksp_clear(ws); ws->workspaceOversizedDuration = 0; ZSTD_cwksp_assert_internal_consistency(ws); } MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { - void* workspace = ZSTD_malloc(size, customMem); + void* workspace = ZSTD_customMalloc(size, customMem); DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); - RETURN_ERROR_IF(workspace == NULL, memory_allocation); - ZSTD_cwksp_init(ws, workspace, size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); return 0; } MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { void *ptr = ws->workspace; DEBUGLOG(4, "cwksp: freeing workspace"); - memset(ws, 0, sizeof(ZSTD_cwksp)); - ZSTD_free(ptr, customMem); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); } /** @@ -486,13 +507,18 @@ MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { */ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { *dst = *src; - memset(src, 0, sizeof(ZSTD_cwksp)); + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); } MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); } +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { return ws->allocFailed; } diff --git a/zstd_ddict.c b/zstd_ddict.c index 0af3d23..5bfee8d 100644 --- a/zstd_ddict.c +++ b/zstd_ddict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -14,7 +14,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include /* memcpy, memmove, memset */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY @@ -25,7 +25,7 @@ #include "zstd_ddict.h" #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -# include "zstd_legacy.h" +# include "../legacy/zstd_legacy.h" #endif @@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) dctx->virtualStart = ddict->dictContent; dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif if (ddict->entropyPresent) { dctx->litEntropy = 1; dctx->fseEntropy = 1; @@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, /* load entropy tables */ RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); ddict->entropyPresent = 1; return 0; } @@ -123,17 +127,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, ddict->dictContent = dict; if (!dict) dictSize = 0; } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); ddict->dictBuffer = internalBuffer; ddict->dictContent = internalBuffer; if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); + ZSTD_memcpy(internalBuffer, dict, dictSize); } ddict->dictSize = dictSize; ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); return 0; } @@ -143,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); if (ddict == NULL) return NULL; ddict->cMem = customMem; { size_t const initResult = ZSTD_initDDict_internal(ddict, @@ -194,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict( if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ if (sBufferSize < neededSpace) return NULL; if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ dict = ddict+1; } if (ZSTD_isError( ZSTD_initDDict_internal(ddict, @@ -209,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) { if (ddict==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); return 0; } } diff --git a/zstd_ddict.h b/zstd_ddict.h index 0479d11..b427f9a 100644 --- a/zstd_ddict.h +++ b/zstd_ddict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ #include "zstd.h" /* ZSTD_DDict, and several public functions */ diff --git a/zstd_decompress.c b/zstd_decompress.c index dd4591b..e0e4695 100644 --- a/zstd_decompress.c +++ b/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -55,7 +55,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include /* memcpy, memmove, memset */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY @@ -68,7 +68,7 @@ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -# include "zstd_legacy.h" +# include "../legacy/zstd_legacy.h" #endif @@ -94,11 +94,18 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) return startingInputLength; } +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; +} + static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->ddict = NULL; dctx->ddictLocal = NULL; dctx->dictEnd = NULL; @@ -111,7 +118,13 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->legacyContext = NULL; dctx->previousLegacyVersion = 0; dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + ZSTD_DCtx_resetParameters(dctx); + dctx->validateChecksum = 1; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif } ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) @@ -129,9 +142,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); if (!dctx) return NULL; dctx->customMem = customMem; ZSTD_initDCtx_internal(dctx); @@ -159,13 +172,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); { ZSTD_customMem const cMem = dctx->customMem; ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); + ZSTD_customFree(dctx->inBuff, cMem); dctx->inBuff = NULL; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (dctx->legacyContext) ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); #endif - ZSTD_free(dctx, cMem); + ZSTD_customFree(dctx, cMem); return 0; } } @@ -174,7 +187,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) { size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } @@ -208,7 +221,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; @@ -241,7 +254,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s const BYTE* ip = (const BYTE*)src; size_t const minInputSize = ZSTD_startingInputLength(format); - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ if (srcSize < minInputSize) return minInputSize; RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); @@ -251,12 +264,12 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s /* skippable frame */ if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); zfhPtr->frameType = ZSTD_skippableFrame; return 0; } - RETURN_ERROR(prefix_unknown); + RETURN_ERROR(prefix_unknown, ""); } /* ensure there is enough `srcSize` to fully read/decode frame header */ @@ -280,7 +293,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s if (!singleSegment) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); windowSize = (1ULL << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } @@ -352,14 +365,14 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; U32 sizeU32; - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported); + frameParameter_unsupported, ""); { size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); return skippableSize; } } @@ -439,9 +452,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he * harder. */ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong); + dictionary_wrong, ""); #endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); return 0; } @@ -456,7 +470,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) { ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) @@ -511,7 +525,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize ip += 4; } - frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) ? zfh.frameContentSize : nbBlocks * zfh.blockSizeMax; @@ -559,17 +573,6 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) * Frame decoding ***************************************************************/ - -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dst; - dctx->previousDstEnd = dst; - } -} - /** ZSTD_insertBlock() : * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) @@ -585,12 +588,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); - memcpy(dst, src, srcSize); + ZSTD_memcpy(dst, src, srcSize); return srcSize; } @@ -598,12 +601,12 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, BYTE b, size_t regenSize) { + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); - memset(dst, b, regenSize); + ZSTD_memset(dst, b, regenSize); return regenSize; } @@ -618,7 +621,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, { const BYTE* ip = (const BYTE*)(*srcPtr); BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + dstCapacity; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -627,15 +630,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* check */ RETURN_ERROR_IF( remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, - srcSize_wrong); + srcSize_wrong, ""); /* Frame Header */ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, - srcSize_wrong); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } @@ -648,28 +651,30 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += ZSTD_blockHeaderSize; remainingSrcSize -= ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); break; case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); break; case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); break; case bt_reserved : default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); - op += decodedSize; + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; if (blockProperties.lastBlock) break; @@ -677,14 +682,16 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } ip += 4; remainingSrcSize -= 4; } @@ -692,7 +699,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; - return op-ostart; + return (size_t)(op-ostart); } static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, @@ -725,7 +732,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); if (ZSTD_isError(decodedSize)) return decodedSize; - assert(decodedSize <=- dstCapacity); + assert(decodedSize <= dstCapacity); dst = (BYTE*)dst + decodedSize; dstCapacity -= decodedSize; @@ -741,7 +748,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (unsigned)magicNumber, ZSTD_MAGICNUMBER); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; @@ -751,11 +758,11 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); } else { /* this will initialize correctly with no dict if dict == NULL, so * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); } ZSTD_checkContinuity(dctx, dst); @@ -765,18 +772,17 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) && (moreThan1Frame==1), srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); - dst = (BYTE*)dst + res; + if (res != 0) + dst = (BYTE*)dst + res; dstCapacity -= res; } moreThan1Frame = 1; @@ -784,7 +790,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); - return (BYTE*)dst - (BYTE*)dststart; + return (size_t)((BYTE*)dst - (BYTE*)dststart); } size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, @@ -824,7 +830,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; @@ -842,6 +848,24 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr ****************************************/ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { switch(dctx->stage) { @@ -874,7 +898,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ - RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) @@ -884,22 +908,22 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c if (dctx->format == ZSTD_f_zstd1) { /* allows header */ assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ dctx->stage = ZSTDds_decodeSkippableHeader; return 0; } } dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = dctx->headerSize - srcSize; dctx->stage = ZSTDds_decodeFrameHeader; return 0; case ZSTDds_decodeFrameHeader: assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -941,29 +965,41 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : + assert(srcSize <= dctx->expected); rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; break; case bt_rle : rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ break; case bt_reserved : /* should never happen */ default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } - if (ZSTD_isError(rSize)) return rSize; + FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); RETURN_ERROR_IF( dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; @@ -974,17 +1010,19 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } else { dctx->stage = ZSTDds_decodeBlockHeader; dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char*)dst + rSize; } return rSize; } case ZSTDds_checkChecksum: assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong); + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -993,7 +1031,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; return 0; @@ -1005,7 +1043,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } @@ -1016,6 +1054,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif return 0; } @@ -1029,7 +1071,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ dictPtr += 8; /* skip header = magic + dictID */ @@ -1045,63 +1087,69 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, workspace, workspaceSize); #else size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, + dictPtr, (size_t)(dictEnd - dictPtr), workspace, workspaceSize); #endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, - offcodeLog); + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, - matchlengthLog); + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, - litlengthLog); + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += litlengthHeaderSize; } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); { int i; size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted); + dictionary_corrupted, ""); entropy->rep[i] = rep; } } - return dictPtr - (const BYTE*)dict; + return (size_t)(dictPtr - (const BYTE*)dict); } static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) @@ -1115,7 +1163,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict /* load entropy tables */ { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); dict = (const char*)dict + eSize; dictSize -= eSize; } @@ -1138,8 +1186,9 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; + dctx->bType = bt_reserved; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; dctx->MLTptr = dctx->entropy.MLTable; dctx->OFTptr = dctx->entropy.OFTable; @@ -1149,11 +1198,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (dict && dictSize) RETURN_ERROR_IF( ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); return 0; } @@ -1172,7 +1221,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) DEBUGLOG(4, "DDict is %s", dctx->ddictIsCold ? "~cold~" : "hot!"); } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (ddict) { /* NULL ddict is equivalent to no dictionary */ ZSTD_copyDDictParameters(dctx, ddict); } @@ -1263,11 +1312,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (dict && dictSize != 0) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); dctx->ddict = dctx->ddictLocal; dctx->dictUses = ZSTD_use_indefinitely; } @@ -1286,7 +1335,7 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); dctx->dictUses = ZSTD_use_once; return 0; } @@ -1303,8 +1352,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) { DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); return ZSTD_startingInputLength(zds->format); } @@ -1320,8 +1369,8 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) * this function cannot fail */ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) { - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); return ZSTD_startingInputLength(dctx->format); } @@ -1330,14 +1379,14 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) * this function cannot fail */ size_t ZSTD_resetDStream(ZSTD_DStream* dctx) { - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); return ZSTD_startingInputLength(dctx->format); } size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (ddict) { dctx->ddict = ddict; @@ -1354,16 +1403,16 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); size_t const min = (size_t)1 << bounds.lowerBound; size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); dctx->maxWindowSize = maxWindowSize; return 0; } size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) @@ -1379,6 +1428,14 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.upperBound = (int)ZSTD_f_zstd1_magicless; ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1398,12 +1455,32 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) } #define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); } size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); switch(dParam) { case ZSTD_d_windowLogMax: if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; @@ -1414,9 +1491,17 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_format, value); dctx->format = (ZSTD_format_e)value; return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; default:; } - RETURN_ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, ""); } size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) @@ -1428,10 +1513,9 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + ZSTD_DCtx_resetParameters(dctx); } return 0; } @@ -1449,7 +1533,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return minRBSize; } @@ -1467,30 +1551,94 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) ZSTD_frameHeader zfh; size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong); + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); } /* ***** Decompression ***** */ -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) { - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; } +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - const char* const istart = (const char*)(input->src) + input->pos; - const char* const iend = (const char*)(input->src) + input->size; + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; const char* ip = istart; - char* const ostart = (char*)(output->dst) + output->pos; - char* const oend = (char*)(output->dst) + output->size; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; char* op = ostart; U32 someMoreWork = 1; @@ -1506,6 +1654,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "forbidden. out: pos: %u vs size: %u", (U32)output->pos, (U32)output->size); DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); while (someMoreWork) { switch(zds->streamStage) @@ -1516,6 +1665,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->legacyVersion = 0; zds->hostageByte = 0; + zds->expectedOutBuffer = *output; /* fall-through */ case zdss_loadHeader : @@ -1543,7 +1693,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "legacy support is incompatible with static dctx"); FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, - dict, dictSize)); + dict, dictSize), ""); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ @@ -1558,24 +1708,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB assert(iend >= ip); if (toLoad > remainingInput) { /* not enough input to load full header */ if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); zds->lhSize += remainingInput; } input->pos = input->size; return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ } assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; break; } } /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); if (cSize <= (size_t)(iend-istart)) { /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); if (ZSTD_isError(decompressedSize)) return decompressedSize; DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") ip = istart + cSize; @@ -1586,15 +1737,23 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } } + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + /* Consume header (see ZSTDds_decodeFrameHeader) */ DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); zds->expected = ZSTD_blockHeaderSize; zds->stage = ZSTDds_decodeBlockHeader; } @@ -1605,40 +1764,48 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); - if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } zds->streamStage = zdss_read; /* fall-through */ case zdss_read: DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); if (neededInSize==0) { /* end of frame */ zds->streamStage = zdss_init; @@ -1646,15 +1813,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), - ip, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); ip += neededInSize; - if (!decodedSize && !isSkipFrame) break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; + /* Function modifies the stage so we must break */ break; } } if (ip==iend) { someMoreWork = 0; break; } /* no more input */ @@ -1666,33 +1827,29 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB size_t const toLoad = neededInSize - zds->inPos; int const isSkipFrame = ZSTD_isSkipFrame(zds); size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); if (isSkipFrame) { loadedSize = MIN(toLoad, (size_t)(iend-ip)); } else { RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, corruption_detected, "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); } ip += loadedSize; zds->inPos += loadedSize; if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ - { size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, - zds->inBuff, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; - zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - } } - zds->streamStage = zdss_flush; - /* fall-through */ - + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); op += flushedSize; zds->outStart += flushedSize; if (flushedSize == toFlushSize) { /* flush completed */ @@ -1712,17 +1869,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } /* result */ input->pos = (size_t)(ip - (const char*)(input->src)); output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + if ((ip==istart) && (op==ostart)) { /* no forward progress */ zds->noForwardProgress ++; if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall); - RETURN_ERROR_IF(ip==iend, srcSize_wrong); + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); assert(0); } } else { diff --git a/zstd_decompress_block.c b/zstd_decompress_block.c index 767e5f9..19ddb7a 100644 --- a/zstd_decompress_block.c +++ b/zstd_decompress_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -14,7 +14,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include /* memcpy, memmove, memset */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "compiler.h" /* prefetch */ #include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ @@ -44,7 +44,7 @@ /*_******************************************************* * Memory operations **********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } /*-************************************************************* @@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); { U32 const cBlockHeader = MEM_readLE24(src); U32 const cSize = cBlockHeader >> 3; @@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); bpPtr->origSize = cSize; /* only useful for RLE */ if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); return cSize; } } @@ -80,7 +80,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); @@ -89,7 +89,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { case set_repeat: DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); /* fall-through */ case set_compressed: @@ -121,8 +121,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -160,13 +160,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; dctx->litEntropy = 1; if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return litCSize + lhSize; } @@ -190,11 +190,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); - memcpy(dctx->litBuffer, istart+lhSize, litSize); + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return lhSize+litSize; } /* direct reference into compressed stream */ @@ -222,8 +222,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; @@ -236,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* Default FSE distribution tables. * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions * They were generated programmatically with following method : * - start from default distributions, present in /lib/common/zstd_internal.h * - generate tables normally, using ZSTD_buildFSETable() @@ -364,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB * generate FSE decoding table for one symbol (ll, ml or off) * cannot fail if input is valid => * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) + unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); - + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; /* Init, lay down lowprob symbols */ { ZSTD_seqSymbol_header DTableH; DTableH.tableLog = tableLog; @@ -396,16 +399,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, assert(normalizedCounter[s]>=0); symbolNext[s] = (U16)normalizedCounter[s]; } } } - memcpy(dt, &DTableH, sizeof(DTableH)); + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ @@ -414,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } /* Build Decoding table */ - { U32 u; + { + U32 u; for (u=0; u max, corruption_detected); + RETURN_ERROR_IF(!srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; U32 const nbBits = nbAdditionalBits[symbol]; @@ -453,7 +550,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb *DTablePtr = defaultTable; return 0; case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); /* prefetch FSE table if used */ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { const void* const pStart = *DTablePtr; @@ -465,9 +562,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb { unsigned tableLog; S16 norm[MaxSeq+1]; size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); *DTablePtr = DTableSpace; return headerSize; } @@ -487,28 +584,29 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); /* SeqHead */ nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); return 1; } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong); + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); nbSeq = ((nbSeq-0x80)<<8) + *ip++; } } *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -520,8 +618,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -530,8 +630,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -540,8 +642,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } } @@ -580,7 +684,7 @@ typedef struct { * Precondition: *ip <= *op * Postcondition: *op - *op >= 8 */ -static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { assert(*ip <= *op); if (offset < 8) { /* close range match, overlap */ @@ -665,15 +769,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op, { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - /* bounds checks */ - assert(oLitEnd < oMatchEnd); - RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); /* copy literals */ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); @@ -683,15 +787,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -709,16 +813,27 @@ size_t ZSTD_execSequence(BYTE* op, BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; - /* Errors and uncommon cases handled here. */ - assert(oLitEnd < oMatchEnd); - if (iLitEnd > litLimit || oMatchEnd > oend_w) + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); assert(iLitEnd <= litLimit /* Literal length is in bounds */); assert(oLitEnd <= oend_w /* Can wildcopy literals */); assert(oMatchEnd <= oend_w /* Can wildcopy matches */); @@ -729,7 +844,7 @@ size_t ZSTD_execSequence(BYTE* op, */ assert(WILDCOPY_OVERLENGTH >= 16); ZSTD_copy16(op, (*litPtr)); - if (sequence.litLength > 16) { + if (UNLIKELY(sequence.litLength > 16)) { ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); } op = oLitEnd; @@ -738,15 +853,15 @@ size_t ZSTD_execSequence(BYTE* op, /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -760,7 +875,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy * without overlap checking. */ - if (sequence.offset >= WILDCOPY_VECLEN) { + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { /* We bet on a full wildcopy for matches, since we expect matches to be * longer than literals (in general). In silesia, ~10% of matches are longer * than 16 bytes. @@ -802,6 +917,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) DStatePtr->state = DInfo.nextState + lowBits; } +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) * bits before reloading. This value is the maximum number of bytes we read @@ -813,25 +936,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) { seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; /* sequence */ { size_t offset; - if (!ofBits) - offset = 0; - else { + if (ofBits > 1) { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); assert(ofBits <= MaxOff); @@ -845,59 +969,142 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { /* offset == 0 */ - offset = seqState->prevOffset[0]; - } - } else { seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset; - } + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } seq.offset = offset; } - seq.matchLength = mlBase - + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) BIT_reloadDStream(&seqState->DStream); /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - seq.litLength = llBase - + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } return seq; } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -910,46 +1117,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; /* Regen sequences */ if (nbSeq) { seqState_t seqState; + size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); ZSTD_STATIC_ASSERT( BIT_DStream_unfinished < BIT_DStream_completed && BIT_DStream_endOfBuffer < BIT_DStream_completed && BIT_DStream_completed < BIT_DStream_overflow); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { - nbSeq--; - { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -959,99 +1224,21 @@ static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) -{ - seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; - - /* sequence */ - { size_t offset; - if (!ofBits) - offset = 0; - else { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets) { - U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - { size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1063,6 +1250,7 @@ ZSTD_decompressSequencesLong_body( const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1078,36 +1266,45 @@ ZSTD_decompressSequencesLong_body( seqState.prefixStart = prefixStart; seqState.pos = (size_t)(op-prefixStart); seqState.dictEnd = dictEnd; + assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -1130,9 +1329,10 @@ static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1146,9 +1346,10 @@ DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1157,9 +1358,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1169,21 +1371,23 @@ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset); + const ZSTD_longOffset_e isLongOffset, + const int frame); #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1198,15 +1402,16 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1240,7 +1445,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) } #endif - size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -1256,7 +1460,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); @@ -1282,6 +1486,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) if ( !usePrefetchDecoder @@ -1300,17 +1506,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (usePrefetchDecoder) #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) diff --git a/zstd_decompress_block.h b/zstd_decompress_block.h index 7e92960..e6bd839 100644 --- a/zstd_decompress_block.h +++ b/zstd_decompress_block.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ #include "zstd.h" /* DCtx, and some public functions */ #include "zstd_internal.h" /* blockProperties_t, and some public functions */ #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ @@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * this function must be called with valid parameters only * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. * Internal use only. */ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); #endif /* ZSTD_DEC_BLOCK_H */ diff --git a/zstd_decompress_internal.h b/zstd_decompress_internal.h index ccbdfa0..b604b22 100644 --- a/zstd_decompress_internal.h +++ b/zstd_decompress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -27,26 +27,26 @@ /*-******************************************************* * Constants *********************************************************/ -static const U32 LL_base[MaxLL+1] = { +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; -static const U32 OF_base[MaxOff+1] = { +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -static const U32 OF_bits[MaxOff+1] = { +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; -static const U32 ML_base[MaxML+1] = { +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, @@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = { #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + typedef struct { ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; } ZSTD_entropyDTables_t; typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, @@ -117,6 +121,8 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; @@ -147,10 +153,19 @@ struct ZSTD_DCtx_s U32 legacyVersion; U32 hostageByte; int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; /* workspace */ BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ @@ -160,7 +175,7 @@ struct ZSTD_DCtx_s /*! ZSTD_loadDEntropy() : * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize); diff --git a/zstd_deps.h b/zstd_deps.h new file mode 100644 index 0000000..0fb8b78 --- /dev/null +++ b/zstd_deps.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ diff --git a/zstd_double_fast.c b/zstd_double_fast.c index a661a48..ef12a52 100644 --- a/zstd_double_fast.c +++ b/zstd_double_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, * is empty. */ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); + U32 const curr = (U32)(ip - base); U32 i; for (i = 0; i < fastHashFillStep; ++i) { size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); if (i == 0) - hashSmall[smHash] = current + i; + hashSmall[smHash] = curr + i; if (i == 0 || hashLarge[lgHash] == 0) - hashLarge[lgHash] = current + i; + hashLarge[lgHash] = curr + i; /* Only load extra positions for ZSTD_dtlm_full */ if (dtlm == ZSTD_dtlm_fast) break; @@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowestValid = ms->window.dictLimit; - const U32 maxDistance = 1U << cParams->windowLog; /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( dictCParams->hashLog : hBitsL; const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); @@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( /* if a dictionary is attached, it must be within window range */ if (dictMode == ZSTD_dictMatchState) { - assert(lowestValid + maxDistance >= endIndex); + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); } /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixLowest); + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic( size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); - U32 const current = (U32)(ip-base); + U32 const curr = (U32)(ip-base); U32 const matchIndexL = hashLong[h2]; U32 matchIndexS = hashSmall[h]; const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; - const U32 repIndex = current + 1 - offset_1; + const U32 repIndex = curr + 1 - offset_1; const BYTE* repMatch = (dictMode == ZSTD_dictMatchState && repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ /* check dictMatchState repcode */ if (dictMode == ZSTD_dictMatchState @@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; - offset = (U32)(current - dictMatchIndexL - dictIndexDelta); + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ goto _match_found; } } @@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic( } } ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif continue; _search_next_long: @@ -206,7 +209,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); U32 const matchIndexL3 = hashLong[hl3]; const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; + hashLong[hl3] = curr + 1; /* check prefix long +1 match */ if (matchIndexL3 > prefixLowestIndex) { @@ -225,7 +228,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; ip++; - offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ goto _match_found; } } } @@ -233,7 +236,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( /* if no long +1 match, explore the short match we found */ if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; - offset = (U32)(current - matchIndexS); + offset = (U32)(curr - matchIndexS); while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ } else { mLength = ZSTD_count(ip+4, match+4, iend) + 4; @@ -257,7 +260,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (ip <= ilimit) { /* Complementary insertion */ /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; + { U32 const indexToInsert = curr+2; hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; @@ -271,7 +274,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState && repIndex2 < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex2 : + dictBase + repIndex2 - dictIndexDelta : base + repIndex2; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { @@ -398,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; const BYTE* matchLong = matchLongBase + matchLongIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ & (repIndex > dictStartIndex)) @@ -418,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; U32 offset; mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; - offset = current - matchLongIndex; + offset = curr - matchLongIndex; while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; @@ -430,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; const BYTE* match3 = match3Base + matchIndex3; U32 offset; - hashLong[h3] = current + 1; + hashLong[h3] = curr + 1; if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; ip++; - offset = current+1 - matchIndex3; + offset = curr+1 - matchIndex3; while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ } else { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - offset = current - matchIndex; + offset = curr - matchIndex; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ } offset_2 = offset_1; @@ -461,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( if (ip <= ilimit) { /* Complementary insertion */ /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; + { U32 const indexToInsert = curr+2; hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; diff --git a/zstd_double_fast.h b/zstd_double_fast.h index 4fa31ac..73a2002 100644 --- a/zstd_double_fast.h +++ b/zstd_double_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_errors.h b/zstd_errors.h index 92a3433..6d0d003 100644 --- a/zstd_errors.h +++ b/zstd_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -76,6 +76,8 @@ typedef enum { /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; diff --git a/zstd_fast.c b/zstd_fast.c index 6dbefee..db7ce83 100644 --- a/zstd_fast.c +++ b/zstd_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, * Insert the other positions if their hash entry is empty. */ for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); + U32 const curr = (U32)(ip - base); size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); - hashTable[hash0] = current; + hashTable[hash0] = curr; if (dtlm == ZSTD_dtlm_fast) continue; /* Only load extra positions for ZSTD_dtlm_full */ { U32 p; for (p = 1; p < fastHashFillStep; ++p) { size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); if (hashTable[hash] == 0) { /* not yet filled */ - hashTable[hash] = current + p; + hashTable[hash] = curr + p; } } } } } @@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic( const BYTE* ip1; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 maxDistance = 1U << cParams->windowLog; - const U32 validStartIndex = ms->window.dictLimit; - const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic( DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; - { U32 const maxRep = (U32)(ip0 - prefixStart); + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ size_t mLength; BYTE const* ip2 = ip0 + 2; @@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic( U32 const current1 = (U32)(ip1-base); U32 const matchIndex0 = hashTable[h0]; U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2-offset_1; + BYTE const* repMatch = ip2 - offset_1; const BYTE* match0 = base + matchIndex0; const BYTE* match1 = base + matchIndex1; U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + hashTable[h0] = current0; /* update hash table */ hashTable[h1] = current1; /* update hash table */ assert(ip0 + 1 == ip1); if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = ip2[-1] == repMatch[-1] ? 1 : 0; + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; ip0 = ip2 - mLength; match0 = repMatch - mLength; + mLength += 4; offcode = 0; goto _match; } @@ -128,19 +141,18 @@ ZSTD_compressBlock_fast_generic( offset_2 = offset_1; offset_1 = (U32)(ip0-match0); offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 0; + mLength = 4; /* Count the backwards match length */ while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ _match: /* Requires: ip0, match0, offcode */ /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); /* match found */ ip0 += mLength; anchor = ip0; - ip1 = ip0 + 1; if (ip0 <= ilimit) { /* Fill Table */ @@ -148,19 +160,18 @@ ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */ - && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ip1 = ip0 + 1; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } - } + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; } /* save reps for next block */ @@ -247,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mLength; size_t const h = ZSTD_hashPtr(ip, hlog, mls); - U32 const current = (U32)(ip-base); + U32 const curr = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; - const U32 repIndex = current + 1 - offset_1; + const U32 repIndex = curr + 1 - offset_1; const BYTE* repMatch = (repIndex < prefixStartIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; - hashTable[h] = current; /* update hash table */ + hashTable[h] = curr; /* update hash table */ if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { @@ -273,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( continue; } else { /* found a dict match */ - U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; while (((ip>anchor) & (dictMatch>dictStart)) && (ip[-1] == dictMatch[-1])) { @@ -305,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( if (ip <= ilimit) { /* Fill Table */ - assert(base+current+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ @@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) @@ -399,12 +410,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const U32 matchIndex = hashTable[h]; const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - hashTable[h] = current; /* update hash table */ - assert(offset_1 <= current +1); /* check repIndex */ + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + assert(offset_1 <= curr +1); /* check repIndex */ if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { @@ -423,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( } { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 const offset = current - matchIndex; + U32 const offset = curr - matchIndex; size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; /* update offset history */ @@ -434,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( if (ip <= ilimit) { /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while (ip <= ilimit) { diff --git a/zstd_fast.h b/zstd_fast.h index b74a88c..3e15cc8 100644 --- a/zstd_fast.h +++ b/zstd_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_internal.h b/zstd_internal.h index dcdcbdb..d94c592 100644 --- a/zstd_internal.h +++ b/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -19,6 +19,9 @@ /*-************************************* * Dependencies ***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include +#endif #include "compiler.h" #include "mem.h" #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ @@ -53,6 +56,31 @@ extern "C" { #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + /** * Return the specified error if the condition evaluates to true. * @@ -62,7 +90,9 @@ extern "C" { */ #define RETURN_ERROR_IF(cond, err, ...) \ if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return ERROR(err); \ @@ -75,7 +105,9 @@ extern "C" { */ #define RETURN_ERROR(err, ...) \ do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return ERROR(err); \ @@ -90,7 +122,9 @@ extern "C" { do { \ size_t const err_code = (err); \ if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return err_code; \ @@ -105,7 +139,7 @@ extern "C" { #define ZSTD_REP_NUM 3 /* number of repcodes */ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) #define MB *(1 <<20) @@ -119,15 +153,17 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define BIT0 1 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; -static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; #define ZSTD_FRAMEIDSIZE 4 /* magic number size */ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; +#define ZSTD_FRAMECHECKSUMSIZE 4 + #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ @@ -150,51 +186,77 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define OffFSELog 8 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) -static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 6, 7, 8, 9,10,11,12, - 13,14,15,16 }; -static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 }; +#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ +/* Each table cannot take more than #symbols * FSELog bits */ +#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) + +static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 6, 7, 8, 9,10,11,12, + 13,14,15,16 +}; +static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { + 4, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 +}; #define LL_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; - -static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 7, 8, 9,10,11, - 12,13,14,15,16 }; -static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 }; +static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; + +static UNUSED_ATTR const U32 ML_bits[MaxML+1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 7, 8, 9,10,11, + 12,13,14,15,16 +}; +static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = { + 1, 4, 3, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 +}; #define ML_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; - -static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - -1,-1,-1,-1,-1 }; +static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; + +static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = { + 1, 1, 1, 1, 1, 1, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + -1,-1,-1,-1,-1 +}; #define OF_DEFAULTNORMLOG 5 /* for static allocation */ -static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; +static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; /*-******************************************* * Shared functions to include for inlining *********************************************/ -static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } +static void ZSTD_copy8(void* dst, const void* src) { +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) + vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else + ZSTD_memcpy(dst, src, 8); +#endif +} #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } -static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } +static void ZSTD_copy16(void* dst, const void* src) { +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) + vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#else + ZSTD_memcpy(dst, src, 16); +#endif +} #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } #define WILDCOPY_OVERLENGTH 32 @@ -207,13 +269,13 @@ typedef enum { } ZSTD_overlap_e; /*! ZSTD_wildcopy() : - * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) + * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) * @param ovtype controls the overlap detection * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. * The src buffer must be before the dst buffer. */ -MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +MEM_STATIC FORCE_INLINE_ATTR void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; @@ -230,47 +292,110 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e } while (op < oend); } else { assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first two COPY16() calls because the copy length is + /* Separate out the first COPY16() call because the copy length is * almost certain to be short, so the branches have different - * probabilities. - * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. - * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. */ - COPY16(op, ip); - COPY16(op, ip); - if (op >= oend) return; +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; do { COPY16(op, ip); COPY16(op, ip); } while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); } + return length; } +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + /*-******************************************* * Private declarations *********************************************/ typedef struct seqDef_s { - U32 offset; + U32 offset; /* Offset code of the sequence */ U16 litLength; U16 matchLength; } seqDef; typedef struct { seqDef* sequencesStart; - seqDef* sequences; + seqDef* sequences; /* ptr to end of sequences */ BYTE* litStart; - BYTE* lit; + BYTE* lit; /* ptr to end of literals */ BYTE* llCode; BYTE* mlCode; BYTE* ofCode; size_t maxNbSeq; size_t maxNbLit; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ } seqStore_t; +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + /** * Contains the compressed frame size and an upper-bound for the decompressed frame size. * Note: before using `compressedSize`, check for errors using ZSTD_isError(). @@ -286,9 +411,9 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBu void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ /* custom memory allocation functions */ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void* ptr, ZSTD_customMem customMem); +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ @@ -296,9 +421,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus assert(val != 0); { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - _BitScanReverse(&r, val); - return (unsigned)r; +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ diff --git a/zstd_lazy.c b/zstd_lazy.c index 9ad7e03..49ec1b0 100644 --- a/zstd_lazy.c +++ b/zstd_lazy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms, /** ZSTD_insertDUBT1() : * sort one already inserted but unsorted position - * assumption : current >= btlow == (current - btmask) + * assumption : curr >= btlow == (curr - btmask) * doesn't fail */ static void ZSTD_insertDUBT1(ZSTD_matchState_t* ms, - U32 current, const BYTE* inputEnd, + U32 curr, const BYTE* inputEnd, U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode) { @@ -74,41 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, const BYTE* const base = ms->window.base; const BYTE* const dictBase = ms->window.dictBase; const U32 dictLimit = ms->window.dictLimit; - const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; - const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* match; - U32* smallerPtr = bt + 2*(current&btMask); + U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = smallerPtr + 1; U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ U32 const windowValid = ms->window.lowLimit; U32 const maxDistance = 1U << cParams->windowLog; - U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", - current, dictLimit, windowLow); - assert(current >= btLow); + curr, dictLimit, windowLow); + assert(curr >= btLow); assert(ip < iend); /* condition for ZSTD_count */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); + assert(matchIndex < curr); /* note : all candidates are now supposed sorted, * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ if ( (dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ - || (current < dictLimit) /* both in extDict */) { + || (curr < dictLimit) /* both in extDict */) { const BYTE* const mBase = ( (dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) ? base : dictBase; assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ - || (current < dictLimit) ); + || (curr < dictLimit) ); match = mBase + matchIndex; matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); } else { @@ -119,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, } DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", - current, matchIndex, (U32)matchLength); + curr, matchIndex, (U32)matchLength); if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ @@ -168,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch ( const BYTE* const base = ms->window.base; const BYTE* const prefixStart = base + ms->window.dictLimit; - U32 const current = (U32)(ip-base); + U32 const curr = (U32)(ip-base); const BYTE* const dictBase = dms->window.base; const BYTE* const dictEnd = dms->window.nextSrc; U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); @@ -195,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch ( if (matchLength > bestLength) { U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; } if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ @@ -218,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch ( } if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } return bestLength; @@ -241,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, U32 matchIndex = hashTable[h]; const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; U32 const unsortLimit = MAX(btLow, windowLow); U32* nextCandidate = bt + 2*(matchIndex&btMask); @@ -256,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, U32 nbCandidates = nbCompares; U32 previousCandidate = 0; - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); /* reach end of unsorted candidates list */ while ( (matchIndex > unsortLimit) @@ -299,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, const U32 dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current + 8 + 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; U32 dummy32; /* to be nullified at the end */ size_t bestLength = 0; matchIndex = hashTable[h]; - hashTable[h] = current; /* Update Hash Table */ + hashTable[h] = curr; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); @@ -326,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, if (matchLength > bestLength) { if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (dictMode == ZSTD_dictMatchState) { nbCompares = 0; /* in addition to avoiding checking any @@ -363,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, mls, dictMode); } - assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); } return bestLength; } @@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( /* Update chains up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndex_internal( +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( ZSTD_matchState_t* ms, const ZSTD_compressionParameters* const cParams, const BYTE* ip, U32 const mls) @@ -475,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); } +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || countBeyondMinChain++ > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE @@ -493,20 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; - const U32 current = (U32)(ip-base); + const U32 curr = (U32)(ip-base); const U32 maxDistance = 1U << cParams->windowLog; const U32 lowestValid = ms->window.lowLimit; - const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; const U32 isDictionary = (ms->loadedDictEnd != 0); const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; - const U32 minChain = current > chainSize ? current - chainSize : 0; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; U32 nbAttempts = 1U << cParams->searchLog; size_t ml=4-1; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex; @@ -523,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } @@ -531,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic ( matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } - if (dictMode == ZSTD_dictMatchState) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); const U32 dmsChainMask = dmsChainSize - 1; @@ -545,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic ( matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; const BYTE* const match = dmsBase + matchIndex; assert(match+4 <= dmsEnd); @@ -555,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } if (matchIndex <= dmsMinChain) break; + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; } } @@ -600,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( } +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* const iLimit, @@ -641,35 +871,62 @@ ZSTD_compressBlock_lazy_generic( typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS - : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS); + + /** + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? - dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixLowest); + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ assert(offset_1 <= dictAndPrefixLength); @@ -677,15 +934,21 @@ ZSTD_compressBlock_lazy_generic( } /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif while (ip < ilimit) { size_t matchLength=0; size_t offset=0; const BYTE* start=ip+1; /* check repCode */ - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { const U32 repIndex = (U32)(ip - base) + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) && repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; @@ -726,7 +989,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -761,7 +1024,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -799,7 +1062,7 @@ ZSTD_compressBlock_lazy_generic( && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; @@ -815,12 +1078,11 @@ ZSTD_compressBlock_lazy_generic( } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { + if (isDxS) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex ? + const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex : base + repIndex; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) @@ -915,6 +1177,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState( } +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, @@ -929,11 +1213,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; - const U32 lowestIndex = ms->window.lowLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + lowestIndex; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, @@ -942,21 +1226,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( U32 offset_1 = rep[0], offset_2 = rep[1]; + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + /* init */ ip += (ip == prefixStart); /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif while (ip < ilimit) { size_t matchLength=0; size_t offset=0; const BYTE* start=ip+1; - U32 current = (U32)(ip-base); + U32 curr = (U32)(ip-base); /* check repCode */ - { const U32 repIndex = (U32)(current+1 - offset_1); + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -980,13 +1273,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( if (depth>=1) while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1010,13 +1304,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* let's find an even better one */ if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1057,10 +1352,12 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* check immediate repcode */ while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - offset_2); + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; diff --git a/zstd_lazy.h b/zstd_lazy.h index bb17630..d0214d5 100644 --- a/zstd_lazy.h +++ b/zstd_lazy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -17,8 +17,18 @@ extern "C" { #include "zstd_compress_internal.h" +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ size_t ZSTD_compressBlock_btlazy2( @@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); diff --git a/zstd_ldm.c b/zstd_ldm.c index c3312ad..3776dd1 100644 --- a/zstd_ldm.c +++ b/zstd_ldm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #include "zstd_ldm.h" @@ -26,13 +27,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (cParams->strategy >= ZSTD_btopt) { - /* Get out of the way of the optimal parser */ - U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); - assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); - assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); - params->minMatchLength = minMatch; - } if (params->hashLog == 0) { params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); assert(params->hashLog <= ZSTD_HASHLOG_MAX); @@ -149,10 +143,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ static size_t ZSTD_ldm_countBackwardsMatch( const BYTE* pIn, const BYTE* pAnchor, - const BYTE* pMatch, const BYTE* pBase) + const BYTE* pMatch, const BYTE* pMatchBase) { size_t matchLength = 0; - while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { pIn--; pMatch--; matchLength++; @@ -160,6 +154,27 @@ static size_t ZSTD_ldm_countBackwardsMatch( return matchLength; } +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + /** ZSTD_ldm_fillFastTables() : * * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. @@ -223,6 +238,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, return rollingHash; } +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + if ((size_t)(iend - ip) >= params->minMatchLength) { + U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); + ZSTD_ldm_fillLdmHashTable( + state, startingHash, ip, iend - params->minMatchLength, state->window.base, + params->hashLog - params->bucketSizeLog, + *params); + } +} + /** ZSTD_ldm_limitTableUpdate() : * @@ -231,10 +260,10 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, * (after a long match, only update tables a limited amount). */ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) { - U32 const current = (U32)(anchor - ms->window.base); - if (current > ms->nextToUpdate + 1024) { + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { ms->nextToUpdate = - current - MIN(512, current - ms->nextToUpdate - 1024); + curr - MIN(512, curr - ms->nextToUpdate - 1024); } } @@ -271,7 +300,7 @@ static size_t ZSTD_ldm_generateSequences_internal( while (ip <= ilimit) { size_t mLength; - U32 const current = (U32)(ip - base); + U32 const curr = (U32)(ip - base); size_t forwardMatchLength = 0, backwardMatchLength = 0; ldmEntry_t* bestEntry = NULL; if (ip != istart) { @@ -321,8 +350,9 @@ static size_t ZSTD_ldm_generateSequences_internal( continue; } curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowMatchPtr); + ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor, + pMatch, lowMatchPtr, + dictStart, dictEnd); curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; } else { /* !extDict */ @@ -350,7 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( /* No match found -- continue searching */ if (bestEntry == NULL) { ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, current, + hBits, curr, *params); ip++; continue; @@ -362,11 +392,11 @@ static size_t ZSTD_ldm_generateSequences_internal( { /* Store the sequence: - * ip = current - backwardMatchLength + * ip = curr - backwardMatchLength * The match is at (bestEntry->offset - backwardMatchLength) */ U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; + U32 const offset = curr - matchIndex; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ @@ -449,6 +479,8 @@ size_t ZSTD_ldm_generateSequences( U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; } /* 2. We enforce the maximum offset allowed. * @@ -457,8 +489,14 @@ size_t ZSTD_ldm_generateSequences( * TODO: * Test the chunk size. * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ newLeftoverSize = ZSTD_ldm_generateSequences_internal( ldmState, sequences, params, chunkStart, chunkSize); @@ -539,6 +577,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, return sequence; } +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -554,6 +609,15 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, BYTE const* ip = istart; DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); /* Loop through each sequence and apply the block compressor to the lits */ @@ -566,14 +630,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, if (sequence.offset == 0) break; - assert(sequence.offset <= (1U << cParams->windowLog)); assert(ip + sequence.litLength + sequence.matchLength <= iend); /* Fill tables for block compressor */ ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, ip); /* Run the block compressor */ - DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); { size_t const newLitLength = blockCompressor(ms, seqStore, rep, ip, sequence.litLength); diff --git a/zstd_ldm.h b/zstd_ldm.h index a478461..e95cfd1 100644 --- a/zstd_ldm.h +++ b/zstd_ldm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #ifndef ZSTD_LDM_H @@ -23,6 +24,10 @@ extern "C" { #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + /** * ZSTD_ldm_generateSequences(): * @@ -73,6 +78,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch); +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); /** ZSTD_ldm_getTableSize() : * Estimate the space needed for long distance matching tables or 0 if LDM is diff --git a/zstd_legacy.h b/zstd_legacy.h index 0dbd3c7..ecd9682 100644 --- a/zstd_legacy.h +++ b/zstd_legacy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_opt.c b/zstd_opt.c index 2e50fca..e55c459 100644 --- a/zstd_opt.c +++ b/zstd_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP } } -/* ZSTD_litLengthContribution() : - * @return ( cost(litlength) - cost(0) ) - * this value can then be added to rawLiteralsCost() - * to provide a cost which is directly comparable to a match ending at same position */ -static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) -{ - if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); - - /* dynamic statistics */ - { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) - + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ - - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); -#if 1 - return contribution; -#else - return MAX(0, contribution); /* sometimes better, sometimes not ... */ -#endif - } -} - -/* ZSTD_literalsContribution() : - * creates a fake cost for the literals part of a sequence - * which can be compared to the ending cost of a match - * should a new match start at this position */ -static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) -{ - int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) - + ZSTD_litLengthContribution(litLength, optPtr, optLevel); - return contribution; -} - /* ZSTD_getMatchPrice() : * Provides the cost of the match part (offset + matchLength) of a sequence * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. @@ -420,32 +386,32 @@ static U32 ZSTD_insertBt1( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ U32 const windowLow = ms->window.lowLimit; - U32 matchEndIdx = current+8+1; + U32 matchEndIdx = curr+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; #ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); predictedSmall += (predictedSmall>0); predictedLarge += (predictedLarge>0); #endif /* ZSTD_C_PREDICT */ - DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); assert(ip <= iend-8); /* required for h calculation */ - hashTable[h] = current; /* Update Hash Table */ + hashTable[h] = curr; /* Update Hash Table */ assert(windowLow > 0); while (nbCompares-- && (matchIndex >= windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); + assert(matchIndex < curr); #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ @@ -508,8 +474,8 @@ static U32 ZSTD_insertBt1( *smallerPtr = *largerPtr = 0; { U32 positions = 0; if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return MAX(positions, matchEndIdx - (current + 8)); + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); } } @@ -553,7 +519,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); + U32 const curr = (U32)(ip-base); U32 const hashLog = cParams->hashLog; U32 const minMatch = (mls==3) ? 3 : 4; U32* const hashTable = ms->hashTable; @@ -567,12 +533,12 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 const dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); U32 const matchLow = windowLow ? windowLow : 1; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; U32 nbCompares = 1U << cParams->searchLog; @@ -591,7 +557,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; size_t bestLength = lengthToBeat-1; - DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); /* check repCode */ assert(ll0 <= 1); /* necessarily 1 or 0 */ @@ -599,26 +565,29 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 repCode; for (repCode = ll0; repCode < lastR; repCode++) { U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - U32 const repIndex = current - repOffset; + U32 const repIndex = curr - repOffset; U32 repLen = 0; - assert(current >= dictLimit); - if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ - if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; } - } else { /* repIndex < dictLimit || repIndex >= current */ + } else { /* repIndex < dictLimit || repIndex >= curr */ const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? dmsBase + repIndex - dmsIndexDelta : dictBase + repIndex; - assert(current >= windowLow); + assert(curr >= windowLow); if ( dictMode == ZSTD_extDict - && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; } if (dictMode == ZSTD_dictMatchState - && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; @@ -640,7 +609,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( if ((mls == 3) /*static*/ && (bestLength < mls)) { U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); if ((matchIndex3 >= matchLow) - & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { size_t mlen; if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { const BYTE* const match = base + matchIndex3; @@ -655,26 +624,26 @@ U32 ZSTD_insertBtAndGetAllMatches ( DEBUGLOG(8, "found small match with hlog3, of length %u", (U32)mlen); bestLength = mlen; - assert(current > matchIndex3); + assert(curr > matchIndex3); assert(mnum==0); /* no prior solution */ - matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; matches[0].len = (U32)mlen; mnum = 1; if ( (mlen > sufficient_len) | (ip+mlen == iLimit) ) { /* best possible length */ - ms->nextToUpdate = current+1; /* skip insertion */ + ms->nextToUpdate = curr+1; /* skip insertion */ return 1; } } } /* no dictMatchState lookup: dicts don't have a populated HC3 table */ } - hashTable[h] = current; /* Update Hash Table */ + hashTable[h] = curr; /* Update Hash Table */ while (nbCompares-- && (matchIndex >= matchLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); const BYTE* match; size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(current > matchIndex); + assert(curr > matchIndex); if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ @@ -691,12 +660,12 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); assert(matchEndIdx > matchIndex); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) @@ -739,11 +708,11 @@ U32 ZSTD_insertBtAndGetAllMatches ( if (matchLength > bestLength) { matchIndex = dictMatchIndex + dmsIndexDelta; DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; matches[mnum].len = (U32)matchLength; mnum++; if ( (matchLength > ZSTD_OPT_NUM) @@ -764,7 +733,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( } } - assert(matchEndIdx > current+8); + assert(matchEndIdx > curr+8); ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ return mnum; } @@ -795,35 +764,145 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( } } +/************************* +* LDM helper functions * +*************************/ -/*-******************************* -* Optimal parser -*********************************/ -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; -static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - memcpy(&newReps, rep, sizeof(newReps)); +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; } } - return newReps; + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } } +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + static U32 ZSTD_totalLen(ZSTD_optimal_t sol) { @@ -839,7 +918,7 @@ listStats(const U32* table, int lastEltID) int enb; for (enb=0; enb < nbElts; enb++) { (void)table; - //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ RAWLOG(2, "%4i,", table[enb]); } RAWLOG(2, " \n"); @@ -872,6 +951,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); /* init */ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", @@ -887,14 +971,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); if (!nbMatches) { ip++; continue; } /* initialize opt[0] */ { U32 i ; for (i=0; i immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; @@ -923,7 +1014,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offset = matches[matchNb].off; U32 const end = matches[matchNb].len; - repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); for ( ; pos <= end ; pos++ ) { U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); U32 const sequencePrice = literalsPrice + matchPrice; @@ -933,8 +1023,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].off = offset; opt[pos].litlen = litlen; opt[pos].price = sequencePrice; - ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); - memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } } last_pos = pos-1; } @@ -961,7 +1049,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[cur].off = 0; opt[cur].litlen = litlen; opt[cur].price = price; - memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); } else { DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), @@ -969,6 +1056,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + /* last match must start at a minimum distance of 8 from oend */ if (inr > ilimit) continue; @@ -984,8 +1086,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const previousPrice = opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + if (!nbMatches) { DEBUGLOG(7, "rPos:%u : no match found", cur); continue; @@ -1009,7 +1115,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* set prices using matches found at position == cur */ for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offset = matches[matchNb].off; - repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); U32 const lastML = matches[matchNb].len; U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; U32 mlen; @@ -1029,8 +1134,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].off = offset; opt[pos].litlen = litlen; opt[pos].price = price; - ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); - memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } else { DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); @@ -1046,6 +1149,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ assert(opt[0].mlen == 0); + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + { U32 const storeEnd = cur + 1; U32 storeStart = storeEnd; U32 seqPos = cur; @@ -1082,20 +1196,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, continue; /* will finish */ } - /* repcodes update : like ZSTD_updateRep(), but update in place */ - if (offCode >= ZSTD_REP_NUM) { /* full offset */ - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offCode - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offCode + (llen==0); - if (repCode) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - if (repCode >= 2) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = currentOffset; - } } - assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); @@ -1104,7 +1204,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } ZSTD_setBasePrices(optStatePtr, optLevel); } - } /* while (ip < ilimit) */ /* Return the last literals size */ @@ -1156,7 +1255,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, const void* src, size_t srcSize) { U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - memcpy(tmpRep, rep, sizeof(tmpRep)); + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); assert(ms->opt.litLengthSum == 0); /* first block */ @@ -1189,7 +1288,7 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - U32 const current = (U32)((const BYTE*)src - ms->window.base); + U32 const curr = (U32)((const BYTE*)src - ms->window.base); DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); /* 2-pass strategy: @@ -1204,7 +1303,7 @@ size_t ZSTD_compressBlock_btultra2( if ( (ms->opt.litLengthSum==0) /* first block */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ - && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ && (srcSize > ZSTD_PREDEF_THRESHOLD) ) { ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); diff --git a/zstd_opt.h b/zstd_opt.h index 094f747..9aba8a9 100644 --- a/zstd_opt.h +++ b/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v01.c b/zstd_v01.c index 8112527..3eb34eb 100644 --- a/zstd_v01.c +++ b/zstd_v01.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -257,7 +257,7 @@ static U64 FSE_read64(const void* memPtr) U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } -#endif // FSE_FORCE_MEMORY_ACCESS +#endif /* FSE_FORCE_MEMORY_ACCESS */ static U16 FSE_readLE16(const void* memPtr) { @@ -1078,7 +1078,7 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ BYTE* const ostart = (BYTE*) dst; BYTE* op = ostart; BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; + BYTE* const olimit = maxDstSize < 15 ? op : omax-15; const void* ptr = DTable; const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; @@ -1092,7 +1092,7 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ const size_t length1 = FSE_readLE16(jumpTable); const size_t length2 = FSE_readLE16(jumpTable+1); const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */ const char* const start1 = (const char*)(cSrc) + 6; const char* const start2 = start1 + length1; const char* const start3 = start2 + length2; @@ -1150,11 +1150,11 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ /* tail */ { - // bitTail = bitD1; // *much* slower : -20% !??! + /* bitTail = bitD1; */ /* *much* slower : -20% !??! */ FSE_DStream_t bitTail; bitTail.ptr = bitD1.ptr; bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */ bitTail.start = start1; for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= 199901L /* C99 */ -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -1483,7 +1487,9 @@ static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProper static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } return srcSize; } @@ -1502,7 +1508,7 @@ static size_t ZSTD_decompressLiterals(void* ctx, if (srcSize <= 3) return ERROR(corruption_detected); litSize = ip[1] + (ip[0]<<8); - litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh.... + litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */ op = oend - litSize; (void)ctx; @@ -1541,7 +1547,9 @@ static size_t ZSTDv01_decodeLiteralsBlock(void* ctx, size_t rleSize = litbp.origSize; if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); if (!srcSize) return ERROR(srcSize_wrong); - memset(oend - rleSize, *ip, rleSize); + if (rleSize > 0) { + memset(oend - rleSize, *ip, rleSize); + } *litStart = oend - rleSize; *litSize = rleSize; ip++; @@ -1901,8 +1909,10 @@ static size_t ZSTD_decompressSequences( { size_t lastLLSize = litEnd - litPtr; if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - if (op != litPtr) memmove(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } } } diff --git a/zstd_v01.h b/zstd_v01.h index 245f9dd..7910351 100644 --- a/zstd_v01.h +++ b/zstd_v01.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v02.c b/zstd_v02.c index c878379..3bb580f 100644 --- a/zstd_v02.c +++ b/zstd_v02.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -89,7 +89,11 @@ extern "C" { * Basic Types *****************************************************************/ #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -189,7 +193,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } -#endif // MEM_FORCE_MEMORY_ACCESS +#endif /* MEM_FORCE_MEMORY_ACCESS */ MEM_STATIC U16 MEM_readLE16(const void* memPtr) @@ -2836,7 +2840,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } return srcSize; } @@ -3229,8 +3235,10 @@ static size_t ZSTD_decompressSequences( size_t lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - if (op != litPtr) memmove(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } } } diff --git a/zstd_v02.h b/zstd_v02.h index 9d7d8d9..5f8f6cd 100644 --- a/zstd_v02.h +++ b/zstd_v02.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v03.c b/zstd_v03.c index 162bd63..7dedf03 100644 --- a/zstd_v03.c +++ b/zstd_v03.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -90,7 +90,11 @@ extern "C" { * Basic Types *****************************************************************/ #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -191,7 +195,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) } -#endif // MEM_FORCE_MEMORY_ACCESS +#endif /* MEM_FORCE_MEMORY_ACCESS */ MEM_STATIC U16 MEM_readLE16(const void* memPtr) @@ -2477,7 +2481,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } return srcSize; } @@ -2870,8 +2876,10 @@ static size_t ZSTD_decompressSequences( size_t lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - if (op != litPtr) memmove(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } } } diff --git a/zstd_v03.h b/zstd_v03.h index efd8c2b..5fc7273 100644 --- a/zstd_v03.h +++ b/zstd_v03.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v04.c b/zstd_v04.c index 4dec308..7abb698 100644 --- a/zstd_v04.c +++ b/zstd_v04.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -52,7 +52,11 @@ extern "C" { * Basic Types *****************************************************************/ #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -161,7 +165,7 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } -#endif // MEM_FORCE_MEMORY_ACCESS +#endif /* MEM_FORCE_MEMORY_ACCESS */ MEM_STATIC U16 MEM_readLE16(const void* memPtr) @@ -2603,7 +2607,9 @@ static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockPropertie static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } return srcSize; } @@ -3008,8 +3014,10 @@ static size_t ZSTD_decompressSequences( size_t lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - if (op != litPtr) memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + if (op != litPtr) memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } } @@ -3407,7 +3415,9 @@ static size_t ZBUFF_decompressWithDictionary(ZBUFF_DCtx* zbc, const void* src, s static size_t ZBUFF_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { size_t length = MIN(maxDstSize, srcSize); - memcpy(dst, src, length); + if (length > 0) { + memcpy(dst, src, length); + } return length; } diff --git a/zstd_v04.h b/zstd_v04.h index bb5f3b7..15fce0d 100644 --- a/zstd_v04.h +++ b/zstd_v04.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v05.c b/zstd_v05.c index 570e0ff..b2053be 100644 --- a/zstd_v05.c +++ b/zstd_v05.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -80,7 +80,11 @@ extern "C" { * Basic Types *****************************************************************/ #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -1804,7 +1808,7 @@ static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (!srcSize) return ERROR(srcSize_wrong); iSize = ip[0]; - //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ if (iSize >= (242)) { /* RLE */ @@ -1879,7 +1883,7 @@ size_t HUFv05_readDTableX2 (U16* DTable, const void* src, size_t srcSize) HUFv05_DEltX2* const dt = (HUFv05_DEltX2*)dtPtr; HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ - //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv05_readStats(huffWeight, HUFv05_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUFv05_isError(iSize)) return iSize; @@ -2210,7 +2214,7 @@ size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize) HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(unsigned)); /* if compilation fails here, assertion is false */ if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); - //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv05_readStats(weightList, HUFv05_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUFv05_isError(iSize)) return iSize; @@ -2539,9 +2543,9 @@ size_t HUFv05_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); - //return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */ - //return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */ - //return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize); /* multi-streams quad-symbols decoding */ + /* return HUFv05_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */ + /* return HUFv05_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */ + /* return HUFv05_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */ } /* zstd - standard compression library @@ -3362,8 +3366,10 @@ static size_t ZSTDv05_decompressSequences( size_t lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -3791,7 +3797,9 @@ static size_t ZBUFFv05_blockHeaderSize = 3; static size_t ZBUFFv05_limitCopy(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { size_t length = MIN(maxDstSize, srcSize); - memcpy(dst, src, length); + if (length > 0) { + memcpy(dst, src, length); + } return length; } @@ -3928,7 +3936,7 @@ size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDst *maxDstSizePtr = 0; return headerSize - zbc->hPos; } - // zbc->stage = ZBUFFv05ds_decodeHeader; break; /* useless : stage follows */ + /* zbc->stage = ZBUFFv05ds_decodeHeader; break; */ /* useless : stage follows */ } /* fall-through */ case ZBUFFv05ds_decodeHeader: @@ -4001,7 +4009,7 @@ size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* zbc, void* dst, size_t* maxDst if (!decodedSize) { zbc->stage = ZBUFFv05ds_read; break; } /* this was just a header */ zbc->outEnd = zbc->outStart + decodedSize; zbc->stage = ZBUFFv05ds_flush; - // break; /* ZBUFFv05ds_flush follows */ + /* break; */ /* ZBUFFv05ds_flush follows */ } } /* fall-through */ diff --git a/zstd_v05.h b/zstd_v05.h index 4a97985..6c46e8f 100644 --- a/zstd_v05.h +++ b/zstd_v05.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v06.c b/zstd_v06.c index 2a08e8d..cf17623 100644 --- a/zstd_v06.c +++ b/zstd_v06.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -82,7 +82,11 @@ extern "C" { * Basic Types *****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -1862,7 +1866,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta if (!srcSize) return ERROR(srcSize_wrong); iSize = ip[0]; - //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ if (iSize >= (242)) { /* RLE */ @@ -2014,7 +2018,7 @@ size_t HUFv06_readDTableX2 (U16* DTable, const void* src, size_t srcSize) HUFv06_DEltX2* const dt = (HUFv06_DEltX2*)dtPtr; HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ - //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv06_readStats(huffWeight, HUFv06_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUFv06_isError(iSize)) return iSize; @@ -2340,7 +2344,7 @@ size_t HUFv06_readDTableX4 (U32* DTable, const void* src, size_t srcSize) HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ if (memLog > HUFv06_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); - //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv06_readStats(weightList, HUFv06_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUFv06_isError(iSize)) return iSize; @@ -2664,13 +2668,13 @@ size_t HUFv06_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS { U32 algoNb = 0; if (Dtime[1] < Dtime[0]) algoNb = 1; - // if (Dtime[2] < Dtime[algoNb]) algoNb = 2; /* current speed of HUFv06_decompress4X6 is not good */ + /* if (Dtime[2] < Dtime[algoNb]) algoNb = 2; */ /* current speed of HUFv06_decompress4X6 is not good */ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); } - //return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */ - //return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */ - //return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); /* multi-streams quad-symbols decoding */ + /* return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */ + /* return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */ + /* return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */ } /* Common functions of Zstd compression library @@ -3501,8 +3505,10 @@ static size_t ZSTDv06_decompressSequences( { size_t const lastLLSize = litEnd - litPtr; if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -4000,7 +4006,9 @@ size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* zbd) MEM_STATIC size_t ZBUFFv06_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { size_t length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); + if (length > 0) { + memcpy(dst, src, length); + } return length; } @@ -4109,7 +4117,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd, if (!decodedSize) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */ zbd->outEnd = zbd->outStart + decodedSize; zbd->stage = ZBUFFds_flush; - // break; /* ZBUFFds_flush follows */ + /* break; */ /* ZBUFFds_flush follows */ } } /* fall-through */ diff --git a/zstd_v06.h b/zstd_v06.h index 0781857..2fd99e6 100644 --- a/zstd_v06.h +++ b/zstd_v06.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstd_v07.c b/zstd_v07.c index a2eeff8..f486eb9 100644 --- a/zstd_v07.c +++ b/zstd_v07.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -242,7 +242,11 @@ extern "C" { * Basic Types *****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -1314,7 +1318,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (!srcSize) return ERROR(srcSize_wrong); iSize = ip[0]; - //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ if (iSize >= (242)) { /* RLE */ @@ -1784,7 +1788,7 @@ size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSi HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr; HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable)); - //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUFv07_isError(iSize)) return iSize; @@ -2148,7 +2152,7 @@ size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSi HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable)); /* if compilation fails here, assertion is false */ if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); - //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUFv07_isError(iSize)) return iSize; @@ -2530,8 +2534,8 @@ size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); } - //return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */ - //return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */ + /* return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */ + /* return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */ } size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) @@ -3272,7 +3276,9 @@ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProper static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } return srcSize; } @@ -3712,10 +3718,12 @@ static size_t ZSTDv07_decompressSequences( /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - //if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ + /* if (litPtr > litEnd) return ERROR(corruption_detected); */ /* too many literals already used */ if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + if (lastLLSize > 0) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -3776,7 +3784,9 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) { if (length > dstCapacity) return ERROR(dstSize_tooSmall); - memset(dst, byte, length); + if (length > 0) { + memset(dst, byte, length); + } return length; } @@ -4378,7 +4388,9 @@ size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd) MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); + if (length > 0) { + memcpy(dst, src, length); + } return length; } diff --git a/zstd_v07.h b/zstd_v07.h index a566c1d..9da50c4 100644 --- a/zstd_v07.h +++ b/zstd_v07.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zstdmt_compress.c b/zstdmt_compress.c index bc3062b..084e48b 100644 --- a/zstdmt_compress.c +++ b/zstdmt_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -20,8 +20,7 @@ /* ====== Dependencies ====== */ -#include /* memcpy, memset */ -#include /* INT_MAX, UINT_MAX */ +#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */ #include "mem.h" /* MEM_STATIC */ #include "pool.h" /* threadpool */ #include "threading.h" /* mutex */ @@ -106,11 +105,11 @@ typedef struct ZSTDMT_bufferPool_s { static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) { unsigned const maxNbBuffers = 2*nbWorkers + 3; - ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc( + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc( sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); if (bufPool==NULL) return NULL; if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { - ZSTD_free(bufPool, cMem); + ZSTD_customFree(bufPool, cMem); return NULL; } bufPool->bufferSize = 64 KB; @@ -127,10 +126,10 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) if (!bufPool) return; /* compatibility with free on NULL */ for (u=0; utotalBuffers; u++) { DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); - ZSTD_free(bufPool->bTable[u].start, bufPool->cMem); + ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem); } ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); - ZSTD_free(bufPool, bufPool->cMem); + ZSTD_customFree(bufPool, bufPool->cMem); } /* only works at initialization, not during compression */ @@ -201,13 +200,13 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) } /* size conditions not respected : scratch this buffer, create new one */ DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); - ZSTD_free(buf.start, bufPool->cMem); + ZSTD_customFree(buf.start, bufPool->cMem); } ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); /* create new buffer */ DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); { buffer_t buffer; - void* const start = ZSTD_malloc(bSize, bufPool->cMem); + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); buffer.start = start; /* note : start can be NULL if malloc fails ! */ buffer.capacity = (start==NULL) ? 0 : bSize; if (start==NULL) { @@ -229,13 +228,13 @@ static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) { size_t const bSize = bufPool->bufferSize; if (buffer.capacity < bSize) { - void* const start = ZSTD_malloc(bSize, bufPool->cMem); + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); buffer_t newBuffer; newBuffer.start = start; newBuffer.capacity = start == NULL ? 0 : bSize; if (start != NULL) { assert(newBuffer.capacity >= buffer.capacity); - memcpy(newBuffer.start, buffer.start, buffer.capacity); + ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity); DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); return newBuffer; } @@ -261,14 +260,12 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); /* Reached bufferPool capacity (should not happen) */ DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); - ZSTD_free(buf.start, bufPool->cMem); + ZSTD_customFree(buf.start, bufPool->cMem); } /* ===== Seq Pool Wrapper ====== */ -static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0}; - typedef ZSTDMT_bufferPool ZSTDMT_seqPool; static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) @@ -278,7 +275,7 @@ static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) static rawSeqStore_t bufferToSeq(buffer_t buffer) { - rawSeqStore_t seq = {NULL, 0, 0, 0}; + rawSeqStore_t seq = kNullRawSeqStore; seq.seq = (rawSeq*)buffer.start; seq.capacity = buffer.capacity / sizeof(rawSeq); return seq; @@ -354,7 +351,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) for (cid=0; cidtotalCCtx; cid++) ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */ ZSTD_pthread_mutex_destroy(&pool->poolMutex); - ZSTD_free(pool, pool->cMem); + ZSTD_customFree(pool, pool->cMem); } /* ZSTDMT_createCCtxPool() : @@ -362,12 +359,12 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, ZSTD_customMem cMem) { - ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc( + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc( sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); assert(nbWorkers > 0); if (!cctxPool) return NULL; if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { - ZSTD_free(cctxPool, cMem); + ZSTD_customFree(cctxPool, cMem); return NULL; } cctxPool->cMem = cMem; @@ -461,7 +458,13 @@ typedef struct { ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ } serialState_t; -static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize) +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) { /* Adjust parameters */ if (params.ldmParams.enableLdm) { @@ -472,7 +475,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* serialState->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); } else { - memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); } serialState->nextJobID = 0; if (params.fParams.checksumFlag) @@ -490,23 +493,39 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* /* Size the seq pool tables */ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ - ZSTD_window_clear(&serialState->ldmState.window); - serialState->ldmWindow = serialState->ldmState.window; + ZSTD_window_init(&serialState->ldmState.window); /* Resize tables and output space if necessary. */ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { - ZSTD_free(serialState->ldmState.hashTable, cMem); - serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem); } if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { - ZSTD_free(serialState->ldmState.bucketOffsets, cMem); - serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem); } if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) return 1; /* Zero the tables */ - memset(serialState->ldmState.hashTable, 0, hashSize); - memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; } + serialState->params = params; serialState->params.jobSize = (U32)jobSize; return 0; @@ -515,7 +534,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* static int ZSTDMT_serialState_init(serialState_t* serialState) { int initError = 0; - memset(serialState, 0, sizeof(*serialState)); + ZSTD_memset(serialState, 0, sizeof(*serialState)); initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); @@ -530,8 +549,8 @@ static void ZSTDMT_serialState_free(serialState_t* serialState) ZSTD_pthread_cond_destroy(&serialState->cond); ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); - ZSTD_free(serialState->ldmState.hashTable, cMem); - ZSTD_free(serialState->ldmState.bucketOffsets, cMem); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); } static void ZSTDMT_serialState_update(serialState_t* serialState, @@ -798,7 +817,6 @@ struct ZSTDMT_CCtx_s { roundBuff_t roundBuff; serialState_t serial; rsyncState_t rsync; - unsigned singleBlockingThread; unsigned jobIDMask; unsigned doneJobID; unsigned nextJobID; @@ -810,6 +828,7 @@ struct ZSTDMT_CCtx_s { ZSTD_customMem cMem; ZSTD_CDict* cdictLocal; const ZSTD_CDict* cdict; + unsigned providedFactory: 1; }; static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) @@ -820,7 +839,7 @@ static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZS ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex); ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond); } - ZSTD_free(jobTable, cMem); + ZSTD_customFree(jobTable, cMem); } /* ZSTDMT_allocJobsTable() @@ -832,7 +851,7 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom U32 const nbJobs = 1 << nbJobsLog2; U32 jobNb; ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*) - ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem); + ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem); int initError = 0; if (jobTable==NULL) return NULL; *nbJobsPtr = nbJobs; @@ -863,12 +882,12 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) { /* ZSTDMT_CCtxParam_setNbWorkers(): * Internal use only */ -size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) { return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); } -MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem) +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) { ZSTDMT_CCtx* mtctx; U32 nbJobs = nbWorkers + 2; @@ -881,12 +900,19 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, /* invalid custom allocator */ return NULL; - mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); + mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem); if (!mtctx) return NULL; ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); mtctx->cMem = cMem; mtctx->allJobsCompleted = 1; - mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + if (pool != NULL) { + mtctx->factory = pool; + mtctx->providedFactory = 1; + } + else { + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->providedFactory = 0; + } mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ mtctx->jobIDMask = nbJobs - 1; @@ -903,22 +929,18 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, return mtctx; } -ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) { #ifdef ZSTD_MULTITHREAD - return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem); + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool); #else (void)nbWorkers; (void)cMem; + (void)pool; return NULL; #endif } -ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) -{ - return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); -} - /* ZSTDMT_releaseAllJobResources() : * note : ensure all workers are killed first ! */ @@ -935,7 +957,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); /* Clear the job description, but keep the mutex/cond */ - memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); mtctx->jobs[jobID].job_mutex = mutex; mtctx->jobs[jobID].job_cond = cond; } @@ -962,7 +984,8 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { if (mtctx==NULL) return 0; /* compatible with free on NULL */ - POOL_free(mtctx->factory); /* stop and free worker threads */ + if (!mtctx->providedFactory) + POOL_free(mtctx->factory); /* stop and free worker threads */ ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); ZSTDMT_freeBufferPool(mtctx->bufPool); @@ -971,8 +994,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) ZSTDMT_serialState_free(&mtctx->serial); ZSTD_freeCDict(mtctx->cdictLocal); if (mtctx->roundBuff.buffer) - ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); - ZSTD_free(mtctx, mtctx->cMem); + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_customFree(mtctx, mtctx->cMem); return 0; } @@ -989,72 +1012,13 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) + mtctx->roundBuff.capacity; } -/* Internal only */ -size_t -ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, - ZSTDMT_parameter parameter, - int value) -{ - DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter"); - switch(parameter) - { - case ZSTDMT_p_jobSize : - DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); - return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value); - case ZSTDMT_p_overlapLog : - DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); - return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value); - case ZSTDMT_p_rsyncable : - DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value); - return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value); - default : - return ERROR(parameter_unsupported); - } -} - -size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value) -{ - DEBUGLOG(4, "ZSTDMT_setMTCtxParameter"); - return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value); -} - -size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value) -{ - switch (parameter) { - case ZSTDMT_p_jobSize: - return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value); - case ZSTDMT_p_overlapLog: - return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value); - case ZSTDMT_p_rsyncable: - return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value); - default: - return ERROR(parameter_unsupported); - } -} - -/* Sets parameters relevant to the compression job, - * initializing others to default values. */ -static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params) -{ - ZSTD_CCtx_params jobParams = *params; - /* Clear parameters related to multithreading */ - jobParams.forceWindow = 0; - jobParams.nbWorkers = 0; - jobParams.jobSize = 0; - jobParams.overlapLog = 0; - jobParams.rsyncable = 0; - memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t)); - memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem)); - return jobParams; -} - /* ZSTDMT_resize() : * @return : error code if fails, 0 on success */ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) { if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); - FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); if (mtctx->bufPool == NULL) return ERROR(memory_allocation); mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); @@ -1076,7 +1040,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", compressionLevel); mtctx->params.compressionLevel = compressionLevel; - { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); cParams.windowLog = saved_wlog; mtctx->params.cParams = cParams; } @@ -1163,8 +1127,8 @@ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) if (params->ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize - * based on chainLog instead. */ - jobLog = MAX(21, params->cParams.chainLog + 4); + * based on cycleLog instead. */ + jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3); } else { jobLog = MAX(20, params->cParams.windowLog + 2); } @@ -1218,172 +1182,6 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) return (ovLog==0) ? 0 : (size_t)1 << ovLog; } -static unsigned -ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers) -{ - assert(nbWorkers>0); - { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); - size_t const jobMaxSize = jobSizeTarget << 2; - size_t const passSizeMax = jobMaxSize * nbWorkers; - unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1; - unsigned const nbJobsLarge = multiplier * nbWorkers; - unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1; - unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers); - return (multiplier>1) ? nbJobsLarge : nbJobsSmall; -} } - -/* ZSTDMT_compress_advanced_internal() : - * This is a blocking function : it will only give back control to caller after finishing its compression job. - */ -static size_t ZSTDMT_compress_advanced_internal( - ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params) -{ - ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms); - size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms); - unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers); - size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; - size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ - const char* const srcStart = (const char*)src; - size_t remainingSrcSize = srcSize; - unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */ - size_t frameStartPos = 0, dstBufferPos = 0; - assert(jobParams.nbWorkers == 0); - assert(mtctx->cctxPool->totalCCtx == params.nbWorkers); - - params.jobSize = (U32)avgJobSize; - DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ", - nbJobs, (U32)proposedJobSize, (U32)avgJobSize); - - if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ - ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; - DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); - if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams); - } - - assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ - ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) ); - if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) - return ERROR(memory_allocation); - - FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ - - { unsigned u; - for (u=0; ujobs[u].prefix.start = srcStart + frameStartPos - dictSize; - mtctx->jobs[u].prefix.size = dictSize; - mtctx->jobs[u].src.start = srcStart + frameStartPos; - mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */ - mtctx->jobs[u].consumed = 0; - mtctx->jobs[u].cSize = 0; - mtctx->jobs[u].cdict = (u==0) ? cdict : NULL; - mtctx->jobs[u].fullFrameSize = srcSize; - mtctx->jobs[u].params = jobParams; - /* do not calculate checksum within sections, but write it in header for first section */ - mtctx->jobs[u].dstBuff = dstBuffer; - mtctx->jobs[u].cctxPool = mtctx->cctxPool; - mtctx->jobs[u].bufPool = mtctx->bufPool; - mtctx->jobs[u].seqPool = mtctx->seqPool; - mtctx->jobs[u].serial = &mtctx->serial; - mtctx->jobs[u].jobID = u; - mtctx->jobs[u].firstJob = (u==0); - mtctx->jobs[u].lastJob = (u==nbJobs-1); - - DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize); - DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12); - POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]); - - frameStartPos += jobSize; - dstBufferPos += dstBufferCapacity; - remainingSrcSize -= jobSize; - } } - - /* collect result */ - { size_t error = 0, dstPos = 0; - unsigned jobID; - for (jobID=0; jobIDjobs[jobID].job_mutex); - while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { - DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID); - ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); - } - ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); - DEBUGLOG(5, "ready to write job %u ", jobID); - - { size_t const cSize = mtctx->jobs[jobID].cSize; - if (ZSTD_isError(cSize)) error = cSize; - if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); - if (jobID) { /* note : job 0 is written directly at dst, which is correct position */ - if (!error) - memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */ - if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */ - DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst); - ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); - } } - mtctx->jobs[jobID].dstBuff = g_nullBuffer; - mtctx->jobs[jobID].cSize = 0; - dstPos += cSize ; - } - } /* for (jobID=0; jobIDserial.xxhState); - if (dstPos + 4 > dstCapacity) { - error = ERROR(dstSize_tooSmall); - } else { - DEBUGLOG(4, "writing checksum : %08X \n", checksum); - MEM_writeLE32((char*)dst + dstPos, checksum); - dstPos += 4; - } } - - if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); - return error ? error : dstPos; - } -} - -size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_parameters params, - int overlapLog) -{ - ZSTD_CCtx_params cctxParams = mtctx->params; - cctxParams.cParams = params.cParams; - cctxParams.fParams = params.fParams; - assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX); - cctxParams.overlapLog = overlapLog; - return ZSTDMT_compress_advanced_internal(mtctx, - dst, dstCapacity, - src, srcSize, - cdict, cctxParams); -} - - -size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); - int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy); - params.fParams.contentSizeFlag = 1; - return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog); -} - - /* ====================================== */ /* ======= Streaming API ======= */ /* ====================================== */ @@ -1403,21 +1201,11 @@ size_t ZSTDMT_initCStream_internal( /* init */ if (params.nbWorkers != mtctx->params.nbWorkers) - FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; - mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ - if (mtctx->singleBlockingThread) { - ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms); - DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); - assert(singleThreadParams.nbWorkers == 0); - return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], - dict, dictSize, cdict, - &singleThreadParams, pledgedSrcSize); - } - DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ @@ -1480,8 +1268,8 @@ size_t ZSTDMT_initCStream_internal( size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; if (mtctx->roundBuff.capacity < capacity) { if (mtctx->roundBuff.buffer) - ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem); - mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem); + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem); if (mtctx->roundBuff.buffer == NULL) { mtctx->roundBuff.capacity = 0; return ERROR(memory_allocation); @@ -1500,58 +1288,12 @@ size_t ZSTDMT_initCStream_internal( mtctx->allJobsCompleted = 0; mtctx->consumed = 0; mtctx->produced = 0; - if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize)) + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) return ERROR(memory_allocation); return 0; } -size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize) -{ - ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ - DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); - cctxParams.cParams = params.cParams; - cctxParams.fParams = params.fParams; - return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL, - cctxParams, pledgedSrcSize); -} - -size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize) -{ - ZSTD_CCtx_params cctxParams = mtctx->params; - if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */ - cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict); - cctxParams.fParams = fParams; - return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict, - cctxParams, pledgedSrcSize); -} - - -/* ZSTDMT_resetCStream() : - * pledgedSrcSize can be zero == unknown (for the time being) - * prefer using ZSTD_CONTENTSIZE_UNKNOWN, - * as `0` might mean "empty" in the future */ -size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) -{ - if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params, - pledgedSrcSize); -} - -size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */ - DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel); - cctxParams.cParams = params.cParams; - cctxParams.fParams = params.fParams; - return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN); -} - /* ZSTDMT_writeLastEmptyBlock() * Write a single empty block with an end-of-frame to finish a frame. @@ -1714,9 +1456,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u assert(mtctx->doneJobID < mtctx->nextJobID); assert(cSize >= mtctx->jobs[wJobID].dstFlushed); assert(mtctx->jobs[wJobID].dstBuff.start != NULL); - memcpy((char*)output->dst + output->pos, - (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, - toFlush); + if (toFlush > 0) { + ZSTD_memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } output->pos += toFlush; mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ @@ -1786,7 +1530,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) BYTE const* const bufferStart = (BYTE const*)buffer.start; BYTE const* const bufferEnd = bufferStart + buffer.capacity; BYTE const* const rangeStart = (BYTE const*)range.start; - BYTE const* const rangeEnd = rangeStart + range.size; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; if (rangeStart == NULL || bufferStart == NULL) return 0; @@ -1867,7 +1611,7 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) return 0; } ZSTDMT_waitForLdmComplete(mtctx, buffer); - memmove(start, mtctx->inBuff.prefix.start, prefixSize); + ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize); mtctx->inBuff.prefix.start = start; mtctx->roundBuff.pos = prefixSize; } @@ -1941,6 +1685,16 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) pos = 0; prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + if ((hash & hitMask) == hitMask) { + /* We're already at a sync point so don't load any more until + * we're able to flush this sync point. + * This likely happened because the job table was full so we + * couldn't add our job. + */ + syncPoint.toLoad = 0; + syncPoint.flush = 1; + return syncPoint; + } } else { /* We don't have enough bytes buffered to initialize the hash, but * we know we have at least RSYNC_LENGTH bytes total. @@ -1995,34 +1749,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, assert(output->pos <= output->size); assert(input->pos <= input->size); - if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ - return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp); - } - if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { /* current frame being ended. Only flush/end are allowed */ return ERROR(stage_wrong); } - /* single-pass shortcut (note : synchronous-mode) */ - if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */ - && (mtctx->nextJobID == 0) /* just started */ - && (mtctx->inBuff.filled == 0) /* nothing buffered */ - && (!mtctx->jobReady) /* no job already created */ - && (endOp == ZSTD_e_end) /* end order */ - && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */ - size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx, - (char*)output->dst + output->pos, output->size - output->pos, - (const char*)input->src + input->pos, input->size - input->pos, - mtctx->cdict, mtctx->params); - if (ZSTD_isError(cSize)) return cSize; - input->pos = input->size; - output->pos += cSize; - mtctx->allJobsCompleted = 1; - mtctx->frameEnded = 1; - return 0; - } - /* fill input buffer */ if ( (!mtctx->jobReady) && (input->size > input->pos) ) { /* support NULL input */ @@ -2045,13 +1776,21 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); - memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); input->pos += syncPoint.toLoad; mtctx->inBuff.filled += syncPoint.toLoad; forwardInputProgress = syncPoint.toLoad>0; } - if ((input->pos < input->size) && (endOp == ZSTD_e_end)) - endOp = ZSTD_e_flush; /* can't end now : not all input consumed */ + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) { + /* Can't end yet because the input is not fully consumed. + * We are in one of these cases: + * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job. + * - We filled the input buffer: flush this job but don't end the frame. + * - We hit a synchronization point: flush this job but don't end the frame. + */ + assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable); + endOp = ZSTD_e_flush; } if ( (mtctx->jobReady) @@ -2060,7 +1799,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ size_t const jobSize = mtctx->inBuff.filled; assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); - FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); } /* check for potential compressed data ready to be flushed */ @@ -2070,47 +1809,3 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, return remainingToFlush; } } - - -size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); - - /* recommended next input size : fill current input buffer */ - return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ -} - - -static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame) -{ - size_t const srcSize = mtctx->inBuff.filled; - DEBUGLOG(5, "ZSTDMT_flushStream_internal"); - - if ( mtctx->jobReady /* one job ready for a worker to pick up */ - || (srcSize > 0) /* still some data within input buffer */ - || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ - DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", - (U32)srcSize, (U32)endFrame); - FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); - } - - /* check if there is any data available to flush */ - return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame); -} - - -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) -{ - DEBUGLOG(5, "ZSTDMT_flushStream"); - if (mtctx->singleBlockingThread) - return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush); -} - -size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) -{ - DEBUGLOG(4, "ZSTDMT_endStream"); - if (mtctx->singleBlockingThread) - return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end); -} diff --git a/zstdmt_compress.h b/zstdmt_compress.h index 12a5260..e306964 100644 --- a/zstdmt_compress.h +++ b/zstdmt_compress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -19,26 +19,14 @@ /* Note : This is an internal API. * These APIs used to be exposed with ZSTDLIB_API, * because it used to be the only way to invoke MT compression. - * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2() - * instead. - * - * If you depend on these APIs and can't switch, then define - * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library. - * However, we may completely remove these functions in a future - * release, so please switch soon. + * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. * * This API requires ZSTD_MULTITHREAD to be defined during compilation, * otherwise ZSTDMT_createCCtx*() will fail. */ -#ifdef ZSTD_LEGACY_MULTITHREADED_API -# define ZSTDMT_API ZSTDLIB_API -#else -# define ZSTDMT_API -#endif - /* === Dependencies === */ -#include /* size_t */ +#include "zstd_deps.h" /* size_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ @@ -54,78 +42,34 @@ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + /* === Memory management === */ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ -ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); -/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ -ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, - ZSTD_customMem cMem); -ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); - -ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); - - -/* === Simple one-pass compression function === */ - -ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem, + ZSTD_threadPool *pool); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); /* === Streaming functions === */ -ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); -ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ - -ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); -ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - -ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ -ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ - - -/* === Advanced functions and parameters === */ - -ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_parameters params, - int overlapLog); - -ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, - const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ - ZSTD_parameters params, - unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ - -ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fparams, - unsigned long long pledgedSrcSize); /* note : zero means empty */ - -/* ZSTDMT_parameter : - * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ -typedef enum { - ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ - ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ - ZSTDMT_p_rsyncable /* Enables rsyncable mode. */ -} ZSTDMT_parameter; - -/* ZSTDMT_setMTCtxParameter() : - * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. - * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ - * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ -ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); - -/* ZSTDMT_getMTCtxParameter() : - * Query the ZSTDMT_CCtx for a parameter value. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ -ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); /*! ZSTDMT_compressStream_generic() : * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() @@ -134,16 +78,10 @@ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter * 0 if fully flushed * or an error code * note : needs to be init using any ZSTD_initCStream*() variant */ -ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); - - -/* ======================================================== - * === Private interface, for use by ZSTD_compress.c === - * === Not exposed in libzstd. Never invoke directly === - * ======================================================== */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); /*! ZSTDMT_toFlushNow() * Tell how many bytes are ready to be flushed immediately. @@ -153,15 +91,6 @@ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, * therefore flushing is limited by speed of oldest job. */ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); -/*! ZSTDMT_CCtxParam_setMTCtxParameter() - * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */ -size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value); - -/*! ZSTDMT_CCtxParam_setNbWorkers() - * Set nbWorkers, and clamp it. - * Also reset jobSize and overlapLog */ -size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers); - /*! ZSTDMT_updateCParams_whileCompressing() : * Updates only a selected set of compression parameters, to remain compatible with current frame. * New parameters will be applied to next compression job. */ @@ -174,17 +103,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); -/*! ZSTDMT_initCStream_internal() : - * Private use only. Init streaming operation. - * expects params to be valid. - * must receive dict, or cdict, or none, but not both. - * @return : 0, or an error code */ -size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, - const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); - - #if defined (__cplusplus) } #endif