diff --git a/include/aarch64_label.h b/include/aarch64_label.h index a4e6d06..25dc8aa 100644 --- a/include/aarch64_label.h +++ b/include/aarch64_label.h @@ -3,16 +3,16 @@ #ifdef __USER_LABEL_PREFIX__ #define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b -#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#define CONCAT2(a, b) a##b +#define cdecl(x) CONCAT1(__USER_LABEL_PREFIX__, x) #else #define cdecl(x) x #endif #ifdef __APPLE__ -#define ASM_DEF_RODATA .section __TEXT,__const +#define ASM_DEF_RODATA .section __TEXT, __const #else -#define ASM_DEF_RODATA .section .rodata +#define ASM_DEF_RODATA .section.rodata #endif #endif diff --git a/include/crc.h b/include/crc.h index 0714960..e2c9902 100644 --- a/include/crc.h +++ b/include/crc.h @@ -27,13 +27,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ - /** * @file crc.h * @brief CRC functions. */ - #ifndef _CRC_H_ #define _CRC_H_ @@ -43,7 +41,6 @@ extern "C" { #endif - /* Multi-binary functions */ /** @@ -54,12 +51,11 @@ extern "C" { * * @returns 16 bit CRC */ -uint16_t crc16_t10dif( - uint16_t init_crc, //!< initial CRC value, 16 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint16_t +crc16_t10dif(uint16_t init_crc, //!< initial CRC value, 16 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC and copy T10 standard, runs appropriate version. @@ -68,13 +64,12 @@ uint16_t crc16_t10dif( * * @returns 16 bit CRC */ -uint16_t crc16_t10dif_copy( - uint16_t init_crc, //!< initial CRC value, 16 bits - uint8_t *dst, //!< buffer destination for copy - uint8_t *src, //!< buffer source to crc + copy - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint16_t +crc16_t10dif_copy(uint16_t init_crc, //!< initial CRC value, 16 bits + uint8_t *dst, //!< buffer destination for copy + uint8_t *src, //!< buffer source to crc + copy + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from the IEEE standard, runs appropriate version. @@ -96,11 +91,11 @@ uint16_t crc16_t10dif_copy( * @returns 32 bit CRC */ -uint32_t crc32_ieee( - uint32_t init_crc, //!< initial CRC value, 32 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint32_t +crc32_ieee(uint32_t init_crc, //!< initial CRC value, 32 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate the customized CRC @@ -124,12 +119,11 @@ uint32_t crc32_ieee( * * @returns 32 bit CRC */ -uint32_t crc32_gzip_refl( - uint32_t init_crc, //!< initial CRC value, 32 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint32_t +crc32_gzip_refl(uint32_t init_crc, //!< initial CRC value, 32 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief ISCSI CRC function, runs appropriate version. @@ -139,12 +133,11 @@ uint32_t crc32_gzip_refl( * * @returns 32 bit CRC */ -unsigned int crc32_iscsi( - unsigned char *buffer, //!< buffer to calculate CRC on - int len, //!< buffer length in bytes - unsigned int init_crc //!< initial CRC value - ); - +unsigned int +crc32_iscsi(unsigned char *buffer, //!< buffer to calculate CRC on + int len, //!< buffer length in bytes + unsigned int init_crc //!< initial CRC value +); /* Base functions */ @@ -152,45 +145,42 @@ unsigned int crc32_iscsi( * @brief ISCSI CRC function, baseline version * @returns 32 bit CRC */ -unsigned int crc32_iscsi_base( - unsigned char *buffer, //!< buffer to calculate CRC on - int len, //!< buffer length in bytes - unsigned int crc_init //!< initial CRC value - ); - +unsigned int +crc32_iscsi_base(unsigned char *buffer, //!< buffer to calculate CRC on + int len, //!< buffer length in bytes + unsigned int crc_init //!< initial CRC value +); /** * @brief Generate CRC from the T10 standard, runs baseline version * @returns 16 bit CRC */ -uint16_t crc16_t10dif_base( - uint16_t seed, //!< initial CRC value, 16 bits - uint8_t *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint16_t +crc16_t10dif_base(uint16_t seed, //!< initial CRC value, 16 bits + uint8_t *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC and copy T10 standard, runs baseline version. * @returns 16 bit CRC */ -uint16_t crc16_t10dif_copy_base( - uint16_t init_crc, //!< initial CRC value, 16 bits - uint8_t *dst, //!< buffer destination for copy - uint8_t *src, //!< buffer source to crc + copy - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint16_t +crc16_t10dif_copy_base(uint16_t init_crc, //!< initial CRC value, 16 bits + uint8_t *dst, //!< buffer destination for copy + uint8_t *src, //!< buffer source to crc + copy + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from the IEEE standard, runs baseline version * @returns 32 bit CRC */ -uint32_t crc32_ieee_base( - uint32_t seed, //!< initial CRC value, 32 bits - uint8_t *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint32_t +crc32_ieee_base(uint32_t seed, //!< initial CRC value, 32 bits + uint8_t *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate the customized CRC @@ -198,12 +188,11 @@ uint32_t crc32_ieee_base( * runs baseline version * @returns 32 bit CRC */ -uint32_t crc32_gzip_refl_base( - uint32_t seed, //!< initial CRC value, 32 bits - uint8_t *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); - +uint32_t +crc32_gzip_refl_base(uint32_t seed, //!< initial CRC value, 32 bits + uint8_t *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); #ifdef __cplusplus } diff --git a/include/crc64.h b/include/crc64.h index c4a5697..717f5b4 100644 --- a/include/crc64.h +++ b/include/crc64.h @@ -27,13 +27,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ - /** * @file crc64.h * @brief CRC64 functions. */ - #ifndef _CRC64_H_ #define _CRC64_H_ @@ -43,7 +41,6 @@ extern "C" { #endif - /* Multi-binary functions */ /** @@ -54,11 +51,11 @@ extern "C" { * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_ecma_refl( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_refl(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ECMA-182 standard in normal format, runs @@ -68,11 +65,11 @@ uint64_t crc64_ecma_refl( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_ecma_norm( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_norm(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in reflected format, runs @@ -82,11 +79,11 @@ uint64_t crc64_ecma_norm( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_iso_refl( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_refl(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in normal format, runs @@ -96,11 +93,11 @@ uint64_t crc64_iso_refl( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_iso_norm( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_norm(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in reflected format, runs @@ -110,11 +107,11 @@ uint64_t crc64_iso_norm( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_jones_refl( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_refl(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in normal format, runs @@ -124,11 +121,11 @@ uint64_t crc64_jones_refl( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_jones_norm( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_norm(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in reflected format, runs @@ -138,11 +135,11 @@ uint64_t crc64_jones_norm( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_refl( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_refl(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in normal format, runs @@ -152,11 +149,11 @@ uint64_t crc64_rocksoft_refl( * selects the appropriate version at runtime. * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_norm( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_norm(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /* Arch specific versions */ @@ -167,11 +164,11 @@ uint64_t crc64_rocksoft_norm( * @returns 64 bit CRC */ -uint64_t crc64_ecma_refl_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_refl_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ECMA-182 standard in normal format. @@ -180,31 +177,31 @@ uint64_t crc64_ecma_refl_by8( * @returns 64 bit CRC */ -uint64_t crc64_ecma_norm_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_norm_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ECMA-182 standard in reflected format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_ecma_refl_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_refl_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ECMA-182 standard in normal format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_ecma_norm_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_ecma_norm_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in reflected format. @@ -213,11 +210,11 @@ uint64_t crc64_ecma_norm_base( * @returns 64 bit CRC */ -uint64_t crc64_iso_refl_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_refl_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in normal format. @@ -226,31 +223,31 @@ uint64_t crc64_iso_refl_by8( * @returns 64 bit CRC */ -uint64_t crc64_iso_norm_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_norm_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in reflected format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_iso_refl_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_refl_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from ISO standard in normal format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_iso_norm_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_iso_norm_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in reflected format. @@ -259,11 +256,11 @@ uint64_t crc64_iso_norm_base( * @returns 64 bit CRC */ -uint64_t crc64_jones_refl_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_refl_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in normal format. @@ -272,31 +269,31 @@ uint64_t crc64_jones_refl_by8( * @returns 64 bit CRC */ -uint64_t crc64_jones_norm_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_norm_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in reflected format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_jones_refl_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_refl_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Jones" coefficients in normal format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_jones_norm_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_jones_norm_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in reflected format. @@ -305,21 +302,21 @@ uint64_t crc64_jones_norm_base( * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_refl_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_refl_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in reflected format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_refl_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_refl_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in normal format. @@ -328,21 +325,21 @@ uint64_t crc64_rocksoft_refl_base( * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_norm_by8( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_norm_by8(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); /** * @brief Generate CRC from "Rocksoft" coefficients in normal format, runs baseline version * @returns 64 bit CRC */ -uint64_t crc64_rocksoft_norm_base( - uint64_t init_crc, //!< initial CRC value, 64 bits - const unsigned char *buf, //!< buffer to calculate CRC on - uint64_t len //!< buffer length in bytes (64-bit data) - ); +uint64_t +crc64_rocksoft_norm_base(uint64_t init_crc, //!< initial CRC value, 64 bits + const unsigned char *buf, //!< buffer to calculate CRC on + uint64_t len //!< buffer length in bytes (64-bit data) +); #ifdef __cplusplus } diff --git a/include/erasure_code.h b/include/erasure_code.h index 3db109d..6a142a7 100644 --- a/include/erasure_code.h +++ b/include/erasure_code.h @@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ - #ifndef _ERASURE_CODE_H_ #define _ERASURE_CODE_H_ @@ -71,7 +70,8 @@ extern "C" { * @returns none */ -void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); +void +ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls); /** * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version. @@ -79,7 +79,8 @@ void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); * Baseline version of ec_encode_data() with same parameters. */ -void ec_init_tables_base(int k, int rows, unsigned char* a, unsigned char* gftbls); +void +ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls); /** * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. @@ -103,19 +104,22 @@ void ec_init_tables_base(int k, int rows, unsigned char* a, unsigned char* gftbl * @returns none */ -void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); +void +ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); /** * @brief Generate or decode erasure codes on blocks of data, runs baseline version. * * Baseline version of ec_encode_data() with same parameters. */ -void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, - unsigned char **dest); +void +ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, + unsigned char **dest); /** - * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version. + * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate + * version. * * Given one source data block, update one or multiple blocks of encoded data as * specified by a matrix of GF(2^8) coefficients. When given a suitable set of @@ -136,8 +140,9 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne * @param coding Array of pointers to coded output buffers. * @returns none */ -void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, - unsigned char *data, unsigned char **coding); +void +ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); /** * @brief Generate update for encode or decode of erasure codes from single source. @@ -145,8 +150,9 @@ void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g * Baseline version of ec_encode_data_update(). */ -void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, - unsigned char *data, unsigned char **dest); +void +ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest); /** * @brief GF(2^8) vector dot product, runs baseline version. @@ -168,9 +174,9 @@ void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned ch * @returns none */ - -void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); +void +gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); /** * @brief GF(2^8) vector dot product, runs appropriate version. @@ -192,8 +198,9 @@ void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); +void +gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); /** * @brief GF(2^8) vector multiply accumulate, runs appropriate version. @@ -218,8 +225,9 @@ void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); +void +gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); /** * @brief GF(2^8) vector multiply accumulate, baseline version. @@ -227,8 +235,9 @@ void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned ch * Baseline version of gf_vect_mad() with same parameters. */ -void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, - unsigned char *dest); +void +gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, + unsigned char *dest); // x86 only #if defined(__i386__) || defined(__x86_64__) @@ -239,8 +248,9 @@ void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned ch * Arch specific version of ec_encode_data() with same parameters. * @requires SSE4.1 */ -void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); +void +ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); /** * @brief Generate or decode erasure codes on blocks of data. @@ -248,8 +258,9 @@ void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigne * Arch specific version of ec_encode_data() with same parameters. * @requires AVX */ -void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); +void +ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); /** * @brief Generate or decode erasure codes on blocks of data. @@ -257,8 +268,9 @@ void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigne * Arch specific version of ec_encode_data() with same parameters. * @requires AVX2 */ -void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); +void +ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); /** * @brief Generate update for encode or decode of erasure codes from single source. @@ -267,8 +279,9 @@ void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsign * @requires SSE4.1 */ -void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, - unsigned char *data, unsigned char **coding); +void +ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); /** * @brief Generate update for encode or decode of erasure codes from single source. @@ -277,8 +290,9 @@ void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned cha * @requires AVX */ -void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, - unsigned char *data, unsigned char **coding); +void +ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); /** * @brief Generate update for encode or decode of erasure codes from single source. @@ -287,8 +301,9 @@ void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned cha * @requires AVX2 */ -void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, - unsigned char *data, unsigned char **coding); +void +ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); /** * @brief GF(2^8) vector dot product. @@ -308,8 +323,9 @@ void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned ch * @returns none */ -void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); +void +gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); /** * @brief GF(2^8) vector dot product. @@ -329,8 +345,9 @@ void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); +void +gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); /** * @brief GF(2^8) vector dot product. @@ -350,8 +367,9 @@ void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); +void +gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char *dest); /** * @brief GF(2^8) vector dot product with two outputs. @@ -372,8 +390,9 @@ void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with two outputs. @@ -394,8 +413,9 @@ void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with two outputs. @@ -416,8 +436,9 @@ void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with three outputs. @@ -438,8 +459,9 @@ void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with three outputs. @@ -460,8 +482,9 @@ void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with three outputs. @@ -482,8 +505,9 @@ void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with four outputs. @@ -504,8 +528,9 @@ void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with four outputs. @@ -526,8 +551,9 @@ void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with four outputs. @@ -548,8 +574,9 @@ void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with five outputs. @@ -570,8 +597,9 @@ void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with five outputs. @@ -592,8 +620,9 @@ void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with five outputs. @@ -614,8 +643,9 @@ void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with six outputs. @@ -636,8 +666,9 @@ void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with six outputs. @@ -658,8 +689,9 @@ void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector dot product with six outputs. @@ -680,8 +712,9 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * @returns none */ -void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char **dest); +void +gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply accumulate, arch specific version. @@ -690,8 +723,9 @@ void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * @requires SSE4.1 */ -void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); +void +gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); /** * @brief GF(2^8) vector multiply accumulate, arch specific version. * @@ -699,8 +733,9 @@ void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigne * @requires AVX */ -void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); +void +gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); /** * @brief GF(2^8) vector multiply accumulate, arch specific version. @@ -709,9 +744,9 @@ void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne * @requires AVX2 */ -void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); - +void +gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); /** * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. @@ -734,21 +769,24 @@ void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsign * @returns none */ -void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). * @requires AVX */ -void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). * @requires AVX2 */ -void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. @@ -771,22 +809,25 @@ void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig * @returns none */ -void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). * @requires AVX */ -void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). * @requires AVX2 */ -void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. @@ -809,61 +850,70 @@ void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig * @returns none */ -void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). * @requires AVX */ -void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). * @requires AVX2 */ -void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. * @requires SSE4.1 */ -void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. * @requires AVX */ -void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. * @requires AVX2 */ -void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. * @requires SSE4.1 */ -void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. * @requires AVX */ -void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); /** * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. * @requires AVX2 */ -void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char **dest); +void +gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); #endif @@ -879,7 +929,8 @@ void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsig * @returns Product of a and b in GF(2^8) */ -unsigned char gf_mul(unsigned char a, unsigned char b); +unsigned char +gf_mul(unsigned char a, unsigned char b); /** * @brief Single element GF(2^8) inverse. @@ -888,7 +939,8 @@ unsigned char gf_mul(unsigned char a, unsigned char b); * @returns Field element b such that a x b = {1} */ -unsigned char gf_inv(unsigned char a); +unsigned char +gf_inv(unsigned char a); /** * @brief Generate a matrix of coefficients to be used for encoding. @@ -914,7 +966,8 @@ unsigned char gf_inv(unsigned char a); * @returns none */ -void gf_gen_rs_matrix(unsigned char *a, int m, int k); +void +gf_gen_rs_matrix(unsigned char *a, int m, int k); /** * @brief Generate a Cauchy matrix of coefficients to be used for encoding. @@ -929,7 +982,8 @@ void gf_gen_rs_matrix(unsigned char *a, int m, int k); * @returns none */ -void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); +void +gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); /** * @brief Invert a matrix in GF(2^8) @@ -943,8 +997,8 @@ void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); * @returns 0 successful, other fail on singular input matrix */ -int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); - +int +gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); /*************************************************************/ diff --git a/include/gf_vect_mul.h b/include/gf_vect_mul.h index 7cd9544..c8ffbd6 100644 --- a/include/gf_vect_mul.h +++ b/include/gf_vect_mul.h @@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ - #ifndef _GF_VECT_MUL_H #define _GF_VECT_MUL_H @@ -46,7 +45,7 @@ extern "C" { // x86 only #if defined(__i386__) || defined(__x86_64__) - /** +/** * @brief GF(2^8) vector multiply by constant. * * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C @@ -64,10 +63,10 @@ extern "C" { * @returns 0 pass, other fail */ -int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest); +int +gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest); - - /** +/** * @brief GF(2^8) vector multiply by constant. * * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C @@ -85,7 +84,8 @@ int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest); * @returns 0 pass, other fail */ -int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest); +int +gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest); #endif @@ -109,8 +109,8 @@ int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest); * @returns 0 pass, other fail */ -int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest); - +int +gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest); /** * @brief Initialize 32-byte constant array for GF(2^8) vector multiply @@ -122,8 +122,8 @@ int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest); * @param gftbl Table output. */ -void gf_vect_mul_init(unsigned char c, unsigned char* gftbl); - +void +gf_vect_mul_init(unsigned char c, unsigned char *gftbl); /** * @brief GF(2^8) vector multiply by constant, runs baseline version. @@ -143,8 +143,8 @@ void gf_vect_mul_init(unsigned char c, unsigned char* gftbl); * @returns 0 pass, other fail */ -int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, - unsigned char *dest); +int +gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest); #ifdef __cplusplus } diff --git a/include/igzip_lib.h b/include/igzip_lib.h index bc60aa0..ee9b72a 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -80,17 +80,17 @@ extern "C" { /******************************************************************************/ /* Deflate Compression Standard Defines */ /******************************************************************************/ -#define IGZIP_K 1024 -#define ISAL_DEF_MAX_HDR_SIZE 328 -#define ISAL_DEF_MAX_CODE_LEN 15 -#define ISAL_DEF_HIST_SIZE (32*IGZIP_K) +#define IGZIP_K 1024 +#define ISAL_DEF_MAX_HDR_SIZE 328 +#define ISAL_DEF_MAX_CODE_LEN 15 +#define ISAL_DEF_HIST_SIZE (32 * IGZIP_K) #define ISAL_DEF_MAX_HIST_BITS 15 -#define ISAL_DEF_MAX_MATCH 258 -#define ISAL_DEF_MIN_MATCH 3 +#define ISAL_DEF_MAX_MATCH 258 +#define ISAL_DEF_MIN_MATCH 3 -#define ISAL_DEF_LIT_SYMBOLS 257 -#define ISAL_DEF_LEN_SYMBOLS 29 -#define ISAL_DEF_DIST_SYMBOLS 30 +#define ISAL_DEF_LIT_SYMBOLS 257 +#define ISAL_DEF_LEN_SYMBOLS 29 +#define ISAL_DEF_DIST_SYMBOLS 30 #define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS) /* Max repeat length, rounded up to 32 byte boundary */ @@ -118,53 +118,53 @@ extern "C" { #define ISAL_LIMIT_HASH_UPDATE -#define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K) -#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE +#define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K) +#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE #define IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE -#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K) -#define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE -#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE -#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE +#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K) +#define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE +#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE +#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE #ifdef LONGER_HUFFTABLE -enum {IGZIP_DIST_TABLE_SIZE = 8*1024}; +enum { IGZIP_DIST_TABLE_SIZE = 8 * 1024 }; /* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */ enum { IGZIP_DECODE_OFFSET = 26 }; #else -enum {IGZIP_DIST_TABLE_SIZE = 2}; +enum { IGZIP_DIST_TABLE_SIZE = 2 }; /* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */ enum { IGZIP_DECODE_OFFSET = 0 }; #endif -enum {IGZIP_LEN_TABLE_SIZE = 256}; -enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS}; +enum { IGZIP_LEN_TABLE_SIZE = 256 }; +enum { IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS }; -#define IGZIP_HUFFTABLE_CUSTOM 0 +#define IGZIP_HUFFTABLE_CUSTOM 0 #define IGZIP_HUFFTABLE_DEFAULT 1 -#define IGZIP_HUFFTABLE_STATIC 2 +#define IGZIP_HUFFTABLE_STATIC 2 /* Flush Flags */ -#define NO_FLUSH 0 /* Default */ -#define SYNC_FLUSH 1 -#define FULL_FLUSH 2 -#define FINISH_FLUSH 0 /* Deprecated */ +#define NO_FLUSH 0 /* Default */ +#define SYNC_FLUSH 1 +#define FULL_FLUSH 2 +#define FINISH_FLUSH 0 /* Deprecated */ /* Gzip Flags */ -#define IGZIP_DEFLATE 0 /* Default */ -#define IGZIP_GZIP 1 -#define IGZIP_GZIP_NO_HDR 2 -#define IGZIP_ZLIB 3 -#define IGZIP_ZLIB_NO_HDR 4 +#define IGZIP_DEFLATE 0 /* Default */ +#define IGZIP_GZIP 1 +#define IGZIP_GZIP_NO_HDR 2 +#define IGZIP_ZLIB 3 +#define IGZIP_ZLIB_NO_HDR 4 /* Compression Return values */ -#define COMP_OK 0 -#define INVALID_FLUSH -7 -#define INVALID_PARAM -8 -#define STATELESS_OVERFLOW -1 +#define COMP_OK 0 +#define INVALID_FLUSH -7 +#define INVALID_PARAM -8 +#define STATELESS_OVERFLOW -1 #define ISAL_INVALID_OPERATION -9 -#define ISAL_INVALID_STATE -3 -#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */ +#define ISAL_INVALID_STATE -3 +#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */ #define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */ /** @@ -172,35 +172,34 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS}; * @brief Compression State please note ZSTATE_TRL only applies for GZIP compression */ - /* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the * hufftable being used for compression may be swapped */ enum isal_zstate_state { - ZSTATE_NEW_HDR, //!< Header to be written - ZSTATE_HDR, //!< Header state - ZSTATE_CREATE_HDR, //!< Header to be created - ZSTATE_BODY, //!< Body state - ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer - ZSTATE_FLUSH_ICF_BUFFER, - ZSTATE_TYPE0_HDR, //! Type0 block header to be written - ZSTATE_TYPE0_BODY, //!< Type0 block body to be written - ZSTATE_SYNC_FLUSH, //!< Write sync flush block - ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf - ZSTATE_TRL, //!< Trailer state - ZSTATE_END, //!< End state - ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written - ZSTATE_TMP_HDR, //!< Temporary Header state - ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state - ZSTATE_TMP_BODY, //!< Temporary Body state - ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer - ZSTATE_TMP_FLUSH_ICF_BUFFER, - ZSTATE_TMP_TYPE0_HDR, //! Temporary Type0 block header to be written - ZSTATE_TMP_TYPE0_BODY, //!< Temporary Type0 block body to be written - ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block - ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf - ZSTATE_TMP_TRL, //!< Temporary Trailer state - ZSTATE_TMP_END //!< Temporary End state + ZSTATE_NEW_HDR, //!< Header to be written + ZSTATE_HDR, //!< Header state + ZSTATE_CREATE_HDR, //!< Header to be created + ZSTATE_BODY, //!< Body state + ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer + ZSTATE_FLUSH_ICF_BUFFER, + ZSTATE_TYPE0_HDR, //! Type0 block header to be written + ZSTATE_TYPE0_BODY, //!< Type0 block body to be written + ZSTATE_SYNC_FLUSH, //!< Write sync flush block + ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf + ZSTATE_TRL, //!< Trailer state + ZSTATE_END, //!< End state + ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written + ZSTATE_TMP_HDR, //!< Temporary Header state + ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state + ZSTATE_TMP_BODY, //!< Temporary Body state + ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer + ZSTATE_TMP_FLUSH_ICF_BUFFER, + ZSTATE_TMP_TYPE0_HDR, //! Temporary Type0 block header to be written + ZSTATE_TMP_TYPE0_BODY, //!< Temporary Type0 block body to be written + ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block + ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf + ZSTATE_TMP_TRL, //!< Temporary Trailer state + ZSTATE_TMP_END //!< Temporary End state }; /* Offset used to switch between TMP states and non-tmp states */ @@ -209,49 +208,49 @@ enum isal_zstate_state { /******************************************************************************/ /* Inflate Implementation Specific Defines */ /******************************************************************************/ -#define ISAL_DECODE_LONG_BITS 12 +#define ISAL_DECODE_LONG_BITS 12 #define ISAL_DECODE_SHORT_BITS 10 /* Current state of decompression */ enum isal_block_state { - ISAL_BLOCK_NEW_HDR, /* Just starting a new block */ - ISAL_BLOCK_HDR, /* In the middle of reading in a block header */ - ISAL_BLOCK_TYPE0, /* Decoding a type 0 block */ - ISAL_BLOCK_CODED, /* Decoding a huffman coded block */ - ISAL_BLOCK_INPUT_DONE, /* Decompression of input is completed */ - ISAL_BLOCK_FINISH, /* Decompression of input is completed and all data has been flushed to output */ - ISAL_GZIP_EXTRA_LEN, - ISAL_GZIP_EXTRA, - ISAL_GZIP_NAME, - ISAL_GZIP_COMMENT, - ISAL_GZIP_HCRC, - ISAL_ZLIB_DICT, - ISAL_CHECKSUM_CHECK, + ISAL_BLOCK_NEW_HDR, /* Just starting a new block */ + ISAL_BLOCK_HDR, /* In the middle of reading in a block header */ + ISAL_BLOCK_TYPE0, /* Decoding a type 0 block */ + ISAL_BLOCK_CODED, /* Decoding a huffman coded block */ + ISAL_BLOCK_INPUT_DONE, /* Decompression of input is completed */ + ISAL_BLOCK_FINISH, /* Decompression of input is completed and all data has been flushed to + output */ + ISAL_GZIP_EXTRA_LEN, + ISAL_GZIP_EXTRA, + ISAL_GZIP_NAME, + ISAL_GZIP_COMMENT, + ISAL_GZIP_HCRC, + ISAL_ZLIB_DICT, + ISAL_CHECKSUM_CHECK, }; - /* Inflate Flags */ -#define ISAL_DEFLATE 0 /* Default */ -#define ISAL_GZIP 1 -#define ISAL_GZIP_NO_HDR 2 -#define ISAL_ZLIB 3 -#define ISAL_ZLIB_NO_HDR 4 -#define ISAL_ZLIB_NO_HDR_VER 5 -#define ISAL_GZIP_NO_HDR_VER 6 +#define ISAL_DEFLATE 0 /* Default */ +#define ISAL_GZIP 1 +#define ISAL_GZIP_NO_HDR 2 +#define ISAL_ZLIB 3 +#define ISAL_ZLIB_NO_HDR 4 +#define ISAL_ZLIB_NO_HDR_VER 5 +#define ISAL_GZIP_NO_HDR_VER 6 /* Inflate Return values */ -#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */ -#define ISAL_END_INPUT 1 /* End of input reached */ -#define ISAL_OUT_OVERFLOW 2 /* End of output reached */ -#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */ -#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */ -#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */ -#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */ -#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */ -#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */ -#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */ -#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */ -#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */ +#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */ +#define ISAL_END_INPUT 1 /* End of input reached */ +#define ISAL_OUT_OVERFLOW 2 /* End of output reached */ +#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */ +#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */ +#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */ +#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */ +#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */ +#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */ +#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */ +#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */ +#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */ #define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */ /******************************************************************************/ @@ -259,15 +258,16 @@ enum isal_block_state { /******************************************************************************/ /** @brief Holds histogram of deflate symbols*/ struct isal_huff_histogram { - uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen - uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen - uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table + uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols + //!< seen + uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen + uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table }; /** @brief Holds modified histogram */ struct isal_mod_hist { - uint32_t d_hist[30]; //!< Distance - uint32_t ll_hist[513]; //! Literal/length + uint32_t d_hist[30]; //!< Distance + uint32_t ll_hist[513]; //! Literal/length }; #define ISAL_DEF_MIN_LEVEL 0 @@ -275,80 +275,80 @@ struct isal_mod_hist { /* Defines used set level data sizes */ /* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */ -#define ISAL_DEF_LVL0_REQ 0 -#define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE) +#define ISAL_DEF_LVL0_REQ 0 +#define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE) #define ISAL_DEF_LVL1_TOKEN_SIZE 4 -#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE) +#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE) #define ISAL_DEF_LVL2_TOKEN_SIZE 4 -#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE +#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE #define ISAL_DEF_LVL3_TOKEN_SIZE 4 /* Data sizes for level specific data options */ -#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ -#define ISAL_DEF_LVL0_SMALL ISAL_DEF_LVL0_REQ -#define ISAL_DEF_LVL0_MEDIUM ISAL_DEF_LVL0_REQ -#define ISAL_DEF_LVL0_LARGE ISAL_DEF_LVL0_REQ +#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ +#define ISAL_DEF_LVL0_SMALL ISAL_DEF_LVL0_REQ +#define ISAL_DEF_LVL0_MEDIUM ISAL_DEF_LVL0_REQ +#define ISAL_DEF_LVL0_LARGE ISAL_DEF_LVL0_REQ #define ISAL_DEF_LVL0_EXTRA_LARGE ISAL_DEF_LVL0_REQ -#define ISAL_DEF_LVL0_DEFAULT ISAL_DEF_LVL0_REQ +#define ISAL_DEF_LVL0_DEFAULT ISAL_DEF_LVL0_REQ -#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K) -#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K) -#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K) -#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K) #define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K) -#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE +#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE -#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K) -#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K) -#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K) -#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K) #define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K) -#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE +#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE -#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K) -#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K) -#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K) -#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K) #define ISAL_DEF_LVL3_EXTRA_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 128 * IGZIP_K) -#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE +#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE -#define IGZIP_NO_HIST 0 -#define IGZIP_HIST 1 -#define IGZIP_DICT_HIST 2 +#define IGZIP_NO_HIST 0 +#define IGZIP_HIST 1 +#define IGZIP_DICT_HIST 2 #define IGZIP_DICT_HASH_SET 3 /** @brief Holds Bit Buffer information*/ struct BitBuf2 { - uint64_t m_bits; //!< bits in the bit buffer - uint32_t m_bit_count; //!< number of valid bits in the bit buffer - uint8_t *m_out_buf; //!< current index of buffer to write to - uint8_t *m_out_end; //!< end of buffer to write to - uint8_t *m_out_start; //!< start of buffer to write to + uint64_t m_bits; //!< bits in the bit buffer + uint32_t m_bit_count; //!< number of valid bits in the bit buffer + uint8_t *m_out_buf; //!< current index of buffer to write to + uint8_t *m_out_end; //!< end of buffer to write to + uint8_t *m_out_start; //!< start of buffer to write to }; /** @brief Holds Zlib header information */ struct isal_zlib_header { - uint32_t info; //!< base-2 logarithm of the LZ77 window size minus 8 - uint32_t level; //!< Compression level (fastest, fast, default, maximum) - uint32_t dict_id; //!< Dictionary id - uint32_t dict_flag; //!< Whether to use a dictionary + uint32_t info; //!< base-2 logarithm of the LZ77 window size minus 8 + uint32_t level; //!< Compression level (fastest, fast, default, maximum) + uint32_t dict_id; //!< Dictionary id + uint32_t dict_flag; //!< Whether to use a dictionary }; /** @brief Holds Gzip header information */ struct isal_gzip_header { - uint32_t text; //!< Optional Text hint - uint32_t time; //!< Unix modification time in gzip header - uint32_t xflags; //!< xflags in gzip header - uint32_t os; //!< OS in gzip header - uint8_t *extra; //!< Extra field in gzip header - uint32_t extra_buf_len; //!< Length of extra buffer - uint32_t extra_len; //!< Actual length of gzip header extra field - char *name; //!< Name in gzip header - uint32_t name_buf_len; //!< Length of name buffer - char *comment; //!< Comments in gzip header - uint32_t comment_buf_len; //!< Length of comment buffer - uint32_t hcrc; //!< Header crc or header crc flag - uint32_t flags; //!< Internal data + uint32_t text; //!< Optional Text hint + uint32_t time; //!< Unix modification time in gzip header + uint32_t xflags; //!< xflags in gzip header + uint32_t os; //!< OS in gzip header + uint8_t *extra; //!< Extra field in gzip header + uint32_t extra_buf_len; //!< Length of extra buffer + uint32_t extra_len; //!< Actual length of gzip header extra field + char *name; //!< Name in gzip header + uint32_t name_buf_len; //!< Length of name buffer + char *comment; //!< Comments in gzip header + uint32_t comment_buf_len; //!< Length of comment buffer + uint32_t hcrc; //!< Header crc or header crc flag + uint32_t flags; //!< Internal data }; /* Variable prefixes: @@ -358,65 +358,67 @@ struct isal_gzip_header { /** @brief Holds the internal state information for input and output compression streams*/ struct isal_zstate { - uint32_t total_in_start; //!< Not used, may be replaced with something else - uint32_t block_next; //!< Start of current deflate block in the input - uint32_t block_end; //!< End of current deflate block in the input - uint32_t dist_mask; //!< Distance mask used. - uint32_t hash_mask; - enum isal_zstate_state state; //!< Current state in processing the data stream - struct BitBuf2 bitbuf; //!< Bit Buffer - uint32_t crc; //!< Current checksum without finalize step if any (adler) - uint8_t has_wrap_hdr; //!< keeps track of wrapper header - uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) - uint8_t has_eob; //!< keeps track of eob on the last deflate block - uint8_t has_hist; //!< flag to track if there is match history - uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized. - uint32_t count; //!< used for partial header/trailer writes - uint8_t tmp_out_buff[16]; //!< temporary array - uint32_t tmp_out_start; //!< temporary variable - uint32_t tmp_out_end; //!< temporary variable - uint32_t b_bytes_valid; //!< number of valid bytes in buffer - uint32_t b_bytes_processed; //!< number of bytes processed in buffer - uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer + uint32_t total_in_start; //!< Not used, may be replaced with something else + uint32_t block_next; //!< Start of current deflate block in the input + uint32_t block_end; //!< End of current deflate block in the input + uint32_t dist_mask; //!< Distance mask used. + uint32_t hash_mask; + enum isal_zstate_state state; //!< Current state in processing the data stream + struct BitBuf2 bitbuf; //!< Bit Buffer + uint32_t crc; //!< Current checksum without finalize step if any (adler) + uint8_t has_wrap_hdr; //!< keeps track of wrapper header + uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) + uint8_t has_eob; //!< keeps track of eob on the last deflate block + uint8_t has_hist; //!< flag to track if there is match history + uint16_t + has_level_buf_init; //!< flag to track if user supplied memory has been initialized. + uint32_t count; //!< used for partial header/trailer writes + uint8_t tmp_out_buff[16]; //!< temporary array + uint32_t tmp_out_start; //!< temporary variable + uint32_t tmp_out_end; //!< temporary variable + uint32_t b_bytes_valid; //!< number of valid bytes in buffer + uint32_t b_bytes_processed; //!< number of bytes processed in buffer + uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer - /* Stream should be setup such that the head is cache aligned*/ - uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array + /* Stream should be setup such that the head is cache aligned*/ + uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array }; /** @brief Holds the huffman tree used to huffman encode the input stream **/ struct isal_hufftables { - uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; //!< deflate huffman tree header - uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr - uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header - uint32_t dist_table[IGZIP_DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code - uint32_t len_table[IGZIP_LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code - uint16_t lit_table[IGZIP_LIT_TABLE_SIZE]; //!< literal code - uint8_t lit_table_sizes[IGZIP_LIT_TABLE_SIZE]; //!< literal code length - uint16_t dcodes[30 - IGZIP_DECODE_OFFSET]; //!< distance code - uint8_t dcodes_sizes[30 - IGZIP_DECODE_OFFSET]; //!< distance code length - + uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; //!< deflate huffman tree header + uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr + uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header + uint32_t dist_table[IGZIP_DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are + //!< the code + uint32_t len_table[IGZIP_LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are + //!< the code + uint16_t lit_table[IGZIP_LIT_TABLE_SIZE]; //!< literal code + uint8_t lit_table_sizes[IGZIP_LIT_TABLE_SIZE]; //!< literal code length + uint16_t dcodes[30 - IGZIP_DECODE_OFFSET]; //!< distance code + uint8_t dcodes_sizes[30 - IGZIP_DECODE_OFFSET]; //!< distance code length }; /** @brief Holds stream information*/ struct isal_zstream { - uint8_t *next_in; //!< Next input byte - uint32_t avail_in; //!< number of bytes available at next_in - uint32_t total_in; //!< total number of bytes read so far + uint8_t *next_in; //!< Next input byte + uint32_t avail_in; //!< number of bytes available at next_in + uint32_t total_in; //!< total number of bytes read so far - uint8_t *next_out; //!< Next output byte - uint32_t avail_out; //!< number of bytes available at next_out - uint32_t total_out; //!< total number of bytes written so far + uint8_t *next_out; //!< Next output byte + uint32_t avail_out; //!< number of bytes available at next_out + uint32_t total_out; //!< total number of bytes written so far - struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing - uint32_t level; //!< Compression level to use - uint32_t level_buf_size; //!< Size of level_buf - uint8_t * level_buf; //!< User allocated buffer required for different compression levels - uint16_t end_of_stream; //!< non-zero if this is the last input buffer - uint16_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH - uint16_t gzip_flag; //!< Indicate if gzip compression is to be performed - uint16_t hist_bits; //!< Log base 2 of maximum lookback distance, 0 is use default - struct isal_zstate internal_state; //!< Internal state for this stream + struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing + uint32_t level; //!< Compression level to use + uint32_t level_buf_size; //!< Size of level_buf + uint8_t *level_buf; //!< User allocated buffer required for different compression levels + uint16_t end_of_stream; //!< non-zero if this is the last input buffer + uint16_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH + uint16_t gzip_flag; //!< Indicate if gzip compression is to be performed + uint16_t hist_bits; //!< Log base 2 of maximum lookback distance, 0 is use default + struct isal_zstate internal_state; //!< Internal state for this stream }; /******************************************************************************/ @@ -482,8 +484,10 @@ struct isal_zstream { #define ISAL_L_DUP ((1 << ISAL_L_REM) - (ISAL_L_REM + 1)) #define ISAL_S_DUP ((1 << ISAL_S_REM) - (ISAL_S_REM + 1)) -#define ISAL_L_UNUSED ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM)/2)) - (1 << ((ISAL_L_REM + 1)/2)) + 1) -#define ISAL_S_UNUSED ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM)/2)) - (1 << ((ISAL_S_REM + 1)/2)) + 1) +#define ISAL_L_UNUSED \ + ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM) / 2)) - (1 << ((ISAL_L_REM + 1) / 2)) + 1) +#define ISAL_S_UNUSED \ + ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM) / 2)) - (1 << ((ISAL_S_REM + 1) / 2)) + 1) #define ISAL_L_SIZE (ISAL_DEF_LIT_LEN_SYMBOLS + ISAL_L_DUP + ISAL_L_UNUSED) #define ISAL_S_SIZE (ISAL_DEF_DIST_SYMBOLS + ISAL_S_DUP + ISAL_S_UNUSED) @@ -493,48 +497,52 @@ struct isal_zstream { /** @brief Large lookup table for decoding huffman codes */ struct inflate_huff_code_large { - uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)]; //!< Short code lookup table - uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED]; //!< Long code lookup table + uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)]; //!< Short code lookup table + uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED]; //!< Long code lookup table }; /** @brief Small lookup table for decoding huffman codes */ struct inflate_huff_code_small { - uint16_t short_code_lookup[1 << (ISAL_DECODE_SHORT_BITS)]; //! 3. * @param len Length of each vector in bytes. Must be 16B aligned. @@ -296,7 +298,8 @@ int xor_check_base(int vects, int len, void **array); * @returns 0 pass, other fail */ -int pq_check_base(int vects, int len, void **array); +int +pq_check_base(int vects, int len, void **array); #ifdef __cplusplus } diff --git a/include/test.h b/include/test.h index 3229c44..9f32638 100644 --- a/include/test.h +++ b/include/test.h @@ -47,35 +47,39 @@ extern "C" { #include #ifdef _MSC_VER -# define inline __inline +#define inline __inline #endif /* Make os-independent alignment attribute, alloc and free. */ -#if defined __unix__ || defined __APPLE__ -# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) -# define __forceinline static inline -# define aligned_free(x) free(x) +#if defined __unix__ || defined __APPLE__ +#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +#define __forceinline static inline +#define aligned_free(x) free(x) #else -# ifdef __MINGW32__ -# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) -# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) -# define aligned_free(x) _aligned_free(x) -# else -# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl -# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) -# define aligned_free(x) _aligned_free(x) -# endif +#ifdef __MINGW32__ +#define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +#define posix_memalign(p, algn, len) \ + (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn))) +#define aligned_free(x) _aligned_free(x) +#else +#define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl +#define posix_memalign(p, algn, len) \ + (NULL == (*((char **) (p)) = (void *) _aligned_malloc(len, algn))) +#define aligned_free(x) _aligned_free(x) +#endif #endif #ifdef DEBUG -# define DEBUG_PRINT(x) printf x +#define DEBUG_PRINT(x) printf x #else -# define DEBUG_PRINT(x) do {} while (0) +#define DEBUG_PRINT(x) \ + do { \ + } while (0) #endif /* Decide whether to use benchmark time as an approximation or a minimum. Fewer * calls to the timer are required for the approximation case.*/ -#define BENCHMARK_MIN_TIME 0 +#define BENCHMARK_MIN_TIME 0 #define BENCHMARK_APPROX_TIME 1 #ifndef BENCHMARK_TYPE #define BENCHMARK_TYPE BENCHMARK_MIN_TIME @@ -86,228 +90,260 @@ extern "C" { * standardized clock source. To obtain a meaningful result it may be * necessary to fix the CPU clock to match the rtdsc tick rate. */ -# include -# include -# define USE_CYCLES +#include +#include +#define USE_CYCLES #else -# include +#include #define USE_SECONDS #endif #ifdef USE_RDTSC #ifndef BENCHMARK_TIME -# define BENCHMARK_TIME 6 +#define BENCHMARK_TIME 6 #endif -# define GHZ 1000000000 -# define UNIT_SCALE (GHZ) -# define CALIBRATE_TIME (UNIT_SCALE / 2) -static inline long long get_time(void) { - unsigned int dummy; - return __rdtscp(&dummy); +#define GHZ 1000000000 +#define UNIT_SCALE (GHZ) +#define CALIBRATE_TIME (UNIT_SCALE / 2) +static inline long long +get_time(void) +{ + unsigned int dummy; + return __rdtscp(&dummy); } -static inline long long get_res(void) { - return 1; +static inline long long +get_res(void) +{ + return 1; } #else #ifndef BENCHMARK_TIME -# define BENCHMARK_TIME 3 +#define BENCHMARK_TIME 3 #endif #ifdef _MSC_VER -#define UNIT_SCALE get_res() +#define UNIT_SCALE get_res() #define CALIBRATE_TIME (UNIT_SCALE / 4) -static inline long long get_time(void) { - long long ret = 0; - QueryPerformanceCounter(&ret); - return ret; +static inline long long +get_time(void) +{ + long long ret = 0; + QueryPerformanceCounter(&ret); + return ret; } -static inline long long get_res(void) { - long long ret = 0; - QueryPerformanceFrequency(&ret); - return ret; +static inline long long +get_res(void) +{ + long long ret = 0; + QueryPerformanceFrequency(&ret); + return ret; } #else -# define NANO_SCALE 1000000000 -# define UNIT_SCALE NANO_SCALE -# define CALIBRATE_TIME (UNIT_SCALE / 4) +#define NANO_SCALE 1000000000 +#define UNIT_SCALE NANO_SCALE +#define CALIBRATE_TIME (UNIT_SCALE / 4) #ifdef __FreeBSD__ -# define CLOCK_ID CLOCK_MONOTONIC_PRECISE +#define CLOCK_ID CLOCK_MONOTONIC_PRECISE #else -# define CLOCK_ID CLOCK_MONOTONIC +#define CLOCK_ID CLOCK_MONOTONIC #endif -static inline long long get_time(void) { - struct timespec time; - long long nano_total; - clock_gettime(CLOCK_ID, &time); - nano_total = time.tv_sec; - nano_total *= NANO_SCALE; - nano_total += time.tv_nsec; - return nano_total; +static inline long long +get_time(void) +{ + struct timespec time; + long long nano_total; + clock_gettime(CLOCK_ID, &time); + nano_total = time.tv_sec; + nano_total *= NANO_SCALE; + nano_total += time.tv_nsec; + return nano_total; } -static inline long long get_res(void) { - struct timespec time; - long long nano_total; - clock_getres(CLOCK_ID, &time); - nano_total = time.tv_sec; - nano_total *= NANO_SCALE; - nano_total += time.tv_nsec; - return nano_total; +static inline long long +get_res(void) +{ + struct timespec time; + long long nano_total; + clock_getres(CLOCK_ID, &time); + nano_total = time.tv_sec; + nano_total *= NANO_SCALE; + nano_total += time.tv_nsec; + return nano_total; } #endif #endif struct perf { - long long start; - long long stop; - long long run_total; - long long iterations; + long long start; + long long stop; + long long run_total; + long long iterations; }; -static inline void perf_init(struct perf *p) { - p->start = 0; - p->stop = 0; - p->run_total = 0; +static inline void +perf_init(struct perf *p) +{ + p->start = 0; + p->stop = 0; + p->run_total = 0; } -static inline void perf_continue(struct perf *p) { - p->start = get_time(); +static inline void +perf_continue(struct perf *p) +{ + p->start = get_time(); } -static inline void perf_pause(struct perf *p) { - p->stop = get_time(); - p->run_total = p->run_total + p->stop - p->start; - p->start = p->stop; +static inline void +perf_pause(struct perf *p) +{ + p->stop = get_time(); + p->run_total = p->run_total + p->stop - p->start; + p->start = p->stop; } -static inline void perf_start(struct perf *p) { - perf_init(p); - perf_continue(p); +static inline void +perf_start(struct perf *p) +{ + perf_init(p); + perf_continue(p); } -static inline void perf_stop(struct perf *p) { - perf_pause(p); +static inline void +perf_stop(struct perf *p) +{ + perf_pause(p); } -static inline double get_time_elapsed(struct perf *p) { - return 1.0 * p->run_total / UNIT_SCALE; +static inline double +get_time_elapsed(struct perf *p) +{ + return 1.0 * p->run_total / UNIT_SCALE; } -static inline long long get_base_elapsed(struct perf *p) { - return p->run_total; +static inline long long +get_base_elapsed(struct perf *p) +{ + return p->run_total; } -static inline unsigned long long estimate_perf_iterations(struct perf *p, - unsigned long long runs, - unsigned long long total) { - total = total * runs; - if (get_base_elapsed(p) > 0) - return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p); - else - return (total + get_res() - 1) / get_res(); +static inline unsigned long long +estimate_perf_iterations(struct perf *p, unsigned long long runs, unsigned long long total) +{ + total = total * runs; + if (get_base_elapsed(p) > 0) + return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p); + else + return (total + get_res() - 1) / get_res(); } -#define CALIBRATE(PERF, FUNC_CALL) { \ - unsigned long long _i, _iter = 1; \ - perf_start(PERF); \ - FUNC_CALL; \ - perf_pause(PERF); \ - \ - while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \ - _iter = estimate_perf_iterations(PERF, _iter, \ - 2 * CALIBRATE_TIME); \ - perf_start(PERF); \ - for (_i = 0; _i < _iter; _i++) { \ - FUNC_CALL; \ - } \ - perf_stop(PERF); \ - } \ - (PERF)->iterations=_iter; \ -} +#define CALIBRATE(PERF, FUNC_CALL) \ + { \ + unsigned long long _i, _iter = 1; \ + perf_start(PERF); \ + FUNC_CALL; \ + perf_pause(PERF); \ + \ + while (get_base_elapsed(PERF) < CALIBRATE_TIME) { \ + _iter = estimate_perf_iterations(PERF, _iter, 2 * CALIBRATE_TIME); \ + perf_start(PERF); \ + for (_i = 0; _i < _iter; _i++) { \ + FUNC_CALL; \ + } \ + perf_stop(PERF); \ + } \ + (PERF)->iterations = _iter; \ + } -#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \ - unsigned long long _i, _iter = (PERF)->iterations; \ - unsigned long long _run_total = RUN_TIME; \ - _run_total *= UNIT_SCALE; \ - _iter = estimate_perf_iterations(PERF, _iter, _run_total);\ - (PERF)->iterations = 0; \ - perf_start(PERF); \ - for (_i = 0; _i < _iter; _i++) { \ - FUNC_CALL; \ - } \ - perf_pause(PERF); \ - (PERF)->iterations += _iter; \ - \ - if(get_base_elapsed(PERF) < _run_total && \ - BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \ - _iter = estimate_perf_iterations(PERF, _iter, \ - _run_total - get_base_elapsed(PERF) + \ - (UNIT_SCALE / 16)); \ - perf_continue(PERF); \ - for (_i = 0; _i < _iter; _i++) { \ - FUNC_CALL; \ - } \ - perf_pause(PERF); \ - (PERF)->iterations += _iter; \ - } \ -} +#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) \ + { \ + unsigned long long _i, _iter = (PERF)->iterations; \ + unsigned long long _run_total = RUN_TIME; \ + _run_total *= UNIT_SCALE; \ + _iter = estimate_perf_iterations(PERF, _iter, _run_total); \ + (PERF)->iterations = 0; \ + perf_start(PERF); \ + for (_i = 0; _i < _iter; _i++) { \ + FUNC_CALL; \ + } \ + perf_pause(PERF); \ + (PERF)->iterations += _iter; \ + \ + if (get_base_elapsed(PERF) < _run_total && BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \ + _iter = estimate_perf_iterations(PERF, _iter, \ + _run_total - get_base_elapsed(PERF) + \ + (UNIT_SCALE / 16)); \ + perf_continue(PERF); \ + for (_i = 0; _i < _iter; _i++) { \ + FUNC_CALL; \ + } \ + perf_pause(PERF); \ + (PERF)->iterations += _iter; \ + } \ + } -#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \ - if((RUN_TIME) > 0) { \ - CALIBRATE(PERF, FUNC_CALL); \ - PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \ - \ - } else { \ - (PERF)->iterations = 1; \ - perf_start(PERF); \ - FUNC_CALL; \ - perf_stop(PERF); \ - } \ -} +#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) \ + { \ + if ((RUN_TIME) > 0) { \ + CALIBRATE(PERF, FUNC_CALL); \ + PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \ + \ + } else { \ + (PERF)->iterations = 1; \ + perf_start(PERF); \ + FUNC_CALL; \ + perf_stop(PERF); \ + } \ + } #ifdef USE_CYCLES -static inline void perf_print(struct perf p, long long unit_count) { - long long total_units = p.iterations * unit_count; +static inline void +perf_print(struct perf p, long long unit_count) +{ + long long total_units = p.iterations * unit_count; - printf("runtime = %10lld ticks", get_base_elapsed(&p)); - if (total_units != 0) { - printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", - total_units / (1000000), get_time_elapsed(&p), - get_base_elapsed(&p) / (double)total_units); - } - printf("\n"); + printf("runtime = %10lld ticks", get_base_elapsed(&p)); + if (total_units != 0) { + printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", total_units / (1000000), + get_time_elapsed(&p), get_base_elapsed(&p) / (double) total_units); + } + printf("\n"); } #else -static inline void perf_print(struct perf p, double unit_count) { - long long total_units = p.iterations * unit_count; - long long usecs = (long long)(get_time_elapsed(&p) * 1000000); +static inline void +perf_print(struct perf p, double unit_count) +{ + long long total_units = p.iterations * unit_count; + long long usecs = (long long) (get_time_elapsed(&p) * 1000000); - printf("runtime = %10lld usecs", usecs); - if (total_units != 0) { - printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", - total_units / (1000000), get_time_elapsed(&p), - ((double)total_units) / (1000000 * get_time_elapsed(&p))); - } - printf("\n"); + printf("runtime = %10lld usecs", usecs); + if (total_units != 0) { + printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", total_units / (1000000), + get_time_elapsed(&p), + ((double) total_units) / (1000000 * get_time_elapsed(&p))); + } + printf("\n"); } #endif -static inline uint64_t get_filesize(FILE * fp) { - uint64_t file_size; - fpos_t pos, pos_curr; +static inline uint64_t +get_filesize(FILE *fp) +{ + uint64_t file_size; + fpos_t pos, pos_curr; - fgetpos(fp, &pos_curr); /* Save current position */ + fgetpos(fp, &pos_curr); /* Save current position */ #if defined(_WIN32) || defined(_WIN64) - _fseeki64(fp, 0, SEEK_END); + _fseeki64(fp, 0, SEEK_END); #else - fseeko(fp, 0, SEEK_END); + fseeko(fp, 0, SEEK_END); #endif - fgetpos(fp, &pos); - file_size = *(uint64_t *) & pos; - fsetpos(fp, &pos_curr); /* Restore position */ + fgetpos(fp, &pos); + file_size = *(uint64_t *) &pos; + fsetpos(fp, &pos_curr); /* Restore position */ - return file_size; + return file_size; } #ifdef __cplusplus diff --git a/include/unaligned.h b/include/unaligned.h index 002cb32..e67bed2 100644 --- a/include/unaligned.h +++ b/include/unaligned.h @@ -37,168 +37,188 @@ #ifdef __FreeBSD__ #include #include -# define isal_bswap16(x) bswap16(x) -# define isal_bswap32(x) bswap32(x) -# define isal_bswap64(x) bswap64(x) -#elif defined (__APPLE__) +#define isal_bswap16(x) bswap16(x) +#define isal_bswap32(x) bswap32(x) +#define isal_bswap64(x) bswap64(x) +#elif defined(__APPLE__) #include -# define isal_bswap16(x) OSSwapInt16(x) -# define isal_bswap32(x) OSSwapInt32(x) -# define isal_bswap64(x) OSSwapInt64(x) -#elif defined (__GNUC__) && !defined (__MINGW32__) -# include -# define isal_bswap16(x) bswap_16(x) -# define isal_bswap32(x) bswap_32(x) -# define isal_bswap64(x) bswap_64(x) +#define isal_bswap16(x) OSSwapInt16(x) +#define isal_bswap32(x) OSSwapInt32(x) +#define isal_bswap64(x) OSSwapInt64(x) +#elif defined(__GNUC__) && !defined(__MINGW32__) +#include +#define isal_bswap16(x) bswap_16(x) +#define isal_bswap32(x) bswap_32(x) +#define isal_bswap64(x) bswap_64(x) #elif defined _WIN64 -# define isal_bswap16(x) _byteswap_ushort(x) -# define isal_bswap32(x) _byteswap_ulong(x) -# define isal_bswap64(x) _byteswap_uint64(x) +#define isal_bswap16(x) _byteswap_ushort(x) +#define isal_bswap32(x) _byteswap_ulong(x) +#define isal_bswap64(x) _byteswap_uint64(x) #endif #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define to_be16(x) isal_bswap16(x) -# define from_be16(x) isal_bswap16(x) -# define to_be32(x) isal_bswap32(x) -# define from_be32(x) isal_bswap32(x) -# define to_be64(x) isal_bswap64(x) -# define from_be64(x) isal_bswap64(x) -# define to_le16(x) (x) -# define from_le16(x) (x) -# define to_le32(x) (x) -# define from_le32(x) (x) -# define to_le64(x) (x) -# define from_le64(x) (x) +#define to_be16(x) isal_bswap16(x) +#define from_be16(x) isal_bswap16(x) +#define to_be32(x) isal_bswap32(x) +#define from_be32(x) isal_bswap32(x) +#define to_be64(x) isal_bswap64(x) +#define from_be64(x) isal_bswap64(x) +#define to_le16(x) (x) +#define from_le16(x) (x) +#define to_le32(x) (x) +#define from_le32(x) (x) +#define to_le64(x) (x) +#define from_le64(x) (x) #else -# define to_be16(x) (x) -# define from_be16(x) (x) -# define to_be32(x) (x) -# define from_be32(x) (x) -# define to_be64(x) (x) -# define from_be64(x) (x) -# define to_le16(x) isal_bswap16(x) -# define from_le16(x) isal_bswap16(x) -# define to_le32(x) isal_bswap32(x) -# define from_le32(x) isal_bswap32(x) -# define to_le64(x) isal_bswap64(x) -# define from_le64(x) isal_bswap64(x) +#define to_be16(x) (x) +#define from_be16(x) (x) +#define to_be32(x) (x) +#define from_be32(x) (x) +#define to_be64(x) (x) +#define from_be64(x) (x) +#define to_le16(x) isal_bswap16(x) +#define from_le16(x) isal_bswap16(x) +#define to_le32(x) isal_bswap32(x) +#define from_le32(x) isal_bswap32(x) +#define to_le64(x) isal_bswap64(x) +#define from_le64(x) isal_bswap64(x) #endif -static inline uint16_t load_native_u16(uint8_t * buf) +static inline uint16_t +load_native_u16(uint8_t *buf) { - uint16_t ret; - memcpy(&ret, buf, sizeof(ret)); - return ret; + uint16_t ret; + memcpy(&ret, buf, sizeof(ret)); + return ret; } -static inline uint16_t load_le_u16(uint8_t * buf) +static inline uint16_t +load_le_u16(uint8_t *buf) { - return from_le16(load_native_u16(buf)); + return from_le16(load_native_u16(buf)); } -static inline uint16_t load_be_u16(uint8_t * buf) +static inline uint16_t +load_be_u16(uint8_t *buf) { - return from_be16(load_native_u16(buf)); + return from_be16(load_native_u16(buf)); } -static inline uint32_t load_native_u32(uint8_t * buf) +static inline uint32_t +load_native_u32(uint8_t *buf) { - uint32_t ret; - memcpy(&ret, buf, sizeof(ret)); - return ret; + uint32_t ret; + memcpy(&ret, buf, sizeof(ret)); + return ret; } -static inline uint32_t load_le_u32(uint8_t * buf) +static inline uint32_t +load_le_u32(uint8_t *buf) { - return from_le32(load_native_u32(buf)); + return from_le32(load_native_u32(buf)); } -static inline uint32_t load_be_u32(uint8_t * buf) +static inline uint32_t +load_be_u32(uint8_t *buf) { - return from_be32(load_native_u32(buf)); + return from_be32(load_native_u32(buf)); } -static inline uint64_t load_native_u64(uint8_t * buf) +static inline uint64_t +load_native_u64(uint8_t *buf) { - uint64_t ret; - memcpy(&ret, buf, sizeof(ret)); - return ret; + uint64_t ret; + memcpy(&ret, buf, sizeof(ret)); + return ret; } -static inline uint64_t load_le_u64(uint8_t * buf) +static inline uint64_t +load_le_u64(uint8_t *buf) { - return from_le64(load_native_u64(buf)); + return from_le64(load_native_u64(buf)); } -static inline uint64_t load_be_u64(uint8_t * buf) +static inline uint64_t +load_be_u64(uint8_t *buf) { - return from_be64(load_native_u64(buf)); + return from_be64(load_native_u64(buf)); } -static inline uintmax_t load_le_umax(uint8_t * buf) +static inline uintmax_t +load_le_umax(uint8_t *buf) { - switch (sizeof(uintmax_t)) { - case sizeof(uint32_t): - return from_le32(load_native_u32(buf)); - case sizeof(uint64_t): - return from_le64(load_native_u64(buf)); - default: - return 0; - } + switch (sizeof(uintmax_t)) { + case sizeof(uint32_t): + return from_le32(load_native_u32(buf)); + case sizeof(uint64_t): + return from_le64(load_native_u64(buf)); + default: + return 0; + } } -static inline void store_native_u16(uint8_t * buf, uint16_t val) +static inline void +store_native_u16(uint8_t *buf, uint16_t val) { - memcpy(buf, &val, sizeof(val)); + memcpy(buf, &val, sizeof(val)); } -static inline void store_le_u16(uint8_t * buf, uint16_t val) +static inline void +store_le_u16(uint8_t *buf, uint16_t val) { - store_native_u16(buf, to_le16(val)); + store_native_u16(buf, to_le16(val)); } -static inline void store_be_u16(uint8_t * buf, uint16_t val) +static inline void +store_be_u16(uint8_t *buf, uint16_t val) { - store_native_u16(buf, to_be16(val)); + store_native_u16(buf, to_be16(val)); } -static inline void store_native_u16_to_u64(uint64_t * buf, uint16_t val) +static inline void +store_native_u16_to_u64(uint64_t *buf, uint16_t val) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - store_native_u16((uint8_t *) buf, val); + store_native_u16((uint8_t *) buf, val); #else - store_native_u16((uint8_t *) buf + 6, val); + store_native_u16((uint8_t *) buf + 6, val); #endif } -static inline void store_native_u32(uint8_t * buf, uint32_t val) +static inline void +store_native_u32(uint8_t *buf, uint32_t val) { - memcpy(buf, &val, sizeof(val)); + memcpy(buf, &val, sizeof(val)); } -static inline void store_le_u32(uint8_t * buf, uint32_t val) +static inline void +store_le_u32(uint8_t *buf, uint32_t val) { - store_native_u32(buf, to_le32(val)); + store_native_u32(buf, to_le32(val)); } -static inline void store_be_u32(uint8_t * buf, uint32_t val) +static inline void +store_be_u32(uint8_t *buf, uint32_t val) { - store_native_u32(buf, to_be32(val)); + store_native_u32(buf, to_be32(val)); } -static inline void store_native_u64(uint8_t * buf, uint64_t val) +static inline void +store_native_u64(uint8_t *buf, uint64_t val) { - memcpy(buf, &val, sizeof(val)); + memcpy(buf, &val, sizeof(val)); } -static inline void store_le_u64(uint8_t * buf, uint64_t val) +static inline void +store_le_u64(uint8_t *buf, uint64_t val) { - store_native_u64(buf, to_le64(val)); + store_native_u64(buf, to_le64(val)); } -static inline void store_be_u64(uint8_t * buf, uint64_t val) +static inline void +store_be_u64(uint8_t *buf, uint64_t val) { - store_native_u64(buf, to_be64(val)); + store_native_u64(buf, to_be64(val)); } #endif