diff --git a/configure.ac b/configure.ac index f544e9e..d5ef0a0 100644 --- a/configure.ac +++ b/configure.ac @@ -33,6 +33,16 @@ AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"]) AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"]) AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"]) +if test "$CPU" = "x86_64"; then + is_x86=yes +else + if test "$CPU" = "x86_32"; then + is_x86=yes + else + is_x86=no + fi +fi + # Check for programs AC_PROG_CC_STDC AC_USE_SYSTEM_EXTENSIONS @@ -50,76 +60,80 @@ AS_IF([test "x$enable_debug" = "xyes"], [ AC_DEFINE(ENABLE_DEBUG, [1], [Debug messages.]) ]) -# Check for yasm and yasm features -AC_CHECK_PROG(HAVE_YASM, yasm, yes, no) -if test "$HAVE_YASM" = "no"; then - AC_MSG_RESULT([no yasm]) -else - AC_MSG_CHECKING([for modern yasm]) - AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])]) - if yasm -f elf64 -p gas conftest.c ; then - with_modern_yasm=yes - AC_MSG_RESULT([yes]) - AC_MSG_CHECKING([for optional yasm AVX512 support]) - AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])]) - if yasm -f elf64 -p gas conftest.c 2> /dev/null; then - yasm_knows_avx512=yes +# If this build is for x86, look for yasm and nasm +if test x"$is_x86" = x"yes"; then + # Check for yasm and yasm features + AC_CHECK_PROG(HAVE_YASM, yasm, yes, no) + if test "$HAVE_YASM" = "no"; then + AC_MSG_RESULT([no yasm]) + else + AC_MSG_CHECKING([for modern yasm]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])]) + if yasm -f elf64 -p gas conftest.c ; then + with_modern_yasm=yes AC_MSG_RESULT([yes]) + AC_MSG_CHECKING([for optional yasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])]) + if yasm -f elf64 -p gas conftest.c 2> /dev/null; then + yasm_knows_avx512=yes + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + else + AC_MSG_FAILURE([no]) + fi + fi + + # Check for nasm and nasm features + AC_CHECK_PROG(HAVE_NASM, nasm, yes, no) + if test "$HAVE_NASM" = "no"; then + AC_MSG_RESULT([no nasm]) + else + AC_MSG_CHECKING([for modern nasm]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])]) + sed -i -e '/pblendvb/!d' conftest.c + if nasm -f elf64 conftest.c 2> /dev/null; then + with_modern_nasm=yes + AC_MSG_RESULT([yes]) + AC_MSG_CHECKING([for optional nasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])]) + sed -i -e '/vinsert/!d' conftest.c + if nasm -f elf64 conftest.c 2> /dev/null; then + nasm_knows_avx512=yes + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi else AC_MSG_RESULT([no]) fi - else - AC_MSG_FAILURE([no]) fi -fi -# Check for nasm and nasm features -AC_CHECK_PROG(HAVE_NASM, nasm, yes, no) -if test "$HAVE_NASM" = "no"; then - AC_MSG_RESULT([no nasm]) -else - AC_MSG_CHECKING([for modern nasm]) - AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])]) - sed -i -e '/pblendvb/!d' conftest.c - if nasm -f elf64 conftest.c 2> /dev/null; then - with_modern_nasm=yes - AC_MSG_RESULT([yes]) - AC_MSG_CHECKING([for optional nasm AVX512 support]) - AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])]) - sed -i -e '/vinsert/!d' conftest.c - if nasm -f elf64 conftest.c 2> /dev/null; then - nasm_knows_avx512=yes - AC_MSG_RESULT([yes]) + # Pick an assembler yasm or nasm + if test x"$AS" = x""; then + if test x"$yasm_knows_avx512" = x"yes"; then + AS=yasm + elif test x"$nasm_knows_avx512" = x"yes"; then + AS=nasm + elif test x"$with_modern_yasm" = x"yes"; then + AS=yasm + elif test x"$with_modern_nasm" = x"yes"; then + AS=nasm else - AC_MSG_RESULT([no]) + AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later (v2.13 for AVX512).]) fi + fi + echo "Using assembler $AS" + + if test \( x"$AS" = x"yasm" -a x"$yasm_knows_avx512" = x"yes" \) -o \( x"$AS" = x"nasm" -a x"$nasm_knows_avx512" = x"yes" \); then + AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.]) + have_as_knows_avx512=yes else - AC_MSG_RESULT([no]) + AC_MSG_RESULT([Assembler does not understand AVX512 opcodes. Consider upgrading for best performance.]) fi fi -# Pick an assembler yasm or nasm -if test x"$AS" = x""; then - if test x"$yasm_knows_avx512" = x"yes"; then - AS=yasm - elif test x"$nasm_knows_avx512" = x"yes"; then - AS=nasm - elif test x"$with_modern_yasm" = x"yes"; then - AS=yasm - elif test x"$with_modern_nasm" = x"yes"; then - AS=nasm - else - AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later (v2.13 for AVX512).]) - fi -fi -echo "Using assembler $AS" - -if test \( x"$AS" = x"yasm" -a x"$yasm_knows_avx512" = x"yes" \) -o \( x"$AS" = x"nasm" -a x"$nasm_knows_avx512" = x"yes" \); then - AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.]) - have_as_knows_avx512=yes -else - AC_MSG_RESULT([Assembler does not understand AVX512 opcodes. Consider upgrading for best performance.]) -fi AM_CONDITIONAL(USE_YASM, test x"$AS" = x"yasm") AM_CONDITIONAL(USE_NASM, test x"$AS" = x"nasm") AM_CONDITIONAL(WITH_AVX512, test x"$have_as_knows_avx512" = x"yes") diff --git a/include/erasure_code.h b/include/erasure_code.h index 4f2db8a..5045625 100644 --- a/include/erasure_code.h +++ b/include/erasure_code.h @@ -98,33 +98,6 @@ void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding); -/** - * @brief Generate or decode erasure codes on blocks of data. - * - * Arch specific version of ec_encode_data() with same parameters. - * @requires SSE4.1 - */ -void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); - -/** - * @brief Generate or decode erasure codes on blocks of data. - * - * Arch specific version of ec_encode_data() with same parameters. - * @requires AVX - */ -void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); - -/** - * @brief Generate or decode erasure codes on blocks of data. - * - * Arch specific version of ec_encode_data() with same parameters. - * @requires AVX2 - */ -void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, - unsigned char **coding); - /** * @brief Generate or decode erasure codes on blocks of data, runs baseline version. * @@ -158,6 +131,127 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data, unsigned char **coding); +/** + * @brief Generate update for encode or decode of erasure codes from single source. + * + * Baseline version of ec_encode_data_update(). + */ + +void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest); + +/** + * @brief GF(2^8) vector dot product, runs baseline version. + * + * Does a GF(2^8) dot product across each byte of the input array and a constant + * set of coefficients to produce each byte of the output. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 32*vlen byte constant array based on the input coefficients. + * + * @param len Length of each vector in bytes. Must be >= 16. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based + * on the array of input coefficients. Only elements 32*CONST*j + 1 + * of this array are used, where j = (0, 1, 2...) and CONST is the + * number of elements in the array of input coefficients. The + * elements used correspond to the original input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + + +void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char *dest); + +/** + * @brief GF(2^8) vector dot product, runs appropriate version. + * + * Does a GF(2^8) dot product across each byte of the input array and a constant + * set of coefficients to produce each byte of the output. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 32*vlen byte constant array based on the input coefficients. + * + * This function determines what instruction sets are enabled and + * selects the appropriate version at runtime. + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based + * on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, runs appropriate version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constant and add to destination array. Can be used for erasure coding encode + * and decode update when only one source is available at a time. Function + * requires pre-calculation of a 32*vec byte constant array based on the input + * coefficients. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, baseline version. + * + * Baseline version of gf_vect_mad() with same parameters. + */ + +void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, + unsigned char *dest); + +// x86 only +#if defined(__i386__) || defined(__x86_64__) + +/** + * @brief Generate or decode erasure codes on blocks of data. + * + * Arch specific version of ec_encode_data() with same parameters. + * @requires SSE4.1 + */ +void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); + +/** + * @brief Generate or decode erasure codes on blocks of data. + * + * Arch specific version of ec_encode_data() with same parameters. + * @requires AVX + */ +void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); + +/** + * @brief Generate or decode erasure codes on blocks of data. + * + * Arch specific version of ec_encode_data() with same parameters. + * @requires AVX2 + */ +void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); + /** * @brief Generate update for encode or decode of erasure codes from single source. * @@ -188,16 +282,6 @@ void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned cha void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, unsigned char *data, unsigned char **coding); -/** - * @brief Generate update for encode or decode of erasure codes from single source. - * - * Baseline version of ec_encode_data_update(). - */ - -void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, - unsigned char *data, unsigned char **dest); - - /** * @brief GF(2^8) vector dot product. * @@ -591,79 +675,6 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); -/** - * @brief GF(2^8) vector dot product, runs baseline version. - * - * Does a GF(2^8) dot product across each byte of the input array and a constant - * set of coefficients to produce each byte of the output. Can be used for - * erasure coding encode and decode. Function requires pre-calculation of a - * 32*vlen byte constant array based on the input coefficients. - * - * @param len Length of each vector in bytes. Must be >= 16. - * @param vlen Number of vector sources. - * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based - * on the array of input coefficients. Only elements 32*CONST*j + 1 - * of this array are used, where j = (0, 1, 2...) and CONST is the - * number of elements in the array of input coefficients. The - * elements used correspond to the original input coefficients. - * @param src Array of pointers to source inputs. - * @param dest Pointer to destination data array. - * @returns none - */ - -void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); - -/** - * @brief GF(2^8) vector dot product, runs appropriate version. - * - * Does a GF(2^8) dot product across each byte of the input array and a constant - * set of coefficients to produce each byte of the output. Can be used for - * erasure coding encode and decode. Function requires pre-calculation of a - * 32*vlen byte constant array based on the input coefficients. - * - * This function determines what instruction sets are enabled and - * selects the appropriate version at runtime. - * - * @param len Length of each vector in bytes. Must be >= 32. - * @param vlen Number of vector sources. - * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based - * on the array of input coefficients. - * @param src Array of pointers to source inputs. - * @param dest Pointer to destination data array. - * @returns none - */ - -void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); - - -/** - * @brief GF(2^8) vector multiply accumulate, runs appropriate version. - * - * Does a GF(2^8) multiply across each byte of input source with expanded - * constant and add to destination array. Can be used for erasure coding encode - * and decode update when only one source is available at a time. Function - * requires pre-calculation of a 32*vec byte constant array based on the input - * coefficients. - * - * This function determines what instruction sets are enabled and selects the - * appropriate version at runtime. - * - * @param len Length of each vector in bytes. Must be >= 32. - * @param vec The number of vector sources or rows in the generator matrix - * for coding. - * @param vec_i The vector index corresponding to the single input source. - * @param gftbls Pointer to array of input tables generated from coding - * coefficients in ec_init_tables(). Must be of size 32*vec. - * @param src Array of pointers to source inputs. - * @param dest Pointer to destination data array. - * @returns none - */ - -void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, - unsigned char *dest); - /** * @brief GF(2^8) vector multiply accumulate, arch specific version. * @@ -693,14 +704,6 @@ void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char *dest); -/** - * @brief GF(2^8) vector multiply accumulate, baseline version. - * - * Baseline version of gf_vect_mad() with same parameters. - */ - -void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, - unsigned char *dest); /** * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. @@ -854,6 +857,7 @@ void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsign void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); +#endif /********************************************************************** * The remaining are lib support functions used in GF(2^8) operations. diff --git a/include/gf_vect_mul.h b/include/gf_vect_mul.h index bf4fd01..70a0ab2 100644 --- a/include/gf_vect_mul.h +++ b/include/gf_vect_mul.h @@ -43,6 +43,9 @@ extern "C" { #endif +// x86 only +#if defined(__i386__) || defined(__x86_64__) + /** * @brief GF(2^8) vector multiply by constant. * @@ -84,6 +87,7 @@ int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest); int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest); +#endif /** * @brief GF(2^8) vector multiply by constant, runs appropriate version. diff --git a/include/raid.h b/include/raid.h index 192fca2..6100a48 100644 --- a/include/raid.h +++ b/include/raid.h @@ -120,6 +120,8 @@ int pq_check(int vects, int len, void **array); /* Arch specific versions */ +// x86 only +#if defined(__i386__) || defined(__x86_64__) /** * @brief Generate XOR parity vector from N sources. @@ -236,6 +238,7 @@ int pq_gen_avx2(int vects, int len, void **array); int pq_check_sse(int vects, int len, void **array); +#endif /** * @brief Generate P+Q parity vectors from N sources, runs baseline version.