build: Add multi-arch autoconf support

Added multi-arch support to configure.ac. Updated header files to only export sse and avx functions on x86 Change-Id: I4d1f8d0eccabad55ee887dc092a565c468f5c629 Signed-off-by: John Kariuki <John.K.Kariuki@intel.com>
2024-12-13 09:52:56 +01:00 · 2018-12-07 02:01:35 -07:00 · 2018-12-07 02:01:35 -07:00 · 2393791654
commit 2393791654
parent ebab4454ef
4 changed files with 201 additions and 176 deletions
--- a/configure.ac
+++ b/configure.ac
@ -33,6 +33,16 @@ AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"])
 AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"])
 AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])

+if test "$CPU" = "x86_64"; then
+   is_x86=yes
+else
+   if test "$CPU" = "x86_32"; then
+      is_x86=yes
+   else
+      is_x86=no
+   fi
+fi
+
 # Check for programs
 AC_PROG_CC_STDC
 AC_USE_SYSTEM_EXTENSIONS
@ -50,76 +60,80 @@ AS_IF([test "x$enable_debug" = "xyes"], [
        AC_DEFINE(ENABLE_DEBUG, [1], [Debug messages.])
 ])

-# Check for yasm and yasm features
-AC_CHECK_PROG(HAVE_YASM, yasm, yes, no)
-if test "$HAVE_YASM" = "no"; then
-  AC_MSG_RESULT([no yasm])
-else
-  AC_MSG_CHECKING([for modern yasm])
-  AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])])
-  if yasm -f elf64 -p gas conftest.c ; then
-    with_modern_yasm=yes
-    AC_MSG_RESULT([yes])
-    AC_MSG_CHECKING([for optional yasm AVX512 support])
-    AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])])
-    if yasm -f elf64 -p gas conftest.c 2> /dev/null; then
-      yasm_knows_avx512=yes
+# If this build is for x86, look for yasm and nasm
+if test x"$is_x86" = x"yes"; then
+  # Check for yasm and yasm features
+  AC_CHECK_PROG(HAVE_YASM, yasm, yes, no)
+  if test "$HAVE_YASM" = "no"; then
+    AC_MSG_RESULT([no yasm])
+  else
+    AC_MSG_CHECKING([for modern yasm])
+    AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])])
+    if yasm -f elf64 -p gas conftest.c ; then
+      with_modern_yasm=yes
      AC_MSG_RESULT([yes])
+      AC_MSG_CHECKING([for optional yasm AVX512 support])
+      AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])])
+      if yasm -f elf64 -p gas conftest.c 2> /dev/null; then
+        yasm_knows_avx512=yes
+        AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+      fi
+    else
+      AC_MSG_FAILURE([no])
+    fi
+  fi
+
+  # Check for nasm and nasm features
+  AC_CHECK_PROG(HAVE_NASM, nasm, yes, no)
+  if test "$HAVE_NASM" = "no"; then
+    AC_MSG_RESULT([no nasm])
+  else
+    AC_MSG_CHECKING([for modern nasm])
+    AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])])
+    sed -i -e '/pblendvb/!d' conftest.c
+    if nasm -f elf64 conftest.c 2> /dev/null; then
+      with_modern_nasm=yes
+      AC_MSG_RESULT([yes])
+      AC_MSG_CHECKING([for optional nasm AVX512 support])
+      AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])])
+      sed -i -e '/vinsert/!d' conftest.c
+      if nasm -f elf64  conftest.c 2> /dev/null; then
+        nasm_knows_avx512=yes
+        AC_MSG_RESULT([yes])
+      else
+        AC_MSG_RESULT([no])
+      fi
    else
      AC_MSG_RESULT([no])
    fi
-  else
-    AC_MSG_FAILURE([no])
  fi
-fi

-# Check for nasm and nasm features
-AC_CHECK_PROG(HAVE_NASM, nasm, yes, no)
-if test "$HAVE_NASM" = "no"; then
-  AC_MSG_RESULT([no nasm])
-else
-  AC_MSG_CHECKING([for modern nasm])
-  AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])])
-  sed -i -e '/pblendvb/!d' conftest.c
-  if nasm -f elf64 conftest.c 2> /dev/null; then
-    with_modern_nasm=yes
-    AC_MSG_RESULT([yes])
-    AC_MSG_CHECKING([for optional nasm AVX512 support])
-    AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])])
-    sed -i -e '/vinsert/!d' conftest.c
-    if nasm -f elf64  conftest.c 2> /dev/null; then
-      nasm_knows_avx512=yes
-      AC_MSG_RESULT([yes])
+  # Pick an assembler yasm or nasm
+  if test x"$AS" = x""; then
+    if test x"$yasm_knows_avx512" = x"yes"; then
+      AS=yasm
+    elif test x"$nasm_knows_avx512" = x"yes"; then
+      AS=nasm
+    elif test x"$with_modern_yasm" = x"yes"; then
+      AS=yasm
+    elif test x"$with_modern_nasm" = x"yes"; then
+      AS=nasm
    else
-      AC_MSG_RESULT([no])
+      AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later (v2.13 for AVX512).])
    fi
+  fi
+  echo "Using assembler $AS"
+
+  if test \( x"$AS" = x"yasm" -a x"$yasm_knows_avx512" = x"yes" \) -o \( x"$AS" = x"nasm" -a x"$nasm_knows_avx512" = x"yes" \); then
+    AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.])
+    have_as_knows_avx512=yes
  else
-    AC_MSG_RESULT([no])
+    AC_MSG_RESULT([Assembler does not understand AVX512 opcodes.  Consider upgrading for best performance.])
  fi
 fi

-# Pick an assembler yasm or nasm
-if test x"$AS" = x""; then
-  if test x"$yasm_knows_avx512" = x"yes"; then
-    AS=yasm
-  elif test x"$nasm_knows_avx512" = x"yes"; then
-    AS=nasm
-  elif test x"$with_modern_yasm" = x"yes"; then
-    AS=yasm
-  elif test x"$with_modern_nasm" = x"yes"; then
-    AS=nasm
-  else
-    AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later (v2.13 for AVX512).])
-  fi
-fi
-echo "Using assembler $AS"
-
-if test \( x"$AS" = x"yasm" -a x"$yasm_knows_avx512" = x"yes" \) -o \( x"$AS" = x"nasm" -a x"$nasm_knows_avx512" = x"yes" \); then
-  AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.])
-  have_as_knows_avx512=yes
-else
-  AC_MSG_RESULT([Assembler does not understand AVX512 opcodes.  Consider upgrading for best performance.])
-fi
 AM_CONDITIONAL(USE_YASM, test x"$AS" = x"yasm")
 AM_CONDITIONAL(USE_NASM, test x"$AS" = x"nasm")
 AM_CONDITIONAL(WITH_AVX512, test x"$have_as_knows_avx512" = x"yes")
--- a/include/erasure_code.h
+++ b/include/erasure_code.h
@ -98,33 +98,6 @@ void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
 void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
 		    unsigned char **coding);

-/**
- * @brief Generate or decode erasure codes on blocks of data.
- *
- * Arch specific version of ec_encode_data() with same parameters.
- * @requires SSE4.1
- */
-void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			unsigned char **coding);
-
-/**
- * @brief Generate or decode erasure codes on blocks of data.
- *
- * Arch specific version of ec_encode_data() with same parameters.
- * @requires AVX
- */
-void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			unsigned char **coding);
-
-/**
- * @brief Generate or decode erasure codes on blocks of data.
- *
- * Arch specific version of ec_encode_data() with same parameters.
- * @requires AVX2
- */
-void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
-			 unsigned char **coding);
-
 /**
 * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
 *
@ -158,6 +131,127 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne
 void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 			   unsigned char *data, unsigned char **coding);

+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Baseline version of ec_encode_data_update().
+ */
+
+void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
+				unsigned char *data, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs baseline version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients. Only elements 32*CONST*j + 1
+ *               of this array are used, where j = (0, 1, 2...) and CONST is the
+ *               number of elements in the array of input coefficients. The
+ *               elements used correspond to the original input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+
+void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
+                        unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs appropriate version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
+                        unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constant and add to destination array. Can be used for erasure coding encode
+ * and decode update when only one source is available at a time. Function
+ * requires pre-calculation of a 32*vec byte constant array based on the input
+ * coefficients.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vec    The number of vector sources or rows in the generator matrix
+ * 		 for coding.
+ * @param vec_i  The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+		 unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, baseline version.
+ *
+ * Baseline version of gf_vect_mad() with same parameters.
+ */
+
+void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
+		      unsigned char *dest);
+
+// x86 only
+#if defined(__i386__) || defined(__x86_64__)
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires SSE4.1
+ */
+void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+			unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX
+ */
+void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+			unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX2
+ */
+void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+			 unsigned char **coding);
+
 /**
 * @brief Generate update for encode or decode of erasure codes from single source.
 *
@ -188,16 +282,6 @@ void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned cha
 void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 				unsigned char *data, unsigned char **coding);

-/**
- * @brief Generate update for encode or decode of erasure codes from single source.
- *
- * Baseline version of ec_encode_data_update().
- */
-
-void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
-				unsigned char *data, unsigned char **dest);
-
-
 /**
 * @brief GF(2^8) vector dot product.
 *
@ -591,79 +675,6 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 			unsigned char **src, unsigned char **dest);

-/**
- * @brief GF(2^8) vector dot product, runs baseline version.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients. Only elements 32*CONST*j + 1
- *               of this array are used, where j = (0, 1, 2...) and CONST is the
- *               number of elements in the array of input coefficients. The
- *               elements used correspond to the original input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
-                        unsigned char **src, unsigned char *dest);
-
-/**
- * @brief GF(2^8) vector dot product, runs appropriate version.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * This function determines what instruction sets are enabled and
- * selects the appropriate version at runtime.
- *
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
-                        unsigned char **src, unsigned char *dest);
-
-
-/**
- * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
- *
- * Does a GF(2^8) multiply across each byte of input source with expanded
- * constant and add to destination array. Can be used for erasure coding encode
- * and decode update when only one source is available at a time. Function
- * requires pre-calculation of a 32*vec byte constant array based on the input
- * coefficients.
- *
- * This function determines what instruction sets are enabled and selects the
- * appropriate version at runtime.
- *
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vec    The number of vector sources or rows in the generator matrix
- * 		 for coding.
- * @param vec_i  The vector index corresponding to the single input source.
- * @param gftbls Pointer to array of input tables generated from coding
- * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
-		 unsigned char *dest);
-
 /**
 * @brief GF(2^8) vector multiply accumulate, arch specific version.
 *
@ -693,14 +704,6 @@ void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
 void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 		      unsigned char *dest);

-/**
- * @brief GF(2^8) vector multiply accumulate, baseline version.
- *
- * Baseline version of gf_vect_mad() with same parameters.
- */
-
-void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
-		      unsigned char *dest);

 /**
 * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
@ -854,6 +857,7 @@ void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
 void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 		       unsigned char **dest);

+#endif

 /**********************************************************************
 * The remaining are lib support functions used in GF(2^8) operations.
--- a/include/gf_vect_mul.h
+++ b/include/gf_vect_mul.h
@ -43,6 +43,9 @@
 extern "C" {
 #endif

+// x86 only
+#if defined(__i386__) || defined(__x86_64__)
+
 /**
 * @brief GF(2^8) vector multiply by constant.
 *
@ -84,6 +87,7 @@ int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);

 int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);

+#endif

 /**
 * @brief GF(2^8) vector multiply by constant, runs appropriate version.
--- a/include/raid.h
+++ b/include/raid.h
@ -120,6 +120,8 @@ int pq_check(int vects, int len, void **array);


 /* Arch specific versions */
+// x86 only
+#if defined(__i386__) || defined(__x86_64__)

 /**
 * @brief Generate XOR parity vector from N sources.
@ -236,6 +238,7 @@ int pq_gen_avx2(int vects, int len, void **array);

 int pq_check_sse(int vects, int len, void **array);

+#endif

 /**
 * @brief Generate P+Q parity vectors from N sources, runs baseline version.