mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
enable VSX SIMD in ISA-L for ppc64le
1) Implement the ErasureCode function in Altivec Intrinsics 2) Coding style update Change-Id: I2c81d035f4083e9b011dbf3b741f628813b68606 Thanks-to: Daniel Axtens <dja@axtens.net> Signed-off-by: Hong Bo Peng <penghb@cn.ibm.com>
This commit is contained in:
parent
a3d5cd8642
commit
180c74aefd
@ -27,9 +27,11 @@ other_tests=
|
||||
other_tests_x86_64=
|
||||
other_tests_x86_32=
|
||||
other_tests_aarch64=
|
||||
other_tests_ppc64le=
|
||||
lsrc_x86_64=
|
||||
lsrc_x86_32=
|
||||
lsrc_aarch64=
|
||||
lsrc_ppc64le=
|
||||
lsrc_base_aliases=
|
||||
lsrc32=
|
||||
unit_tests32=
|
||||
@ -71,6 +73,11 @@ libisal_la_SOURCES += ${lsrc_aarch64}
|
||||
other_tests += ${other_tests_aarch64}
|
||||
endif
|
||||
|
||||
if CPU_PPC64LE
|
||||
libisal_la_SOURCES += ${lsrc_ppc64le}
|
||||
other_tests += ${other_tests_ppc64le}
|
||||
endif
|
||||
|
||||
if CPU_UNDEFINED
|
||||
libisal_la_SOURCES += ${lsrc_base_aliases}
|
||||
endif
|
||||
|
@ -30,10 +30,13 @@ AS_CASE([$host_cpu],
|
||||
[i?86], [CPU="x86_32"],
|
||||
[aarch64], [CPU="aarch64"],
|
||||
[arm64], [CPU="aarch64"],
|
||||
[powerpc64le], [CPU="ppc64le"],
|
||||
[ppc64le], [CPU="ppc64le"],
|
||||
)
|
||||
AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"])
|
||||
AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"])
|
||||
AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"])
|
||||
AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"])
|
||||
AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])
|
||||
|
||||
if test "$CPU" = "x86_64"; then
|
||||
|
@ -35,6 +35,7 @@ lsrc += \
|
||||
|
||||
lsrc_base_aliases += crc/crc_base_aliases.c
|
||||
lsrc_x86_32 += crc/crc_base_aliases.c
|
||||
lsrc_ppc64le += crc/crc_base_aliases.c
|
||||
|
||||
lsrc_x86_64 += \
|
||||
crc/crc16_t10dif_01.asm \
|
||||
|
@ -29,6 +29,8 @@
|
||||
|
||||
include erasure_code/aarch64/Makefile.am
|
||||
|
||||
include erasure_code/ppc64le/Makefile.am
|
||||
|
||||
lsrc += erasure_code/ec_base.c
|
||||
|
||||
lsrc_base_aliases += erasure_code/ec_base_aliases.c
|
||||
|
15
erasure_code/ppc64le/Makefile.am
Normal file
15
erasure_code/ppc64le/Makefile.am
Normal file
@ -0,0 +1,15 @@
|
||||
lsrc_ppc64le += erasure_code/ppc64le/ec_base_vsx.c \
|
||||
erasure_code/ppc64le/gf_vect_mul_vsx.c \
|
||||
erasure_code/ppc64le/gf_vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_vect_mad_vsx.c \
|
||||
erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_2vect_mad_vsx.c \
|
||||
erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_3vect_mad_vsx.c \
|
||||
erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_4vect_mad_vsx.c \
|
||||
erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_5vect_mad_vsx.c \
|
||||
erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c \
|
||||
erasure_code/ppc64le/gf_6vect_mad_vsx.c
|
||||
|
97
erasure_code/ppc64le/ec_base_vsx.c
Normal file
97
erasure_code/ppc64le/ec_base_vsx.c
Normal file
@ -0,0 +1,97 @@
|
||||
#include "erasure_code.h"
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
|
||||
}
|
||||
|
||||
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v,
|
||||
unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
if (len < 64) {
|
||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
while (dests >= 6) {
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
v += 6 * srcs * 32;
|
||||
dest += 6;
|
||||
dests -= 6;
|
||||
}
|
||||
switch (dests) {
|
||||
case 6:
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
{
|
||||
if (len < 64) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
v += 6 * k * 32;
|
||||
dest += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||
{
|
||||
gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest);
|
||||
return 0;
|
||||
}
|
338
erasure_code/ppc64le/ec_base_vsx.h
Normal file
338
erasure_code/ppc64le/ec_base_vsx.h
Normal file
@ -0,0 +1,338 @@
|
||||
#ifndef _ERASURE_CODE_PPC64LE_H_
|
||||
#define _ERASURE_CODE_PPC64LE_H_
|
||||
|
||||
#include "erasure_code.h"
|
||||
#include <altivec.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__ibmxl__)
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
|
||||
#elif defined __GNUC__ && __GNUC__ >= 8
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
|
||||
#elif defined __GNUC__ && __GNUC__ >= 7
|
||||
#if defined _ARCH_PWR9
|
||||
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#else
|
||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
||||
return vc;
|
||||
}
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#endif
|
||||
#else
|
||||
#if defined _ARCH_PWR8
|
||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
||||
return vc;
|
||||
}
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#else
|
||||
#error "This code is only supported on ppc64le."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constant and save to destination array. Can be used for erasure coding encode
|
||||
* and decode update when only one source is available at a time. Function
|
||||
* requires pre-calculation of a 32 byte constant array based on the input
|
||||
* coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs. VSX version.
|
||||
*
|
||||
* Vector dot product optimized to calculate two outputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs. VSX version.
|
||||
*
|
||||
* Vector dot product optimized to calculate three outputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs. VSX version.
|
||||
*
|
||||
* Vector dot product optimized to calculate four outputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs. VSX version.
|
||||
*
|
||||
* Vector dot product optimized to calculate five outputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs. VSX version.
|
||||
*
|
||||
* Vector dot product optimized to calculate six outputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constant and add to destination array. Can be used for erasure coding encode
|
||||
* and decode update when only one source is available at a time. Function
|
||||
* requires pre-calculation of a 32*vec byte constant array based on the input
|
||||
* coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires VSX
|
||||
*
|
||||
* @param len Length of each vector in bytes.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ERASURE_CODE_PPC64LE_H_
|
83
erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c
Normal file
83
erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,83 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
}
|
65
erasure_code/ppc64le/gf_2vect_mad_vsx.c
Normal file
65
erasure_code/ppc64le/gf_2vect_mad_vsx.c
Normal file
@ -0,0 +1,65 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
}
|
104
erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c
Normal file
104
erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,104 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
}
|
84
erasure_code/ppc64le/gf_3vect_mad_vsx.c
Normal file
84
erasure_code/ppc64le/gf_3vect_mad_vsx.c
Normal file
@ -0,0 +1,84 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
}
|
124
erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c
Normal file
124
erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,124 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
}
|
103
erasure_code/ppc64le/gf_4vect_mad_vsx.c
Normal file
103
erasure_code/ppc64le/gf_4vect_mad_vsx.c
Normal file
@ -0,0 +1,103 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
}
|
145
erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c
Normal file
145
erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,145 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vlo4 = vec_xl(0, g4);
|
||||
vhi4 = vec_xl(16, g4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
}
|
122
erasure_code/ppc64le/gf_5vect_mad_vsx.c
Normal file
122
erasure_code/ppc64le/gf_5vect_mad_vsx.c
Normal file
@ -0,0 +1,122 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
}
|
166
erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c
Normal file
166
erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,166 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
||||
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
t5 = (unsigned char *)dest[5];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
vYB = vYB ^ vYB;
|
||||
vYC = vYC ^ vYC;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
vYN = vYN ^ vYN;
|
||||
vYO = vYO ^ vYO;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
unsigned char *g5 = &gftbls[5 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = EC_vec_xl(0, g2);
|
||||
vhi2 = EC_vec_xl(16, g2);
|
||||
vlo3 = EC_vec_xl(0, g3);
|
||||
vhi3 = EC_vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vlo4 = EC_vec_xl(0, g4);
|
||||
vhi4 = EC_vec_xl(16, g4);
|
||||
vlo5 = EC_vec_xl(0, g5);
|
||||
vhi5 = EC_vec_xl(16, g5);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
g5 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
}
|
142
erasure_code/ppc64le/gf_6vect_mad_vsx.c
Normal file
142
erasure_code/ppc64le/gf_6vect_mad_vsx.c
Normal file
@ -0,0 +1,142 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
t5 = (unsigned char *)dest[5];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vYB = vec_xl(0, t5 + i);
|
||||
vYC = vec_xl(16, t5 + i);
|
||||
vYN = vec_xl(32, t5 + i);
|
||||
vYO = vec_xl(48, t5 + i);
|
||||
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
}
|
85
erasure_code/ppc64le/gf_vect_dot_prod_vsx.c
Normal file
85
erasure_code/ppc64le/gf_vect_dot_prod_vsx.c
Normal file
@ -0,0 +1,85 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest;
|
||||
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
|
||||
g0 += 32;
|
||||
}
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
return;
|
||||
}
|
48
erasure_code/ppc64le/gf_vect_mad_vsx.c
Normal file
48
erasure_code/ppc64le/gf_vect_mad_vsx.c
Normal file
@ -0,0 +1,48 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest;
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vY3 = vec_xl(32, t0 + i);
|
||||
vY4 = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
61
erasure_code/ppc64le/gf_vect_mul_vsx.c
Normal file
61
erasure_code/ppc64le/gf_vect_mul_vsx.c
Normal file
@ -0,0 +1,61 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest;
|
||||
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
gf_vect_mul_base(head, gftbl, src, dest);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbl);
|
||||
vhi0 = EC_vec_xl(16, gftbl);
|
||||
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
|
||||
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
@ -38,6 +38,7 @@ lsrc += igzip/igzip.c \
|
||||
|
||||
lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
|
||||
lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
|
||||
lsrc_ppc64le += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
|
||||
|
||||
lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
|
||||
igzip/aarch64/igzip_multibinary_arm64.S \
|
||||
|
@ -32,6 +32,7 @@ include mem/aarch64/Makefile.am
|
||||
lsrc += mem/mem_zero_detect_base.c
|
||||
|
||||
lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c
|
||||
lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c
|
||||
|
||||
lsrc_x86_64 += mem/mem_zero_detect_avx.asm \
|
||||
mem/mem_zero_detect_sse.asm \
|
||||
|
@ -32,6 +32,7 @@ include raid/aarch64/Makefile.am
|
||||
lsrc += raid/raid_base.c
|
||||
|
||||
lsrc_base_aliases += raid/raid_base_aliases.c
|
||||
lsrc_ppc64le += raid/raid_base_aliases.c
|
||||
|
||||
lsrc_x86_64 += \
|
||||
raid/xor_gen_sse.asm \
|
||||
|
Loading…
Reference in New Issue
Block a user