mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
erasure_code: reformat using new code style
Signed-off-by: Marcel Cornu <marcel.d.cornu@intel.com>
This commit is contained in:
parent
671e67b62d
commit
300260a4d9
@ -31,94 +31,86 @@
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_dot_prod);
|
||||
|
||||
return PROVIDER_BASIC(gf_vect_dot_prod);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_mad);
|
||||
|
||||
return PROVIDER_BASIC(gf_vect_mad);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(ec_encode_data);
|
||||
|
||||
return PROVIDER_BASIC(ec_encode_data);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(ec_encode_data_update);
|
||||
|
||||
return PROVIDER_BASIC(ec_encode_data_update);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_mul);
|
||||
|
||||
return PROVIDER_BASIC(gf_vect_mul);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_init_tables)
|
||||
{
|
||||
return PROVIDER_BASIC(ec_init_tables);
|
||||
}
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return PROVIDER_BASIC(ec_init_tables); }
|
||||
|
@ -29,236 +29,265 @@
|
||||
#include "erasure_code.h"
|
||||
|
||||
/*external function*/
|
||||
extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows > 5) {
|
||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 32;
|
||||
coding += 5;
|
||||
rows -= 5;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
while (rows > 5) {
|
||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 32;
|
||||
coding += 5;
|
||||
rows -= 5;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* SVE */
|
||||
extern void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
extern void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
extern void gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
extern void gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
void ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows > 11) {
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
while (rows > 11) {
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
|
||||
switch (rows) {
|
||||
case 11:
|
||||
/* 7 + 4 */
|
||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 7 * k * 32;
|
||||
coding += 7;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 10:
|
||||
/* 6 + 4 */
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 9:
|
||||
/* 5 + 4 */
|
||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 32;
|
||||
coding += 5;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 8:
|
||||
/* 4 + 4 */
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 7:
|
||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 6:
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (rows) {
|
||||
case 11:
|
||||
/* 7 + 4 */
|
||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 7 * k * 32;
|
||||
coding += 7;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 10:
|
||||
/* 6 + 4 */
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 9:
|
||||
/* 5 + 4 */
|
||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 32;
|
||||
coding += 5;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 8:
|
||||
/* 4 + 4 */
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 7:
|
||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 6:
|
||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -28,322 +28,331 @@
|
||||
**********************************************************************/
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h> // for memset
|
||||
#include <string.h> // for memset
|
||||
#include <stdint.h>
|
||||
|
||||
#include "erasure_code.h"
|
||||
#include "ec_base.h" // for GF tables
|
||||
#include "ec_base.h" // for GF tables
|
||||
|
||||
void ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
void
|
||||
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
int i, j;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < rows; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
gf_vect_mul_init(*a++, g_tbls);
|
||||
g_tbls += 32;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < rows; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
gf_vect_mul_init(*a++, g_tbls);
|
||||
g_tbls += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char gf_mul(unsigned char a, unsigned char b)
|
||||
unsigned char
|
||||
gf_mul(unsigned char a, unsigned char b)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
int i;
|
||||
int i;
|
||||
|
||||
if ((a == 0) || (b == 0))
|
||||
return 0;
|
||||
if ((a == 0) || (b == 0))
|
||||
return 0;
|
||||
|
||||
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||
#else
|
||||
return gf_mul_table_base[b * 256 + a];
|
||||
return gf_mul_table_base[b * 256 + a];
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned char gf_inv(unsigned char a)
|
||||
unsigned char
|
||||
gf_inv(unsigned char a)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
if (a == 0)
|
||||
return 0;
|
||||
if (a == 0)
|
||||
return 0;
|
||||
|
||||
return gff_base[255 - gflog_base[a]];
|
||||
return gff_base[255 - gflog_base[a]];
|
||||
#else
|
||||
return gf_inv_table_base[a];
|
||||
return gf_inv_table_base[a];
|
||||
#endif
|
||||
}
|
||||
|
||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||
void
|
||||
gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char p, gen = 1;
|
||||
int i, j;
|
||||
unsigned char p, gen = 1;
|
||||
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
for (i = k; i < m; i++) {
|
||||
p = 1;
|
||||
for (j = 0; j < k; j++) {
|
||||
a[k * i + j] = p;
|
||||
p = gf_mul(p, gen);
|
||||
}
|
||||
gen = gf_mul(gen, 2);
|
||||
}
|
||||
for (i = k; i < m; i++) {
|
||||
p = 1;
|
||||
for (j = 0; j < k; j++) {
|
||||
a[k * i + j] = p;
|
||||
p = gf_mul(p, gen);
|
||||
}
|
||||
gen = gf_mul(gen, 2);
|
||||
}
|
||||
}
|
||||
|
||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||
void
|
||||
gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *p;
|
||||
int i, j;
|
||||
unsigned char *p;
|
||||
|
||||
// Identity matrix in high position
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
// For the rest choose 1/(i + j) | i != j
|
||||
p = &a[k * k];
|
||||
for (i = k; i < m; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*p++ = gf_inv(i ^ j);
|
||||
// Identity matrix in high position
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
// For the rest choose 1/(i + j) | i != j
|
||||
p = &a[k * k];
|
||||
for (i = k; i < m; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*p++ = gf_inv(i ^ j);
|
||||
}
|
||||
|
||||
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||
int
|
||||
gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||
{
|
||||
int i, j, k;
|
||||
unsigned char temp;
|
||||
int i, j, k;
|
||||
unsigned char temp;
|
||||
|
||||
// Set out_mat[] to the identity matrix
|
||||
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||
out_mat[i] = 0;
|
||||
// Set out_mat[] to the identity matrix
|
||||
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||
out_mat[i] = 0;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
out_mat[i * n + i] = 1;
|
||||
for (i = 0; i < n; i++)
|
||||
out_mat[i * n + i] = 1;
|
||||
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++)
|
||||
if (in_mat[j * n + i])
|
||||
break;
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++)
|
||||
if (in_mat[j * n + i])
|
||||
break;
|
||||
|
||||
if (j == n) // Couldn't find means it's singular
|
||||
return -1;
|
||||
if (j == n) // Couldn't find means it's singular
|
||||
return -1;
|
||||
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i)
|
||||
continue;
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i)
|
||||
continue;
|
||||
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculates const table gftbl in GF(2^8) from single input A
|
||||
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
||||
|
||||
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||
void
|
||||
gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||
{
|
||||
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
|
||||
#if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
unsigned long long v1, v2, v4, v8, *t;
|
||||
unsigned long long v10, v20, v40, v80;
|
||||
unsigned char c17, c18, c20, c24;
|
||||
unsigned long long v1, v2, v4, v8, *t;
|
||||
unsigned long long v10, v20, v40, v80;
|
||||
unsigned char c17, c18, c20, c24;
|
||||
|
||||
t = (unsigned long long *)tbl;
|
||||
t = (unsigned long long *) tbl;
|
||||
|
||||
v1 = c * 0x0100010001000100ull;
|
||||
v2 = c2 * 0x0101000001010000ull;
|
||||
v4 = c4 * 0x0101010100000000ull;
|
||||
v8 = c8 * 0x0101010101010101ull;
|
||||
v1 = c * 0x0100010001000100ull;
|
||||
v2 = c2 * 0x0101000001010000ull;
|
||||
v4 = c4 * 0x0101010100000000ull;
|
||||
v8 = c8 * 0x0101010101010101ull;
|
||||
|
||||
v4 = v1 ^ v2 ^ v4;
|
||||
t[0] = v4;
|
||||
t[1] = v8 ^ v4;
|
||||
v4 = v1 ^ v2 ^ v4;
|
||||
t[0] = v4;
|
||||
t[1] = v8 ^ v4;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
|
||||
v10 = c17 * 0x0100010001000100ull;
|
||||
v20 = c18 * 0x0101000001010000ull;
|
||||
v40 = c20 * 0x0101010100000000ull;
|
||||
v80 = c24 * 0x0101010101010101ull;
|
||||
v10 = c17 * 0x0100010001000100ull;
|
||||
v20 = c18 * 0x0101000001010000ull;
|
||||
v40 = c20 * 0x0101010100000000ull;
|
||||
v80 = c24 * 0x0101010101010101ull;
|
||||
|
||||
v40 = v10 ^ v20 ^ v40;
|
||||
t[2] = v40;
|
||||
t[3] = v80 ^ v40;
|
||||
v40 = v10 ^ v20 ^ v40;
|
||||
t[2] = v40;
|
||||
t[3] = v80 ^ v40;
|
||||
|
||||
#else // 32-bit or other
|
||||
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
|
||||
c31;
|
||||
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31;
|
||||
|
||||
c3 = c2 ^ c;
|
||||
c5 = c4 ^ c;
|
||||
c6 = c4 ^ c2;
|
||||
c7 = c4 ^ c3;
|
||||
c3 = c2 ^ c;
|
||||
c5 = c4 ^ c;
|
||||
c6 = c4 ^ c2;
|
||||
c7 = c4 ^ c3;
|
||||
|
||||
c9 = c8 ^ c;
|
||||
c10 = c8 ^ c2;
|
||||
c11 = c8 ^ c3;
|
||||
c12 = c8 ^ c4;
|
||||
c13 = c8 ^ c5;
|
||||
c14 = c8 ^ c6;
|
||||
c15 = c8 ^ c7;
|
||||
c9 = c8 ^ c;
|
||||
c10 = c8 ^ c2;
|
||||
c11 = c8 ^ c3;
|
||||
c12 = c8 ^ c4;
|
||||
c13 = c8 ^ c5;
|
||||
c14 = c8 ^ c6;
|
||||
c15 = c8 ^ c7;
|
||||
|
||||
tbl[0] = 0;
|
||||
tbl[1] = c;
|
||||
tbl[2] = c2;
|
||||
tbl[3] = c3;
|
||||
tbl[4] = c4;
|
||||
tbl[5] = c5;
|
||||
tbl[6] = c6;
|
||||
tbl[7] = c7;
|
||||
tbl[8] = c8;
|
||||
tbl[9] = c9;
|
||||
tbl[10] = c10;
|
||||
tbl[11] = c11;
|
||||
tbl[12] = c12;
|
||||
tbl[13] = c13;
|
||||
tbl[14] = c14;
|
||||
tbl[15] = c15;
|
||||
tbl[0] = 0;
|
||||
tbl[1] = c;
|
||||
tbl[2] = c2;
|
||||
tbl[3] = c3;
|
||||
tbl[4] = c4;
|
||||
tbl[5] = c5;
|
||||
tbl[6] = c6;
|
||||
tbl[7] = c7;
|
||||
tbl[8] = c8;
|
||||
tbl[9] = c9;
|
||||
tbl[10] = c10;
|
||||
tbl[11] = c11;
|
||||
tbl[12] = c12;
|
||||
tbl[13] = c13;
|
||||
tbl[14] = c14;
|
||||
tbl[15] = c15;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c19 = c18 ^ c17;
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c21 = c20 ^ c17;
|
||||
c22 = c20 ^ c18;
|
||||
c23 = c20 ^ c19;
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c25 = c24 ^ c17;
|
||||
c26 = c24 ^ c18;
|
||||
c27 = c24 ^ c19;
|
||||
c28 = c24 ^ c20;
|
||||
c29 = c24 ^ c21;
|
||||
c30 = c24 ^ c22;
|
||||
c31 = c24 ^ c23;
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c19 = c18 ^ c17;
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c21 = c20 ^ c17;
|
||||
c22 = c20 ^ c18;
|
||||
c23 = c20 ^ c19;
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||
c25 = c24 ^ c17;
|
||||
c26 = c24 ^ c18;
|
||||
c27 = c24 ^ c19;
|
||||
c28 = c24 ^ c20;
|
||||
c29 = c24 ^ c21;
|
||||
c30 = c24 ^ c22;
|
||||
c31 = c24 ^ c23;
|
||||
|
||||
tbl[16] = 0;
|
||||
tbl[17] = c17;
|
||||
tbl[18] = c18;
|
||||
tbl[19] = c19;
|
||||
tbl[20] = c20;
|
||||
tbl[21] = c21;
|
||||
tbl[22] = c22;
|
||||
tbl[23] = c23;
|
||||
tbl[24] = c24;
|
||||
tbl[25] = c25;
|
||||
tbl[26] = c26;
|
||||
tbl[27] = c27;
|
||||
tbl[28] = c28;
|
||||
tbl[29] = c29;
|
||||
tbl[30] = c30;
|
||||
tbl[31] = c31;
|
||||
tbl[16] = 0;
|
||||
tbl[17] = c17;
|
||||
tbl[18] = c18;
|
||||
tbl[19] = c19;
|
||||
tbl[20] = c20;
|
||||
tbl[21] = c21;
|
||||
tbl[22] = c22;
|
||||
tbl[23] = c23;
|
||||
tbl[24] = c24;
|
||||
tbl[25] = c25;
|
||||
tbl[26] = c26;
|
||||
tbl[27] = c27;
|
||||
tbl[28] = c28;
|
||||
tbl[29] = c29;
|
||||
tbl[30] = c30;
|
||||
tbl[31] = c31;
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||
int i, j;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||
|
||||
dest[i] = s;
|
||||
}
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void gf_vect_mad_base(int len, int vec, int vec_i,
|
||||
unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
|
||||
unsigned char *dest)
|
||||
{
|
||||
int i;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[i];
|
||||
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
|
||||
dest[i] = s;
|
||||
}
|
||||
int i;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[i];
|
||||
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
int i, j, l;
|
||||
unsigned char s;
|
||||
int i, j, l;
|
||||
unsigned char s;
|
||||
|
||||
for (l = 0; l < dests; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < srcs; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||
for (l = 0; l < dests; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < srcs; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
{
|
||||
int i, l;
|
||||
unsigned char s;
|
||||
int i, l;
|
||||
unsigned char s;
|
||||
|
||||
for (l = 0; l < rows; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[l][i];
|
||||
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
|
||||
for (l = 0; l < rows; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[l][i];
|
||||
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
|
||||
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||
int
|
||||
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
//2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
// 2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
|
||||
// Len must be aligned to 32B
|
||||
if ((len % 32) != 0) {
|
||||
return -1;
|
||||
}
|
||||
// Len must be aligned to 32B
|
||||
if ((len % 32) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
return 0;
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
return 0;
|
||||
}
|
||||
|
11184
erasure_code/ec_base.h
11184
erasure_code/ec_base.h
File diff suppressed because it is too large
Load Diff
@ -29,37 +29,40 @@
|
||||
|
||||
#include "erasure_code.h"
|
||||
|
||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_dot_prod_base(len, vlen, v, src, dest);
|
||||
gf_vect_dot_prod_base(len, vlen, v, src, dest);
|
||||
}
|
||||
|
||||
void gf_vect_mad(int len, int vec, int vec_i,
|
||||
unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
|
||||
|
||||
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
|
||||
}
|
||||
|
||||
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||
}
|
||||
|
||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
|
||||
unsigned char **dest)
|
||||
{
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||
}
|
||||
|
||||
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||
int
|
||||
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||
{
|
||||
return gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest);
|
||||
return gf_vect_mul_base(len, a, (unsigned char *) src, (unsigned char *) dest);
|
||||
}
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
void
|
||||
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||
}
|
||||
|
@ -28,387 +28,423 @@
|
||||
**********************************************************************/
|
||||
#include <limits.h>
|
||||
#include "erasure_code.h"
|
||||
#include "ec_base.h" /* for GF tables */
|
||||
#include "ec_base.h" /* for GF tables */
|
||||
|
||||
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
|
||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
|
||||
void
|
||||
ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 32) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
if (len < 32) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_AS_KNOWS_AVX512
|
||||
|
||||
extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char *dest);
|
||||
extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern int
|
||||
gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char *dest);
|
||||
extern int
|
||||
gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern int
|
||||
gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern int
|
||||
gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern int
|
||||
gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern int
|
||||
gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 64) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
if (len < 64) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 64) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
if (len < 64) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if AS_FEATURE_LEVEL >= 10
|
||||
|
||||
extern void gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char *dest);
|
||||
extern void gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void
|
||||
gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
|
||||
extern void gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
extern void gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
extern void gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char *dest);
|
||||
extern void gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding);
|
||||
extern void gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
extern void gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
extern void
|
||||
gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
extern void
|
||||
gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
extern void
|
||||
gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
extern void
|
||||
gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
void ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
void
|
||||
ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
int i, j;
|
||||
int i, j;
|
||||
|
||||
uint64_t *g64 = (uint64_t *) g_tbls;
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*(g64++) = gf_table_gfni[*a++];
|
||||
uint64_t *g64 = (uint64_t *) g_tbls;
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*(g64++) = gf_table_gfni[*a++];
|
||||
}
|
||||
|
||||
void ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 8;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
while (rows >= 6) {
|
||||
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 8;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls,
|
||||
unsigned char **data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
while (rows >= 3) {
|
||||
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||
g_tbls += 3 * k * 8;
|
||||
coding += 3;
|
||||
rows -= 3;
|
||||
}
|
||||
switch (rows) {
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
while (rows >= 3) {
|
||||
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||
g_tbls += 3 * k * 8;
|
||||
coding += 3;
|
||||
rows -= 3;
|
||||
}
|
||||
switch (rows) {
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i,
|
||||
unsigned char *g_tbls, unsigned char *data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 8;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 8;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 5:
|
||||
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
|
||||
unsigned char *g_tbls, unsigned char *data,
|
||||
unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
while (rows >= 5) {
|
||||
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 8;
|
||||
coding += 5;
|
||||
rows -= 5;
|
||||
}
|
||||
switch (rows) {
|
||||
case 4:
|
||||
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
while (rows >= 5) {
|
||||
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 5 * k * 8;
|
||||
coding += 5;
|
||||
rows -= 5;
|
||||
}
|
||||
switch (rows) {
|
||||
case 4:
|
||||
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // AS_FEATURE_LEVEL >= 10
|
||||
@ -416,119 +452,119 @@ void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
|
||||
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
void
|
||||
ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 32) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
if (len < 32) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
|
@ -29,27 +29,27 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <assert.h>
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
@ -59,117 +59,120 @@
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
|
||||
void
|
||||
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs)
|
||||
{
|
||||
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
}
|
||||
|
||||
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
|
||||
u8 * src_err_list, int nerrs, u8 ** temp_buffs)
|
||||
int
|
||||
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
|
||||
int nerrs, u8 **temp_buffs)
|
||||
{
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 *recov[TEST_SOURCES];
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 *recov[TEST_SOURCES];
|
||||
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0)
|
||||
return BAD_MATRIX;
|
||||
if (gf_invert_matrix(b, d, k) < 0)
|
||||
return BAD_MATRIX;
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables_base(k, nerrs, c, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||
// Recover data
|
||||
ec_init_tables_base(k, nerrs, c, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, m, k, nerrs, check;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES];
|
||||
struct perf start;
|
||||
int i, j, m, k, nerrs, check;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES];
|
||||
struct perf start;
|
||||
|
||||
// Pick test parameters
|
||||
m = 14;
|
||||
k = 10;
|
||||
nerrs = 4;
|
||||
const u8 err_list[] = { 2, 4, 5, 7 };
|
||||
// Pick test parameters
|
||||
m = 14;
|
||||
k = 10;
|
||||
nerrs = 4;
|
||||
const u8 err_list[] = { 2, 4, 5, 7 };
|
||||
|
||||
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
// check input parameters
|
||||
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
|
||||
// check input parameters
|
||||
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
|
||||
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
|
||||
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||
|
||||
// Start decode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, check =
|
||||
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
||||
temp_buffs));
|
||||
// Start decode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
||||
temp_buffs));
|
||||
|
||||
if (check == BAD_MATRIX) {
|
||||
printf("BAD MATRIX\n");
|
||||
return check;
|
||||
}
|
||||
if (check == BAD_MATRIX) {
|
||||
printf("BAD MATRIX\n");
|
||||
return check;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
||||
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -29,29 +29,29 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
@ -61,215 +61,219 @@
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void usage(const char *app_name)
|
||||
void
|
||||
usage(const char *app_name)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s [options]\n"
|
||||
" -h Help\n"
|
||||
" -k <val> Number of source buffers\n"
|
||||
" -p <val> Number of parity buffers\n"
|
||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
|
||||
app_name);
|
||||
fprintf(stderr,
|
||||
"Usage: %s [options]\n"
|
||||
" -h Help\n"
|
||||
" -k <val> Number of source buffers\n"
|
||||
" -p <val> Number of parity buffers\n"
|
||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
|
||||
"k)\n",
|
||||
app_name);
|
||||
}
|
||||
|
||||
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start)
|
||||
void
|
||||
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, struct perf *start)
|
||||
{
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
BENCHMARK(start, BENCHMARK_TIME,
|
||||
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
BENCHMARK(start, BENCHMARK_TIME,
|
||||
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
|
||||
}
|
||||
|
||||
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
|
||||
u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start)
|
||||
int
|
||||
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
|
||||
int nerrs, u8 **temp_buffs, struct perf *start)
|
||||
{
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 *recov[TEST_SOURCES];
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 *recov[TEST_SOURCES];
|
||||
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0)
|
||||
return BAD_MATRIX;
|
||||
if (gf_invert_matrix(b, d, k) < 0)
|
||||
return BAD_MATRIX;
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
BENCHMARK(start, BENCHMARK_TIME,
|
||||
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
BENCHMARK(start, BENCHMARK_TIME,
|
||||
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, m, k, p, nerrs, check, ret = -1;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES];
|
||||
struct perf start;
|
||||
int i, j, m, k, p, nerrs, check, ret = -1;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES];
|
||||
struct perf start;
|
||||
|
||||
/* Set default parameters */
|
||||
k = 8;
|
||||
p = 6;
|
||||
nerrs = 4;
|
||||
/* Set default parameters */
|
||||
k = 8;
|
||||
p = 6;
|
||||
nerrs = 4;
|
||||
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-k") == 0) {
|
||||
k = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-p") == 0) {
|
||||
p = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-e") == 0) {
|
||||
nerrs = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-h") == 0) {
|
||||
usage(argv[0]);
|
||||
return 0;
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-k") == 0) {
|
||||
k = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-p") == 0) {
|
||||
p = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-e") == 0) {
|
||||
nerrs = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-h") == 0) {
|
||||
usage(argv[0]);
|
||||
return 0;
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (nerrs > k) {
|
||||
printf
|
||||
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||
nerrs, k);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs > k) {
|
||||
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||
nerrs, k);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (k <= 0) {
|
||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||
return -1;
|
||||
}
|
||||
if (k <= 0) {
|
||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (p <= 0) {
|
||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||
return -1;
|
||||
}
|
||||
if (p <= 0) {
|
||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nerrs <= 0) {
|
||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs <= 0) {
|
||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nerrs > p) {
|
||||
printf
|
||||
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
|
||||
nerrs, p);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs > p) {
|
||||
printf("Number of errors (%d) cannot be higher than number of parity buffers "
|
||||
"(%d)\n",
|
||||
nerrs, p);
|
||||
return -1;
|
||||
}
|
||||
|
||||
m = k + p;
|
||||
m = k + p;
|
||||
|
||||
if (m > MMAX) {
|
||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||
MMAX);
|
||||
return -1;
|
||||
}
|
||||
if (m > MMAX) {
|
||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||
MMAX);
|
||||
return -1;
|
||||
}
|
||||
|
||||
u8 *err_list = malloc((size_t)nerrs);
|
||||
if (err_list == NULL) {
|
||||
printf("Error allocating list of array of error indices\n");
|
||||
return -1;
|
||||
}
|
||||
u8 *err_list = malloc((size_t) nerrs);
|
||||
if (err_list == NULL) {
|
||||
printf("Error allocating list of array of error indices\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
srand(TEST_SEED);
|
||||
srand(TEST_SEED);
|
||||
|
||||
for (i = 0; i < nerrs;) {
|
||||
u8 next_err = rand() % k;
|
||||
for (j = 0; j < i; j++)
|
||||
if (next_err == err_list[j])
|
||||
break;
|
||||
if (j != i)
|
||||
continue;
|
||||
err_list[i++] = next_err;
|
||||
}
|
||||
for (i = 0; i < nerrs;) {
|
||||
u8 next_err = rand() % k;
|
||||
for (j = 0; j < i; j++)
|
||||
if (next_err == err_list[j])
|
||||
break;
|
||||
if (j != i)
|
||||
continue;
|
||||
err_list[i++] = next_err;
|
||||
}
|
||||
|
||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
|
||||
p, nerrs);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf("%d ", (int)err_list[i]);
|
||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
|
||||
nerrs);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf("%d ", (int) err_list[i]);
|
||||
|
||||
printf("])\n");
|
||||
printf("])\n");
|
||||
|
||||
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < p; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
for (i = 0; i < p; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Start encode test
|
||||
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
|
||||
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
||||
// Start encode test
|
||||
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
|
||||
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||
|
||||
// Start decode test
|
||||
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
||||
temp_buffs, &start);
|
||||
// Start decode test
|
||||
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, temp_buffs,
|
||||
&start);
|
||||
|
||||
if (check == BAD_MATRIX) {
|
||||
printf("BAD MATRIX\n");
|
||||
ret = check;
|
||||
goto exit;
|
||||
}
|
||||
if (check == BAD_MATRIX) {
|
||||
printf("BAD MATRIX\n");
|
||||
ret = check;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
||||
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||
|
||||
printf("done all: Pass\n");
|
||||
printf("done all: Pass\n");
|
||||
|
||||
ret = 0;
|
||||
ret = 0;
|
||||
|
||||
exit:
|
||||
free(err_list);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
free(buffs[i]);
|
||||
free(temp_buffs[i]);
|
||||
}
|
||||
return ret;
|
||||
exit:
|
||||
free(err_list);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
free(buffs[i]);
|
||||
free(temp_buffs[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -29,43 +29,43 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
//By default, test multibinary version
|
||||
// By default, test multibinary version
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST ec_encode_data_update
|
||||
# define REF_FUNCTION ec_encode_data
|
||||
#define FUNCTION_UNDER_TEST ec_encode_data_update
|
||||
#define REF_FUNCTION ec_encode_data
|
||||
#endif
|
||||
|
||||
//By default, test EC(8+4)
|
||||
// By default, test EC(8+4)
|
||||
#if (!defined(VECT))
|
||||
# define VECT 4
|
||||
#define VECT 4
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 32
|
||||
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
#ifndef TEST_SEED
|
||||
# define TEST_SEED 0x1234
|
||||
#define TEST_SEED 0x1234
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
@ -73,308 +73,316 @@
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void usage(const char *app_name)
|
||||
void
|
||||
usage(const char *app_name)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s [options]\n"
|
||||
" -h Help\n"
|
||||
" -k <val> Number of source buffers\n"
|
||||
" -p <val> Number of parity buffers\n"
|
||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
|
||||
app_name);
|
||||
fprintf(stderr,
|
||||
"Usage: %s [options]\n"
|
||||
" -h Help\n"
|
||||
" -k <val> Number of source buffers\n"
|
||||
" -p <val> Number of parity buffers\n"
|
||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
|
||||
"k)\n",
|
||||
app_name);
|
||||
}
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
void
|
||||
dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a)
|
||||
void
|
||||
encode_update_test_ref(int m, int k, u8 *g_tbls, u8 **buffs, u8 *a)
|
||||
{
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
}
|
||||
|
||||
void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a)
|
||||
void
|
||||
encode_update_test(int m, int k, u8 *g_tbls, u8 **perf_update_buffs, u8 *a)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
|
||||
perf_update_buffs[i], &perf_update_buffs[k]);
|
||||
}
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, perf_update_buffs[i],
|
||||
&perf_update_buffs[k]);
|
||||
}
|
||||
}
|
||||
|
||||
int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err,
|
||||
u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs)
|
||||
int
|
||||
decode_test(int m, int k, u8 **update_buffs, u8 **recov, u8 *a, u8 *src_in_err, u8 *src_err_list,
|
||||
int nerrs, u8 *g_tbls, u8 **perf_update_buffs)
|
||||
{
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = update_buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
int i, j, r;
|
||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = update_buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0) {
|
||||
printf("BAD MATRIX\n");
|
||||
return -1;
|
||||
}
|
||||
if (gf_invert_matrix(b, d, k) < 0) {
|
||||
printf("BAD MATRIX\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
|
||||
perf_update_buffs);
|
||||
}
|
||||
return 0;
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], perf_update_buffs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, check, m, k, p, nerrs, ret = -1;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *update_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
struct perf start;
|
||||
int i, j, check, m, k, p, nerrs, ret = -1;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *update_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
|
||||
u8 a[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
struct perf start;
|
||||
|
||||
/* Set default parameters */
|
||||
k = 10;
|
||||
p = VECT;
|
||||
nerrs = VECT;
|
||||
/* Set default parameters */
|
||||
k = 10;
|
||||
p = VECT;
|
||||
nerrs = VECT;
|
||||
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-k") == 0) {
|
||||
k = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-p") == 0) {
|
||||
p = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-e") == 0) {
|
||||
nerrs = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-h") == 0) {
|
||||
usage(argv[0]);
|
||||
return 0;
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-k") == 0) {
|
||||
k = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-p") == 0) {
|
||||
p = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-e") == 0) {
|
||||
nerrs = atoi(argv[++i]);
|
||||
} else if (strcmp(argv[i], "-h") == 0) {
|
||||
usage(argv[0]);
|
||||
return 0;
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (nerrs > k) {
|
||||
printf
|
||||
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||
nerrs, k);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs > k) {
|
||||
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||
nerrs, k);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (k <= 0) {
|
||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||
return -1;
|
||||
}
|
||||
if (k <= 0) {
|
||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (p <= 0) {
|
||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||
return -1;
|
||||
}
|
||||
if (p <= 0) {
|
||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nerrs > p) {
|
||||
printf
|
||||
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
|
||||
nerrs, p);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs > p) {
|
||||
printf("Number of errors (%d) cannot be higher than number of parity buffers "
|
||||
"(%d)\n",
|
||||
nerrs, p);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nerrs <= 0) {
|
||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||
return -1;
|
||||
}
|
||||
if (nerrs <= 0) {
|
||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||
return -1;
|
||||
}
|
||||
|
||||
m = k + p;
|
||||
m = k + p;
|
||||
|
||||
if (m > MMAX) {
|
||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||
MMAX);
|
||||
return -1;
|
||||
}
|
||||
if (m > MMAX) {
|
||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||
MMAX);
|
||||
return -1;
|
||||
}
|
||||
|
||||
u8 *err_list = malloc((size_t)nerrs);
|
||||
if (err_list == NULL) {
|
||||
printf("Error allocating list of array of error indices\n");
|
||||
return -1;
|
||||
}
|
||||
u8 *err_list = malloc((size_t) nerrs);
|
||||
if (err_list == NULL) {
|
||||
printf("Error allocating list of array of error indices\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
srand(TEST_SEED);
|
||||
srand(TEST_SEED);
|
||||
|
||||
for (i = 0; i < nerrs;) {
|
||||
u8 next_err = rand() % k;
|
||||
for (j = 0; j < i; j++)
|
||||
if (next_err == err_list[j])
|
||||
break;
|
||||
if (j != i)
|
||||
continue;
|
||||
err_list[i++] = next_err;
|
||||
}
|
||||
for (i = 0; i < nerrs;) {
|
||||
u8 next_err = rand() % k;
|
||||
for (j = 0; j < i; j++)
|
||||
if (next_err == err_list[j])
|
||||
break;
|
||||
if (j != i)
|
||||
continue;
|
||||
err_list[i++] = next_err;
|
||||
}
|
||||
|
||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
|
||||
p, nerrs);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf("%d ", err_list[i]);
|
||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
|
||||
nerrs);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf("%d ", err_list[i]);
|
||||
|
||||
printf("])\n");
|
||||
printf("])\n");
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
||||
}
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||
// zero for update function
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
update_buffs[i] = buf;
|
||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
||||
}
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
perf_update_buffs[i] = buf;
|
||||
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
||||
}
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
update_buffs[i] = buf;
|
||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||
// zero for update function
|
||||
}
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("Error allocating buffers\n");
|
||||
goto exit;
|
||||
}
|
||||
perf_update_buffs[i] = buf;
|
||||
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer
|
||||
// to be zero for update function
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++) {
|
||||
buffs[i][j] = rand();
|
||||
update_buffs[i][j] = buffs[i][j];
|
||||
}
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++) {
|
||||
buffs[i][j] = rand();
|
||||
update_buffs[i][j] = buffs[i][j];
|
||||
}
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
encode_update_test_ref(m, k, g_tbls, buffs, a);
|
||||
encode_update_test(m, k, g_tbls, update_buffs, a);
|
||||
for (i = 0; i < m - k; i++) {
|
||||
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
|
||||
printf("\nupdate_buffs%d :", i);
|
||||
dump(update_buffs[k + i], 25);
|
||||
printf("buffs%d :", i);
|
||||
dump(buffs[k + i], 25);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
encode_update_test_ref(m, k, g_tbls, buffs, a);
|
||||
encode_update_test(m, k, g_tbls, update_buffs, a);
|
||||
for (i = 0; i < m - k; i++) {
|
||||
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
|
||||
printf("\nupdate_buffs%d :", i);
|
||||
dump(update_buffs[k + i], 25);
|
||||
printf("buffs%d :", i);
|
||||
dump(buffs[k + i], 25);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
|
||||
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
|
||||
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||
#endif
|
||||
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
encode_update_test(m, k, g_tbls, perf_update_buffs, a));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test(m, k, g_tbls, perf_update_buffs, a));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||
&perf_update_buffs[k]));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||
&perf_update_buffs[k]));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
|
||||
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
// Make parity vects
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||
&perf_update_buffs[k]));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
|
||||
// Start encode test
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
// Make parity vects
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||
&perf_update_buffs[k]));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
|
||||
|
||||
for (i = k; i < m; i++) {
|
||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
||||
}
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
|
||||
&update_buffs[k]);
|
||||
}
|
||||
for (i = k; i < m; i++) {
|
||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||
// zero for update function
|
||||
}
|
||||
for (i = 0; i < k; i++) {
|
||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
|
||||
&update_buffs[k]);
|
||||
}
|
||||
|
||||
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
|
||||
nerrs, g_tbls, temp_buffs);
|
||||
BENCHMARK(&start, BENCHMARK_TIME, check =
|
||||
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
|
||||
nerrs, g_tbls, perf_update_buffs));
|
||||
if (check) {
|
||||
printf("BAD_MATRIX\n");
|
||||
ret = check;
|
||||
goto exit;
|
||||
}
|
||||
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs, g_tbls,
|
||||
temp_buffs);
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
check = decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs,
|
||||
g_tbls, perf_update_buffs));
|
||||
if (check) {
|
||||
printf("BAD_MATRIX\n");
|
||||
ret = check;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||
|
||||
printf("done all: Pass\n");
|
||||
printf("done all: Pass\n");
|
||||
|
||||
ret = 0;
|
||||
ret = 0;
|
||||
|
||||
exit:
|
||||
free(err_list);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
free(buffs[i]);
|
||||
free(temp_buffs[i]);
|
||||
free(update_buffs[i]);
|
||||
free(perf_update_buffs[i]);
|
||||
}
|
||||
return ret;
|
||||
exit:
|
||||
free(err_list);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
free(buffs[i]);
|
||||
free(temp_buffs[i]);
|
||||
free(update_buffs[i]);
|
||||
free(perf_update_buffs[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,114 +3,117 @@
|
||||
#include <stdio.h>
|
||||
#include "erasure_code.h"
|
||||
|
||||
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
|
||||
#define M_MAX 0x20
|
||||
#define K_MAX 0x10
|
||||
#define ROWS M_MAX
|
||||
#define COLS K_MAX
|
||||
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
|
||||
#define M_MAX 0x20
|
||||
#define K_MAX 0x10
|
||||
#define ROWS M_MAX
|
||||
#define COLS K_MAX
|
||||
|
||||
static inline uint64_t min(const uint64_t a, const uint64_t b)
|
||||
static inline uint64_t
|
||||
min(const uint64_t a, const uint64_t b)
|
||||
{
|
||||
if (a <= b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
if (a <= b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
void gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
|
||||
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
|
||||
const uint64_t col_indicator)
|
||||
void
|
||||
gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
|
||||
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
|
||||
const uint64_t col_indicator)
|
||||
{
|
||||
uint64_t i, j, r, s;
|
||||
uint64_t i, j, r, s;
|
||||
|
||||
for (i = 0, r = 0; i < rows; i++) {
|
||||
if (!(row_indicator & ((uint64_t) 1 << i)))
|
||||
continue;
|
||||
for (i = 0, r = 0; i < rows; i++) {
|
||||
if (!(row_indicator & ((uint64_t) 1 << i)))
|
||||
continue;
|
||||
|
||||
for (j = 0, s = 0; j < cols; j++) {
|
||||
if (!(col_indicator & ((uint64_t) 1 << j)))
|
||||
continue;
|
||||
out_matrix[dim * r + s] = in_matrix[cols * i + j];
|
||||
s++;
|
||||
}
|
||||
r++;
|
||||
}
|
||||
for (j = 0, s = 0; j < cols; j++) {
|
||||
if (!(col_indicator & ((uint64_t) 1 << j)))
|
||||
continue;
|
||||
out_matrix[dim * r + s] = in_matrix[cols * i + j];
|
||||
s++;
|
||||
}
|
||||
r++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Gosper's Hack */
|
||||
uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize)
|
||||
uint64_t
|
||||
next_subset(uint64_t *subset, uint64_t element_count, uint64_t subsize)
|
||||
{
|
||||
uint64_t tmp1 = *subset & -*subset;
|
||||
uint64_t tmp2 = *subset + tmp1;
|
||||
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
|
||||
if (*subset & (((uint64_t) 1 << element_count))) {
|
||||
/* Overflow on last subset */
|
||||
*subset = ((uint64_t) 1 << subsize) - 1;
|
||||
return 1;
|
||||
}
|
||||
uint64_t tmp1 = *subset & -*subset;
|
||||
uint64_t tmp2 = *subset + tmp1;
|
||||
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
|
||||
if (*subset & (((uint64_t) 1 << element_count))) {
|
||||
/* Overflow on last subset */
|
||||
*subset = ((uint64_t) 1 << subsize) - 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
|
||||
int
|
||||
are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
|
||||
{
|
||||
unsigned char matrix[COLS * COLS];
|
||||
unsigned char invert_matrix[COLS * COLS];
|
||||
uint64_t subsize;
|
||||
unsigned char matrix[COLS * COLS];
|
||||
unsigned char invert_matrix[COLS * COLS];
|
||||
uint64_t subsize;
|
||||
|
||||
/* Check all square subsize x subsize submatrices of the rows x cols
|
||||
* vmatrix for singularity*/
|
||||
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
|
||||
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
|
||||
uint64_t col_indicator = subset_init;
|
||||
do {
|
||||
uint64_t row_indicator = subset_init;
|
||||
do {
|
||||
gen_sub_matrix(matrix, subsize, vmatrix, rows,
|
||||
cols, row_indicator, col_indicator);
|
||||
if (gf_invert_matrix(matrix, invert_matrix, (int)subsize))
|
||||
return 1;
|
||||
/* Check all square subsize x subsize submatrices of the rows x cols
|
||||
* vmatrix for singularity*/
|
||||
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
|
||||
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
|
||||
uint64_t col_indicator = subset_init;
|
||||
do {
|
||||
uint64_t row_indicator = subset_init;
|
||||
do {
|
||||
gen_sub_matrix(matrix, subsize, vmatrix, rows, cols, row_indicator,
|
||||
col_indicator);
|
||||
if (gf_invert_matrix(matrix, invert_matrix, (int) subsize))
|
||||
return 1;
|
||||
|
||||
} while (next_subset(&row_indicator, rows, subsize) == 0);
|
||||
} while (next_subset(&col_indicator, cols, subsize) == 0);
|
||||
}
|
||||
} while (next_subset(&row_indicator, rows, subsize) == 0);
|
||||
} while (next_subset(&col_indicator, cols, subsize) == 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
unsigned char vmatrix[(ROWS + COLS) * COLS];
|
||||
uint64_t rows, cols;
|
||||
unsigned char vmatrix[(ROWS + COLS) * COLS];
|
||||
uint64_t rows, cols;
|
||||
|
||||
if (K_MAX > MAX_CHECK) {
|
||||
printf("K_MAX too large for this test\n");
|
||||
return 0;
|
||||
}
|
||||
if (M_MAX > MAX_CHECK) {
|
||||
printf("M_MAX too large for this test\n");
|
||||
return 0;
|
||||
}
|
||||
if (M_MAX < K_MAX) {
|
||||
printf("M_MAX must be smaller than K_MAX");
|
||||
return 0;
|
||||
}
|
||||
if (K_MAX > MAX_CHECK) {
|
||||
printf("K_MAX too large for this test\n");
|
||||
return 0;
|
||||
}
|
||||
if (M_MAX > MAX_CHECK) {
|
||||
printf("M_MAX too large for this test\n");
|
||||
return 0;
|
||||
}
|
||||
if (M_MAX < K_MAX) {
|
||||
printf("M_MAX must be smaller than K_MAX");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
|
||||
printf("gen_rs_matrix creates erasure codes for:\n");
|
||||
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
|
||||
printf("gen_rs_matrix creates erasure codes for:\n");
|
||||
|
||||
for (cols = 1; cols <= K_MAX; cols++) {
|
||||
for (rows = 1; rows <= M_MAX - cols; rows++) {
|
||||
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
|
||||
for (cols = 1; cols <= K_MAX; cols++) {
|
||||
for (rows = 1; rows <= M_MAX - cols; rows++) {
|
||||
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
|
||||
|
||||
/* Verify the Vandermonde portion of vmatrix contains no
|
||||
* singular submatrix */
|
||||
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
|
||||
break;
|
||||
|
||||
}
|
||||
printf(" k = %2u, m <= %2u \n", (unsigned)cols, (unsigned)(rows + cols - 1));
|
||||
|
||||
}
|
||||
return 0;
|
||||
/* Verify the Vandermonde portion of vmatrix contains no
|
||||
* singular submatrix */
|
||||
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
|
||||
break;
|
||||
}
|
||||
printf(" k = %2u, m <= %2u \n", (unsigned) cols, (unsigned) (rows + cols - 1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <assert.h>
|
||||
|
||||
#include "erasure_code.h"
|
||||
@ -37,202 +37,194 @@
|
||||
#define TEST_LEN 8192
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 128
|
||||
#define TEST_SOURCES 128
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 200
|
||||
#define RANDOMS 200
|
||||
#endif
|
||||
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
|
||||
void
|
||||
matrix_mult(u8 *a, u8 *b, u8 *c, int n)
|
||||
{
|
||||
int i, j, k;
|
||||
u8 d;
|
||||
int i, j, k;
|
||||
u8 d;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
d = 0;
|
||||
for (k = 0; k < n; k++) {
|
||||
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
||||
}
|
||||
c[i * n + j] = d;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
d = 0;
|
||||
for (k = 0; k < n; k++) {
|
||||
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
||||
}
|
||||
c[i * n + j] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_matrix(u8 * a, int n)
|
||||
void
|
||||
print_matrix(u8 *a, int n)
|
||||
{
|
||||
int i, j;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
printf(" %2x", a[i * n + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
printf(" %2x", a[i * n + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int is_ident(u8 * a, const int n)
|
||||
int
|
||||
is_ident(u8 *a, const int n)
|
||||
{
|
||||
int i, j;
|
||||
u8 c;
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
c = *a++;
|
||||
if (i == j)
|
||||
c--;
|
||||
if (c != 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
int i, j;
|
||||
u8 c;
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
c = *a++;
|
||||
if (i == j)
|
||||
c--;
|
||||
if (c != 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
|
||||
int
|
||||
inv_test(u8 *in, u8 *inv, u8 *sav, int n)
|
||||
{
|
||||
memcpy(sav, in, n * n);
|
||||
memcpy(sav, in, n * n);
|
||||
|
||||
if (gf_invert_matrix(in, inv, n)) {
|
||||
printf("Given singular matrix\n");
|
||||
print_matrix(sav, n);
|
||||
return -1;
|
||||
}
|
||||
if (gf_invert_matrix(in, inv, n)) {
|
||||
printf("Given singular matrix\n");
|
||||
print_matrix(sav, n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
matrix_mult(inv, sav, in, n);
|
||||
matrix_mult(inv, sav, in, n);
|
||||
|
||||
if (is_ident(in, n)) {
|
||||
printf("fail\n");
|
||||
print_matrix(sav, n);
|
||||
print_matrix(inv, n);
|
||||
print_matrix(in, n);
|
||||
return -1;
|
||||
}
|
||||
if (is_ident(in, n)) {
|
||||
printf("fail\n");
|
||||
print_matrix(sav, n);
|
||||
print_matrix(inv, n);
|
||||
print_matrix(in, n);
|
||||
return -1;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, k, t;
|
||||
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
|
||||
int ret = -1;
|
||||
int i, k, t;
|
||||
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
|
||||
int ret = -1;
|
||||
|
||||
u8 test1[] = { 1, 1, 6,
|
||||
1, 1, 1,
|
||||
7, 1, 9
|
||||
};
|
||||
u8 test1[] = { 1, 1, 6, 1, 1, 1, 7, 1, 9 };
|
||||
|
||||
u8 test2[] = { 0, 1, 6,
|
||||
1, 0, 1,
|
||||
0, 1, 9
|
||||
};
|
||||
u8 test2[] = { 0, 1, 6, 1, 0, 1, 0, 1, 9 };
|
||||
|
||||
u8 test3[] = { 0, 0, 1,
|
||||
1, 0, 0,
|
||||
0, 1, 1
|
||||
};
|
||||
u8 test3[] = { 0, 0, 1, 1, 0, 0, 0, 1, 1 };
|
||||
|
||||
u8 test4[] = { 0, 1, 6, 7,
|
||||
1, 1, 0, 0,
|
||||
0, 1, 2, 3,
|
||||
3, 2, 2, 3
|
||||
}; // = row3+3*row2
|
||||
u8 test4[] = { 0, 1, 6, 7, 1, 1, 0, 0, 0, 1, 2, 3, 3, 2, 2, 3 }; // = row3+3*row2
|
||||
|
||||
printf("gf_inverse_test: max=%d ", KMAX);
|
||||
printf("gf_inverse_test: max=%d ", KMAX);
|
||||
|
||||
test_mat = malloc(KMAX * KMAX);
|
||||
save_mat = malloc(KMAX * KMAX);
|
||||
invr_mat = malloc(KMAX * KMAX);
|
||||
test_mat = malloc(KMAX * KMAX);
|
||||
save_mat = malloc(KMAX * KMAX);
|
||||
invr_mat = malloc(KMAX * KMAX);
|
||||
|
||||
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
||||
goto exit;
|
||||
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
||||
goto exit;
|
||||
|
||||
// Test with lots of leading 1's
|
||||
k = 3;
|
||||
memcpy(test_mat, test1, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
// Test with lots of leading 1's
|
||||
k = 3;
|
||||
memcpy(test_mat, test1, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
|
||||
// Test with leading zeros
|
||||
k = 3;
|
||||
memcpy(test_mat, test2, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
// Test with leading zeros
|
||||
k = 3;
|
||||
memcpy(test_mat, test2, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
|
||||
// Test 3
|
||||
k = 3;
|
||||
memcpy(test_mat, test3, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
// Test 3
|
||||
k = 3;
|
||||
memcpy(test_mat, test3, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
goto exit;
|
||||
|
||||
// Test 4 - try a singular matrix
|
||||
k = 4;
|
||||
memcpy(test_mat, test4, k * k);
|
||||
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("Fail: didn't catch singular matrix\n");
|
||||
print_matrix(test4, 4);
|
||||
goto exit;
|
||||
}
|
||||
// Do random test of size KMAX
|
||||
k = KMAX;
|
||||
// Test 4 - try a singular matrix
|
||||
k = 4;
|
||||
memcpy(test_mat, test4, k * k);
|
||||
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("Fail: didn't catch singular matrix\n");
|
||||
print_matrix(test4, 4);
|
||||
goto exit;
|
||||
}
|
||||
// Do random test of size KMAX
|
||||
k = KMAX;
|
||||
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("rand picked a singular matrix, try again\n");
|
||||
goto exit;
|
||||
}
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("rand picked a singular matrix, try again\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail\n");
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
goto exit;
|
||||
}
|
||||
// Do Randoms. Random size and coefficients
|
||||
for (t = 0; t < RANDOMS; t++) {
|
||||
k = rand() % KMAX;
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail\n");
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
goto exit;
|
||||
}
|
||||
// Do Randoms. Random size and coefficients
|
||||
for (t = 0; t < RANDOMS; t++) {
|
||||
k = rand() % KMAX;
|
||||
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k))
|
||||
continue;
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k))
|
||||
continue;
|
||||
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail rand k=%d\n", k);
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
goto exit;
|
||||
}
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail rand k=%d\n", k);
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
goto exit;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
if (0 == (t % 8))
|
||||
putchar('.');
|
||||
if (0 == (t % 8))
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
printf(" Pass\n");
|
||||
printf(" Pass\n");
|
||||
|
||||
ret = 0;
|
||||
ret = 0;
|
||||
|
||||
exit:
|
||||
free(test_mat);
|
||||
free(save_mat);
|
||||
free(invr_mat);
|
||||
exit:
|
||||
free(test_mat);
|
||||
free(save_mat);
|
||||
free(invr_mat);
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
@ -29,26 +29,26 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "test.h"
|
||||
#include "erasure_code.h"
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN 8 * 1024
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
@ -58,105 +58,108 @@ u8 gff[256];
|
||||
u8 gflog[256];
|
||||
u8 gf_mul_table[256 * 256];
|
||||
|
||||
void mk_gf_field(void)
|
||||
void
|
||||
mk_gf_field(void)
|
||||
{
|
||||
int i;
|
||||
u8 s = 1;
|
||||
gflog[0] = 0;
|
||||
int i;
|
||||
u8 s = 1;
|
||||
gflog[0] = 0;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
gff[i] = s;
|
||||
gflog[s] = i;
|
||||
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
||||
}
|
||||
for (i = 0; i < 256; i++) {
|
||||
gff[i] = s;
|
||||
gflog[s] = i;
|
||||
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
||||
}
|
||||
}
|
||||
|
||||
void mk_gf_mul_table(u8 * table)
|
||||
void
|
||||
mk_gf_mul_table(u8 *table)
|
||||
{
|
||||
// Populate a single table with all multiply combinations for a fast,
|
||||
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
||||
int i, j;
|
||||
for (i = 0; i < 256; i++)
|
||||
for (j = 0; j < 256; j++)
|
||||
table[i * 256 + j] = gf_mul(i, j);
|
||||
// Populate a single table with all multiply combinations for a fast,
|
||||
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
||||
int i, j;
|
||||
for (i = 0; i < 256; i++)
|
||||
for (j = 0; j < 256; j++)
|
||||
table[i * 256 + j] = gf_mul(i, j);
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
||||
void
|
||||
gf_vect_dot_prod_ref(int len, int vlen, u8 *v, u8 **src, u8 *dest)
|
||||
{
|
||||
int i, j;
|
||||
u8 s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j]);
|
||||
int i, j;
|
||||
u8 s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j]);
|
||||
|
||||
dest[i] = s;
|
||||
}
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
||||
void
|
||||
gf_vect_dot_prod_mult(int len, int vlen, u8 *v, u8 **src, u8 *dest)
|
||||
{
|
||||
int i, j;
|
||||
u8 s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
|
||||
}
|
||||
dest[i] = s;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
u8 s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
|
||||
}
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
int i, j;
|
||||
u8 vec[TEST_SOURCES], *dest1, *dest2;
|
||||
u8 *matrix[TEST_SOURCES];
|
||||
struct perf start;
|
||||
int i, j;
|
||||
u8 vec[TEST_SOURCES], *dest1, *dest2;
|
||||
u8 *matrix[TEST_SOURCES];
|
||||
struct perf start;
|
||||
|
||||
dest1 = (u8 *) malloc(TEST_LEN);
|
||||
dest2 = (u8 *) malloc(TEST_LEN);
|
||||
dest1 = (u8 *) malloc(TEST_LEN);
|
||||
dest2 = (u8 *) malloc(TEST_LEN);
|
||||
|
||||
if (NULL == dest1 || NULL == dest2) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
memset(dest1, 0xfe, TEST_LEN);
|
||||
memset(dest2, 0xfe, TEST_LEN);
|
||||
if (NULL == dest1 || NULL == dest2) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
memset(dest1, 0xfe, TEST_LEN);
|
||||
memset(dest2, 0xfe, TEST_LEN);
|
||||
|
||||
mk_gf_field();
|
||||
mk_gf_mul_table(gf_mul_table);
|
||||
mk_gf_field();
|
||||
mk_gf_mul_table(gf_mul_table);
|
||||
|
||||
//generate random vector and matrix/data
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
vec[i] = rand();
|
||||
// generate random vector and matrix/data
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
vec[i] = rand();
|
||||
|
||||
if (!(matrix[i] = malloc(TEST_LEN))) {
|
||||
fprintf(stderr, "Error failure\n\n");
|
||||
return -1;
|
||||
}
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
matrix[i][j] = rand();
|
||||
if (!(matrix[i] = malloc(TEST_LEN))) {
|
||||
fprintf(stderr, "Error failure\n\n");
|
||||
return -1;
|
||||
}
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
matrix[i][j] = rand();
|
||||
}
|
||||
|
||||
}
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
|
||||
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
|
||||
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
|
||||
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
|
||||
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
||||
// Compare with reference function
|
||||
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
||||
printf("Error, different results!\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Compare with reference function
|
||||
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
||||
printf("Error, different results!\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Pass functional test\n");
|
||||
return 0;
|
||||
printf("Pass functional test\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,19 +29,19 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <assert.h>
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN / 2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 250
|
||||
#define TEST_SOURCES 250
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
@ -49,244 +49,251 @@
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
void
|
||||
dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
void
|
||||
dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
void
|
||||
dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
int i, j, rtest, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Init
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
// Init
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
assert(!(m > MMAX || k > KMAX));
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
assert(!(m > MMAX || k > KMAX));
|
||||
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2)
|
||||
;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1)
|
||||
;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k)
|
||||
;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,146 +29,148 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN 8 * 1024
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64 - 1))
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
void
|
||||
dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
void
|
||||
dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void vect_dot_prod_perf(void (*fun_ptr)
|
||||
(int, int, unsigned char *, unsigned char **, unsigned char *),
|
||||
u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref)
|
||||
void
|
||||
vect_dot_prod_perf(void (*fun_ptr)(int, int, unsigned char *, unsigned char **, unsigned char *),
|
||||
u8 *g, u8 *g_tbls, u8 **buffs, u8 *dest_ref)
|
||||
{
|
||||
int j;
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
int j;
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
(*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
(*fun_ptr)(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||
struct perf start;
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||
struct perf start;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref)
|
||||
);
|
||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref));
|
||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||
#else
|
||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
|
||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
|
||||
#endif
|
||||
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
||||
BENCHMARK(&start, BENCHMARK_TIME,
|
||||
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,28 +29,28 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 32
|
||||
#define TEST_MIN_SIZE 32
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN / 2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
@ -58,481 +58,486 @@
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#define PTR_ALIGN_CHK_B 0
|
||||
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#define PTR_ALIGN_CHK_B 32
|
||||
#define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
void
|
||||
dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
void
|
||||
dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
void
|
||||
dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptr;
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptr;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
else
|
||||
putchar('.');
|
||||
else
|
||||
putchar('.');
|
||||
#endif
|
||||
|
||||
// Rand data test
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Rand data test
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||
dump_matrix(buffs, 5, srcs);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 5);
|
||||
printf("dprod:");
|
||||
dump(dest, 5);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||
dump_matrix(buffs, 5, srcs);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 5);
|
||||
printf("dprod:");
|
||||
dump(dest, 5);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2)
|
||||
;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1)
|
||||
;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k)
|
||||
;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, align);
|
||||
printf("dprod:");
|
||||
dump(dest, align);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, align);
|
||||
printf("dprod:");
|
||||
dump(dest, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||
|
||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(udest_ptr, 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptr - dest;
|
||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(udest_ptr, 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptr - dest;
|
||||
|
||||
if (memcmp(dest, dest_ref, offset)) {
|
||||
printf("Fail rand ualign pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad end\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest, dest_ref, offset)) {
|
||||
printf("Fail rand ualign pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,503 +29,500 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef ALIGN_SIZE
|
||||
# define ALIGN_SIZE 32
|
||||
#define ALIGN_SIZE 32
|
||||
#endif
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
//By default, test multi-binary version
|
||||
# define FUNCTION_UNDER_TEST gf_vect_mad
|
||||
# define REF_FUNCTION gf_vect_dot_prod
|
||||
# define VECT 1
|
||||
// By default, test multi-binary version
|
||||
#define FUNCTION_UNDER_TEST gf_vect_mad
|
||||
#define REF_FUNCTION gf_vect_dot_prod
|
||||
#define VECT 1
|
||||
#endif
|
||||
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 64
|
||||
#define TEST_MIN_SIZE 64
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 20000
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN / 2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 20000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#define PTR_ALIGN_CHK_B 0
|
||||
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B ALIGN_SIZE
|
||||
# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
|
||||
#define PTR_ALIGN_CHK_B ALIGN_SIZE
|
||||
#define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
#if (VECT == 1)
|
||||
# define LAST_ARG *dest
|
||||
#define LAST_ARG *dest
|
||||
#else
|
||||
# define LAST_ARG **dest
|
||||
#define LAST_ARG **dest
|
||||
#endif
|
||||
|
||||
extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char LAST_ARG);
|
||||
extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char LAST_ARG);
|
||||
extern void
|
||||
FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char LAST_ARG);
|
||||
extern void
|
||||
REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char LAST_ARG);
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
void
|
||||
dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
void
|
||||
dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
void
|
||||
dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 gf[6][TEST_SOURCES];
|
||||
u8 *g_tbls;
|
||||
u8 *dest_ref[VECT];
|
||||
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
|
||||
int vector = VECT;
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 gf[6][TEST_SOURCES];
|
||||
u8 *g_tbls;
|
||||
u8 *dest_ref[VECT];
|
||||
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
|
||||
int vector = VECT;
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[VECT];
|
||||
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[VECT];
|
||||
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ptrs[i] = buf;
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ptrs[i] = buf;
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref[i] = buf;
|
||||
memset(dest_ref[i], 0, TEST_LEN);
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref[i] = buf;
|
||||
memset(dest_ref[i], 0, TEST_LEN);
|
||||
}
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
switch (vector) {
|
||||
case 6:
|
||||
memset(gf[5], 0xe6, TEST_SOURCES);
|
||||
case 5:
|
||||
memset(gf[4], 4, TEST_SOURCES);
|
||||
case 4:
|
||||
memset(gf[3], 9, TEST_SOURCES);
|
||||
case 3:
|
||||
memset(gf[2], 7, TEST_SOURCES);
|
||||
case 2:
|
||||
memset(gf[1], 1, TEST_SOURCES);
|
||||
case 1:
|
||||
memset(gf[0], 2, TEST_SOURCES);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
switch (vector) {
|
||||
case 6:
|
||||
memset(gf[5], 0xe6, TEST_SOURCES);
|
||||
case 5:
|
||||
memset(gf[4], 4, TEST_SOURCES);
|
||||
case 4:
|
||||
memset(gf[3], 9, TEST_SOURCES);
|
||||
case 3:
|
||||
memset(gf[2], 7, TEST_SOURCES);
|
||||
case 2:
|
||||
memset(gf[1], 1, TEST_SOURCES);
|
||||
case 1:
|
||||
memset(gf[0], 2, TEST_SOURCES);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
||||
buffs, dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], buffs,
|
||||
dest_ref[i]);
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||
#endif
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if (VECT == 1)
|
||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
|
||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
|
||||
#else
|
||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
|
||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
|
||||
#endif
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
|
||||
// Rand data test
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
|
||||
&g_tbls[i * 32 * TEST_SOURCES], buffs,
|
||||
dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
|
||||
&g_tbls[i * 32 * TEST_SOURCES], buffs, dest_ref[i]);
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||
*dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||
*dest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||
dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||
#endif
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n",
|
||||
i, rtest);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", i,
|
||||
rtest);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < srcs; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * srcs) + j * 32]);
|
||||
}
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < srcs; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * srcs) + j * 32]);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs],
|
||||
buffs, dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], buffs,
|
||||
dest_ref[i]);
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < srcs; i++) {
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||
for (i = 0; i < srcs; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
||||
*dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
||||
*dest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
||||
dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], dest_ptrs);
|
||||
#endif
|
||||
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test%d srcs=%d\n", i, srcs);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||
printf("Fail rand " xstr(
|
||||
FUNCTION_UNDER_TEST) " test%d srcs=%d\n",
|
||||
i, srcs);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES,
|
||||
&g_tbls[i * 32 * TEST_SOURCES], efence_buffs,
|
||||
dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref[i]);
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, size);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
for (i = 0; i < vector; i++)
|
||||
memset(dest_ptrs[i], 0, size);
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||
*dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||
*dest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||
dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||
dest_ptrs);
|
||||
#endif
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test%d size=%d\n", i, size);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], TEST_MIN_SIZE + align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d size=%d\n",
|
||||
i, size);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], TEST_MIN_SIZE + align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
for (i = 0; i < vector; i++) {
|
||||
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < srcs; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
|
||||
}
|
||||
for (i = 0; i < vector; i++)
|
||||
for (j = 0; j < srcs; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
|
||||
dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
|
||||
dest_ref[i]);
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
for (i = 0; i < srcs; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
|
||||
#endif
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test%d ualign srcs=%d\n", i, srcs);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(
|
||||
FUNCTION_UNDER_TEST) " test%d ualign srcs=%d\n",
|
||||
i, srcs);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
|
||||
for (i = 0; i < vector; i++) {
|
||||
offset = udest_ptrs[i] - dest_ptrs[i];
|
||||
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(dest_ptrs[i] + offset + size, dest_ref[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
offset = udest_ptrs[i] - dest_ptrs[i];
|
||||
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ptrs[i] + offset + size, dest_ref[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < vector; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf[i][j] = rand();
|
||||
gf_vect_mul_init(gf[i][j],
|
||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||
}
|
||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
}
|
||||
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES,
|
||||
&g_tbls[i * 32 * TEST_SOURCES], buffs,
|
||||
dest_ref[i]);
|
||||
for (i = 0; i < vector; i++)
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
||||
buffs, dest_ref[i]);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
#if (VECT == 1)
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||
*dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
||||
#else
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||
dest_ptrs);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||
#endif
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test%d ualign len=%d\n", i, size);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < vector; i++) {
|
||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||
printf("Fail rand " xstr(
|
||||
FUNCTION_UNDER_TEST) " test%d ualign len=%d\n",
|
||||
i, size);
|
||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref[i], 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,117 +29,116 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include <string.h> // for memset
|
||||
#include "erasure_code.h"
|
||||
|
||||
#define TEST_SIZE 8192
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 100000
|
||||
#define TEST_SIZE 8192
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 100000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
|
||||
printf("gf_vect_mul_base_test:\n");
|
||||
printf("gf_vect_mul_base_test:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail fill with rand data\n");
|
||||
return 1;
|
||||
}
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail fill with rand data\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||
printf("fail fill with rand data for buff1\n");
|
||||
return -1;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||
printf("fail fill with rand data for buff1\n");
|
||||
return -1;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
|
||||
gf_mul(a, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
printf("Random tests ");
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail random tests\n");
|
||||
return 1;
|
||||
}
|
||||
// Check each possible constant
|
||||
printf("Random tests ");
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail random tests\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||
buff2[i], gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
|
||||
if (gf_vect_mul_base
|
||||
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) != 0) {
|
||||
printf("fail tests at end of buffer\n");
|
||||
return -1;
|
||||
}
|
||||
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) !=
|
||||
0) {
|
||||
printf("fail tests at end of buffer\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
|
||||
efence_buff2[i], gf_mul(2, efence_buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
return 0;
|
||||
printf(" done: Pass\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -29,63 +29,65 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include <string.h> // for memset
|
||||
#include "erasure_code.h"
|
||||
#include "test.h"
|
||||
|
||||
#ifndef GT_L3_CACHE
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||
#endif
|
||||
|
||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#elif defined (COLD_TEST)
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN 8 * 1024
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
#elif defined(COLD_TEST)
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN (GT_L3_CACHE / 2)
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
#elif defined (TEST_CUSTOM)
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
#define TEST_SOURCES 10
|
||||
#define TEST_LEN (GT_L3_CACHE / 2)
|
||||
#define TEST_TYPE_STR "_cold"
|
||||
#elif defined(TEST_CUSTOM)
|
||||
#define TEST_TYPE_STR "_cus"
|
||||
#endif
|
||||
|
||||
#define TEST_MEM (2 * TEST_LEN)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2)
|
||||
void
|
||||
gf_vect_mul_perf(u8 a, u8 *gf_const_tbl, u8 *buff1, u8 *buff2)
|
||||
{
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||
struct perf start;
|
||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||
struct perf start;
|
||||
|
||||
printf("gf_vect_mul_perf:\n");
|
||||
printf("gf_vect_mul_perf:\n");
|
||||
|
||||
// Allocate large mem region
|
||||
buff1 = (u8 *) malloc(TEST_LEN);
|
||||
buff2 = (u8 *) malloc(TEST_LEN);
|
||||
if (NULL == buff1 || NULL == buff2) {
|
||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||
return 1;
|
||||
}
|
||||
// Allocate large mem region
|
||||
buff1 = (u8 *) malloc(TEST_LEN);
|
||||
buff2 = (u8 *) malloc(TEST_LEN);
|
||||
if (NULL == buff1 || NULL == buff2) {
|
||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(buff1, 0, TEST_LEN);
|
||||
memset(buff2, 0, TEST_LEN);
|
||||
memset(buff1, 0, TEST_LEN);
|
||||
memset(buff2, 0, TEST_LEN);
|
||||
|
||||
printf("Start timed tests\n");
|
||||
fflush(0);
|
||||
printf("Start timed tests\n");
|
||||
fflush(0);
|
||||
|
||||
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
|
||||
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
|
||||
|
||||
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long)TEST_LEN);
|
||||
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
||||
perf_print(start, (long long) TEST_LEN);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
@ -31,165 +31,164 @@
|
||||
#include <stdlib.h>
|
||||
#include "erasure_code.h"
|
||||
|
||||
#define TEST_SIZE (128*1024)
|
||||
#define TEST_SIZE (128 * 1024)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i, ret = -1;
|
||||
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
|
||||
int tsize;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
unsigned char *efence_buff3;
|
||||
int i, ret = -1;
|
||||
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
|
||||
int tsize;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
unsigned char *efence_buff3;
|
||||
|
||||
printf("gf_vect_mul_test: ");
|
||||
printf("gf_vect_mul_test: ");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
goto exit;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
goto exit;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2\n");
|
||||
goto exit;
|
||||
}
|
||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
|
||||
buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||
printf("fail fill with rand data\n");
|
||||
goto exit;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||
printf("fail fill with rand data\n");
|
||||
goto exit;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
|
||||
gf_mul(a, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2\n");
|
||||
goto exit;
|
||||
}
|
||||
// Check each possible constant
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||
buff2[i], gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Check buffer len
|
||||
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
||||
a = rand();
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2 (len %d)\n", tsize);
|
||||
goto exit;
|
||||
}
|
||||
// Check buffer len
|
||||
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
||||
a = rand();
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
|
||||
printf("fail creating buff2 (len %d)\n", tsize);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (i = 0; i < tsize; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
for (i = 0; i < tsize; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||
buff2[i], gf_mul(2, buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
if (0 == tsize % (32 * 8)) {
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
if (0 == tsize % (32 * 8)) {
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
efence_buff3 = buff3 + size;
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
efence_buff3 = buff3 + size;
|
||||
|
||||
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i],
|
||||
gf_mul(2, efence_buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
|
||||
efence_buff2[i], gf_mul(2, efence_buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (gf_vect_mul_base
|
||||
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) != 0) {
|
||||
printf("fail line up TEST_SIZE from end\n");
|
||||
goto exit;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (efence_buff2[i] != efence_buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, efence_buff2[i], efence_buff3[i],
|
||||
gf_mul(2, efence_buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) !=
|
||||
0) {
|
||||
printf("fail line up TEST_SIZE from end\n");
|
||||
goto exit;
|
||||
}
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (efence_buff2[i] != efence_buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a,
|
||||
efence_buff2[i], efence_buff3[i],
|
||||
gf_mul(2, efence_buff1[i]));
|
||||
goto exit;
|
||||
}
|
||||
#ifdef TEST_VERBOSE
|
||||
putchar('.');
|
||||
putchar('.');
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Test all unsupported sizes up to TEST_SIZE
|
||||
for (size = 0; size < TEST_SIZE; size++) {
|
||||
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
|
||||
printf
|
||||
("fail expecting nonzero return code for unaligned size param (%d)\n",
|
||||
size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
// Test all unsupported sizes up to TEST_SIZE
|
||||
for (size = 0; size < TEST_SIZE; size++) {
|
||||
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
|
||||
printf("fail expecting nonzero return code for unaligned size param (%d)\n",
|
||||
size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
fflush(0);
|
||||
printf(" done: Pass\n");
|
||||
fflush(0);
|
||||
|
||||
ret = 0;
|
||||
exit:
|
||||
ret = 0;
|
||||
exit:
|
||||
|
||||
free(buff1);
|
||||
free(buff2);
|
||||
free(buff3);
|
||||
free(buff1);
|
||||
free(buff2);
|
||||
free(buff3);
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,106 +1,109 @@
|
||||
#include "erasure_code.h"
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
|
||||
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
|
||||
}
|
||||
|
||||
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v,
|
||||
unsigned char *src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
|
||||
|
||||
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
|
||||
}
|
||||
|
||||
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
if (len < 64) {
|
||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||
return;
|
||||
}
|
||||
if (len < 64) {
|
||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
while (dests >= 6) {
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
v += 6 * srcs * 32;
|
||||
dest += 6;
|
||||
dests -= 6;
|
||||
}
|
||||
switch (dests) {
|
||||
case 6:
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
while (dests >= 6) {
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
v += 6 * srcs * 32;
|
||||
dest += 6;
|
||||
dests -= 6;
|
||||
}
|
||||
switch (dests) {
|
||||
case 6:
|
||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
void
|
||||
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
|
||||
unsigned char **dest)
|
||||
{
|
||||
if (len < 64) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||
return;
|
||||
}
|
||||
if (len < 64) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
v += 6 * k * 32;
|
||||
dest += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
while (rows >= 6) {
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
v += 6 * k * 32;
|
||||
dest += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||
int
|
||||
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||
{
|
||||
/* Size must be aligned to 32 bytes */
|
||||
if ((len % 32) != 0)
|
||||
return -1;
|
||||
/* Size must be aligned to 32 bytes */
|
||||
if ((len % 32) != 0)
|
||||
return -1;
|
||||
|
||||
gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest);
|
||||
return 0;
|
||||
gf_vect_mul_vsx(len, a, (unsigned char *) src, (unsigned char *) dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
void
|
||||
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||
}
|
||||
|
@ -9,29 +9,37 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__ibmxl__)
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
|
||||
#elif defined __GNUC__ && __GNUC__ >= 8
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
|
||||
#elif defined __GNUC__ && __GNUC__ >= 7
|
||||
#if defined _ARCH_PWR9
|
||||
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
|
||||
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#else
|
||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
||||
return vc;
|
||||
inline vector unsigned char
|
||||
EC_vec_xl(int off, unsigned char *ptr)
|
||||
{
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
|
||||
: "=wa"(vc)
|
||||
: "r"(off), "r"(ptr));
|
||||
return vc;
|
||||
}
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#endif
|
||||
#else
|
||||
#if defined _ARCH_PWR8
|
||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
||||
return vc;
|
||||
inline vector unsigned char
|
||||
EC_vec_xl(int off, unsigned char *ptr)
|
||||
{
|
||||
vector unsigned char vc;
|
||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
|
||||
: "=wa"(vc)
|
||||
: "r"(off), "r"(ptr));
|
||||
return vc;
|
||||
}
|
||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||
#else
|
||||
@ -57,7 +65,8 @@ inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
|
||||
void
|
||||
gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product. VSX version.
|
||||
@ -77,8 +86,9 @@ void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigne
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
void
|
||||
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs. VSX version.
|
||||
@ -99,8 +109,9 @@ void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
void
|
||||
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs. VSX version.
|
||||
@ -121,8 +132,9 @@ void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
void
|
||||
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs. VSX version.
|
||||
@ -143,8 +155,9 @@ void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
void
|
||||
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs. VSX version.
|
||||
@ -165,8 +178,9 @@ void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
void
|
||||
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs. VSX version.
|
||||
@ -187,8 +201,9 @@ void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
void
|
||||
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate. VSX version.
|
||||
@ -211,8 +226,9 @@ void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
void
|
||||
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
|
||||
*
|
||||
@ -234,8 +250,9 @@ void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
void
|
||||
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
|
||||
@ -258,8 +275,9 @@ void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
void
|
||||
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
|
||||
@ -282,8 +300,9 @@ void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
void
|
||||
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
|
||||
@ -305,8 +324,9 @@ void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
void
|
||||
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
|
||||
@ -328,8 +348,9 @@ void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
void
|
||||
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1,83 +1,84 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
}
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,65 +1,66 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
void
|
||||
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, head;
|
||||
unsigned char *s, *t0, *t1;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4;
|
||||
vector unsigned char vYD, vYE, vYF, vYG;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,104 +1,105 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
}
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,84 +1,85 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
void
|
||||
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, head;
|
||||
unsigned char *s, *t0, *t1, *t2;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,124 +1,125 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
}
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,103 +1,104 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
void
|
||||
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,145 +1,146 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
t4 = (unsigned char *) dest[4];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
vlo2 = vec_xl(0, g2);
|
||||
vhi2 = vec_xl(16, g2);
|
||||
vlo3 = vec_xl(0, g3);
|
||||
vhi3 = vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vlo4 = vec_xl(0, g4);
|
||||
vhi4 = vec_xl(16, g4);
|
||||
vlo4 = vec_xl(0, g4);
|
||||
vhi4 = vec_xl(16, g4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
}
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,122 +1,123 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
void
|
||||
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
t4 = (unsigned char *) dest[4];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,166 +1,167 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
void
|
||||
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
||||
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
|
||||
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *) dest[5]);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
t5 = (unsigned char *)dest[5];
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
t4 = (unsigned char *) dest[4];
|
||||
t5 = (unsigned char *) dest[5];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
vYB = vYB ^ vYB;
|
||||
vYC = vYC ^ vYC;
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY9 = vY9 ^ vY9;
|
||||
vYA = vYA ^ vYA;
|
||||
vYB = vYB ^ vYB;
|
||||
vYC = vYC ^ vYC;
|
||||
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
vYN = vYN ^ vYN;
|
||||
vYO = vYO ^ vYO;
|
||||
vYD = vYD ^ vYD;
|
||||
vYE = vYE ^ vYE;
|
||||
vYF = vYF ^ vYF;
|
||||
vYG = vYG ^ vYG;
|
||||
vYH = vYH ^ vYH;
|
||||
vYI = vYI ^ vYI;
|
||||
vYJ = vYJ ^ vYJ;
|
||||
vYK = vYK ^ vYK;
|
||||
vYL = vYL ^ vYL;
|
||||
vYM = vYM ^ vYM;
|
||||
vYN = vYN ^ vYN;
|
||||
vYO = vYO ^ vYO;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
unsigned char *g5 = &gftbls[5 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||
unsigned char *g5 = &gftbls[5 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo1 = EC_vec_xl(0, g1);
|
||||
vhi1 = EC_vec_xl(16, g1);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vlo2 = EC_vec_xl(0, g2);
|
||||
vhi2 = EC_vec_xl(16, g2);
|
||||
vlo3 = EC_vec_xl(0, g3);
|
||||
vhi3 = EC_vec_xl(16, g3);
|
||||
vlo2 = EC_vec_xl(0, g2);
|
||||
vhi2 = EC_vec_xl(16, g2);
|
||||
vlo3 = EC_vec_xl(0, g3);
|
||||
vhi3 = EC_vec_xl(16, g3);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vlo4 = EC_vec_xl(0, g4);
|
||||
vhi4 = EC_vec_xl(16, g4);
|
||||
vlo5 = EC_vec_xl(0, g5);
|
||||
vhi5 = EC_vec_xl(16, g5);
|
||||
vlo4 = EC_vec_xl(0, g4);
|
||||
vhi4 = EC_vec_xl(16, g4);
|
||||
vlo5 = EC_vec_xl(0, g5);
|
||||
vhi5 = EC_vec_xl(16, g5);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
g5 += 32;
|
||||
}
|
||||
g0 += 32;
|
||||
g1 += 32;
|
||||
g2 += 32;
|
||||
g3 += 32;
|
||||
g4 += 32;
|
||||
g5 += 32;
|
||||
}
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,142 +1,143 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest)
|
||||
void
|
||||
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest)
|
||||
{
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, head;
|
||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||
vector unsigned char vX1, vX2, vX3, vX4;
|
||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest[0];
|
||||
t1 = (unsigned char *)dest[1];
|
||||
t2 = (unsigned char *)dest[2];
|
||||
t3 = (unsigned char *)dest[3];
|
||||
t4 = (unsigned char *)dest[4];
|
||||
t5 = (unsigned char *)dest[5];
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest[0];
|
||||
t1 = (unsigned char *) dest[1];
|
||||
t2 = (unsigned char *) dest[2];
|
||||
t3 = (unsigned char *) dest[3];
|
||||
t4 = (unsigned char *) dest[4];
|
||||
t5 = (unsigned char *) dest[5];
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vYD = vec_xl(32, t0 + i);
|
||||
vYE = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vYD, 32, t0 + i);
|
||||
vec_xst(vYE, 48, t0 + i);
|
||||
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
vY3 = vec_xl(0, t1 + i);
|
||||
vY4 = vec_xl(16, t1 + i);
|
||||
vYF = vec_xl(32, t1 + i);
|
||||
vYG = vec_xl(48, t1 + i);
|
||||
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
vec_xst(vY3, 0, t1 + i);
|
||||
vec_xst(vY4, 16, t1 + i);
|
||||
vec_xst(vYF, 32, t1 + i);
|
||||
vec_xst(vYG, 48, t1 + i);
|
||||
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
vY5 = vec_xl(0, t2 + i);
|
||||
vY6 = vec_xl(16, t2 + i);
|
||||
vYH = vec_xl(32, t2 + i);
|
||||
vYI = vec_xl(48, t2 + i);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
vY7 = vec_xl(0, t3 + i);
|
||||
vY8 = vec_xl(16, t3 + i);
|
||||
vYJ = vec_xl(32, t3 + i);
|
||||
vYK = vec_xl(48, t3 + i);
|
||||
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
vec_xst(vY5, 0, t2 + i);
|
||||
vec_xst(vY6, 16, t2 + i);
|
||||
vec_xst(vYH, 32, t2 + i);
|
||||
vec_xst(vYI, 48, t2 + i);
|
||||
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
vY9 = vec_xl(0, t4 + i);
|
||||
vYA = vec_xl(16, t4 + i);
|
||||
vYL = vec_xl(32, t4 + i);
|
||||
vYM = vec_xl(48, t4 + i);
|
||||
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
vec_xst(vY7, 0, t3 + i);
|
||||
vec_xst(vY8, 16, t3 + i);
|
||||
vec_xst(vYJ, 32, t3 + i);
|
||||
vec_xst(vYK, 48, t3 + i);
|
||||
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||
|
||||
vYB = vec_xl(0, t5 + i);
|
||||
vYC = vec_xl(16, t5 + i);
|
||||
vYN = vec_xl(32, t5 + i);
|
||||
vYO = vec_xl(48, t5 + i);
|
||||
vYB = vec_xl(0, t5 + i);
|
||||
vYC = vec_xl(16, t5 + i);
|
||||
vYN = vec_xl(32, t5 + i);
|
||||
vYO = vec_xl(48, t5 + i);
|
||||
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
vec_xst(vY9, 0, t4 + i);
|
||||
vec_xst(vYA, 16, t4 + i);
|
||||
vec_xst(vYL, 32, t4 + i);
|
||||
vec_xst(vYM, 48, t4 + i);
|
||||
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vYB, 0, t5 + i);
|
||||
vec_xst(vYC, 16, t5 + i);
|
||||
vec_xst(vYN, 32, t5 + i);
|
||||
vec_xst(vYO, 48, t5 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,85 +1,86 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||
unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, j, head;
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, j, head;
|
||||
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest);
|
||||
if (vlen < 128) {
|
||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest);
|
||||
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (j = 1; j < vlen; j++) {
|
||||
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = (unsigned char *)dest;
|
||||
t0 = (unsigned char *) dest;
|
||||
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
}
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||
}
|
||||
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vY1 = vY1 ^ vY1;
|
||||
vY2 = vY2 ^ vY2;
|
||||
vY3 = vY3 ^ vY3;
|
||||
vY4 = vY4 ^ vY4;
|
||||
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
vY5 = vY5 ^ vY5;
|
||||
vY6 = vY6 ^ vY6;
|
||||
vY7 = vY7 ^ vY7;
|
||||
vY8 = vY8 ^ vY8;
|
||||
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *)src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (j = 0; j < vlen; j++) {
|
||||
s = (unsigned char *) src[j];
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
vlo0 = EC_vec_xl(0, g0);
|
||||
vhi0 = EC_vec_xl(16, g0);
|
||||
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
|
||||
g0 += 32;
|
||||
}
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
g0 += 32;
|
||||
}
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
return;
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,48 +1,49 @@
|
||||
#include "ec_base_vsx.h"
|
||||
|
||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest;
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest;
|
||||
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
|
||||
}
|
||||
head = len % 64;
|
||||
if (head != 0) {
|
||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 63; i += 64) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vY3 = vec_xl(32, t0 + i);
|
||||
vY4 = vec_xl(48, t0 + i);
|
||||
vY1 = vec_xl(0, t0 + i);
|
||||
vY2 = vec_xl(16, t0 + i);
|
||||
vY3 = vec_xl(32, t0 + i);
|
||||
vY4 = vec_xl(48, t0 + i);
|
||||
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
}
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
}
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
@ -3,73 +3,74 @@
|
||||
/*
|
||||
* Same as gf_vect_mul_base in "ec_base.h" but without the size restriction.
|
||||
*/
|
||||
static void _gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
|
||||
unsigned char *dest)
|
||||
static void
|
||||
_gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
//2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
// 2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
return;
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
return;
|
||||
}
|
||||
|
||||
void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
|
||||
void
|
||||
gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
unsigned char *s, *t0;
|
||||
vector unsigned char vX1, vY1;
|
||||
vector unsigned char vX2, vY2;
|
||||
vector unsigned char vX3, vY3;
|
||||
vector unsigned char vX4, vY4;
|
||||
vector unsigned char vX5, vY5;
|
||||
vector unsigned char vX6, vY6;
|
||||
vector unsigned char vX7, vY7;
|
||||
vector unsigned char vX8, vY8;
|
||||
vector unsigned char vhi0, vlo0;
|
||||
int i, head;
|
||||
|
||||
s = (unsigned char *)src;
|
||||
t0 = (unsigned char *)dest;
|
||||
s = (unsigned char *) src;
|
||||
t0 = (unsigned char *) dest;
|
||||
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
_gf_vect_mul_base(head, gftbl, src, dest);
|
||||
}
|
||||
head = len % 128;
|
||||
if (head != 0) {
|
||||
_gf_vect_mul_base(head, gftbl, src, dest);
|
||||
}
|
||||
|
||||
vlo0 = EC_vec_xl(0, gftbl);
|
||||
vhi0 = EC_vec_xl(16, gftbl);
|
||||
vlo0 = EC_vec_xl(0, gftbl);
|
||||
vhi0 = EC_vec_xl(16, gftbl);
|
||||
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
for (i = head; i < len - 127; i += 128) {
|
||||
vX1 = vec_xl(0, s + i);
|
||||
vX2 = vec_xl(16, s + i);
|
||||
vX3 = vec_xl(32, s + i);
|
||||
vX4 = vec_xl(48, s + i);
|
||||
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
vX5 = vec_xl(64, s + i);
|
||||
vX6 = vec_xl(80, s + i);
|
||||
vX7 = vec_xl(96, s + i);
|
||||
vX8 = vec_xl(112, s + i);
|
||||
|
||||
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
|
||||
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
|
||||
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
|
||||
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
|
||||
|
||||
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
|
||||
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
|
||||
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
|
||||
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
|
||||
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
vec_xst(vY1, 0, t0 + i);
|
||||
vec_xst(vY2, 16, t0 + i);
|
||||
vec_xst(vY3, 32, t0 + i);
|
||||
vec_xst(vY4, 48, t0 + i);
|
||||
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
vec_xst(vY5, 64, t0 + i);
|
||||
vec_xst(vY6, 80, t0 + i);
|
||||
vec_xst(vY7, 96, t0 + i);
|
||||
vec_xst(vY8, 112, t0 + i);
|
||||
}
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user