erasure_code: reformat using new code style

Signed-off-by: Marcel Cornu <marcel.d.cornu@intel.com>
This commit is contained in:
Marcel Cornu 2024-04-19 17:08:53 +01:00 committed by Pablo de Lara
parent 671e67b62d
commit 300260a4d9
37 changed files with 10984 additions and 13068 deletions

View File

@ -31,94 +31,86 @@
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{ {
#if defined(__linux__) #if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_dot_prod_sve); return PROVIDER_INFO(gf_vect_dot_prod_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon); return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve); return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon); return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif #endif
return PROVIDER_BASIC(gf_vect_dot_prod); return PROVIDER_BASIC(gf_vect_dot_prod);
} }
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
{ {
#if defined(__linux__) #if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mad_sve); return PROVIDER_INFO(gf_vect_mad_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon); return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mad_sve); return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon); return PROVIDER_INFO(gf_vect_mad_neon);
#endif #endif
return PROVIDER_BASIC(gf_vect_mad); return PROVIDER_BASIC(gf_vect_mad);
} }
DEFINE_INTERFACE_DISPATCHER(ec_encode_data) DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{ {
#if defined(__linux__) #if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_sve); return PROVIDER_INFO(ec_encode_data_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon); return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_sve); return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon); return PROVIDER_INFO(ec_encode_data_neon);
#endif #endif
return PROVIDER_BASIC(ec_encode_data); return PROVIDER_BASIC(ec_encode_data);
} }
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
{ {
#if defined(__linux__) #if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_update_sve); return PROVIDER_INFO(ec_encode_data_update_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon); return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve); return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon); return PROVIDER_INFO(ec_encode_data_update_neon);
#endif #endif
return PROVIDER_BASIC(ec_encode_data_update); return PROVIDER_BASIC(ec_encode_data_update);
} }
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{ {
#if defined(__linux__) #if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mul_sve); return PROVIDER_INFO(gf_vect_mul_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon); return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mul_sve); return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon); return PROVIDER_INFO(gf_vect_mul_neon);
#endif #endif
return PROVIDER_BASIC(gf_vect_mul); return PROVIDER_BASIC(gf_vect_mul);
} }
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return PROVIDER_BASIC(ec_init_tables); }
{
return PROVIDER_BASIC(ec_init_tables);
}

View File

@ -29,236 +29,265 @@
#include "erasure_code.h" #include "erasure_code.h"
/*external function*/ /*external function*/
extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, extern void
unsigned char **src, unsigned char *dest); gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char *dest);
unsigned char **src, unsigned char **dest); extern void
extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **src, unsigned char **dest); unsigned char **dest);
extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, extern void
unsigned char **src, unsigned char **dest); gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **dest);
unsigned char **src, unsigned char **dest); extern void
extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *src, unsigned char *dest); unsigned char **dest);
extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **dest);
unsigned char *src, unsigned char **dest); extern void
extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *src, unsigned char **dest); unsigned char *dest);
extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **dest);
unsigned char *src, unsigned char **dest); extern void
gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, void
unsigned char **coding) ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows > 5) { while (rows > 5) {
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding); gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32; g_tbls += 5 * k * 32;
coding += 5; coding += 5;
rows -= 5; rows -= 5;
} }
switch (rows) { switch (rows) {
case 5: case 5:
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding); gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding); gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding); gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding); gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding); gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
default: default:
break; break;
} }
} }
void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows > 6) { while (rows > 6) {
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 6: case 6:
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break; break;
case 5: case 5:
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
} }
} }
/* SVE */ /* SVE */
extern void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, extern void
unsigned char **src, unsigned char *dest); gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char *dest);
unsigned char **src, unsigned char **dest); extern void
extern void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **src, unsigned char **dest); unsigned char **dest);
extern void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, extern void
unsigned char **src, unsigned char **dest); gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **dest);
unsigned char **src, unsigned char **dest); extern void
extern void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **src, unsigned char **dest); unsigned char **dest);
extern void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, extern void
unsigned char **src, unsigned char **dest); gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **dest);
unsigned char **src, unsigned char **dest); extern void
extern void gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *src, unsigned char *dest); unsigned char **dest);
extern void gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
extern void gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **dest);
unsigned char *src, unsigned char **dest); extern void
extern void gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *src, unsigned char **dest); unsigned char **dest);
extern void gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *dest);
unsigned char *src, unsigned char **dest); extern void
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, void
unsigned char **coding) ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows > 11) { while (rows > 11) {
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 11: case 11:
/* 7 + 4 */ /* 7 + 4 */
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 7 * k * 32; g_tbls += 7 * k * 32;
coding += 7; coding += 7;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 10: case 10:
/* 6 + 4 */ /* 6 + 4 */
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 9: case 9:
/* 5 + 4 */ /* 5 + 4 */
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32; g_tbls += 5 * k * 32;
coding += 5; coding += 5;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 8: case 8:
/* 4 + 4 */ /* 4 + 4 */
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32; g_tbls += 4 * k * 32;
coding += 4; coding += 4;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 7: case 7:
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 6: case 6:
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 5: case 5:
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding); gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding); gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
break; break;
default: default:
break; break;
} }
} }
void ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows > 6) { while (rows > 6) {
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 6: case 6:
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break; break;
case 5: case 5:
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
break; break;
default: default:
break; break;
} }
} }

View File

@ -28,322 +28,331 @@
**********************************************************************/ **********************************************************************/
#include <limits.h> #include <limits.h>
#include <string.h> // for memset #include <string.h> // for memset
#include <stdint.h> #include <stdint.h>
#include "erasure_code.h" #include "erasure_code.h"
#include "ec_base.h" // for GF tables #include "ec_base.h" // for GF tables
void ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls) void
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{ {
int i, j; int i, j;
for (i = 0; i < rows; i++) { for (i = 0; i < rows; i++) {
for (j = 0; j < k; j++) { for (j = 0; j < k; j++) {
gf_vect_mul_init(*a++, g_tbls); gf_vect_mul_init(*a++, g_tbls);
g_tbls += 32; g_tbls += 32;
} }
} }
} }
unsigned char gf_mul(unsigned char a, unsigned char b) unsigned char
gf_mul(unsigned char a, unsigned char b)
{ {
#ifndef GF_LARGE_TABLES #ifndef GF_LARGE_TABLES
int i; int i;
if ((a == 0) || (b == 0)) if ((a == 0) || (b == 0))
return 0; return 0;
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i]; return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
#else #else
return gf_mul_table_base[b * 256 + a]; return gf_mul_table_base[b * 256 + a];
#endif #endif
} }
unsigned char gf_inv(unsigned char a) unsigned char
gf_inv(unsigned char a)
{ {
#ifndef GF_LARGE_TABLES #ifndef GF_LARGE_TABLES
if (a == 0) if (a == 0)
return 0; return 0;
return gff_base[255 - gflog_base[a]]; return gff_base[255 - gflog_base[a]];
#else #else
return gf_inv_table_base[a]; return gf_inv_table_base[a];
#endif #endif
} }
void gf_gen_rs_matrix(unsigned char *a, int m, int k) void
gf_gen_rs_matrix(unsigned char *a, int m, int k)
{ {
int i, j; int i, j;
unsigned char p, gen = 1; unsigned char p, gen = 1;
memset(a, 0, k * m); memset(a, 0, k * m);
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
a[k * i + i] = 1; a[k * i + i] = 1;
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
p = 1; p = 1;
for (j = 0; j < k; j++) { for (j = 0; j < k; j++) {
a[k * i + j] = p; a[k * i + j] = p;
p = gf_mul(p, gen); p = gf_mul(p, gen);
} }
gen = gf_mul(gen, 2); gen = gf_mul(gen, 2);
} }
} }
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k) void
gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
{ {
int i, j; int i, j;
unsigned char *p; unsigned char *p;
// Identity matrix in high position // Identity matrix in high position
memset(a, 0, k * m); memset(a, 0, k * m);
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
a[k * i + i] = 1; a[k * i + i] = 1;
// For the rest choose 1/(i + j) | i != j
p = &a[k * k];
for (i = k; i < m; i++)
for (j = 0; j < k; j++)
*p++ = gf_inv(i ^ j);
// For the rest choose 1/(i + j) | i != j
p = &a[k * k];
for (i = k; i < m; i++)
for (j = 0; j < k; j++)
*p++ = gf_inv(i ^ j);
} }
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n) int
gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
{ {
int i, j, k; int i, j, k;
unsigned char temp; unsigned char temp;
// Set out_mat[] to the identity matrix // Set out_mat[] to the identity matrix
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n) for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
out_mat[i] = 0; out_mat[i] = 0;
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
out_mat[i * n + i] = 1; out_mat[i * n + i] = 1;
// Inverse // Inverse
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
// Check for 0 in pivot element // Check for 0 in pivot element
if (in_mat[i * n + i] == 0) { if (in_mat[i * n + i] == 0) {
// Find a row with non-zero in current column and swap // Find a row with non-zero in current column and swap
for (j = i + 1; j < n; j++) for (j = i + 1; j < n; j++)
if (in_mat[j * n + i]) if (in_mat[j * n + i])
break; break;
if (j == n) // Couldn't find means it's singular if (j == n) // Couldn't find means it's singular
return -1; return -1;
for (k = 0; k < n; k++) { // Swap rows i,j for (k = 0; k < n; k++) { // Swap rows i,j
temp = in_mat[i * n + k]; temp = in_mat[i * n + k];
in_mat[i * n + k] = in_mat[j * n + k]; in_mat[i * n + k] = in_mat[j * n + k];
in_mat[j * n + k] = temp; in_mat[j * n + k] = temp;
temp = out_mat[i * n + k]; temp = out_mat[i * n + k];
out_mat[i * n + k] = out_mat[j * n + k]; out_mat[i * n + k] = out_mat[j * n + k];
out_mat[j * n + k] = temp; out_mat[j * n + k] = temp;
} }
} }
temp = gf_inv(in_mat[i * n + i]); // 1/pivot temp = gf_inv(in_mat[i * n + i]); // 1/pivot
for (j = 0; j < n; j++) { // Scale row i by 1/pivot for (j = 0; j < n; j++) { // Scale row i by 1/pivot
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp); in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp); out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
} }
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
if (j == i) if (j == i)
continue; continue;
temp = in_mat[j * n + i]; temp = in_mat[j * n + i];
for (k = 0; k < n; k++) { for (k = 0; k < n; k++) {
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]); out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]); in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
} }
} }
} }
return 0; return 0;
} }
// Calculates const table gftbl in GF(2^8) from single input A // Calculates const table gftbl in GF(2^8) from single input A
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} } // gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
void gf_vect_mul_init(unsigned char c, unsigned char *tbl) void
gf_vect_mul_init(unsigned char c, unsigned char *tbl)
{ {
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2} unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); // Mult by GF{2}
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2} unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); // Mult by GF{2}
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2} unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); // Mult by GF{2}
#if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
unsigned long long v1, v2, v4, v8, *t; unsigned long long v1, v2, v4, v8, *t;
unsigned long long v10, v20, v40, v80; unsigned long long v10, v20, v40, v80;
unsigned char c17, c18, c20, c24; unsigned char c17, c18, c20, c24;
t = (unsigned long long *)tbl; t = (unsigned long long *) tbl;
v1 = c * 0x0100010001000100ull; v1 = c * 0x0100010001000100ull;
v2 = c2 * 0x0101000001010000ull; v2 = c2 * 0x0101000001010000ull;
v4 = c4 * 0x0101010100000000ull; v4 = c4 * 0x0101010100000000ull;
v8 = c8 * 0x0101010101010101ull; v8 = c8 * 0x0101010101010101ull;
v4 = v1 ^ v2 ^ v4; v4 = v1 ^ v2 ^ v4;
t[0] = v4; t[0] = v4;
t[1] = v8 ^ v4; t[1] = v8 ^ v4;
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
v10 = c17 * 0x0100010001000100ull; v10 = c17 * 0x0100010001000100ull;
v20 = c18 * 0x0101000001010000ull; v20 = c18 * 0x0101000001010000ull;
v40 = c20 * 0x0101010100000000ull; v40 = c20 * 0x0101010100000000ull;
v80 = c24 * 0x0101010101010101ull; v80 = c24 * 0x0101010101010101ull;
v40 = v10 ^ v20 ^ v40; v40 = v10 ^ v20 ^ v40;
t[2] = v40; t[2] = v40;
t[3] = v80 ^ v40; t[3] = v80 ^ v40;
#else // 32-bit or other #else // 32-bit or other
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15; unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31;
c31;
c3 = c2 ^ c; c3 = c2 ^ c;
c5 = c4 ^ c; c5 = c4 ^ c;
c6 = c4 ^ c2; c6 = c4 ^ c2;
c7 = c4 ^ c3; c7 = c4 ^ c3;
c9 = c8 ^ c; c9 = c8 ^ c;
c10 = c8 ^ c2; c10 = c8 ^ c2;
c11 = c8 ^ c3; c11 = c8 ^ c3;
c12 = c8 ^ c4; c12 = c8 ^ c4;
c13 = c8 ^ c5; c13 = c8 ^ c5;
c14 = c8 ^ c6; c14 = c8 ^ c6;
c15 = c8 ^ c7; c15 = c8 ^ c7;
tbl[0] = 0; tbl[0] = 0;
tbl[1] = c; tbl[1] = c;
tbl[2] = c2; tbl[2] = c2;
tbl[3] = c3; tbl[3] = c3;
tbl[4] = c4; tbl[4] = c4;
tbl[5] = c5; tbl[5] = c5;
tbl[6] = c6; tbl[6] = c6;
tbl[7] = c7; tbl[7] = c7;
tbl[8] = c8; tbl[8] = c8;
tbl[9] = c9; tbl[9] = c9;
tbl[10] = c10; tbl[10] = c10;
tbl[11] = c11; tbl[11] = c11;
tbl[12] = c12; tbl[12] = c12;
tbl[13] = c13; tbl[13] = c13;
tbl[14] = c14; tbl[14] = c14;
tbl[15] = c15; tbl[15] = c15;
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c19 = c18 ^ c17; c19 = c18 ^ c17;
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c21 = c20 ^ c17; c21 = c20 ^ c17;
c22 = c20 ^ c18; c22 = c20 ^ c18;
c23 = c20 ^ c19; c23 = c20 ^ c19;
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c25 = c24 ^ c17; c25 = c24 ^ c17;
c26 = c24 ^ c18; c26 = c24 ^ c18;
c27 = c24 ^ c19; c27 = c24 ^ c19;
c28 = c24 ^ c20; c28 = c24 ^ c20;
c29 = c24 ^ c21; c29 = c24 ^ c21;
c30 = c24 ^ c22; c30 = c24 ^ c22;
c31 = c24 ^ c23; c31 = c24 ^ c23;
tbl[16] = 0; tbl[16] = 0;
tbl[17] = c17; tbl[17] = c17;
tbl[18] = c18; tbl[18] = c18;
tbl[19] = c19; tbl[19] = c19;
tbl[20] = c20; tbl[20] = c20;
tbl[21] = c21; tbl[21] = c21;
tbl[22] = c22; tbl[22] = c22;
tbl[23] = c23; tbl[23] = c23;
tbl[24] = c24; tbl[24] = c24;
tbl[25] = c25; tbl[25] = c25;
tbl[26] = c26; tbl[26] = c26;
tbl[27] = c27; tbl[27] = c27;
tbl[28] = c28; tbl[28] = c28;
tbl[29] = c29; tbl[29] = c29;
tbl[30] = c30; tbl[30] = c30;
tbl[31] = c31; tbl[31] = c31;
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ #endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
} }
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, void
unsigned char **src, unsigned char *dest) gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{ {
int i, j; int i, j;
unsigned char s; unsigned char s;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = 0; s = 0;
for (j = 0; j < vlen; j++) for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j * 32 + 1]); s ^= gf_mul(src[j][i], v[j * 32 + 1]);
dest[i] = s; dest[i] = s;
} }
} }
void gf_vect_mad_base(int len, int vec, int vec_i, void
unsigned char *v, unsigned char *src, unsigned char *dest) gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
unsigned char *dest)
{ {
int i; int i;
unsigned char s; unsigned char s;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = dest[i]; s = dest[i];
s ^= gf_mul(src[i], v[vec_i * 32 + 1]); s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
dest[i] = s; dest[i] = s;
} }
} }
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, void
unsigned char **src, unsigned char **dest) ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{ {
int i, j, l; int i, j, l;
unsigned char s; unsigned char s;
for (l = 0; l < dests; l++) { for (l = 0; l < dests; l++) {
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = 0; s = 0;
for (j = 0; j < srcs; j++) for (j = 0; j < srcs; j++)
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]); s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
dest[l][i] = s; dest[l][i] = s;
} }
} }
} }
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, void
unsigned char *data, unsigned char **dest) ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
unsigned char *data, unsigned char **dest)
{ {
int i, l; int i, l;
unsigned char s; unsigned char s;
for (l = 0; l < rows; l++) { for (l = 0; l < rows; l++) {
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = dest[l][i]; s = dest[l][i];
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]); s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
dest[l][i] = s; dest[l][i] = s;
} }
} }
} }
int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest) int
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
{ {
//2nd element of table array is ref value used to fill it in // 2nd element of table array is ref value used to fill it in
unsigned char c = a[1]; unsigned char c = a[1];
// Len must be aligned to 32B // Len must be aligned to 32B
if ((len % 32) != 0) { if ((len % 32) != 0) {
return -1; return -1;
} }
while (len-- > 0) while (len-- > 0)
*dest++ = gf_mul(c, *src++); *dest++ = gf_mul(c, *src++);
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@ -29,37 +29,40 @@
#include "erasure_code.h" #include "erasure_code.h"
void gf_vect_dot_prod(int len, int vlen, unsigned char *v, void
unsigned char **src, unsigned char *dest) gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{ {
gf_vect_dot_prod_base(len, vlen, v, src, dest); gf_vect_dot_prod_base(len, vlen, v, src, dest);
} }
void gf_vect_mad(int len, int vec, int vec_i, void
unsigned char *v, unsigned char *src, unsigned char *dest) gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
{ {
gf_vect_mad_base(len, vec, vec_i, v, src, dest); gf_vect_mad_base(len, vec, vec_i, v, src, dest);
} }
void ec_encode_data(int len, int srcs, int dests, unsigned char *v, void
unsigned char **src, unsigned char **dest) ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{ {
ec_encode_data_base(len, srcs, dests, v, src, dest); ec_encode_data_base(len, srcs, dests, v, src, dest);
} }
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, void
unsigned char *data, unsigned char **dest) ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
unsigned char **dest)
{ {
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest); ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
} }
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest) int
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
{ {
return gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest); return gf_vect_mul_base(len, a, (unsigned char *) src, (unsigned char *) dest);
} }
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls) void
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{ {
return ec_init_tables_base(k, rows, a, g_tbls); return ec_init_tables_base(k, rows, a, g_tbls);
} }

View File

@ -28,387 +28,423 @@
**********************************************************************/ **********************************************************************/
#include <limits.h> #include <limits.h>
#include "erasure_code.h" #include "erasure_code.h"
#include "ec_base.h" /* for GF tables */ #include "ec_base.h" /* for GF tables */
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 #if __x86_64__ || __i386__ || _M_X64 || _M_IX86
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, void
unsigned char **coding) ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows >= 6) {
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
} }
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, void
unsigned char **coding) ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows >= 6) {
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
} }
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, void
unsigned char **coding) ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 32) { if (len < 32) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows >= 6) {
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
} }
#ifdef HAVE_AS_KNOWS_AVX512 #ifdef HAVE_AS_KNOWS_AVX512
extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data, extern int
unsigned char *dest); gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char *dest);
unsigned char **data, unsigned char **coding); extern int
extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **data, unsigned char **coding); unsigned char **coding);
extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, extern int
unsigned char **data, unsigned char **coding); gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **coding);
unsigned char **data, unsigned char **coding); extern int
extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **data, unsigned char **coding); unsigned char **coding);
extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, extern int
unsigned char *src, unsigned char *dest); gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **coding);
unsigned char *src, unsigned char **dest); extern int
extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char *src, unsigned char **dest); unsigned char **coding);
extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *dest);
unsigned char *src, unsigned char **dest); extern void
extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *src, unsigned char **dest); unsigned char **dest);
extern void
gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, void
unsigned char **data, unsigned char **coding) ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
if (len < 64) { if (len < 64) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding); ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return; return;
} }
while (rows >= 6) { while (rows >= 6) {
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding); gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 5: case 5:
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding); gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding); gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding); gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding); gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding); gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
} }
} }
void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 64) { if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows >= 6) { while (rows >= 6) {
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 5: case 5:
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
} }
} }
#if AS_FEATURE_LEVEL >= 10 #if AS_FEATURE_LEVEL >= 10
extern void gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, extern void
unsigned char **data, unsigned char *dest); gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern void gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char *dest);
unsigned char **data, unsigned char **coding); extern void
extern void gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **data, unsigned char **coding); unsigned char **coding);
extern void gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, extern void
unsigned char **data, unsigned char **coding); gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern void gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **coding);
unsigned char **data, unsigned char **coding); extern void
extern void gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **data, unsigned char **coding); unsigned char **coding);
extern void
gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char *dest); gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *dest);
unsigned char *src, unsigned char **dest); extern void
extern void gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *src, unsigned char **dest); unsigned char **dest);
extern void gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **dest);
unsigned char *src, unsigned char **dest); extern void
extern void gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *src, unsigned char **dest); unsigned char **dest);
extern void
gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, extern void
unsigned char **data, unsigned char *dest); gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern void gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char *dest);
unsigned char **data, unsigned char **coding); extern void
extern void gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **data, unsigned char **coding); unsigned char **coding);
extern void gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char *dest); gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
extern void gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **coding);
unsigned char *src, unsigned char **dest); extern void
extern void gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *src, unsigned char **dest); unsigned char *dest);
extern void gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char **dest); gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char **dest);
unsigned char *src, unsigned char **dest); extern void
gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls) void
ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{ {
int i, j; int i, j;
uint64_t *g64 = (uint64_t *) g_tbls; uint64_t *g64 = (uint64_t *) g_tbls;
for (i = 0; i < rows; i++)
for (j = 0; j < k; j++)
*(g64++) = gf_table_gfni[*a++];
for (i = 0; i < rows; i++)
for (j = 0; j < k; j++)
*(g64++) = gf_table_gfni[*a++];
} }
void ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, void
unsigned char **data, unsigned char **coding) ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
while (rows >= 6) { while (rows >= 6) {
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding); gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 8; g_tbls += 6 * k * 8;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 5: case 5:
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding); gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding); gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding); gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding); gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding); gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
break; break;
case 0: case 0:
default: default:
break; break;
} }
} }
void ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, void
unsigned char **data, unsigned char **coding) ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{ {
while (rows >= 3) { while (rows >= 3) {
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding); gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
g_tbls += 3 * k * 8; g_tbls += 3 * k * 8;
coding += 3; coding += 3;
rows -= 3; rows -= 3;
} }
switch (rows) { switch (rows) {
case 2: case 2:
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding); gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding); gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
break; break;
case 0: case 0:
default: default:
break; break;
} }
} }
void ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, void
unsigned char *g_tbls, unsigned char *data, ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char **coding) unsigned char *data, unsigned char **coding)
{ {
while (rows >= 6) { while (rows >= 6) {
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 8; g_tbls += 6 * k * 8;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 5: case 5:
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
default: default:
break; break;
} }
} }
void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, void
unsigned char *g_tbls, unsigned char *data, ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char **coding) unsigned char *data, unsigned char **coding)
{ {
while (rows >= 5) { while (rows >= 5) {
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 5 * k * 8; g_tbls += 5 * k * 8;
coding += 5; coding += 5;
rows -= 5; rows -= 5;
} }
switch (rows) { switch (rows) {
case 4: case 4:
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
default: default:
break; break;
} }
} }
#endif // AS_FEATURE_LEVEL >= 10 #endif // AS_FEATURE_LEVEL >= 10
@ -416,119 +452,119 @@ void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
#if __WORDSIZE == 64 || _WIN64 || __x86_64__ #if __WORDSIZE == 64 || _WIN64 || __x86_64__
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows > 6) {
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows > 6) {
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
} }
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 16) { if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows > 6) { while (rows > 6) {
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 6: case 6:
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break; break;
case 5: case 5:
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
} }
} }
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, void
unsigned char *data, unsigned char **coding) ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{ {
if (len < 32) { if (len < 32) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return; return;
} }
while (rows > 6) { while (rows > 6) {
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32; g_tbls += 6 * k * 32;
coding += 6; coding += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 6: case 6:
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break; break;
case 5: case 5:
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break; break;
case 4: case 4:
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break; break;
case 3: case 3:
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break; break;
case 2: case 2:
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break; break;
case 1: case 1:
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding); gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
break; break;
case 0: case 0:
break; break;
} }
} }
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ #endif //__WORDSIZE == 64 || _WIN64 || __x86_64__

View File

@ -29,27 +29,27 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include <assert.h> #include <assert.h>
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) #define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) #define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
#define MMAX TEST_SOURCES #define MMAX TEST_SOURCES
@ -59,117 +59,120 @@
typedef unsigned char u8; typedef unsigned char u8;
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs) void
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs)
{ {
ec_init_tables_base(k, m - k, &a[k * k], g_tbls); ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
} }
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, int
u8 * src_err_list, int nerrs, u8 ** temp_buffs) ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 **temp_buffs)
{ {
int i, j, r; int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES]; u8 *recov[TEST_SOURCES];
// Construct b by removing error rows // Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) while (src_in_err[r])
r++; r++;
recov[i] = buffs[r]; recov[i] = buffs[r];
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j]; b[k * i + j] = a[k * r + j];
} }
if (gf_invert_matrix(b, d, k) < 0) if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX; return BAD_MATRIX;
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j]; c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data // Recover data
ec_init_tables_base(k, nerrs, c, g_tbls); ec_init_tables_base(k, nerrs, c, g_tbls);
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs); ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
return 0; return 0;
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, m, k, nerrs, check; int i, j, m, k, nerrs, check;
void *buf; void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX]; u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES]; u8 src_err_list[TEST_SOURCES];
struct perf start; struct perf start;
// Pick test parameters // Pick test parameters
m = 14; m = 14;
k = 10; k = 10;
nerrs = 4; nerrs = 4;
const u8 err_list[] = { 2, 4, 5, 7 }; const u8 err_list[] = { 2, 4, 5, 7 };
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
// check input parameters // check input parameters
assert(!(m > MMAX || k > KMAX || nerrs > (m - k))); assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
memcpy(src_err_list, err_list, nerrs); memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1; src_in_err[src_err_list[i]] = 1;
// Allocate the arrays // Allocate the arrays
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n"); printf("alloc error: Fail\n");
return -1; return -1;
} }
buffs[i] = buf; buffs[i] = buf;
} }
for (i = 0; i < (m - k); i++) { for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n"); printf("alloc error: Fail\n");
return -1; return -1;
} }
temp_buffs[i] = buf; temp_buffs[i] = buf;
} }
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++) for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k); gf_gen_rs_matrix(a, m, k);
// Start encode test // Start encode test
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs)); BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
printf("erasure_code_base_encode" TEST_TYPE_STR ": "); printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m)); perf_print(start, (long long) (TEST_LEN(m)) * (m));
// Start decode test // Start decode test
BENCHMARK(&start, BENCHMARK_TIME, check = BENCHMARK(&start, BENCHMARK_TIME,
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs)); temp_buffs));
if (check == BAD_MATRIX) { if (check == BAD_MATRIX) {
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
return check; return check;
} }
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
return -1; return -1;
} }
} }
printf("erasure_code_base_decode" TEST_TYPE_STR ": "); printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n"); printf("done all: Pass\n");
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@ -29,29 +29,29 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) #define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) #define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
#ifndef TEST_SEED #ifndef TEST_SEED
# define TEST_SEED 0x1234 #define TEST_SEED 0x1234
#endif #endif
#define MMAX TEST_SOURCES #define MMAX TEST_SOURCES
@ -61,215 +61,219 @@
typedef unsigned char u8; typedef unsigned char u8;
void usage(const char *app_name) void
usage(const char *app_name)
{ {
fprintf(stderr, fprintf(stderr,
"Usage: %s [options]\n" "Usage: %s [options]\n"
" -h Help\n" " -h Help\n"
" -k <val> Number of source buffers\n" " -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n" " -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n", " -e <val> Number of simulated buffers with errors (cannot be higher than p or "
app_name); "k)\n",
app_name);
} }
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start) void
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, struct perf *start)
{ {
ec_init_tables(k, m - k, &a[k * k], g_tbls); ec_init_tables(k, m - k, &a[k * k], g_tbls);
BENCHMARK(start, BENCHMARK_TIME, BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k])); ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
} }
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, int
u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start) ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 **temp_buffs, struct perf *start)
{ {
int i, j, r; int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES]; u8 *recov[TEST_SOURCES];
// Construct b by removing error rows // Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) while (src_in_err[r])
r++; r++;
recov[i] = buffs[r]; recov[i] = buffs[r];
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j]; b[k * i + j] = a[k * r + j];
} }
if (gf_invert_matrix(b, d, k) < 0) if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX; return BAD_MATRIX;
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j]; c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data // Recover data
ec_init_tables(k, nerrs, c, g_tbls); ec_init_tables(k, nerrs, c, g_tbls);
BENCHMARK(start, BENCHMARK_TIME, BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs)); ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
return 0; return 0;
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, m, k, p, nerrs, check, ret = -1; int i, j, m, k, p, nerrs, check, ret = -1;
void *buf; void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL }; u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL }; u8 *buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX]; u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES]; u8 src_err_list[TEST_SOURCES];
struct perf start; struct perf start;
/* Set default parameters */ /* Set default parameters */
k = 8; k = 8;
p = 6; p = 6;
nerrs = 4; nerrs = 4;
/* Parse arguments */ /* Parse arguments */
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) { if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]); k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) { } else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]); p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) { } else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]); nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) { } else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]); usage(argv[0]);
return 0; return 0;
} else { } else {
usage(argv[0]); usage(argv[0]);
return -1; return -1;
} }
} }
if (nerrs > k) { if (nerrs > k) {
printf printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n", nerrs, k);
nerrs, k); return -1;
return -1; }
}
if (k <= 0) { if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k); printf("Number of source buffers (%d) must be > 0\n", k);
return -1; return -1;
} }
if (p <= 0) { if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p); printf("Number of parity buffers (%d) must be > 0\n", p);
return -1; return -1;
} }
if (nerrs <= 0) { if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs); printf("Number of errors (%d) must be > 0\n", nerrs);
return -1; return -1;
} }
if (nerrs > p) { if (nerrs > p) {
printf printf("Number of errors (%d) cannot be higher than number of parity buffers "
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n", "(%d)\n",
nerrs, p); nerrs, p);
return -1; return -1;
} }
m = k + p; m = k + p;
if (m > MMAX) { if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n", printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX); MMAX);
return -1; return -1;
} }
u8 *err_list = malloc((size_t)nerrs); u8 *err_list = malloc((size_t) nerrs);
if (err_list == NULL) { if (err_list == NULL) {
printf("Error allocating list of array of error indices\n"); printf("Error allocating list of array of error indices\n");
return -1; return -1;
} }
srand(TEST_SEED); srand(TEST_SEED);
for (i = 0; i < nerrs;) { for (i = 0; i < nerrs;) {
u8 next_err = rand() % k; u8 next_err = rand() % k;
for (j = 0; j < i; j++) for (j = 0; j < i; j++)
if (next_err == err_list[j]) if (next_err == err_list[j])
break; break;
if (j != i) if (j != i)
continue; continue;
err_list[i++] = next_err; err_list[i++] = next_err;
} }
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
p, nerrs); nerrs);
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
printf("%d ", (int)err_list[i]); printf("%d ", (int) err_list[i]);
printf("])\n"); printf("])\n");
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
memcpy(src_err_list, err_list, nerrs); memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1; src_in_err[src_err_list[i]] = 1;
// Allocate the arrays // Allocate the arrays
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n"); printf("Error allocating buffers\n");
goto exit; goto exit;
} }
buffs[i] = buf; buffs[i] = buf;
} }
for (i = 0; i < p; i++) { for (i = 0; i < p; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n"); printf("Error allocating buffers\n");
goto exit; goto exit;
} }
temp_buffs[i] = buf; temp_buffs[i] = buf;
} }
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++) for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k); gf_gen_rs_matrix(a, m, k);
// Start encode test // Start encode test
ec_encode_perf(m, k, a, g_tbls, buffs, &start); ec_encode_perf(m, k, a, g_tbls, buffs, &start);
printf("erasure_code_encode" TEST_TYPE_STR ": "); printf("erasure_code_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m)); perf_print(start, (long long) (TEST_LEN(m)) * (m));
// Start decode test // Start decode test
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, temp_buffs,
temp_buffs, &start); &start);
if (check == BAD_MATRIX) { if (check == BAD_MATRIX) {
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
ret = check; ret = check;
goto exit; goto exit;
} }
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
goto exit; goto exit;
} }
} }
printf("erasure_code_decode" TEST_TYPE_STR ": "); printf("erasure_code_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n"); printf("done all: Pass\n");
ret = 0; ret = 0;
exit: exit:
free(err_list); free(err_list);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]); free(buffs[i]);
free(temp_buffs[i]); free(temp_buffs[i]);
} }
return ret; return ret;
} }

File diff suppressed because it is too large Load Diff

View File

@ -29,43 +29,43 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
//By default, test multibinary version // By default, test multibinary version
#ifndef FUNCTION_UNDER_TEST #ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST ec_encode_data_update #define FUNCTION_UNDER_TEST ec_encode_data_update
# define REF_FUNCTION ec_encode_data #define REF_FUNCTION ec_encode_data
#endif #endif
//By default, test EC(8+4) // By default, test EC(8+4)
#if (!defined(VECT)) #if (!defined(VECT))
# define VECT 4 #define VECT 4
#endif #endif
#define str(s) #s #define str(s) #s
#define xstr(s) str(s) #define xstr(s) str(s)
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) #define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 32 #define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) #define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
#ifndef TEST_SEED #ifndef TEST_SEED
# define TEST_SEED 0x1234 #define TEST_SEED 0x1234
#endif #endif
#define MMAX TEST_SOURCES #define MMAX TEST_SOURCES
@ -73,308 +73,316 @@
typedef unsigned char u8; typedef unsigned char u8;
void usage(const char *app_name) void
usage(const char *app_name)
{ {
fprintf(stderr, fprintf(stderr,
"Usage: %s [options]\n" "Usage: %s [options]\n"
" -h Help\n" " -h Help\n"
" -k <val> Number of source buffers\n" " -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n" " -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n", " -e <val> Number of simulated buffers with errors (cannot be higher than p or "
app_name); "k)\n",
app_name);
} }
void dump(unsigned char *buf, int len) void
dump(unsigned char *buf, int len)
{ {
int i; int i;
for (i = 0; i < len;) { for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]); printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0) if (i % 32 == 0)
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a) void
encode_update_test_ref(int m, int k, u8 *g_tbls, u8 **buffs, u8 *a)
{ {
ec_init_tables(k, m - k, &a[k * k], g_tbls); ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
} }
void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a) void
encode_update_test(int m, int k, u8 *g_tbls, u8 **perf_update_buffs, u8 *a)
{ {
int i; int i;
// Make parity vects // Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls); ec_init_tables(k, m - k, &a[k * k], g_tbls);
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, perf_update_buffs[i],
perf_update_buffs[i], &perf_update_buffs[k]); &perf_update_buffs[k]);
} }
} }
int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err, int
u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs) decode_test(int m, int k, u8 **update_buffs, u8 **recov, u8 *a, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 *g_tbls, u8 **perf_update_buffs)
{ {
int i, j, r; int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
// Construct b by removing error rows // Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) while (src_in_err[r])
r++; r++;
recov[i] = update_buffs[r]; recov[i] = update_buffs[r];
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j]; b[k * i + j] = a[k * r + j];
} }
if (gf_invert_matrix(b, d, k) < 0) { if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
return -1; return -1;
} }
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j]; c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data // Recover data
ec_init_tables(k, nerrs, c, g_tbls); ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], perf_update_buffs);
perf_update_buffs); }
} return 0;
return 0;
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, check, m, k, p, nerrs, ret = -1; int i, j, check, m, k, p, nerrs, ret = -1;
void *buf; void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL }; u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL }; u8 *buffs[TEST_SOURCES] = { NULL };
u8 *update_buffs[TEST_SOURCES] = { NULL }; u8 *update_buffs[TEST_SOURCES] = { NULL };
u8 *perf_update_buffs[TEST_SOURCES] = { NULL }; u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX]; u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start; struct perf start;
/* Set default parameters */ /* Set default parameters */
k = 10; k = 10;
p = VECT; p = VECT;
nerrs = VECT; nerrs = VECT;
/* Parse arguments */ /* Parse arguments */
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) { if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]); k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) { } else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]); p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) { } else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]); nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) { } else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]); usage(argv[0]);
return 0; return 0;
} else { } else {
usage(argv[0]); usage(argv[0]);
return -1; return -1;
} }
} }
if (nerrs > k) { if (nerrs > k) {
printf printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n", nerrs, k);
nerrs, k); return -1;
return -1; }
}
if (k <= 0) { if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k); printf("Number of source buffers (%d) must be > 0\n", k);
return -1; return -1;
} }
if (p <= 0) { if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p); printf("Number of parity buffers (%d) must be > 0\n", p);
return -1; return -1;
} }
if (nerrs > p) { if (nerrs > p) {
printf printf("Number of errors (%d) cannot be higher than number of parity buffers "
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n", "(%d)\n",
nerrs, p); nerrs, p);
return -1; return -1;
} }
if (nerrs <= 0) { if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs); printf("Number of errors (%d) must be > 0\n", nerrs);
return -1; return -1;
} }
m = k + p; m = k + p;
if (m > MMAX) { if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n", printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX); MMAX);
return -1; return -1;
} }
u8 *err_list = malloc((size_t)nerrs); u8 *err_list = malloc((size_t) nerrs);
if (err_list == NULL) { if (err_list == NULL) {
printf("Error allocating list of array of error indices\n"); printf("Error allocating list of array of error indices\n");
return -1; return -1;
} }
srand(TEST_SEED); srand(TEST_SEED);
for (i = 0; i < nerrs;) { for (i = 0; i < nerrs;) {
u8 next_err = rand() % k; u8 next_err = rand() % k;
for (j = 0; j < i; j++) for (j = 0; j < i; j++)
if (next_err == err_list[j]) if (next_err == err_list[j])
break; break;
if (j != i) if (j != i)
continue; continue;
err_list[i++] = next_err; err_list[i++] = next_err;
} }
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
p, nerrs); nerrs);
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
printf("%d ", err_list[i]); printf("%d ", err_list[i]);
printf("])\n"); printf("])\n");
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
memcpy(src_err_list, err_list, nerrs); memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1; src_in_err[src_err_list[i]] = 1;
// Allocate the arrays // Allocate the arrays
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n"); printf("Error allocating buffers\n");
goto exit; goto exit;
} }
buffs[i] = buf; buffs[i] = buf;
} }
for (i = 0; i < (m - k); i++) { for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n"); printf("Error allocating buffers\n");
goto exit; goto exit;
} }
temp_buffs[i] = buf; temp_buffs[i] = buf;
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
} // zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) { if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n"); printf("Error allocating buffers\n");
goto exit; goto exit;
} }
update_buffs[i] = buf; update_buffs[i] = buf;
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
} // zero for update function
for (i = 0; i < TEST_SOURCES; i++) { }
if (posix_memalign(&buf, 64, TEST_LEN(m))) { for (i = 0; i < TEST_SOURCES; i++) {
printf("Error allocating buffers\n"); if (posix_memalign(&buf, 64, TEST_LEN(m))) {
goto exit; printf("Error allocating buffers\n");
} goto exit;
perf_update_buffs[i] = buf; }
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function perf_update_buffs[i] = buf;
} memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer
// to be zero for update function
}
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++) { for (j = 0; j < TEST_LEN(m); j++) {
buffs[i][j] = rand(); buffs[i][j] = rand();
update_buffs[i][j] = buffs[i][j]; update_buffs[i][j] = buffs[i][j];
} }
gf_gen_rs_matrix(a, m, k); gf_gen_rs_matrix(a, m, k);
encode_update_test_ref(m, k, g_tbls, buffs, a); encode_update_test_ref(m, k, g_tbls, buffs, a);
encode_update_test(m, k, g_tbls, update_buffs, a); encode_update_test(m, k, g_tbls, update_buffs, a);
for (i = 0; i < m - k; i++) { for (i = 0; i < m - k; i++) {
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) { if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
printf("\nupdate_buffs%d :", i); printf("\nupdate_buffs%d :", i);
dump(update_buffs[k + i], 25); dump(update_buffs[k + i], 25);
printf("buffs%d :", i); printf("buffs%d :", i);
dump(buffs[k + i], 25); dump(buffs[k + i], 25);
goto exit; goto exit;
} }
} }
#ifdef DO_REF_PERF #ifdef DO_REF_PERF
// Start encode test // Start encode test
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a)); BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": "); printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m)); perf_print(start, (long long) (TEST_LEN(m)) * (m));
#endif #endif
// Start encode test // Start encode test
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME, encode_update_test(m, k, g_tbls, perf_update_buffs, a));
encode_update_test(m, k, g_tbls, perf_update_buffs, a)); printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); perf_print(start, (long long) (TEST_LEN(m)) * (m));
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test // Start encode test
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects // Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls); ec_init_tables(k, m - k, &a[k * k], g_tbls);
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k])); &perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": "); printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
// Start encode test // Start encode test
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects // Make parity vects
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k])); &perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": "); printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
} // zero for update function
for (i = 0; i < k; i++) { }
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i], for (i = 0; i < k; i++) {
&update_buffs[k]); FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
} &update_buffs[k]);
}
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs, g_tbls,
nerrs, g_tbls, temp_buffs); temp_buffs);
BENCHMARK(&start, BENCHMARK_TIME, check = BENCHMARK(&start, BENCHMARK_TIME,
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, check = decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs,
nerrs, g_tbls, perf_update_buffs)); g_tbls, perf_update_buffs));
if (check) { if (check) {
printf("BAD_MATRIX\n"); printf("BAD_MATRIX\n");
ret = check; ret = check;
goto exit; goto exit;
} }
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) { if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs); printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
goto exit; goto exit;
} }
} }
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": "); printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n"); printf("done all: Pass\n");
ret = 0; ret = 0;
exit: exit:
free(err_list); free(err_list);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]); free(buffs[i]);
free(temp_buffs[i]); free(temp_buffs[i]);
free(update_buffs[i]); free(update_buffs[i]);
free(perf_update_buffs[i]); free(perf_update_buffs[i]);
} }
return ret; return ret;
} }

File diff suppressed because it is too large Load Diff

View File

@ -3,114 +3,117 @@
#include <stdio.h> #include <stdio.h>
#include "erasure_code.h" #include "erasure_code.h"
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */ #define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
#define M_MAX 0x20 #define M_MAX 0x20
#define K_MAX 0x10 #define K_MAX 0x10
#define ROWS M_MAX #define ROWS M_MAX
#define COLS K_MAX #define COLS K_MAX
static inline uint64_t min(const uint64_t a, const uint64_t b) static inline uint64_t
min(const uint64_t a, const uint64_t b)
{ {
if (a <= b) if (a <= b)
return a; return a;
else else
return b; return b;
} }
void gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix, void
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator, gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
const uint64_t col_indicator) const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
const uint64_t col_indicator)
{ {
uint64_t i, j, r, s; uint64_t i, j, r, s;
for (i = 0, r = 0; i < rows; i++) { for (i = 0, r = 0; i < rows; i++) {
if (!(row_indicator & ((uint64_t) 1 << i))) if (!(row_indicator & ((uint64_t) 1 << i)))
continue; continue;
for (j = 0, s = 0; j < cols; j++) { for (j = 0, s = 0; j < cols; j++) {
if (!(col_indicator & ((uint64_t) 1 << j))) if (!(col_indicator & ((uint64_t) 1 << j)))
continue; continue;
out_matrix[dim * r + s] = in_matrix[cols * i + j]; out_matrix[dim * r + s] = in_matrix[cols * i + j];
s++; s++;
} }
r++; r++;
} }
} }
/* Gosper's Hack */ /* Gosper's Hack */
uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize) uint64_t
next_subset(uint64_t *subset, uint64_t element_count, uint64_t subsize)
{ {
uint64_t tmp1 = *subset & -*subset; uint64_t tmp1 = *subset & -*subset;
uint64_t tmp2 = *subset + tmp1; uint64_t tmp2 = *subset + tmp1;
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2; *subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
if (*subset & (((uint64_t) 1 << element_count))) { if (*subset & (((uint64_t) 1 << element_count))) {
/* Overflow on last subset */ /* Overflow on last subset */
*subset = ((uint64_t) 1 << subsize) - 1; *subset = ((uint64_t) 1 << subsize) - 1;
return 1; return 1;
} }
return 0; return 0;
} }
int are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols) int
are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
{ {
unsigned char matrix[COLS * COLS]; unsigned char matrix[COLS * COLS];
unsigned char invert_matrix[COLS * COLS]; unsigned char invert_matrix[COLS * COLS];
uint64_t subsize; uint64_t subsize;
/* Check all square subsize x subsize submatrices of the rows x cols /* Check all square subsize x subsize submatrices of the rows x cols
* vmatrix for singularity*/ * vmatrix for singularity*/
for (subsize = 1; subsize <= min(rows, cols); subsize++) { for (subsize = 1; subsize <= min(rows, cols); subsize++) {
const uint64_t subset_init = (1ULL << subsize) - 1ULL; const uint64_t subset_init = (1ULL << subsize) - 1ULL;
uint64_t col_indicator = subset_init; uint64_t col_indicator = subset_init;
do { do {
uint64_t row_indicator = subset_init; uint64_t row_indicator = subset_init;
do { do {
gen_sub_matrix(matrix, subsize, vmatrix, rows, gen_sub_matrix(matrix, subsize, vmatrix, rows, cols, row_indicator,
cols, row_indicator, col_indicator); col_indicator);
if (gf_invert_matrix(matrix, invert_matrix, (int)subsize)) if (gf_invert_matrix(matrix, invert_matrix, (int) subsize))
return 1; return 1;
} while (next_subset(&row_indicator, rows, subsize) == 0); } while (next_subset(&row_indicator, rows, subsize) == 0);
} while (next_subset(&col_indicator, cols, subsize) == 0); } while (next_subset(&col_indicator, cols, subsize) == 0);
} }
return 0; return 0;
} }
int main(int argc, char **argv) int
main(int argc, char **argv)
{ {
unsigned char vmatrix[(ROWS + COLS) * COLS]; unsigned char vmatrix[(ROWS + COLS) * COLS];
uint64_t rows, cols; uint64_t rows, cols;
if (K_MAX > MAX_CHECK) { if (K_MAX > MAX_CHECK) {
printf("K_MAX too large for this test\n"); printf("K_MAX too large for this test\n");
return 0; return 0;
} }
if (M_MAX > MAX_CHECK) { if (M_MAX > MAX_CHECK) {
printf("M_MAX too large for this test\n"); printf("M_MAX too large for this test\n");
return 0; return 0;
} }
if (M_MAX < K_MAX) { if (M_MAX < K_MAX) {
printf("M_MAX must be smaller than K_MAX"); printf("M_MAX must be smaller than K_MAX");
return 0; return 0;
} }
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX); printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
printf("gen_rs_matrix creates erasure codes for:\n"); printf("gen_rs_matrix creates erasure codes for:\n");
for (cols = 1; cols <= K_MAX; cols++) { for (cols = 1; cols <= K_MAX; cols++) {
for (rows = 1; rows <= M_MAX - cols; rows++) { for (rows = 1; rows <= M_MAX - cols; rows++) {
gf_gen_rs_matrix(vmatrix, rows + cols, cols); gf_gen_rs_matrix(vmatrix, rows + cols, cols);
/* Verify the Vandermonde portion of vmatrix contains no /* Verify the Vandermonde portion of vmatrix contains no
* singular submatrix */ * singular submatrix */
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols)) if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
break; break;
}
} printf(" k = %2u, m <= %2u \n", (unsigned) cols, (unsigned) (rows + cols - 1));
printf(" k = %2u, m <= %2u \n", (unsigned)cols, (unsigned)(rows + cols - 1)); }
return 0;
}
return 0;
} }

View File

@ -29,7 +29,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include <assert.h> #include <assert.h>
#include "erasure_code.h" #include "erasure_code.h"
@ -37,202 +37,194 @@
#define TEST_LEN 8192 #define TEST_LEN 8192
#ifndef TEST_SOURCES #ifndef TEST_SOURCES
# define TEST_SOURCES 128 #define TEST_SOURCES 128
#endif #endif
#ifndef RANDOMS #ifndef RANDOMS
# define RANDOMS 200 #define RANDOMS 200
#endif #endif
#define KMAX TEST_SOURCES #define KMAX TEST_SOURCES
typedef unsigned char u8; typedef unsigned char u8;
void matrix_mult(u8 * a, u8 * b, u8 * c, int n) void
matrix_mult(u8 *a, u8 *b, u8 *c, int n)
{ {
int i, j, k; int i, j, k;
u8 d; u8 d;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
d = 0; d = 0;
for (k = 0; k < n; k++) { for (k = 0; k < n; k++) {
d ^= gf_mul(a[n * i + k], b[n * k + j]); d ^= gf_mul(a[n * i + k], b[n * k + j]);
} }
c[i * n + j] = d; c[i * n + j] = d;
} }
} }
} }
void print_matrix(u8 * a, int n) void
print_matrix(u8 *a, int n)
{ {
int i, j; int i, j;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
printf(" %2x", a[i * n + j]); printf(" %2x", a[i * n + j]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
int is_ident(u8 * a, const int n) int
is_ident(u8 *a, const int n)
{ {
int i, j; int i, j;
u8 c; u8 c;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
c = *a++; c = *a++;
if (i == j) if (i == j)
c--; c--;
if (c != 0) if (c != 0)
return -1; return -1;
} }
} }
return 0; return 0;
} }
int inv_test(u8 * in, u8 * inv, u8 * sav, int n) int
inv_test(u8 *in, u8 *inv, u8 *sav, int n)
{ {
memcpy(sav, in, n * n); memcpy(sav, in, n * n);
if (gf_invert_matrix(in, inv, n)) { if (gf_invert_matrix(in, inv, n)) {
printf("Given singular matrix\n"); printf("Given singular matrix\n");
print_matrix(sav, n); print_matrix(sav, n);
return -1; return -1;
} }
matrix_mult(inv, sav, in, n); matrix_mult(inv, sav, in, n);
if (is_ident(in, n)) { if (is_ident(in, n)) {
printf("fail\n"); printf("fail\n");
print_matrix(sav, n); print_matrix(sav, n);
print_matrix(inv, n); print_matrix(inv, n);
print_matrix(in, n); print_matrix(in, n);
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
return 0; return 0;
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, k, t; int i, k, t;
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL; u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
int ret = -1; int ret = -1;
u8 test1[] = { 1, 1, 6, u8 test1[] = { 1, 1, 6, 1, 1, 1, 7, 1, 9 };
1, 1, 1,
7, 1, 9
};
u8 test2[] = { 0, 1, 6, u8 test2[] = { 0, 1, 6, 1, 0, 1, 0, 1, 9 };
1, 0, 1,
0, 1, 9
};
u8 test3[] = { 0, 0, 1, u8 test3[] = { 0, 0, 1, 1, 0, 0, 0, 1, 1 };
1, 0, 0,
0, 1, 1
};
u8 test4[] = { 0, 1, 6, 7, u8 test4[] = { 0, 1, 6, 7, 1, 1, 0, 0, 0, 1, 2, 3, 3, 2, 2, 3 }; // = row3+3*row2
1, 1, 0, 0,
0, 1, 2, 3,
3, 2, 2, 3
}; // = row3+3*row2
printf("gf_inverse_test: max=%d ", KMAX); printf("gf_inverse_test: max=%d ", KMAX);
test_mat = malloc(KMAX * KMAX); test_mat = malloc(KMAX * KMAX);
save_mat = malloc(KMAX * KMAX); save_mat = malloc(KMAX * KMAX);
invr_mat = malloc(KMAX * KMAX); invr_mat = malloc(KMAX * KMAX);
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat) if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
goto exit; goto exit;
// Test with lots of leading 1's // Test with lots of leading 1's
k = 3; k = 3;
memcpy(test_mat, test1, k * k); memcpy(test_mat, test1, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k)) if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit; goto exit;
// Test with leading zeros // Test with leading zeros
k = 3; k = 3;
memcpy(test_mat, test2, k * k); memcpy(test_mat, test2, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k)) if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit; goto exit;
// Test 3 // Test 3
k = 3; k = 3;
memcpy(test_mat, test3, k * k); memcpy(test_mat, test3, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k)) if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit; goto exit;
// Test 4 - try a singular matrix // Test 4 - try a singular matrix
k = 4; k = 4;
memcpy(test_mat, test4, k * k); memcpy(test_mat, test4, k * k);
if (!gf_invert_matrix(test_mat, invr_mat, k)) { if (!gf_invert_matrix(test_mat, invr_mat, k)) {
printf("Fail: didn't catch singular matrix\n"); printf("Fail: didn't catch singular matrix\n");
print_matrix(test4, 4); print_matrix(test4, 4);
goto exit; goto exit;
} }
// Do random test of size KMAX // Do random test of size KMAX
k = KMAX; k = KMAX;
for (i = 0; i < k * k; i++) for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand(); test_mat[i] = save_mat[i] = rand();
if (gf_invert_matrix(test_mat, invr_mat, k)) { if (gf_invert_matrix(test_mat, invr_mat, k)) {
printf("rand picked a singular matrix, try again\n"); printf("rand picked a singular matrix, try again\n");
goto exit; goto exit;
} }
matrix_mult(invr_mat, save_mat, test_mat, k); matrix_mult(invr_mat, save_mat, test_mat, k);
if (is_ident(test_mat, k)) { if (is_ident(test_mat, k)) {
printf("fail\n"); printf("fail\n");
print_matrix(save_mat, k); print_matrix(save_mat, k);
print_matrix(invr_mat, k); print_matrix(invr_mat, k);
print_matrix(test_mat, k); print_matrix(test_mat, k);
goto exit; goto exit;
} }
// Do Randoms. Random size and coefficients // Do Randoms. Random size and coefficients
for (t = 0; t < RANDOMS; t++) { for (t = 0; t < RANDOMS; t++) {
k = rand() % KMAX; k = rand() % KMAX;
for (i = 0; i < k * k; i++) for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand(); test_mat[i] = save_mat[i] = rand();
if (gf_invert_matrix(test_mat, invr_mat, k)) if (gf_invert_matrix(test_mat, invr_mat, k))
continue; continue;
matrix_mult(invr_mat, save_mat, test_mat, k); matrix_mult(invr_mat, save_mat, test_mat, k);
if (is_ident(test_mat, k)) { if (is_ident(test_mat, k)) {
printf("fail rand k=%d\n", k); printf("fail rand k=%d\n", k);
print_matrix(save_mat, k); print_matrix(save_mat, k);
print_matrix(invr_mat, k); print_matrix(invr_mat, k);
print_matrix(test_mat, k); print_matrix(test_mat, k);
goto exit; goto exit;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
if (0 == (t % 8)) if (0 == (t % 8))
putchar('.'); putchar('.');
#endif #endif
} }
printf(" Pass\n"); printf(" Pass\n");
ret = 0; ret = 0;
exit: exit:
free(test_mat); free(test_mat);
free(save_mat); free(save_mat);
free(invr_mat); free(invr_mat);
return ret; return ret;
} }

View File

@ -29,26 +29,26 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "test.h" #include "test.h"
#include "erasure_code.h" #include "erasure_code.h"
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN 8*1024 #define TEST_LEN 8 * 1024
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN (GT_L3_CACHE / TEST_SOURCES) #define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
typedef unsigned char u8; typedef unsigned char u8;
@ -58,105 +58,108 @@ u8 gff[256];
u8 gflog[256]; u8 gflog[256];
u8 gf_mul_table[256 * 256]; u8 gf_mul_table[256 * 256];
void mk_gf_field(void) void
mk_gf_field(void)
{ {
int i; int i;
u8 s = 1; u8 s = 1;
gflog[0] = 0; gflog[0] = 0;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
gff[i] = s; gff[i] = s;
gflog[s] = i; gflog[s] = i;
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2} s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
} }
} }
void mk_gf_mul_table(u8 * table) void
mk_gf_mul_table(u8 *table)
{ {
// Populate a single table with all multiply combinations for a fast, // Populate a single table with all multiply combinations for a fast,
// single-table lookup of GF(2^8) multiply at the expense of memory. // single-table lookup of GF(2^8) multiply at the expense of memory.
int i, j; int i, j;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
for (j = 0; j < 256; j++) for (j = 0; j < 256; j++)
table[i * 256 + j] = gf_mul(i, j); table[i * 256 + j] = gf_mul(i, j);
} }
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest) void
gf_vect_dot_prod_ref(int len, int vlen, u8 *v, u8 **src, u8 *dest)
{ {
int i, j; int i, j;
u8 s; u8 s;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = 0; s = 0;
for (j = 0; j < vlen; j++) for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j]); s ^= gf_mul(src[j][i], v[j]);
dest[i] = s; dest[i] = s;
} }
} }
void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest) void
gf_vect_dot_prod_mult(int len, int vlen, u8 *v, u8 **src, u8 *dest)
{ {
int i, j; int i, j;
u8 s; u8 s;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
s = 0; s = 0;
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s ^= gf_mul_table[v[j] * 256 + src[j][i]]; s ^= gf_mul_table[v[j] * 256 + src[j][i]];
} }
dest[i] = s; dest[i] = s;
} }
} }
int main(void) int
main(void)
{ {
int i, j; int i, j;
u8 vec[TEST_SOURCES], *dest1, *dest2; u8 vec[TEST_SOURCES], *dest1, *dest2;
u8 *matrix[TEST_SOURCES]; u8 *matrix[TEST_SOURCES];
struct perf start; struct perf start;
dest1 = (u8 *) malloc(TEST_LEN); dest1 = (u8 *) malloc(TEST_LEN);
dest2 = (u8 *) malloc(TEST_LEN); dest2 = (u8 *) malloc(TEST_LEN);
if (NULL == dest1 || NULL == dest2) { if (NULL == dest1 || NULL == dest2) {
printf("buffer alloc error\n"); printf("buffer alloc error\n");
return -1; return -1;
} }
memset(dest1, 0xfe, TEST_LEN); memset(dest1, 0xfe, TEST_LEN);
memset(dest2, 0xfe, TEST_LEN); memset(dest2, 0xfe, TEST_LEN);
mk_gf_field(); mk_gf_field();
mk_gf_mul_table(gf_mul_table); mk_gf_mul_table(gf_mul_table);
//generate random vector and matrix/data // generate random vector and matrix/data
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
vec[i] = rand(); vec[i] = rand();
if (!(matrix[i] = malloc(TEST_LEN))) { if (!(matrix[i] = malloc(TEST_LEN))) {
fprintf(stderr, "Error failure\n\n"); fprintf(stderr, "Error failure\n\n");
return -1; return -1;
} }
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
matrix[i][j] = rand(); matrix[i][j] = rand();
}
} BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1)); gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": "); printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME, // Compare with reference function
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2)); if (0 != memcmp(dest1, dest2, TEST_LEN)) {
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": "); printf("Error, different results!\n\n");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); return -1;
}
// Compare with reference function printf("Pass functional test\n");
if (0 != memcmp(dest1, dest2, TEST_LEN)) { return 0;
printf("Error, different results!\n\n");
return -1;
}
printf("Pass functional test\n");
return 0;
} }

View File

@ -29,19 +29,19 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include <assert.h> #include <assert.h>
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#define TEST_LEN 8192 #define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2) #define TEST_SIZE (TEST_LEN / 2)
#ifndef TEST_SOURCES #ifndef TEST_SOURCES
# define TEST_SOURCES 250 #define TEST_SOURCES 250
#endif #endif
#ifndef RANDOMS #ifndef RANDOMS
# define RANDOMS 20 #define RANDOMS 20
#endif #endif
#define MMAX TEST_SOURCES #define MMAX TEST_SOURCES
@ -49,244 +49,251 @@
typedef unsigned char u8; typedef unsigned char u8;
void dump(unsigned char *buf, int len) void
dump(unsigned char *buf, int len)
{ {
int i; int i;
for (i = 0; i < len;) { for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]); printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0) if (i % 32 == 0)
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_matrix(unsigned char **s, int k, int m) void
dump_matrix(unsigned char **s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]); printf(" %2x", s[i][j]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_u8xu8(unsigned char *s, int k, int m) void
dump_u8xu8(unsigned char *s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]); printf(" %2x", 0xff & s[j + (i * m)]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, rtest, m, k, nerrs, r, err; int i, j, rtest, m, k, nerrs, r, err;
void *buf; void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN); printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays // Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
buffs[i] = buf; buffs[i] = buf;
} }
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest = buf; dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest_ref = buf; dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
temp_buff = buf; temp_buff = buf;
// Init // Init
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN); memset(buffs[i], 0, TEST_LEN);
memset(dest, 0, TEST_LEN); memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN); memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN); memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES); memset(g, 0, TEST_SOURCES);
// Test erasure code using gf_vect_dot_prod // Test erasure code using gf_vect_dot_prod
// Pick a first test // Pick a first test
m = 9; m = 9;
k = 5; k = 5;
assert(!(m > MMAX || k > KMAX)); assert(!(m > MMAX || k > KMAX));
gf_gen_cauchy1_matrix(a, m, k); gf_gen_cauchy1_matrix(a, m, k);
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
// Make parity vects // Make parity vects
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
} }
// Random buffers in erasure // Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand(); err = 1 & rand();
src_in_err[i] = err; src_in_err[i] = err;
if (err) if (err)
src_err_list[nerrs++] = i; src_err_list[nerrs++] = i;
} }
// construct b by removing error rows // construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j]; b[k * i + j] = a[k * r + j];
} }
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
recov[i] = buffs[r]; recov[i] = buffs[r];
} }
// Recover data // Recover data
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]); printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25); dump(temp_buff, 25);
printf("orig :"); printf("orig :");
dump(buffs[src_err_list[i]], 25); dump(buffs[src_err_list[i]], 25);
return -1; return -1;
} }
} }
// Do more random tests // Do more random tests
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2) ; while ((m = (rand() % MMAX)) < 2)
while ((k = (rand() % KMAX)) >= m || k < 1) ; ;
while ((k = (rand() % KMAX)) >= m || k < 1)
;
if (m > MMAX || k > KMAX) if (m > MMAX || k > KMAX)
continue; continue;
gf_gen_cauchy1_matrix(a, m, k); gf_gen_cauchy1_matrix(a, m, k);
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
// Make parity vects // Make parity vects
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
} }
// Random errors // Random errors
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand(); err = 1 & rand();
src_in_err[i] = err; src_in_err[i] = err;
if (err) if (err)
src_err_list[nerrs++] = i; src_err_list[nerrs++] = i;
} }
if (nerrs == 0) { // should have at least one error if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k) ; while ((err = (rand() % KMAX)) >= k)
src_err_list[nerrs++] = err; ;
src_in_err[err] = 1; src_err_list[nerrs++] = err;
} src_in_err[err] = 1;
// construct b by removing error rows }
for (i = 0, r = 0; i < k; i++, r++) { // construct b by removing error rows
while (src_in_err[r]) { for (i = 0, r = 0; i < k; i++, r++) {
r++; while (src_in_err[r]) {
continue; r++;
} continue;
for (j = 0; j < k; j++) }
b[k * i + j] = a[k * r + j]; for (j = 0; j < k; j++)
} b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
recov[i] = buffs[r]; recov[i] = buffs[r];
} }
// Recover data // Recover data
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = "); printf(" - erase list = ");
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]); printf(" %d", src_err_list[i]);
printf("\na:\n"); printf("\na:\n");
dump_u8xu8((u8 *) a, m, k); dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n"); printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k); dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n"); printf("orig data:\n");
dump_matrix(buffs, m, 25); dump_matrix(buffs, m, 25);
printf("orig :"); printf("orig :");
dump(buffs[src_err_list[i]], 25); dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]); printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25); dump(temp_buff, 25);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
printf("done all: Pass\n"); printf("done all: Pass\n");
return 0; return 0;
} }

View File

@ -29,146 +29,148 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef FUNCTION_UNDER_TEST #ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST gf_vect_dot_prod #define FUNCTION_UNDER_TEST gf_vect_dot_prod
#endif #endif
#define str(s) #s #define str(s) #s
#define xstr(s) str(s) #define xstr(s) str(s)
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN 8*1024 #define TEST_LEN 8 * 1024
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) #define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64 - 1))
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
typedef unsigned char u8; typedef unsigned char u8;
void dump(unsigned char *buf, int len) void
dump(unsigned char *buf, int len)
{ {
int i; int i;
for (i = 0; i < len;) { for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]); printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0) if (i % 32 == 0)
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_matrix(unsigned char **s, int k, int m) void
dump_matrix(unsigned char **s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]); printf(" %2x", s[i][j]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void vect_dot_prod_perf(void (*fun_ptr) void
(int, int, unsigned char *, unsigned char **, unsigned char *), vect_dot_prod_perf(void (*fun_ptr)(int, int, unsigned char *, unsigned char **, unsigned char *),
u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref) u8 *g, u8 *g_tbls, u8 **buffs, u8 *dest_ref)
{ {
int j; int j;
for (j = 0; j < TEST_SOURCES; j++) for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]); gf_vect_mul_init(g[j], &g_tbls[j * 32]);
(*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); (*fun_ptr)(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j; int i, j;
void *buf; void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref; u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
u8 *temp_buff, *buffs[TEST_SOURCES]; u8 *temp_buff, *buffs[TEST_SOURCES];
struct perf start; struct perf start;
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
// Allocate the arrays // Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
buffs[i] = buf; buffs[i] = buf;
} }
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest = buf; dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest_ref = buf; dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
temp_buff = buf; temp_buff = buf;
// Performance test // Performance test
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
memset(dest, 0, TEST_LEN); memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN); memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN); memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES); memset(g, 0, TEST_SOURCES);
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand(); g[i] = rand();
#ifdef DO_REF_PERF #ifdef DO_REF_PERF
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref) vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref));
); printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": "); perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
#else #else
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref); vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
#endif #endif
BENCHMARK(&start, BENCHMARK_TIME, BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest)); vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
if (0 != memcmp(dest_ref, dest, TEST_LEN)) { if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n"); printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
dump_matrix(buffs, 5, TEST_SOURCES); dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref, 25); dump(dest_ref, 25);
printf("dprod:"); printf("dprod:");
dump(dest, 25); dump(dest, 25);
return -1; return -1;
} }
printf("pass perf check\n"); printf("pass perf check\n");
return 0; return 0;
} }

View File

@ -29,28 +29,28 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef FUNCTION_UNDER_TEST #ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST gf_vect_dot_prod #define FUNCTION_UNDER_TEST gf_vect_dot_prod
#endif #endif
#ifndef TEST_MIN_SIZE #ifndef TEST_MIN_SIZE
# define TEST_MIN_SIZE 32 #define TEST_MIN_SIZE 32
#endif #endif
#define str(s) #s #define str(s) #s
#define xstr(s) str(s) #define xstr(s) str(s)
#define TEST_LEN 8192 #define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2) #define TEST_SIZE (TEST_LEN / 2)
#ifndef TEST_SOURCES #ifndef TEST_SOURCES
# define TEST_SOURCES 16 #define TEST_SOURCES 16
#endif #endif
#ifndef RANDOMS #ifndef RANDOMS
# define RANDOMS 20 #define RANDOMS 20
#endif #endif
#define MMAX TEST_SOURCES #define MMAX TEST_SOURCES
@ -58,481 +58,486 @@
#ifdef EC_ALIGNED_ADDR #ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment // Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 0 #define PTR_ALIGN_CHK_B 0
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only #define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#else #else
// Define power of 2 range to check ptr, len alignment // Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 32 #define PTR_ALIGN_CHK_B 32
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only #define LEN_ALIGN_CHK_B 32 // 0 for aligned only
#endif #endif
typedef unsigned char u8; typedef unsigned char u8;
void dump(unsigned char *buf, int len) void
dump(unsigned char *buf, int len)
{ {
int i; int i;
for (i = 0; i < len;) { for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]); printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0) if (i % 32 == 0)
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_matrix(unsigned char **s, int k, int m) void
dump_matrix(unsigned char **s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]); printf(" %2x", s[i][j]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_u8xu8(unsigned char *s, int k, int m) void
dump_u8xu8(unsigned char *s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]); printf(" %2x", 0xff & s[j + (i * m)]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, rtest, srcs, m, k, nerrs, r, err; int i, j, rtest, srcs, m, k, nerrs, r, err;
void *buf; void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
int align, size; int align, size;
unsigned char *efence_buffs[TEST_SOURCES]; unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset; unsigned int offset;
u8 *ubuffs[TEST_SOURCES]; u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptr; u8 *udest_ptr;
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays // Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
buffs[i] = buf; buffs[i] = buf;
} }
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest = buf; dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest_ref = buf; dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
temp_buff = buf; temp_buff = buf;
// Test of all zeros // Test of all zeros
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN); memset(buffs[i], 0, TEST_LEN);
memset(dest, 0, TEST_LEN); memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN); memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN); memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES); memset(g, 0, TEST_SOURCES);
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) { if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n"); printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
dump_matrix(buffs, 5, TEST_SOURCES); dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref, 25); dump(dest_ref, 25);
printf("dprod:"); printf("dprod:");
dump(dest, 25); dump(dest, 25);
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
else else
putchar('.'); putchar('.');
#endif #endif
// Rand data test // Rand data test
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand(); g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) { if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n"); printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
dump_matrix(buffs, 5, TEST_SOURCES); dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref, 25); dump(dest_ref, 25);
printf("dprod:"); printf("dprod:");
dump(dest, 25); dump(dest, 25);
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Rand data test with varied parameters // Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) { for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
g[i] = rand(); g[i] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref); gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest); FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) { if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n"); printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
dump_matrix(buffs, 5, srcs); dump_matrix(buffs, 5, srcs);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref, 5); dump(dest_ref, 5);
printf("dprod:"); printf("dprod:");
dump(dest, 5); dump(dest, 5);
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
} }
// Test erasure code using gf_vect_dot_prod // Test erasure code using gf_vect_dot_prod
// Pick a first test // Pick a first test
m = 9; m = 9;
k = 5; k = 5;
if (m > MMAX || k > KMAX) if (m > MMAX || k > KMAX)
return -1; return -1;
gf_gen_rs_matrix(a, m, k); gf_gen_rs_matrix(a, m, k);
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
// Make parity vects // Make parity vects
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
#ifndef USEREF #ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
#else #else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
#endif #endif
} }
// Random buffers in erasure // Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand(); err = 1 & rand();
src_in_err[i] = err; src_in_err[i] = err;
if (err) if (err)
src_err_list[nerrs++] = i; src_err_list[nerrs++] = i;
} }
// construct b by removing error rows // construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j]; b[k * i + j] = a[k * r + j];
} }
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
recov[i] = buffs[r]; recov[i] = buffs[r];
} }
// Recover data // Recover data
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
#ifndef USEREF #ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
#else #else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
#endif #endif
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]); printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25); dump(temp_buff, 25);
printf("orig :"); printf("orig :");
dump(buffs[src_err_list[i]], 25); dump(buffs[src_err_list[i]], 25);
return -1; return -1;
} }
} }
// Do more random tests // Do more random tests
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2) ; while ((m = (rand() % MMAX)) < 2)
while ((k = (rand() % KMAX)) >= m || k < 1) ; ;
while ((k = (rand() % KMAX)) >= m || k < 1)
;
if (m > MMAX || k > KMAX) if (m > MMAX || k > KMAX)
continue; continue;
gf_gen_rs_matrix(a, m, k); gf_gen_rs_matrix(a, m, k);
// Make random data // Make random data
for (i = 0; i < k; i++) for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
// Make parity vects // Make parity vects
for (i = k; i < m; i++) { for (i = k; i < m; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
#ifndef USEREF #ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
#else #else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
#endif #endif
} }
// Random errors // Random errors
memset(src_in_err, 0, TEST_SOURCES); memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand(); err = 1 & rand();
src_in_err[i] = err; src_in_err[i] = err;
if (err) if (err)
src_err_list[nerrs++] = i; src_err_list[nerrs++] = i;
} }
if (nerrs == 0) { // should have at least one error if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k) ; while ((err = (rand() % KMAX)) >= k)
src_err_list[nerrs++] = err; ;
src_in_err[err] = 1; src_err_list[nerrs++] = err;
} src_in_err[err] = 1;
// construct b by removing error rows }
for (i = 0, r = 0; i < k; i++, r++) { // construct b by removing error rows
while (src_in_err[r]) { for (i = 0, r = 0; i < k; i++, r++) {
r++; while (src_in_err[r]) {
continue; r++;
} continue;
for (j = 0; j < k; j++) }
b[k * i + j] = a[k * r + j]; for (j = 0; j < k; j++)
} b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n"); printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) { for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) { while (src_in_err[r]) {
r++; r++;
continue; continue;
} }
recov[i] = buffs[r]; recov[i] = buffs[r];
} }
// Recover data // Recover data
for (i = 0; i < nerrs; i++) { for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++) for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
#ifndef USEREF #ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
#else #else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
#endif #endif
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = "); printf(" - erase list = ");
for (i = 0; i < nerrs; i++) for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]); printf(" %d", src_err_list[i]);
printf("\na:\n"); printf("\na:\n");
dump_u8xu8((u8 *) a, m, k); dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n"); printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k); dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n"); printf("orig data:\n");
dump_matrix(buffs, m, 25); dump_matrix(buffs, m, 25);
printf("orig :"); printf("orig :");
dump(buffs[src_err_list[i]], 25); dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]); printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25); dump(temp_buff, 25);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Run tests at end of buffer for Electric Fence // Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size; efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand(); g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref); gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest); FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
if (0 != memcmp(dest_ref, dest, size)) { if (0 != memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n"); printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
dump_matrix(efence_buffs, 5, TEST_SOURCES); dump_matrix(efence_buffs, 5, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref, align); dump(dest_ref, align);
printf("dprod:"); printf("dprod:");
dump(dest, align); dump(dest, align);
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Test rand ptr alignment if available // Test rand ptr alignment if available
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES; srcs = rand() % TEST_SOURCES;
if (srcs == 0) if (srcs == 0)
continue; continue;
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets // Add random offsets
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset)); udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
memset(dest, 0, TEST_LEN); // zero pad to check write-over memset(dest, 0, TEST_LEN); // zero pad to check write-over
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++) for (j = 0; j < size; j++)
ubuffs[i][j] = rand(); ubuffs[i][j] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
g[i] = rand(); g[i] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref); gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr); FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
if (memcmp(dest_ref, udest_ptr, size)) { if (memcmp(dest_ref, udest_ptr, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", srcs);
srcs); dump_matrix(ubuffs, 5, TEST_SOURCES);
dump_matrix(ubuffs, 5, TEST_SOURCES); printf("dprod_base:");
printf("dprod_base:"); dump(dest_ref, 25);
dump(dest_ref, 25); printf("dprod:");
printf("dprod:"); dump(udest_ptr, 25);
dump(udest_ptr, 25); return -1;
return -1; }
} // Confirm that padding around dests is unchanged
// Confirm that padding around dests is unchanged memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff offset = udest_ptr - dest;
offset = udest_ptr - dest;
if (memcmp(dest, dest_ref, offset)) { if (memcmp(dest, dest_ref, offset)) {
printf("Fail rand ualign pad start\n"); printf("Fail rand ualign pad start\n");
return -1; return -1;
} }
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) { if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
printf("Fail rand ualign pad end\n"); printf("Fail rand ualign pad end\n");
return -1; return -1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Test all size alignment // Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
srcs = TEST_SOURCES; srcs = TEST_SOURCES;
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++) for (j = 0; j < size; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
g[i] = rand(); g[i] = rand();
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]); gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref); gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest); FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
if (memcmp(dest_ref, dest, size)) { if (memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", size);
size); dump_matrix(buffs, 5, TEST_SOURCES);
dump_matrix(buffs, 5, TEST_SOURCES); printf("dprod_base:");
printf("dprod_base:"); dump(dest_ref, 25);
dump(dest_ref, 25); printf("dprod:");
printf("dprod:"); dump(dest, 25);
dump(dest, 25); return -1;
return -1; }
} }
}
printf("done all: Pass\n"); printf("done all: Pass\n");
return 0; return 0;
} }

View File

@ -29,503 +29,500 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset, memcmp #include <string.h> // for memset, memcmp
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef ALIGN_SIZE #ifndef ALIGN_SIZE
# define ALIGN_SIZE 32 #define ALIGN_SIZE 32
#endif #endif
#ifndef FUNCTION_UNDER_TEST #ifndef FUNCTION_UNDER_TEST
//By default, test multi-binary version // By default, test multi-binary version
# define FUNCTION_UNDER_TEST gf_vect_mad #define FUNCTION_UNDER_TEST gf_vect_mad
# define REF_FUNCTION gf_vect_dot_prod #define REF_FUNCTION gf_vect_dot_prod
# define VECT 1 #define VECT 1
#endif #endif
#ifndef TEST_MIN_SIZE #ifndef TEST_MIN_SIZE
# define TEST_MIN_SIZE 64 #define TEST_MIN_SIZE 64
#endif #endif
#define str(s) #s #define str(s) #s
#define xstr(s) str(s) #define xstr(s) str(s)
#define TEST_LEN 8192 #define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2) #define TEST_SIZE (TEST_LEN / 2)
#define TEST_MEM TEST_SIZE #define TEST_MEM TEST_SIZE
#define TEST_LOOPS 20000 #define TEST_LOOPS 20000
#define TEST_TYPE_STR "" #define TEST_TYPE_STR ""
#ifndef TEST_SOURCES #ifndef TEST_SOURCES
# define TEST_SOURCES 16 #define TEST_SOURCES 16
#endif #endif
#ifndef RANDOMS #ifndef RANDOMS
# define RANDOMS 20 #define RANDOMS 20
#endif #endif
#ifdef EC_ALIGNED_ADDR #ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment // Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 0 #define PTR_ALIGN_CHK_B 0
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only #define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#else #else
// Define power of 2 range to check ptr, len alignment // Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B ALIGN_SIZE #define PTR_ALIGN_CHK_B ALIGN_SIZE
# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only #define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
#endif #endif
#define str(s) #s #define str(s) #s
#define xstr(s) str(s) #define xstr(s) str(s)
typedef unsigned char u8; typedef unsigned char u8;
#if (VECT == 1) #if (VECT == 1)
# define LAST_ARG *dest #define LAST_ARG *dest
#else #else
# define LAST_ARG **dest #define LAST_ARG **dest
#endif #endif
extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, extern void
unsigned char *src, unsigned char LAST_ARG); FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char LAST_ARG);
unsigned char LAST_ARG); extern void
REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char LAST_ARG);
void dump(unsigned char *buf, int len) void
dump(unsigned char *buf, int len)
{ {
int i; int i;
for (i = 0; i < len;) { for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]); printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0) if (i % 32 == 0)
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_matrix(unsigned char **s, int k, int m) void
dump_matrix(unsigned char **s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]); printf(" %2x", s[i][j]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
void dump_u8xu8(unsigned char *s, int k, int m) void
dump_u8xu8(unsigned char *s, int k, int m)
{ {
int i, j; int i, j;
for (i = 0; i < k; i++) { for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]); printf(" %2x", 0xff & s[j + (i * m)]);
} }
printf("\n"); printf("\n");
} }
printf("\n"); printf("\n");
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, j, rtest, srcs; int i, j, rtest, srcs;
void *buf; void *buf;
u8 gf[6][TEST_SOURCES]; u8 gf[6][TEST_SOURCES];
u8 *g_tbls; u8 *g_tbls;
u8 *dest_ref[VECT]; u8 *dest_ref[VECT];
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES]; u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
int vector = VECT; int vector = VECT;
int align, size; int align, size;
unsigned char *efence_buffs[TEST_SOURCES]; unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset; unsigned int offset;
u8 *ubuffs[TEST_SOURCES]; u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptrs[VECT]; u8 *udest_ptrs[VECT];
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays // Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
buffs[i] = buf; buffs[i] = buf;
} }
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) { if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
g_tbls = buf; g_tbls = buf;
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest_ptrs[i] = buf; dest_ptrs[i] = buf;
memset(dest_ptrs[i], 0, TEST_LEN); memset(dest_ptrs[i], 0, TEST_LEN);
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) { if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail"); printf("alloc error: Fail");
return -1; return -1;
} }
dest_ref[i] = buf; dest_ref[i] = buf;
memset(dest_ref[i], 0, TEST_LEN); memset(dest_ref[i], 0, TEST_LEN);
} }
// Test of all zeros // Test of all zeros
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN); memset(buffs[i], 0, TEST_LEN);
switch (vector) { switch (vector) {
case 6: case 6:
memset(gf[5], 0xe6, TEST_SOURCES); memset(gf[5], 0xe6, TEST_SOURCES);
case 5: case 5:
memset(gf[4], 4, TEST_SOURCES); memset(gf[4], 4, TEST_SOURCES);
case 4: case 4:
memset(gf[3], 9, TEST_SOURCES); memset(gf[3], 9, TEST_SOURCES);
case 3: case 3:
memset(gf[2], 7, TEST_SOURCES); memset(gf[2], 7, TEST_SOURCES);
case 2: case 2:
memset(gf[1], 1, TEST_SOURCES); memset(gf[1], 1, TEST_SOURCES);
case 1: case 1:
memset(gf[0], 2, TEST_SOURCES); memset(gf[0], 2, TEST_SOURCES);
break; break;
default: default:
return -1; return -1;
} }
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) { for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], buffs,
buffs, dest_ref[i]); dest_ref[i]);
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN); memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs); FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
#else #else
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs); FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
#endif #endif
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES); dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref[i], 25); dump(dest_ref[i], 25);
printf("dprod_dut:"); printf("dprod_dut:");
dump(dest_ptrs[i], 25); dump(dest_ptrs[i], 25);
return -1; return -1;
} }
} }
#if (VECT == 1) #if (VECT == 1)
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref); REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
#else #else
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref); REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
#endif #endif
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES); dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref[i], 25); dump(dest_ref[i], 25);
printf("dprod_dut:"); printf("dprod_dut:");
dump(dest_ptrs[i], 25); dump(dest_ptrs[i], 25);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
// Rand data test // Rand data test
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) { for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]); &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
&g_tbls[i * 32 * TEST_SOURCES], buffs, &g_tbls[i * 32 * TEST_SOURCES], buffs, dest_ref[i]);
dest_ref[i]);
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN); memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
*dest_ptrs); *dest_ptrs);
#else #else
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
dest_ptrs);
#endif #endif
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", i,
i, rtest); rtest);
dump_matrix(buffs, vector, TEST_SOURCES); dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref[i], 25); dump(dest_ref[i], 25);
printf("dprod_dut:"); printf("dprod_dut:");
dump(dest_ptrs[i], 25); dump(dest_ptrs[i], 25);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Rand data test with varied parameters // Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) { for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) { for (j = 0; j < srcs; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * srcs) + j * 32]); &g_tbls[i * (32 * srcs) + j * 32]);
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], buffs,
buffs, dest_ref[i]); dest_ref[i]);
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN); memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < srcs; i++) { for (i = 0; i < srcs; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
*dest_ptrs); *dest_ptrs);
#else #else
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], dest_ptrs);
dest_ptrs);
#endif #endif
}
} for (i = 0; i < vector; i++) {
for (i = 0; i < vector; i++) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { printf("Fail rand " xstr(
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) FUNCTION_UNDER_TEST) " test%d srcs=%d\n",
" test%d srcs=%d\n", i, srcs); i, srcs);
dump_matrix(buffs, vector, TEST_SOURCES); dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref[i], 25); dump(dest_ref[i], 25);
printf("dprod_dut:"); printf("dprod_dut:");
dump(dest_ptrs[i], 25); dump(dest_ptrs[i], 25);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
} }
// Run tests at end of buffer for Electric Fence // Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++) for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size; efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) { for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]); &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES, gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
&g_tbls[i * 32 * TEST_SOURCES], efence_buffs, efence_buffs, dest_ref[i]);
dest_ref[i]);
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, size); memset(dest_ptrs[i], 0, size);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
*dest_ptrs); *dest_ptrs);
#else #else
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
dest_ptrs); dest_ptrs);
#endif #endif
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d size=%d\n",
" test%d size=%d\n", i, size); i, size);
dump_matrix(buffs, vector, TEST_SOURCES); dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:"); printf("dprod_base:");
dump(dest_ref[i], TEST_MIN_SIZE + align); dump(dest_ref[i], TEST_MIN_SIZE + align);
printf("dprod_dut:"); printf("dprod_dut:");
dump(dest_ptrs[i], TEST_MIN_SIZE + align); dump(dest_ptrs[i], TEST_MIN_SIZE + align);
return -1; return -1;
} }
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Test rand ptr alignment if available // Test rand ptr alignment if available
for (rtest = 0; rtest < RANDOMS; rtest++) { for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES; srcs = rand() % TEST_SOURCES;
if (srcs == 0) if (srcs == 0)
continue; continue;
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets // Add random offsets
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
} }
for (i = 0; i < srcs; i++) for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++) for (j = 0; j < size; j++)
ubuffs[i][j] = rand(); ubuffs[i][j] = rand();
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) { for (j = 0; j < srcs; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]); gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs, gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
dest_ref[i]); dest_ref[i]);
for (i = 0; i < srcs; i++) { for (i = 0; i < srcs; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs); FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
#else #else
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs); FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
#endif #endif
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) { if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) printf("Fail rand " xstr(
" test%d ualign srcs=%d\n", i, srcs); FUNCTION_UNDER_TEST) " test%d ualign srcs=%d\n",
dump_matrix(buffs, vector, TEST_SOURCES); i, srcs);
printf("dprod_base:"); dump_matrix(buffs, vector, TEST_SOURCES);
dump(dest_ref[i], 25); printf("dprod_base:");
printf("dprod_dut:"); dump(dest_ref[i], 25);
dump(udest_ptrs[i], 25); printf("dprod_dut:");
return -1; dump(udest_ptrs[i], 25);
} return -1;
} }
}
// Confirm that padding around dests is unchanged // Confirm that padding around dests is unchanged
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
offset = udest_ptrs[i] - dest_ptrs[i]; offset = udest_ptrs[i] - dest_ptrs[i];
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) { if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
printf("Fail rand ualign pad1 start\n"); printf("Fail rand ualign pad1 start\n");
return -1; return -1;
} }
if (memcmp if (memcmp(dest_ptrs[i] + offset + size, dest_ref[0],
(dest_ptrs[i] + offset + size, dest_ref[0], PTR_ALIGN_CHK_B - offset)) {
PTR_ALIGN_CHK_B - offset)) { printf("Fail rand ualign pad1 end\n");
printf("Fail rand ualign pad1 end\n"); return -1;
return -1; }
} }
}
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Test all size alignment // Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
for (i = 0; i < TEST_SOURCES; i++) for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < size; j++) for (j = 0; j < size; j++)
buffs[i][j] = rand(); buffs[i][j] = rand();
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
for (j = 0; j < TEST_SOURCES; j++) { for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand(); gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]); &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
} }
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
} }
for (i = 0; i < vector; i++) for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES, gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
&g_tbls[i * 32 * TEST_SOURCES], buffs, buffs, dest_ref[i]);
dest_ref[i]);
for (i = 0; i < TEST_SOURCES; i++) { for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1) #if (VECT == 1)
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
*dest_ptrs);
#else #else
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
dest_ptrs);
#endif #endif
} }
for (i = 0; i < vector; i++) { for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) printf("Fail rand " xstr(
" test%d ualign len=%d\n", i, size); FUNCTION_UNDER_TEST) " test%d ualign len=%d\n",
dump_matrix(buffs, vector, TEST_SOURCES); i, size);
printf("dprod_base:"); dump_matrix(buffs, vector, TEST_SOURCES);
dump(dest_ref[i], 25); printf("dprod_base:");
printf("dprod_dut:"); dump(dest_ref[i], 25);
dump(dest_ptrs[i], 25); printf("dprod_dut:");
return -1; dump(dest_ptrs[i], 25);
} return -1;
} }
}
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
}
} printf("Pass\n");
return 0;
printf("Pass\n");
return 0;
} }

View File

@ -29,117 +29,116 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset #include <string.h> // for memset
#include "erasure_code.h" #include "erasure_code.h"
#define TEST_SIZE 8192 #define TEST_SIZE 8192
#define TEST_MEM TEST_SIZE #define TEST_MEM TEST_SIZE
#define TEST_LOOPS 100000 #define TEST_LOOPS 100000
#define TEST_TYPE_STR "" #define TEST_TYPE_STR ""
typedef unsigned char u8; typedef unsigned char u8;
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i; int i;
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
int align, size; int align, size;
unsigned char *efence_buff1; unsigned char *efence_buff1;
unsigned char *efence_buff2; unsigned char *efence_buff2;
printf("gf_vect_mul_base_test:\n"); printf("gf_vect_mul_base_test:\n");
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
buff1 = (u8 *) malloc(TEST_SIZE); buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE); buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE); buff3 = (u8 *) malloc(TEST_SIZE);
if (NULL == buff1 || NULL == buff2 || NULL == buff3) { if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n"); printf("buffer alloc error\n");
return -1; return -1;
} }
// Fill with rand data // Fill with rand data
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand(); buff1[i] = rand();
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) { if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail fill with rand data\n"); printf("fail fill with rand data\n");
return 1; return 1;
} }
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) { if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i], printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
gf_mul(2, buff1[i])); gf_mul(2, buff1[i]));
return 1; return 1;
} }
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) { if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data for buff1\n"); printf("fail fill with rand data for buff1\n");
return -1; return -1;
} }
// Check reference function // Check reference function
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
if (buff2[i] != buff3[i]) { if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); gf_mul(a, buff1[i]));
return 1; return 1;
} }
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand(); buff1[i] = rand();
// Check each possible constant // Check each possible constant
printf("Random tests "); printf("Random tests ");
for (a = 0; a != 255; a++) { for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) { if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail random tests\n"); printf("fail random tests\n");
return 1; return 1;
} }
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) { if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); buff2[i], gf_mul(2, buff1[i]));
return 1; return 1;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Run tests at end of buffer for Electric Fence // Run tests at end of buffer for Electric Fence
align = 32; align = 32;
a = 2; a = 2;
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) { for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end // Line up TEST_SIZE from end
efence_buff1 = buff1 + size; efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size; efence_buff2 = buff2 + size;
if (gf_vect_mul_base if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) !=
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) != 0) { 0) {
printf("fail tests at end of buffer\n"); printf("fail tests at end of buffer\n");
return -1; return -1;
} }
for (i = 0; i < TEST_SIZE - size; i++) for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
i, efence_buff1[i], efence_buff2[i], gf_mul(2, efence_buff2[i], gf_mul(2, efence_buff1[i]));
efence_buff1 return 1;
[i])); }
return 1;
}
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
printf(" done: Pass\n"); printf(" done: Pass\n");
return 0; return 0;
} }

View File

@ -29,63 +29,65 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> // for memset #include <string.h> // for memset
#include "erasure_code.h" #include "erasure_code.h"
#include "test.h" #include "test.h"
#ifndef GT_L3_CACHE #ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ #define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif #endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM) #if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset // Cached test, loop many times over small dataset
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN 8*1024 #define TEST_LEN 8 * 1024
# define TEST_TYPE_STR "_warm" #define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST) #elif defined(COLD_TEST)
// Uncached test. Pull from large mem base. // Uncached test. Pull from large mem base.
# define TEST_SOURCES 10 #define TEST_SOURCES 10
# define TEST_LEN (GT_L3_CACHE / 2) #define TEST_LEN (GT_L3_CACHE / 2)
# define TEST_TYPE_STR "_cold" #define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM) #elif defined(TEST_CUSTOM)
# define TEST_TYPE_STR "_cus" #define TEST_TYPE_STR "_cus"
#endif #endif
#define TEST_MEM (2 * TEST_LEN) #define TEST_MEM (2 * TEST_LEN)
typedef unsigned char u8; typedef unsigned char u8;
void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2) void
gf_vect_mul_perf(u8 a, u8 *gf_const_tbl, u8 *buff1, u8 *buff2)
{ {
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2); gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
} }
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
u8 *buff1, *buff2, gf_const_tbl[64], a = 2; u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
struct perf start; struct perf start;
printf("gf_vect_mul_perf:\n"); printf("gf_vect_mul_perf:\n");
// Allocate large mem region // Allocate large mem region
buff1 = (u8 *) malloc(TEST_LEN); buff1 = (u8 *) malloc(TEST_LEN);
buff2 = (u8 *) malloc(TEST_LEN); buff2 = (u8 *) malloc(TEST_LEN);
if (NULL == buff1 || NULL == buff2) { if (NULL == buff1 || NULL == buff2) {
printf("Failed to allocate %dB\n", TEST_LEN); printf("Failed to allocate %dB\n", TEST_LEN);
return 1; return 1;
} }
memset(buff1, 0, TEST_LEN); memset(buff1, 0, TEST_LEN);
memset(buff2, 0, TEST_LEN); memset(buff2, 0, TEST_LEN);
printf("Start timed tests\n"); printf("Start timed tests\n");
fflush(0); fflush(0);
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2)); BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
printf("gf_vect_mul" TEST_TYPE_STR ": "); printf("gf_vect_mul" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN); perf_print(start, (long long) TEST_LEN);
return 0; return 0;
} }

View File

@ -31,165 +31,164 @@
#include <stdlib.h> #include <stdlib.h>
#include "erasure_code.h" #include "erasure_code.h"
#define TEST_SIZE (128*1024) #define TEST_SIZE (128 * 1024)
typedef unsigned char u8; typedef unsigned char u8;
int main(int argc, char *argv[]) int
main(int argc, char *argv[])
{ {
int i, ret = -1; int i, ret = -1;
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2; u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
int tsize; int tsize;
int align, size; int align, size;
unsigned char *efence_buff1; unsigned char *efence_buff1;
unsigned char *efence_buff2; unsigned char *efence_buff2;
unsigned char *efence_buff3; unsigned char *efence_buff3;
printf("gf_vect_mul_test: "); printf("gf_vect_mul_test: ");
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
buff1 = (u8 *) malloc(TEST_SIZE); buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE); buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE); buff3 = (u8 *) malloc(TEST_SIZE);
if (NULL == buff1 || NULL == buff2 || NULL == buff3) { if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n"); printf("buffer alloc error\n");
goto exit; goto exit;
} }
// Fill with rand data // Fill with rand data
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand(); buff1[i] = rand();
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) { if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n"); printf("fail creating buff2\n");
goto exit; goto exit;
} }
for (i = 0; i < TEST_SIZE; i++) { for (i = 0; i < TEST_SIZE; i++) {
if (gf_mul(a, buff1[i]) != buff2[i]) { if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
buff1[i], buff2[i], gf_mul(2, buff1[i])); gf_mul(2, buff1[i]));
goto exit; goto exit;
} }
} }
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) { if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data\n"); printf("fail fill with rand data\n");
goto exit; goto exit;
} }
// Check reference function // Check reference function
for (i = 0; i < TEST_SIZE; i++) { for (i = 0; i < TEST_SIZE; i++) {
if (buff2[i] != buff3[i]) { if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); gf_mul(a, buff1[i]));
goto exit; goto exit;
} }
} }
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand(); buff1[i] = rand();
// Check each possible constant // Check each possible constant
for (a = 0; a != 255; a++) { for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) { if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n"); printf("fail creating buff2\n");
goto exit; goto exit;
} }
for (i = 0; i < TEST_SIZE; i++) for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) { if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); buff2[i], gf_mul(2, buff1[i]));
goto exit; goto exit;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Check buffer len // Check buffer len
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) { for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
a = rand(); a = rand();
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) { if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2 (len %d)\n", tsize); printf("fail creating buff2 (len %d)\n", tsize);
goto exit; goto exit;
} }
for (i = 0; i < tsize; i++) for (i = 0; i < tsize; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) { if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); buff2[i], gf_mul(2, buff1[i]));
goto exit; goto exit;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
if (0 == tsize % (32 * 8)) { if (0 == tsize % (32 * 8)) {
putchar('.'); putchar('.');
fflush(0); fflush(0);
} }
#endif #endif
} }
// Run tests at end of buffer for Electric Fence // Run tests at end of buffer for Electric Fence
align = 32; align = 32;
a = 2; a = 2;
gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) { for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end // Line up TEST_SIZE from end
efence_buff1 = buff1 + size; efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size; efence_buff2 = buff2 + size;
efence_buff3 = buff3 + size; efence_buff3 = buff3 + size;
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
for (i = 0; i < TEST_SIZE - size; i++) for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
i, efence_buff1[i], efence_buff2[i], efence_buff2[i], gf_mul(2, efence_buff1[i]));
gf_mul(2, efence_buff1[i])); goto exit;
goto exit; }
}
if (gf_vect_mul_base if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) !=
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) != 0) { 0) {
printf("fail line up TEST_SIZE from end\n"); printf("fail line up TEST_SIZE from end\n");
goto exit; goto exit;
} }
// Check reference function // Check reference function
for (i = 0; i < TEST_SIZE - size; i++) for (i = 0; i < TEST_SIZE - size; i++)
if (efence_buff2[i] != efence_buff3[i]) { if (efence_buff2[i] != efence_buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a,
i, a, efence_buff2[i], efence_buff3[i], efence_buff2[i], efence_buff3[i],
gf_mul(2, efence_buff1[i])); gf_mul(2, efence_buff1[i]));
goto exit; goto exit;
} }
#ifdef TEST_VERBOSE #ifdef TEST_VERBOSE
putchar('.'); putchar('.');
#endif #endif
} }
// Test all unsupported sizes up to TEST_SIZE // Test all unsupported sizes up to TEST_SIZE
for (size = 0; size < TEST_SIZE; size++) { for (size = 0; size < TEST_SIZE; size++) {
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) { if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
printf printf("fail expecting nonzero return code for unaligned size param (%d)\n",
("fail expecting nonzero return code for unaligned size param (%d)\n", size);
size); goto exit;
goto exit; }
} }
}
printf(" done: Pass\n"); printf(" done: Pass\n");
fflush(0); fflush(0);
ret = 0; ret = 0;
exit: exit:
free(buff1); free(buff1);
free(buff2); free(buff2);
free(buff3); free(buff3);
return ret; return ret;
} }

View File

@ -1,106 +1,109 @@
#include "erasure_code.h" #include "erasure_code.h"
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_vect_dot_prod(int len, int vlen, unsigned char *v, void
unsigned char **src, unsigned char *dest) gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{ {
gf_vect_dot_prod_vsx(len, vlen, v, src, dest); gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
} }
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, void
unsigned char *src, unsigned char *dest) gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
{ {
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest); gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
} }
void ec_encode_data(int len, int srcs, int dests, unsigned char *v, void
unsigned char **src, unsigned char **dest) ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{ {
if (len < 64) { if (len < 64) {
ec_encode_data_base(len, srcs, dests, v, src, dest); ec_encode_data_base(len, srcs, dests, v, src, dest);
return; return;
} }
while (dests >= 6) { while (dests >= 6) {
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest); gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
v += 6 * srcs * 32; v += 6 * srcs * 32;
dest += 6; dest += 6;
dests -= 6; dests -= 6;
} }
switch (dests) { switch (dests) {
case 6: case 6:
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest); gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
break; break;
case 5: case 5:
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest); gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
break; break;
case 4: case 4:
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest); gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
break; break;
case 3: case 3:
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest); gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
break; break;
case 2: case 2:
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest); gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
break; break;
case 1: case 1:
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest); gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
break; break;
case 0: case 0:
break; break;
} }
} }
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, void
unsigned char *data, unsigned char **dest) ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
unsigned char **dest)
{ {
if (len < 64) { if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest); ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
return; return;
} }
while (rows >= 6) { while (rows >= 6) {
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest); gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
v += 6 * k * 32; v += 6 * k * 32;
dest += 6; dest += 6;
rows -= 6; rows -= 6;
} }
switch (rows) { switch (rows) {
case 6: case 6:
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest); gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
break; break;
case 5: case 5:
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest); gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
break; break;
case 4: case 4:
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest); gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
break; break;
case 3: case 3:
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest); gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
break; break;
case 2: case 2:
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest); gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
break; break;
case 1: case 1:
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest); gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
break; break;
case 0: case 0:
break; break;
} }
} }
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest) int
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
{ {
/* Size must be aligned to 32 bytes */ /* Size must be aligned to 32 bytes */
if ((len % 32) != 0) if ((len % 32) != 0)
return -1; return -1;
gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest); gf_vect_mul_vsx(len, a, (unsigned char *) src, (unsigned char *) dest);
return 0; return 0;
} }
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls) void
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{ {
return ec_init_tables_base(k, rows, a, g_tbls); return ec_init_tables_base(k, rows, a, g_tbls);
} }

View File

@ -9,29 +9,37 @@ extern "C" {
#endif #endif
#if defined(__ibmxl__) #if defined(__ibmxl__)
#define EC_vec_xl(a, b) vec_xl_be(a, b) #define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc) #define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 8 #elif defined __GNUC__ && __GNUC__ >= 8
#define EC_vec_xl(a, b) vec_xl_be(a, b) #define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc) #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 7 #elif defined __GNUC__ && __GNUC__ >= 7
#if defined _ARCH_PWR9 #if defined _ARCH_PWR9
#define EC_vec_xl(a, b) vec_vsx_ld(a, b) #define EC_vec_xl(a, b) vec_vsx_ld(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else #else
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) { inline vector unsigned char
vector unsigned char vc; EC_vec_xl(int off, unsigned char *ptr)
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr)); {
return vc; vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
: "=wa"(vc)
: "r"(off), "r"(ptr));
return vc;
} }
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#endif #endif
#else #else
#if defined _ARCH_PWR8 #if defined _ARCH_PWR8
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) { inline vector unsigned char
vector unsigned char vc; EC_vec_xl(int off, unsigned char *ptr)
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr)); {
return vc; vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
: "=wa"(vc)
: "r"(off), "r"(ptr));
return vc;
} }
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) #define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else #else
@ -57,7 +65,8 @@ inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
* @returns none * @returns none
*/ */
void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest); void
gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
/** /**
* @brief GF(2^8) vector dot product. VSX version. * @brief GF(2^8) vector dot product. VSX version.
@ -77,8 +86,9 @@ void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigne
* @returns none * @returns none
*/ */
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char *dest); gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
/** /**
* @brief GF(2^8) vector dot product with two outputs. VSX version. * @brief GF(2^8) vector dot product with two outputs. VSX version.
@ -99,8 +109,9 @@ void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest); gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector dot product with three outputs. VSX version. * @brief GF(2^8) vector dot product with three outputs. VSX version.
@ -121,8 +132,9 @@ void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest); gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector dot product with four outputs. VSX version. * @brief GF(2^8) vector dot product with four outputs. VSX version.
@ -143,8 +155,9 @@ void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest); gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector dot product with five outputs. VSX version. * @brief GF(2^8) vector dot product with five outputs. VSX version.
@ -165,8 +178,9 @@ void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest); gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector dot product with six outputs. VSX version. * @brief GF(2^8) vector dot product with six outputs. VSX version.
@ -187,8 +201,9 @@ void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest); gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector multiply accumulate. VSX version. * @brief GF(2^8) vector multiply accumulate. VSX version.
@ -211,8 +226,9 @@ void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none * @returns none
*/ */
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char *dest); gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
/** /**
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version. * @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
* *
@ -234,8 +250,9 @@ void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
* @returns none * @returns none
*/ */
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char **dest); gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version. * @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
@ -258,8 +275,9 @@ void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @returns none * @returns none
*/ */
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char **dest); gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version. * @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
@ -282,8 +300,9 @@ void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @returns none * @returns none
*/ */
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char **dest); gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version. * @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
@ -305,8 +324,9 @@ void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @param dest Array of pointers to destination input/outputs. * @param dest Array of pointers to destination input/outputs.
* @returns none * @returns none
*/ */
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char **dest); gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/** /**
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version. * @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
@ -328,8 +348,9 @@ void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @param dest Array of pointers to destination input/outputs. * @param dest Array of pointers to destination input/outputs.
* @returns none * @returns none
*/ */
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, void
unsigned char **dest); gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1,83 +1,84 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest) gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1; unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4; vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG; vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1; vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
} }
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vYD = vYD ^ vYD; vYD = vYD ^ vYD;
vYE = vYE ^ vYE; vYE = vYE ^ vYE;
vYF = vYF ^ vYF; vYF = vYF ^ vYF;
vYG = vYG ^ vYG; vYG = vYG ^ vYG;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen]; unsigned char *g1 = &gftbls[1 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1); vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1); vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
g0 += 32; g0 += 32;
g1 += 32; g1 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
} }
return; return;
} }

View File

@ -1,65 +1,66 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char **dest) gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1; unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4; vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG; vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1; vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i); vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i); vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i); vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i); vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i); vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i); vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
} }
return; return;
} }

View File

@ -1,104 +1,105 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest) gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2; unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
} }
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5; vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6; vY6 = vY6 ^ vY6;
vYD = vYD ^ vYD; vYD = vYD ^ vYD;
vYE = vYE ^ vYE; vYE = vYE ^ vYE;
vYF = vYF ^ vYF; vYF = vYF ^ vYF;
vYG = vYG ^ vYG; vYG = vYG ^ vYG;
vYH = vYH ^ vYH; vYH = vYH ^ vYH;
vYI = vYI ^ vYI; vYI = vYI ^ vYI;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen]; unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen]; unsigned char *g2 = &gftbls[2 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1); vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1); vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2); vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2); vhi2 = vec_xl(16, g2);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
g0 += 32; g0 += 32;
g1 += 32; g1 += 32;
g2 += 32; g2 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
} }
return; return;
} }

View File

@ -1,84 +1,85 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char **dest) gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2; unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i); vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i); vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i); vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i); vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i); vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i); vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i); vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i); vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i); vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i); vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
} }
return; return;
} }

View File

@ -1,124 +1,125 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest) gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3; unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
} }
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5; vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6; vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7; vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8; vY8 = vY8 ^ vY8;
vYD = vYD ^ vYD; vYD = vYD ^ vYD;
vYE = vYE ^ vYE; vYE = vYE ^ vYE;
vYF = vYF ^ vYF; vYF = vYF ^ vYF;
vYG = vYG ^ vYG; vYG = vYG ^ vYG;
vYH = vYH ^ vYH; vYH = vYH ^ vYH;
vYI = vYI ^ vYI; vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ; vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK; vYK = vYK ^ vYK;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen]; unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen]; unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen]; unsigned char *g3 = &gftbls[3 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1); vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1); vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2); vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2); vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3); vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3); vhi3 = vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
g0 += 32; g0 += 32;
g1 += 32; g1 += 32;
g2 += 32; g2 += 32;
g3 += 32; g3 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
} }
return; return;
} }

View File

@ -1,103 +1,104 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char **dest) gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3; unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i); vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i); vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i); vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i); vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i); vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i); vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i); vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i); vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i); vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i); vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i); vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i); vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i); vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i); vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
} }
return; return;
} }

View File

@ -1,145 +1,146 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest) gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3, *t4; unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]); gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
t4 = (unsigned char *)dest[4]; t4 = (unsigned char *) dest[4];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4); gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
} }
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5; vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6; vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7; vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8; vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9; vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA; vYA = vYA ^ vYA;
vYD = vYD ^ vYD; vYD = vYD ^ vYD;
vYE = vYE ^ vYE; vYE = vYE ^ vYE;
vYF = vYF ^ vYF; vYF = vYF ^ vYF;
vYG = vYG ^ vYG; vYG = vYG ^ vYG;
vYH = vYH ^ vYH; vYH = vYH ^ vYH;
vYI = vYI ^ vYI; vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ; vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK; vYK = vYK ^ vYK;
vYL = vYL ^ vYL; vYL = vYL ^ vYL;
vYM = vYM ^ vYM; vYM = vYM ^ vYM;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen]; unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen]; unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen]; unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen]; unsigned char *g4 = &gftbls[4 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1); vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1); vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2); vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2); vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3); vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3); vhi3 = vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vlo4 = vec_xl(0, g4); vlo4 = vec_xl(0, g4);
vhi4 = vec_xl(16, g4); vhi4 = vec_xl(16, g4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
g0 += 32; g0 += 32;
g1 += 32; g1 += 32;
g2 += 32; g2 += 32;
g3 += 32; g3 += 32;
g4 += 32; g4 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i); vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i); vec_xst(vYA, 16, t4 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i); vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i); vec_xst(vYM, 48, t4 + i);
} }
return; return;
} }

View File

@ -1,122 +1,123 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char **dest) gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3, *t4; unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
t4 = (unsigned char *)dest[4]; t4 = (unsigned char *) dest[4];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4); gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5))); vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5))); vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i); vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i); vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i); vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i); vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i); vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i); vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i); vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i); vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i); vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i); vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i); vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i); vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i); vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i); vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vec_xl(0, t4 + i); vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i); vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i); vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i); vYM = vec_xl(48, t4 + i);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vec_xst(vY9, 0, t4 + i); vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i); vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i); vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i); vec_xst(vYM, 48, t4 + i);
} }
return; return;
} }

View File

@ -1,166 +1,167 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char **dest) gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5; unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5; vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]); gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]); gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *) dest[5]);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
t4 = (unsigned char *)dest[4]; t4 = (unsigned char *) dest[4];
t5 = (unsigned char *)dest[5]; t5 = (unsigned char *) dest[5];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4); gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5); gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
} }
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5; vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6; vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7; vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8; vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9; vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA; vYA = vYA ^ vYA;
vYB = vYB ^ vYB; vYB = vYB ^ vYB;
vYC = vYC ^ vYC; vYC = vYC ^ vYC;
vYD = vYD ^ vYD; vYD = vYD ^ vYD;
vYE = vYE ^ vYE; vYE = vYE ^ vYE;
vYF = vYF ^ vYF; vYF = vYF ^ vYF;
vYG = vYG ^ vYG; vYG = vYG ^ vYG;
vYH = vYH ^ vYH; vYH = vYH ^ vYH;
vYI = vYI ^ vYI; vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ; vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK; vYK = vYK ^ vYK;
vYL = vYL ^ vYL; vYL = vYL ^ vYL;
vYM = vYM ^ vYM; vYM = vYM ^ vYM;
vYN = vYN ^ vYN; vYN = vYN ^ vYN;
vYO = vYO ^ vYO; vYO = vYO ^ vYO;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen]; unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen]; unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen]; unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen]; unsigned char *g4 = &gftbls[4 * 32 * vlen];
unsigned char *g5 = &gftbls[5 * 32 * vlen]; unsigned char *g5 = &gftbls[5 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1); vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1); vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = EC_vec_xl(0, g2); vlo2 = EC_vec_xl(0, g2);
vhi2 = EC_vec_xl(16, g2); vhi2 = EC_vec_xl(16, g2);
vlo3 = EC_vec_xl(0, g3); vlo3 = EC_vec_xl(0, g3);
vhi3 = EC_vec_xl(16, g3); vhi3 = EC_vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vlo4 = EC_vec_xl(0, g4); vlo4 = EC_vec_xl(0, g4);
vhi4 = EC_vec_xl(16, g4); vhi4 = EC_vec_xl(16, g4);
vlo5 = EC_vec_xl(0, g5); vlo5 = EC_vec_xl(0, g5);
vhi5 = EC_vec_xl(16, g5); vhi5 = EC_vec_xl(16, g5);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1); vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2); vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3); vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4); vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
g0 += 32; g0 += 32;
g1 += 32; g1 += 32;
g2 += 32; g2 += 32;
g3 += 32; g3 += 32;
g4 += 32; g4 += 32;
g5 += 32; g5 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i); vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i); vec_xst(vYA, 16, t4 + i);
vec_xst(vYB, 0, t5 + i); vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i); vec_xst(vYC, 16, t5 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i); vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i); vec_xst(vYM, 48, t4 + i);
vec_xst(vYN, 32, t5 + i); vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i); vec_xst(vYO, 48, t5 + i);
} }
return; return;
} }

View File

@ -1,142 +1,143 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char **dest) gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{ {
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5; unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4; vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC; vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO; vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5; vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest[0]; t0 = (unsigned char *) dest[0];
t1 = (unsigned char *)dest[1]; t1 = (unsigned char *) dest[1];
t2 = (unsigned char *)dest[2]; t2 = (unsigned char *) dest[2];
t3 = (unsigned char *)dest[3]; t3 = (unsigned char *) dest[3];
t4 = (unsigned char *)dest[4]; t4 = (unsigned char *) dest[4];
t5 = (unsigned char *)dest[5]; t5 = (unsigned char *) dest[5];
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4); gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5); gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5))); vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5))); vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5))); vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5))); vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i); vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i); vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i); vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i); vec_xst(vYE, 48, t0 + i);
vY3 = vec_xl(0, t1 + i); vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i); vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i); vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i); vYG = vec_xl(48, t1 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vec_xst(vY3, 0, t1 + i); vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i); vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i); vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i); vec_xst(vYG, 48, t1 + i);
vY5 = vec_xl(0, t2 + i); vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i); vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i); vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i); vYI = vec_xl(48, t2 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i); vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i); vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i); vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i); vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i); vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i); vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i); vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i); vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vec_xl(0, t4 + i); vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i); vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i); vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i); vYM = vec_xl(48, t4 + i);
vec_xst(vY7, 0, t3 + i); vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i); vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i); vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i); vec_xst(vYK, 48, t3 + i);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vYB = vec_xl(0, t5 + i); vYB = vec_xl(0, t5 + i);
vYC = vec_xl(16, t5 + i); vYC = vec_xl(16, t5 + i);
vYN = vec_xl(32, t5 + i); vYN = vec_xl(32, t5 + i);
vYO = vec_xl(48, t5 + i); vYO = vec_xl(48, t5 + i);
vec_xst(vY9, 0, t4 + i); vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i); vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i); vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i); vec_xst(vYM, 48, t4 + i);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1); vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2); vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3); vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4); vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
vec_xst(vYB, 0, t5 + i); vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i); vec_xst(vYC, 16, t5 + i);
vec_xst(vYN, 32, t5 + i); vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i); vec_xst(vYO, 48, t5 + i);
} }
return; return;
} }

View File

@ -1,85 +1,86 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, void
unsigned char **src, unsigned char *dest) gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest)
{ {
unsigned char *s, *t0; unsigned char *s, *t0;
vector unsigned char vX1, vY1; vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2; vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3; vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4; vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5; vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6; vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7; vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8; vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0; vector unsigned char vhi0, vlo0;
int i, j, head; int i, j, head;
if (vlen < 128) { if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest); gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest);
for (j = 1; j < vlen; j++) { for (j = 1; j < vlen; j++) {
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
} }
return; return;
} }
t0 = (unsigned char *)dest; t0 = (unsigned char *) dest;
head = len % 128; head = len % 128;
if (head != 0) { if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
} }
for (i = head; i < len - 127; i += 128) { for (i = head; i < len - 127; i += 128) {
vY1 = vY1 ^ vY1; vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2; vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3; vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4; vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5; vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6; vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7; vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8; vY8 = vY8 ^ vY8;
unsigned char *g0 = &gftbls[0 * 32 * vlen]; unsigned char *g0 = &gftbls[0 * 32 * vlen];
for (j = 0; j < vlen; j++) { for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j]; s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0); vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0); vhi0 = EC_vec_xl(16, g0);
vX5 = vec_xl(64, s + i); vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i); vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i); vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i); vX8 = vec_xl(112, s + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3); vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4); vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5); vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6); vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7); vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8); vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
g0 += 32; g0 += 32;
} }
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i); vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i); vec_xst(vY4, 48, t0 + i);
vec_xst(vY5, 64, t0 + i); vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i); vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i); vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i); vec_xst(vY8, 112, t0 + i);
} }
return; return;
} }

View File

@ -1,48 +1,49 @@
#include "ec_base_vsx.h" #include "ec_base_vsx.h"
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, void
unsigned char *src, unsigned char *dest) gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest)
{ {
unsigned char *s, *t0; unsigned char *s, *t0;
vector unsigned char vX1, vY1; vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2; vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3; vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4; vector unsigned char vX4, vY4;
vector unsigned char vhi0, vlo0; vector unsigned char vhi0, vlo0;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest; t0 = (unsigned char *) dest;
head = len % 64; head = len % 64;
if (head != 0) { if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest); gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
} }
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) { for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i); vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i); vY2 = vec_xl(16, t0 + i);
vY3 = vec_xl(32, t0 + i); vY3 = vec_xl(32, t0 + i);
vY4 = vec_xl(48, t0 + i); vY4 = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3); vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4); vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i); vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i); vec_xst(vY4, 48, t0 + i);
} }
return; return;
} }

View File

@ -3,73 +3,74 @@
/* /*
* Same as gf_vect_mul_base in "ec_base.h" but without the size restriction. * Same as gf_vect_mul_base in "ec_base.h" but without the size restriction.
*/ */
static void _gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, static void
unsigned char *dest) _gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
{ {
//2nd element of table array is ref value used to fill it in // 2nd element of table array is ref value used to fill it in
unsigned char c = a[1]; unsigned char c = a[1];
while (len-- > 0) while (len-- > 0)
*dest++ = gf_mul(c, *src++); *dest++ = gf_mul(c, *src++);
return; return;
} }
void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest) void
gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
{ {
unsigned char *s, *t0; unsigned char *s, *t0;
vector unsigned char vX1, vY1; vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2; vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3; vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4; vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5; vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6; vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7; vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8; vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0; vector unsigned char vhi0, vlo0;
int i, head; int i, head;
s = (unsigned char *)src; s = (unsigned char *) src;
t0 = (unsigned char *)dest; t0 = (unsigned char *) dest;
head = len % 128; head = len % 128;
if (head != 0) { if (head != 0) {
_gf_vect_mul_base(head, gftbl, src, dest); _gf_vect_mul_base(head, gftbl, src, dest);
} }
vlo0 = EC_vec_xl(0, gftbl); vlo0 = EC_vec_xl(0, gftbl);
vhi0 = EC_vec_xl(16, gftbl); vhi0 = EC_vec_xl(16, gftbl);
for (i = head; i < len - 127; i += 128) { for (i = head; i < len - 127; i += 128) {
vX1 = vec_xl(0, s + i); vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i); vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i); vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i); vX4 = vec_xl(48, s + i);
vX5 = vec_xl(64, s + i); vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i); vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i); vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i); vX8 = vec_xl(112, s + i);
vY1 = EC_vec_permxor(vhi0, vlo0, vX1); vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = EC_vec_permxor(vhi0, vlo0, vX2); vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = EC_vec_permxor(vhi0, vlo0, vX3); vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = EC_vec_permxor(vhi0, vlo0, vX4); vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
vY5 = EC_vec_permxor(vhi0, vlo0, vX5); vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = EC_vec_permxor(vhi0, vlo0, vX6); vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = EC_vec_permxor(vhi0, vlo0, vX7); vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = EC_vec_permxor(vhi0, vlo0, vX8); vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
vec_xst(vY1, 0, t0 + i); vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i); vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i); vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i); vec_xst(vY4, 48, t0 + i);
vec_xst(vY5, 64, t0 + i); vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i); vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i); vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i); vec_xst(vY8, 112, t0 + i);
} }
return; return;
} }