erasure_code: reformat using new code style

Signed-off-by: Marcel Cornu <marcel.d.cornu@intel.com>
This commit is contained in:
Marcel Cornu 2024-04-19 17:08:53 +01:00 committed by Pablo de Lara
parent 671e67b62d
commit 300260a4d9
37 changed files with 10984 additions and 13068 deletions

View File

@ -31,94 +31,86 @@
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_dot_prod_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_dot_prod_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon);
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif
return PROVIDER_BASIC(gf_vect_dot_prod);
return PROVIDER_BASIC(gf_vect_dot_prod);
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mad_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mad_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon);
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon);
#endif
return PROVIDER_BASIC(gf_vect_mad);
return PROVIDER_BASIC(gf_vect_mad);
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon);
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon);
#endif
return PROVIDER_BASIC(ec_encode_data);
return PROVIDER_BASIC(ec_encode_data);
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_update_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_update_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon);
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon);
#endif
return PROVIDER_BASIC(ec_encode_data_update);
return PROVIDER_BASIC(ec_encode_data_update);
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mul_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mul_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon);
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon);
#endif
return PROVIDER_BASIC(gf_vect_mul);
return PROVIDER_BASIC(gf_vect_mul);
}
DEFINE_INTERFACE_DISPATCHER(ec_init_tables)
{
return PROVIDER_BASIC(ec_init_tables);
}
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return PROVIDER_BASIC(ec_init_tables); }

View File

@ -29,236 +29,265 @@
#include "erasure_code.h"
/*external function*/
extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest);
extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void
gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
extern void
gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
void
ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows > 5) {
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
rows -= 5;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
break;
case 0:
break;
default:
break;
}
while (rows > 5) {
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
rows -= 5;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
break;
case 0:
break;
default:
break;
}
}
void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
}
/* SVE */
extern void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest);
extern void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
extern void gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
extern void gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void
gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
extern void
gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
extern void
gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
void
ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows > 11) {
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
while (rows > 11) {
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 11:
/* 7 + 4 */
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 7 * k * 32;
coding += 7;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 10:
/* 6 + 4 */
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 9:
/* 5 + 4 */
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 8:
/* 4 + 4 */
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 7:
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 6:
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 5:
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
break;
default:
break;
}
switch (rows) {
case 11:
/* 7 + 4 */
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 7 * k * 32;
coding += 7;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 10:
/* 6 + 4 */
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 9:
/* 5 + 4 */
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 5 * k * 32;
coding += 5;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 8:
/* 4 + 4 */
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
g_tbls += 4 * k * 32;
coding += 4;
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 7:
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 6:
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 5:
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
break;
default:
break;
}
}
void ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
break;
default:
break;
}
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
break;
default:
break;
}
}

View File

@ -28,322 +28,331 @@
**********************************************************************/
#include <limits.h>
#include <string.h> // for memset
#include <string.h> // for memset
#include <stdint.h>
#include "erasure_code.h"
#include "ec_base.h" // for GF tables
#include "ec_base.h" // for GF tables
void ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
void
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{
int i, j;
int i, j;
for (i = 0; i < rows; i++) {
for (j = 0; j < k; j++) {
gf_vect_mul_init(*a++, g_tbls);
g_tbls += 32;
}
}
for (i = 0; i < rows; i++) {
for (j = 0; j < k; j++) {
gf_vect_mul_init(*a++, g_tbls);
g_tbls += 32;
}
}
}
unsigned char gf_mul(unsigned char a, unsigned char b)
unsigned char
gf_mul(unsigned char a, unsigned char b)
{
#ifndef GF_LARGE_TABLES
int i;
int i;
if ((a == 0) || (b == 0))
return 0;
if ((a == 0) || (b == 0))
return 0;
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
#else
return gf_mul_table_base[b * 256 + a];
return gf_mul_table_base[b * 256 + a];
#endif
}
unsigned char gf_inv(unsigned char a)
unsigned char
gf_inv(unsigned char a)
{
#ifndef GF_LARGE_TABLES
if (a == 0)
return 0;
if (a == 0)
return 0;
return gff_base[255 - gflog_base[a]];
return gff_base[255 - gflog_base[a]];
#else
return gf_inv_table_base[a];
return gf_inv_table_base[a];
#endif
}
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
void
gf_gen_rs_matrix(unsigned char *a, int m, int k)
{
int i, j;
unsigned char p, gen = 1;
int i, j;
unsigned char p, gen = 1;
memset(a, 0, k * m);
for (i = 0; i < k; i++)
a[k * i + i] = 1;
memset(a, 0, k * m);
for (i = 0; i < k; i++)
a[k * i + i] = 1;
for (i = k; i < m; i++) {
p = 1;
for (j = 0; j < k; j++) {
a[k * i + j] = p;
p = gf_mul(p, gen);
}
gen = gf_mul(gen, 2);
}
for (i = k; i < m; i++) {
p = 1;
for (j = 0; j < k; j++) {
a[k * i + j] = p;
p = gf_mul(p, gen);
}
gen = gf_mul(gen, 2);
}
}
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
void
gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
{
int i, j;
unsigned char *p;
int i, j;
unsigned char *p;
// Identity matrix in high position
memset(a, 0, k * m);
for (i = 0; i < k; i++)
a[k * i + i] = 1;
// For the rest choose 1/(i + j) | i != j
p = &a[k * k];
for (i = k; i < m; i++)
for (j = 0; j < k; j++)
*p++ = gf_inv(i ^ j);
// Identity matrix in high position
memset(a, 0, k * m);
for (i = 0; i < k; i++)
a[k * i + i] = 1;
// For the rest choose 1/(i + j) | i != j
p = &a[k * k];
for (i = k; i < m; i++)
for (j = 0; j < k; j++)
*p++ = gf_inv(i ^ j);
}
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
int
gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
{
int i, j, k;
unsigned char temp;
int i, j, k;
unsigned char temp;
// Set out_mat[] to the identity matrix
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
out_mat[i] = 0;
// Set out_mat[] to the identity matrix
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
out_mat[i] = 0;
for (i = 0; i < n; i++)
out_mat[i * n + i] = 1;
for (i = 0; i < n; i++)
out_mat[i * n + i] = 1;
// Inverse
for (i = 0; i < n; i++) {
// Check for 0 in pivot element
if (in_mat[i * n + i] == 0) {
// Find a row with non-zero in current column and swap
for (j = i + 1; j < n; j++)
if (in_mat[j * n + i])
break;
// Inverse
for (i = 0; i < n; i++) {
// Check for 0 in pivot element
if (in_mat[i * n + i] == 0) {
// Find a row with non-zero in current column and swap
for (j = i + 1; j < n; j++)
if (in_mat[j * n + i])
break;
if (j == n) // Couldn't find means it's singular
return -1;
if (j == n) // Couldn't find means it's singular
return -1;
for (k = 0; k < n; k++) { // Swap rows i,j
temp = in_mat[i * n + k];
in_mat[i * n + k] = in_mat[j * n + k];
in_mat[j * n + k] = temp;
for (k = 0; k < n; k++) { // Swap rows i,j
temp = in_mat[i * n + k];
in_mat[i * n + k] = in_mat[j * n + k];
in_mat[j * n + k] = temp;
temp = out_mat[i * n + k];
out_mat[i * n + k] = out_mat[j * n + k];
out_mat[j * n + k] = temp;
}
}
temp = out_mat[i * n + k];
out_mat[i * n + k] = out_mat[j * n + k];
out_mat[j * n + k] = temp;
}
}
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
}
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
}
for (j = 0; j < n; j++) {
if (j == i)
continue;
for (j = 0; j < n; j++) {
if (j == i)
continue;
temp = in_mat[j * n + i];
for (k = 0; k < n; k++) {
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
}
}
}
return 0;
temp = in_mat[j * n + i];
for (k = 0; k < n; k++) {
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
}
}
}
return 0;
}
// Calculates const table gftbl in GF(2^8) from single input A
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
void
gf_vect_mul_init(unsigned char c, unsigned char *tbl)
{
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); // Mult by GF{2}
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); // Mult by GF{2}
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); // Mult by GF{2}
#if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
unsigned long long v1, v2, v4, v8, *t;
unsigned long long v10, v20, v40, v80;
unsigned char c17, c18, c20, c24;
unsigned long long v1, v2, v4, v8, *t;
unsigned long long v10, v20, v40, v80;
unsigned char c17, c18, c20, c24;
t = (unsigned long long *)tbl;
t = (unsigned long long *) tbl;
v1 = c * 0x0100010001000100ull;
v2 = c2 * 0x0101000001010000ull;
v4 = c4 * 0x0101010100000000ull;
v8 = c8 * 0x0101010101010101ull;
v1 = c * 0x0100010001000100ull;
v2 = c2 * 0x0101000001010000ull;
v4 = c4 * 0x0101010100000000ull;
v8 = c8 * 0x0101010101010101ull;
v4 = v1 ^ v2 ^ v4;
t[0] = v4;
t[1] = v8 ^ v4;
v4 = v1 ^ v2 ^ v4;
t[0] = v4;
t[1] = v8 ^ v4;
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
v10 = c17 * 0x0100010001000100ull;
v20 = c18 * 0x0101000001010000ull;
v40 = c20 * 0x0101010100000000ull;
v80 = c24 * 0x0101010101010101ull;
v10 = c17 * 0x0100010001000100ull;
v20 = c18 * 0x0101000001010000ull;
v40 = c20 * 0x0101010100000000ull;
v80 = c24 * 0x0101010101010101ull;
v40 = v10 ^ v20 ^ v40;
t[2] = v40;
t[3] = v80 ^ v40;
v40 = v10 ^ v20 ^ v40;
t[2] = v40;
t[3] = v80 ^ v40;
#else // 32-bit or other
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
c31;
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31;
c3 = c2 ^ c;
c5 = c4 ^ c;
c6 = c4 ^ c2;
c7 = c4 ^ c3;
c3 = c2 ^ c;
c5 = c4 ^ c;
c6 = c4 ^ c2;
c7 = c4 ^ c3;
c9 = c8 ^ c;
c10 = c8 ^ c2;
c11 = c8 ^ c3;
c12 = c8 ^ c4;
c13 = c8 ^ c5;
c14 = c8 ^ c6;
c15 = c8 ^ c7;
c9 = c8 ^ c;
c10 = c8 ^ c2;
c11 = c8 ^ c3;
c12 = c8 ^ c4;
c13 = c8 ^ c5;
c14 = c8 ^ c6;
c15 = c8 ^ c7;
tbl[0] = 0;
tbl[1] = c;
tbl[2] = c2;
tbl[3] = c3;
tbl[4] = c4;
tbl[5] = c5;
tbl[6] = c6;
tbl[7] = c7;
tbl[8] = c8;
tbl[9] = c9;
tbl[10] = c10;
tbl[11] = c11;
tbl[12] = c12;
tbl[13] = c13;
tbl[14] = c14;
tbl[15] = c15;
tbl[0] = 0;
tbl[1] = c;
tbl[2] = c2;
tbl[3] = c3;
tbl[4] = c4;
tbl[5] = c5;
tbl[6] = c6;
tbl[7] = c7;
tbl[8] = c8;
tbl[9] = c9;
tbl[10] = c10;
tbl[11] = c11;
tbl[12] = c12;
tbl[13] = c13;
tbl[14] = c14;
tbl[15] = c15;
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c19 = c18 ^ c17;
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c21 = c20 ^ c17;
c22 = c20 ^ c18;
c23 = c20 ^ c19;
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
c25 = c24 ^ c17;
c26 = c24 ^ c18;
c27 = c24 ^ c19;
c28 = c24 ^ c20;
c29 = c24 ^ c21;
c30 = c24 ^ c22;
c31 = c24 ^ c23;
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c19 = c18 ^ c17;
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c21 = c20 ^ c17;
c22 = c20 ^ c18;
c23 = c20 ^ c19;
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
c25 = c24 ^ c17;
c26 = c24 ^ c18;
c27 = c24 ^ c19;
c28 = c24 ^ c20;
c29 = c24 ^ c21;
c30 = c24 ^ c22;
c31 = c24 ^ c23;
tbl[16] = 0;
tbl[17] = c17;
tbl[18] = c18;
tbl[19] = c19;
tbl[20] = c20;
tbl[21] = c21;
tbl[22] = c22;
tbl[23] = c23;
tbl[24] = c24;
tbl[25] = c25;
tbl[26] = c26;
tbl[27] = c27;
tbl[28] = c28;
tbl[29] = c29;
tbl[30] = c30;
tbl[31] = c31;
tbl[16] = 0;
tbl[17] = c17;
tbl[18] = c18;
tbl[19] = c19;
tbl[20] = c20;
tbl[21] = c21;
tbl[22] = c22;
tbl[23] = c23;
tbl[24] = c24;
tbl[25] = c25;
tbl[26] = c26;
tbl[27] = c27;
tbl[28] = c28;
tbl[29] = c29;
tbl[30] = c30;
tbl[31] = c31;
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
}
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
unsigned char **src, unsigned char *dest)
void
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{
int i, j;
unsigned char s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
int i, j;
unsigned char s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
dest[i] = s;
}
dest[i] = s;
}
}
void gf_vect_mad_base(int len, int vec, int vec_i,
unsigned char *v, unsigned char *src, unsigned char *dest)
void
gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
unsigned char *dest)
{
int i;
unsigned char s;
for (i = 0; i < len; i++) {
s = dest[i];
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
dest[i] = s;
}
int i;
unsigned char s;
for (i = 0; i < len; i++) {
s = dest[i];
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
dest[i] = s;
}
}
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
unsigned char **src, unsigned char **dest)
void
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{
int i, j, l;
unsigned char s;
int i, j, l;
unsigned char s;
for (l = 0; l < dests; l++) {
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < srcs; j++)
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
for (l = 0; l < dests; l++) {
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < srcs; j++)
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
dest[l][i] = s;
}
}
dest[l][i] = s;
}
}
}
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
unsigned char *data, unsigned char **dest)
void
ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
unsigned char *data, unsigned char **dest)
{
int i, l;
unsigned char s;
int i, l;
unsigned char s;
for (l = 0; l < rows; l++) {
for (i = 0; i < len; i++) {
s = dest[l][i];
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
for (l = 0; l < rows; l++) {
for (i = 0; i < len; i++) {
s = dest[l][i];
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
dest[l][i] = s;
}
}
dest[l][i] = s;
}
}
}
int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
int
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
{
//2nd element of table array is ref value used to fill it in
unsigned char c = a[1];
// 2nd element of table array is ref value used to fill it in
unsigned char c = a[1];
// Len must be aligned to 32B
if ((len % 32) != 0) {
return -1;
}
// Len must be aligned to 32B
if ((len % 32) != 0) {
return -1;
}
while (len-- > 0)
*dest++ = gf_mul(c, *src++);
return 0;
while (len-- > 0)
*dest++ = gf_mul(c, *src++);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -29,37 +29,40 @@
#include "erasure_code.h"
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
unsigned char **src, unsigned char *dest)
void
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{
gf_vect_dot_prod_base(len, vlen, v, src, dest);
gf_vect_dot_prod_base(len, vlen, v, src, dest);
}
void gf_vect_mad(int len, int vec, int vec_i,
unsigned char *v, unsigned char *src, unsigned char *dest)
void
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
{
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
}
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
unsigned char **src, unsigned char **dest)
void
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{
ec_encode_data_base(len, srcs, dests, v, src, dest);
ec_encode_data_base(len, srcs, dests, v, src, dest);
}
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
unsigned char *data, unsigned char **dest)
void
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
unsigned char **dest)
{
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
}
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
int
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
{
return gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest);
return gf_vect_mul_base(len, a, (unsigned char *) src, (unsigned char *) dest);
}
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
void
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{
return ec_init_tables_base(k, rows, a, g_tbls);
return ec_init_tables_base(k, rows, a, g_tbls);
}

View File

@ -28,387 +28,423 @@
**********************************************************************/
#include <limits.h>
#include "erasure_code.h"
#include "ec_base.h" /* for GF tables */
#include "ec_base.h" /* for GF tables */
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
void
ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
}
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
void
ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 16) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
}
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
void
ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 32) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 32) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
}
#ifdef HAVE_AS_KNOWS_AVX512
extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char *dest);
extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern int
gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char *dest);
extern int
gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern int
gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern int
gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern int
gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern int
gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding)
void
ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
if (len < 64) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
if (len < 64) {
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
break;
case 0:
break;
}
}
void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows >= 6) {
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
}
#if AS_FEATURE_LEVEL >= 10
extern void gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char *dest);
extern void gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void
gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char *dest);
extern void
gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
extern void gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void
gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char *dest);
extern void gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding);
extern void gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
extern void gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
extern void
gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char *dest);
extern void
gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding);
extern void
gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
extern void
gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
extern void
gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
void
ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{
int i, j;
int i, j;
uint64_t *g64 = (uint64_t *) g_tbls;
for (i = 0; i < rows; i++)
for (j = 0; j < k; j++)
*(g64++) = gf_table_gfni[*a++];
uint64_t *g64 = (uint64_t *) g_tbls;
for (i = 0; i < rows; i++)
for (j = 0; j < k; j++)
*(g64++) = gf_table_gfni[*a++];
}
void ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding)
void
ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
while (rows >= 6) {
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 8;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
while (rows >= 6) {
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
g_tbls += 6 * k * 8;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 4:
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 3:
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 2:
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
}
void ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls,
unsigned char **data, unsigned char **coding)
void
ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
unsigned char **coding)
{
while (rows >= 3) {
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
g_tbls += 3 * k * 8;
coding += 3;
rows -= 3;
}
switch (rows) {
case 2:
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
while (rows >= 3) {
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
g_tbls += 3 * k * 8;
coding += 3;
rows -= 3;
}
switch (rows) {
case 2:
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
break;
case 1:
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
}
void ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i,
unsigned char *g_tbls, unsigned char *data,
unsigned char **coding)
void
ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
while (rows >= 6) {
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 8;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
while (rows >= 6) {
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 8;
coding += 6;
rows -= 6;
}
switch (rows) {
case 5:
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
}
void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
unsigned char *g_tbls, unsigned char *data,
unsigned char **coding)
void
ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
while (rows >= 5) {
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 5 * k * 8;
coding += 5;
rows -= 5;
}
switch (rows) {
case 4:
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
while (rows >= 5) {
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
g_tbls += 5 * k * 8;
coding += 5;
rows -= 5;
}
switch (rows) {
case 4:
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
default:
break;
}
}
#endif // AS_FEATURE_LEVEL >= 10
@ -416,119 +452,119 @@ void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
}
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 16) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
}
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
void
ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
unsigned char *data, unsigned char **coding)
{
if (len < 32) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
if (len < 32) {
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
return;
}
while (rows > 6) {
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
g_tbls += 6 * k * 32;
coding += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 5:
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 4:
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 3:
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 2:
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
break;
case 1:
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
break;
case 0:
break;
}
}
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__

View File

@ -29,27 +29,27 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include <assert.h>
#include "erasure_code.h"
#include "test.h"
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 32
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 32
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
#define MMAX TEST_SOURCES
@ -59,117 +59,120 @@
typedef unsigned char u8;
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
void
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs)
{
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 ** temp_buffs)
int
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 **temp_buffs)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables_base(k, nerrs, c, g_tbls);
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
// Recover data
ec_init_tables_base(k, nerrs, c, g_tbls);
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
return 0;
return 0;
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, m, k, nerrs, check;
void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES];
struct perf start;
int i, j, m, k, nerrs, check;
void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES];
struct perf start;
// Pick test parameters
m = 14;
k = 10;
nerrs = 4;
const u8 err_list[] = { 2, 4, 5, 7 };
// Pick test parameters
m = 14;
k = 10;
nerrs = 4;
const u8 err_list[] = { 2, 4, 5, 7 };
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
// check input parameters
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
// check input parameters
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n");
return -1;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n");
return -1;
}
buffs[i] = buf;
}
for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n");
return -1;
}
temp_buffs[i] = buf;
}
for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("alloc error: Fail\n");
return -1;
}
temp_buffs[i] = buf;
}
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k);
gf_gen_rs_matrix(a, m, k);
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m));
// Start decode test
BENCHMARK(&start, BENCHMARK_TIME, check =
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs));
// Start decode test
BENCHMARK(&start, BENCHMARK_TIME,
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs));
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
return check;
}
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
return check;
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
return -1;
}
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
return -1;
}
}
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
return 0;
printf("done all: Pass\n");
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -29,29 +29,29 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "erasure_code.h"
#include "test.h"
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 32
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 32
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#define TEST_SEED 0x1234
#endif
#define MMAX TEST_SOURCES
@ -61,215 +61,219 @@
typedef unsigned char u8;
void usage(const char *app_name)
void
usage(const char *app_name)
{
fprintf(stderr,
"Usage: %s [options]\n"
" -h Help\n"
" -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
app_name);
fprintf(stderr,
"Usage: %s [options]\n"
" -h Help\n"
" -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
"k)\n",
app_name);
}
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start)
void
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, struct perf *start)
{
ec_init_tables(k, m - k, &a[k * k], g_tbls);
BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
ec_init_tables(k, m - k, &a[k * k], g_tbls);
BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
}
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start)
int
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 **temp_buffs, struct perf *start)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
BENCHMARK(start, BENCHMARK_TIME,
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
return 0;
return 0;
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, m, k, p, nerrs, check, ret = -1;
void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES];
struct perf start;
int i, j, m, k, p, nerrs, check, ret = -1;
void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES];
struct perf start;
/* Set default parameters */
k = 8;
p = 6;
nerrs = 4;
/* Set default parameters */
k = 8;
p = 6;
nerrs = 4;
/* Parse arguments */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]);
return 0;
} else {
usage(argv[0]);
return -1;
}
}
/* Parse arguments */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]);
return 0;
} else {
usage(argv[0]);
return -1;
}
}
if (nerrs > k) {
printf
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
nerrs, k);
return -1;
}
if (nerrs > k) {
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
nerrs, k);
return -1;
}
if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k);
return -1;
}
if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k);
return -1;
}
if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p);
return -1;
}
if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p);
return -1;
}
if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs);
return -1;
}
if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs);
return -1;
}
if (nerrs > p) {
printf
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
nerrs, p);
return -1;
}
if (nerrs > p) {
printf("Number of errors (%d) cannot be higher than number of parity buffers "
"(%d)\n",
nerrs, p);
return -1;
}
m = k + p;
m = k + p;
if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX);
return -1;
}
if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX);
return -1;
}
u8 *err_list = malloc((size_t)nerrs);
if (err_list == NULL) {
printf("Error allocating list of array of error indices\n");
return -1;
}
u8 *err_list = malloc((size_t) nerrs);
if (err_list == NULL) {
printf("Error allocating list of array of error indices\n");
return -1;
}
srand(TEST_SEED);
srand(TEST_SEED);
for (i = 0; i < nerrs;) {
u8 next_err = rand() % k;
for (j = 0; j < i; j++)
if (next_err == err_list[j])
break;
if (j != i)
continue;
err_list[i++] = next_err;
}
for (i = 0; i < nerrs;) {
u8 next_err = rand() % k;
for (j = 0; j < i; j++)
if (next_err == err_list[j])
break;
if (j != i)
continue;
err_list[i++] = next_err;
}
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
p, nerrs);
for (i = 0; i < nerrs; i++)
printf("%d ", (int)err_list[i]);
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
nerrs);
for (i = 0; i < nerrs; i++)
printf("%d ", (int) err_list[i]);
printf("])\n");
printf("])\n");
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
buffs[i] = buf;
}
for (i = 0; i < p; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
temp_buffs[i] = buf;
}
for (i = 0; i < p; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
temp_buffs[i] = buf;
}
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++)
buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k);
gf_gen_rs_matrix(a, m, k);
// Start encode test
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
printf("erasure_code_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
printf("erasure_code_encode" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m));
// Start decode test
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs, &start);
// Start decode test
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, temp_buffs,
&start);
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
ret = check;
goto exit;
}
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
ret = check;
goto exit;
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
goto exit;
}
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
goto exit;
}
}
printf("erasure_code_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf("erasure_code_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
printf("done all: Pass\n");
ret = 0;
ret = 0;
exit:
free(err_list);
for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]);
free(temp_buffs[i]);
}
return ret;
exit:
free(err_list);
for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]);
free(temp_buffs[i]);
}
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -29,43 +29,43 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "erasure_code.h"
#include "test.h"
//By default, test multibinary version
// By default, test multibinary version
#ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST ec_encode_data_update
# define REF_FUNCTION ec_encode_data
#define FUNCTION_UNDER_TEST ec_encode_data_update
#define REF_FUNCTION ec_encode_data
#endif
//By default, test EC(8+4)
// By default, test EC(8+4)
#if (!defined(VECT))
# define VECT 4
#define VECT 4
#endif
#define str(s) #s
#define str(s) #s
#define xstr(s) str(s)
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 32
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 32
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 32
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#define TEST_SEED 0x1234
#endif
#define MMAX TEST_SOURCES
@ -73,308 +73,316 @@
typedef unsigned char u8;
void usage(const char *app_name)
void
usage(const char *app_name)
{
fprintf(stderr,
"Usage: %s [options]\n"
" -h Help\n"
" -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
app_name);
fprintf(stderr,
"Usage: %s [options]\n"
" -h Help\n"
" -k <val> Number of source buffers\n"
" -p <val> Number of parity buffers\n"
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
"k)\n",
app_name);
}
void dump(unsigned char *buf, int len)
void
dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a)
void
encode_update_test_ref(int m, int k, u8 *g_tbls, u8 **buffs, u8 *a)
{
ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a)
void
encode_update_test(int m, int k, u8 *g_tbls, u8 **perf_update_buffs, u8 *a)
{
int i;
int i;
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
perf_update_buffs[i], &perf_update_buffs[k]);
}
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, perf_update_buffs[i],
&perf_update_buffs[k]);
}
}
int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs)
int
decode_test(int m, int k, u8 **update_buffs, u8 **recov, u8 *a, u8 *src_in_err, u8 *src_err_list,
int nerrs, u8 *g_tbls, u8 **perf_update_buffs)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = update_buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = update_buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
perf_update_buffs);
}
return 0;
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], perf_update_buffs);
}
return 0;
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, check, m, k, p, nerrs, ret = -1;
void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL };
u8 *update_buffs[TEST_SOURCES] = { NULL };
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start;
int i, j, check, m, k, p, nerrs, ret = -1;
void *buf;
u8 *temp_buffs[TEST_SOURCES] = { NULL };
u8 *buffs[TEST_SOURCES] = { NULL };
u8 *update_buffs[TEST_SOURCES] = { NULL };
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start;
/* Set default parameters */
k = 10;
p = VECT;
nerrs = VECT;
/* Set default parameters */
k = 10;
p = VECT;
nerrs = VECT;
/* Parse arguments */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]);
return 0;
} else {
usage(argv[0]);
return -1;
}
}
/* Parse arguments */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-k") == 0) {
k = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0) {
p = atoi(argv[++i]);
} else if (strcmp(argv[i], "-e") == 0) {
nerrs = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) {
usage(argv[0]);
return 0;
} else {
usage(argv[0]);
return -1;
}
}
if (nerrs > k) {
printf
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
nerrs, k);
return -1;
}
if (nerrs > k) {
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
nerrs, k);
return -1;
}
if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k);
return -1;
}
if (k <= 0) {
printf("Number of source buffers (%d) must be > 0\n", k);
return -1;
}
if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p);
return -1;
}
if (p <= 0) {
printf("Number of parity buffers (%d) must be > 0\n", p);
return -1;
}
if (nerrs > p) {
printf
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
nerrs, p);
return -1;
}
if (nerrs > p) {
printf("Number of errors (%d) cannot be higher than number of parity buffers "
"(%d)\n",
nerrs, p);
return -1;
}
if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs);
return -1;
}
if (nerrs <= 0) {
printf("Number of errors (%d) must be > 0\n", nerrs);
return -1;
}
m = k + p;
m = k + p;
if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX);
return -1;
}
if (m > MMAX) {
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
MMAX);
return -1;
}
u8 *err_list = malloc((size_t)nerrs);
if (err_list == NULL) {
printf("Error allocating list of array of error indices\n");
return -1;
}
u8 *err_list = malloc((size_t) nerrs);
if (err_list == NULL) {
printf("Error allocating list of array of error indices\n");
return -1;
}
srand(TEST_SEED);
srand(TEST_SEED);
for (i = 0; i < nerrs;) {
u8 next_err = rand() % k;
for (j = 0; j < i; j++)
if (next_err == err_list[j])
break;
if (j != i)
continue;
err_list[i++] = next_err;
}
for (i = 0; i < nerrs;) {
u8 next_err = rand() % k;
for (j = 0; j < i; j++)
if (next_err == err_list[j])
break;
if (j != i)
continue;
err_list[i++] = next_err;
}
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
p, nerrs);
for (i = 0; i < nerrs; i++)
printf("%d ", err_list[i]);
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
nerrs);
for (i = 0; i < nerrs; i++)
printf("%d ", err_list[i]);
printf("])\n");
printf("])\n");
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
memcpy(src_err_list, err_list, nerrs);
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0; i < nerrs; i++)
src_in_err[src_err_list[i]] = 1;
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < m; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
buffs[i] = buf;
}
for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
temp_buffs[i] = buf;
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < (m - k); i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
temp_buffs[i] = buf;
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
// zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
update_buffs[i] = buf;
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
perf_update_buffs[i] = buf;
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
update_buffs[i] = buf;
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
// zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
printf("Error allocating buffers\n");
goto exit;
}
perf_update_buffs[i] = buf;
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer
// to be zero for update function
}
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++) {
buffs[i][j] = rand();
update_buffs[i][j] = buffs[i][j];
}
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN(m); j++) {
buffs[i][j] = rand();
update_buffs[i][j] = buffs[i][j];
}
gf_gen_rs_matrix(a, m, k);
gf_gen_rs_matrix(a, m, k);
encode_update_test_ref(m, k, g_tbls, buffs, a);
encode_update_test(m, k, g_tbls, update_buffs, a);
for (i = 0; i < m - k; i++) {
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
printf("\nupdate_buffs%d :", i);
dump(update_buffs[k + i], 25);
printf("buffs%d :", i);
dump(buffs[k + i], 25);
goto exit;
}
}
encode_update_test_ref(m, k, g_tbls, buffs, a);
encode_update_test(m, k, g_tbls, update_buffs, a);
for (i = 0; i < m - k; i++) {
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
printf("\nupdate_buffs%d :", i);
dump(update_buffs[k + i], 25);
printf("buffs%d :", i);
dump(buffs[k + i], 25);
goto exit;
}
}
#ifdef DO_REF_PERF
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m));
#endif
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME,
encode_update_test(m, k, g_tbls, perf_update_buffs, a));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test(m, k, g_tbls, perf_update_buffs, a));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
// Start encode test
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
for (i = k; i < m; i++) {
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
&update_buffs[k]);
}
for (i = k; i < m; i++) {
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
// zero for update function
}
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
&update_buffs[k]);
}
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
nerrs, g_tbls, temp_buffs);
BENCHMARK(&start, BENCHMARK_TIME, check =
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
nerrs, g_tbls, perf_update_buffs));
if (check) {
printf("BAD_MATRIX\n");
ret = check;
goto exit;
}
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs, g_tbls,
temp_buffs);
BENCHMARK(&start, BENCHMARK_TIME,
check = decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs,
g_tbls, perf_update_buffs));
if (check) {
printf("BAD_MATRIX\n");
ret = check;
goto exit;
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
goto exit;
}
}
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
goto exit;
}
}
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
printf("done all: Pass\n");
ret = 0;
ret = 0;
exit:
free(err_list);
for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]);
free(temp_buffs[i]);
free(update_buffs[i]);
free(perf_update_buffs[i]);
}
return ret;
exit:
free(err_list);
for (i = 0; i < TEST_SOURCES; i++) {
free(buffs[i]);
free(temp_buffs[i]);
free(update_buffs[i]);
free(perf_update_buffs[i]);
}
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -3,114 +3,117 @@
#include <stdio.h>
#include "erasure_code.h"
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
#define M_MAX 0x20
#define K_MAX 0x10
#define ROWS M_MAX
#define COLS K_MAX
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
#define M_MAX 0x20
#define K_MAX 0x10
#define ROWS M_MAX
#define COLS K_MAX
static inline uint64_t min(const uint64_t a, const uint64_t b)
static inline uint64_t
min(const uint64_t a, const uint64_t b)
{
if (a <= b)
return a;
else
return b;
if (a <= b)
return a;
else
return b;
}
void gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
const uint64_t col_indicator)
void
gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
const uint64_t col_indicator)
{
uint64_t i, j, r, s;
uint64_t i, j, r, s;
for (i = 0, r = 0; i < rows; i++) {
if (!(row_indicator & ((uint64_t) 1 << i)))
continue;
for (i = 0, r = 0; i < rows; i++) {
if (!(row_indicator & ((uint64_t) 1 << i)))
continue;
for (j = 0, s = 0; j < cols; j++) {
if (!(col_indicator & ((uint64_t) 1 << j)))
continue;
out_matrix[dim * r + s] = in_matrix[cols * i + j];
s++;
}
r++;
}
for (j = 0, s = 0; j < cols; j++) {
if (!(col_indicator & ((uint64_t) 1 << j)))
continue;
out_matrix[dim * r + s] = in_matrix[cols * i + j];
s++;
}
r++;
}
}
/* Gosper's Hack */
uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize)
uint64_t
next_subset(uint64_t *subset, uint64_t element_count, uint64_t subsize)
{
uint64_t tmp1 = *subset & -*subset;
uint64_t tmp2 = *subset + tmp1;
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
if (*subset & (((uint64_t) 1 << element_count))) {
/* Overflow on last subset */
*subset = ((uint64_t) 1 << subsize) - 1;
return 1;
}
uint64_t tmp1 = *subset & -*subset;
uint64_t tmp2 = *subset + tmp1;
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
if (*subset & (((uint64_t) 1 << element_count))) {
/* Overflow on last subset */
*subset = ((uint64_t) 1 << subsize) - 1;
return 1;
}
return 0;
return 0;
}
int are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
int
are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
{
unsigned char matrix[COLS * COLS];
unsigned char invert_matrix[COLS * COLS];
uint64_t subsize;
unsigned char matrix[COLS * COLS];
unsigned char invert_matrix[COLS * COLS];
uint64_t subsize;
/* Check all square subsize x subsize submatrices of the rows x cols
* vmatrix for singularity*/
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
uint64_t col_indicator = subset_init;
do {
uint64_t row_indicator = subset_init;
do {
gen_sub_matrix(matrix, subsize, vmatrix, rows,
cols, row_indicator, col_indicator);
if (gf_invert_matrix(matrix, invert_matrix, (int)subsize))
return 1;
/* Check all square subsize x subsize submatrices of the rows x cols
* vmatrix for singularity*/
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
uint64_t col_indicator = subset_init;
do {
uint64_t row_indicator = subset_init;
do {
gen_sub_matrix(matrix, subsize, vmatrix, rows, cols, row_indicator,
col_indicator);
if (gf_invert_matrix(matrix, invert_matrix, (int) subsize))
return 1;
} while (next_subset(&row_indicator, rows, subsize) == 0);
} while (next_subset(&col_indicator, cols, subsize) == 0);
}
} while (next_subset(&row_indicator, rows, subsize) == 0);
} while (next_subset(&col_indicator, cols, subsize) == 0);
}
return 0;
return 0;
}
int main(int argc, char **argv)
int
main(int argc, char **argv)
{
unsigned char vmatrix[(ROWS + COLS) * COLS];
uint64_t rows, cols;
unsigned char vmatrix[(ROWS + COLS) * COLS];
uint64_t rows, cols;
if (K_MAX > MAX_CHECK) {
printf("K_MAX too large for this test\n");
return 0;
}
if (M_MAX > MAX_CHECK) {
printf("M_MAX too large for this test\n");
return 0;
}
if (M_MAX < K_MAX) {
printf("M_MAX must be smaller than K_MAX");
return 0;
}
if (K_MAX > MAX_CHECK) {
printf("K_MAX too large for this test\n");
return 0;
}
if (M_MAX > MAX_CHECK) {
printf("M_MAX too large for this test\n");
return 0;
}
if (M_MAX < K_MAX) {
printf("M_MAX must be smaller than K_MAX");
return 0;
}
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
printf("gen_rs_matrix creates erasure codes for:\n");
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
printf("gen_rs_matrix creates erasure codes for:\n");
for (cols = 1; cols <= K_MAX; cols++) {
for (rows = 1; rows <= M_MAX - cols; rows++) {
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
for (cols = 1; cols <= K_MAX; cols++) {
for (rows = 1; rows <= M_MAX - cols; rows++) {
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
/* Verify the Vandermonde portion of vmatrix contains no
* singular submatrix */
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
break;
}
printf(" k = %2u, m <= %2u \n", (unsigned)cols, (unsigned)(rows + cols - 1));
}
return 0;
/* Verify the Vandermonde portion of vmatrix contains no
* singular submatrix */
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
break;
}
printf(" k = %2u, m <= %2u \n", (unsigned) cols, (unsigned) (rows + cols - 1));
}
return 0;
}

View File

@ -29,7 +29,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include <assert.h>
#include "erasure_code.h"
@ -37,202 +37,194 @@
#define TEST_LEN 8192
#ifndef TEST_SOURCES
# define TEST_SOURCES 128
#define TEST_SOURCES 128
#endif
#ifndef RANDOMS
# define RANDOMS 200
#define RANDOMS 200
#endif
#define KMAX TEST_SOURCES
typedef unsigned char u8;
void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
void
matrix_mult(u8 *a, u8 *b, u8 *c, int n)
{
int i, j, k;
u8 d;
int i, j, k;
u8 d;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
d = 0;
for (k = 0; k < n; k++) {
d ^= gf_mul(a[n * i + k], b[n * k + j]);
}
c[i * n + j] = d;
}
}
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
d = 0;
for (k = 0; k < n; k++) {
d ^= gf_mul(a[n * i + k], b[n * k + j]);
}
c[i * n + j] = d;
}
}
}
void print_matrix(u8 * a, int n)
void
print_matrix(u8 *a, int n)
{
int i, j;
int i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
printf(" %2x", a[i * n + j]);
}
printf("\n");
}
printf("\n");
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
printf(" %2x", a[i * n + j]);
}
printf("\n");
}
printf("\n");
}
int is_ident(u8 * a, const int n)
int
is_ident(u8 *a, const int n)
{
int i, j;
u8 c;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
c = *a++;
if (i == j)
c--;
if (c != 0)
return -1;
}
}
return 0;
int i, j;
u8 c;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
c = *a++;
if (i == j)
c--;
if (c != 0)
return -1;
}
}
return 0;
}
int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
int
inv_test(u8 *in, u8 *inv, u8 *sav, int n)
{
memcpy(sav, in, n * n);
memcpy(sav, in, n * n);
if (gf_invert_matrix(in, inv, n)) {
printf("Given singular matrix\n");
print_matrix(sav, n);
return -1;
}
if (gf_invert_matrix(in, inv, n)) {
printf("Given singular matrix\n");
print_matrix(sav, n);
return -1;
}
matrix_mult(inv, sav, in, n);
matrix_mult(inv, sav, in, n);
if (is_ident(in, n)) {
printf("fail\n");
print_matrix(sav, n);
print_matrix(inv, n);
print_matrix(in, n);
return -1;
}
if (is_ident(in, n)) {
printf("fail\n");
print_matrix(sav, n);
print_matrix(inv, n);
print_matrix(in, n);
return -1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
return 0;
return 0;
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, k, t;
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
int ret = -1;
int i, k, t;
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
int ret = -1;
u8 test1[] = { 1, 1, 6,
1, 1, 1,
7, 1, 9
};
u8 test1[] = { 1, 1, 6, 1, 1, 1, 7, 1, 9 };
u8 test2[] = { 0, 1, 6,
1, 0, 1,
0, 1, 9
};
u8 test2[] = { 0, 1, 6, 1, 0, 1, 0, 1, 9 };
u8 test3[] = { 0, 0, 1,
1, 0, 0,
0, 1, 1
};
u8 test3[] = { 0, 0, 1, 1, 0, 0, 0, 1, 1 };
u8 test4[] = { 0, 1, 6, 7,
1, 1, 0, 0,
0, 1, 2, 3,
3, 2, 2, 3
}; // = row3+3*row2
u8 test4[] = { 0, 1, 6, 7, 1, 1, 0, 0, 0, 1, 2, 3, 3, 2, 2, 3 }; // = row3+3*row2
printf("gf_inverse_test: max=%d ", KMAX);
printf("gf_inverse_test: max=%d ", KMAX);
test_mat = malloc(KMAX * KMAX);
save_mat = malloc(KMAX * KMAX);
invr_mat = malloc(KMAX * KMAX);
test_mat = malloc(KMAX * KMAX);
save_mat = malloc(KMAX * KMAX);
invr_mat = malloc(KMAX * KMAX);
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
goto exit;
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
goto exit;
// Test with lots of leading 1's
k = 3;
memcpy(test_mat, test1, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test with lots of leading 1's
k = 3;
memcpy(test_mat, test1, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test with leading zeros
k = 3;
memcpy(test_mat, test2, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test with leading zeros
k = 3;
memcpy(test_mat, test2, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test 3
k = 3;
memcpy(test_mat, test3, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test 3
k = 3;
memcpy(test_mat, test3, k * k);
if (inv_test(test_mat, invr_mat, save_mat, k))
goto exit;
// Test 4 - try a singular matrix
k = 4;
memcpy(test_mat, test4, k * k);
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
printf("Fail: didn't catch singular matrix\n");
print_matrix(test4, 4);
goto exit;
}
// Do random test of size KMAX
k = KMAX;
// Test 4 - try a singular matrix
k = 4;
memcpy(test_mat, test4, k * k);
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
printf("Fail: didn't catch singular matrix\n");
print_matrix(test4, 4);
goto exit;
}
// Do random test of size KMAX
k = KMAX;
for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand();
for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand();
if (gf_invert_matrix(test_mat, invr_mat, k)) {
printf("rand picked a singular matrix, try again\n");
goto exit;
}
if (gf_invert_matrix(test_mat, invr_mat, k)) {
printf("rand picked a singular matrix, try again\n");
goto exit;
}
matrix_mult(invr_mat, save_mat, test_mat, k);
matrix_mult(invr_mat, save_mat, test_mat, k);
if (is_ident(test_mat, k)) {
printf("fail\n");
print_matrix(save_mat, k);
print_matrix(invr_mat, k);
print_matrix(test_mat, k);
goto exit;
}
// Do Randoms. Random size and coefficients
for (t = 0; t < RANDOMS; t++) {
k = rand() % KMAX;
if (is_ident(test_mat, k)) {
printf("fail\n");
print_matrix(save_mat, k);
print_matrix(invr_mat, k);
print_matrix(test_mat, k);
goto exit;
}
// Do Randoms. Random size and coefficients
for (t = 0; t < RANDOMS; t++) {
k = rand() % KMAX;
for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand();
for (i = 0; i < k * k; i++)
test_mat[i] = save_mat[i] = rand();
if (gf_invert_matrix(test_mat, invr_mat, k))
continue;
if (gf_invert_matrix(test_mat, invr_mat, k))
continue;
matrix_mult(invr_mat, save_mat, test_mat, k);
matrix_mult(invr_mat, save_mat, test_mat, k);
if (is_ident(test_mat, k)) {
printf("fail rand k=%d\n", k);
print_matrix(save_mat, k);
print_matrix(invr_mat, k);
print_matrix(test_mat, k);
goto exit;
}
if (is_ident(test_mat, k)) {
printf("fail rand k=%d\n", k);
print_matrix(save_mat, k);
print_matrix(invr_mat, k);
print_matrix(test_mat, k);
goto exit;
}
#ifdef TEST_VERBOSE
if (0 == (t % 8))
putchar('.');
if (0 == (t % 8))
putchar('.');
#endif
}
}
printf(" Pass\n");
printf(" Pass\n");
ret = 0;
ret = 0;
exit:
free(test_mat);
free(save_mat);
free(invr_mat);
exit:
free(test_mat);
free(save_mat);
free(invr_mat);
return ret;
return ret;
}

View File

@ -29,26 +29,26 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "test.h"
#include "erasure_code.h"
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 10
#define TEST_LEN 8 * 1024
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 10
#define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
typedef unsigned char u8;
@ -58,105 +58,108 @@ u8 gff[256];
u8 gflog[256];
u8 gf_mul_table[256 * 256];
void mk_gf_field(void)
void
mk_gf_field(void)
{
int i;
u8 s = 1;
gflog[0] = 0;
int i;
u8 s = 1;
gflog[0] = 0;
for (i = 0; i < 256; i++) {
gff[i] = s;
gflog[s] = i;
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
}
for (i = 0; i < 256; i++) {
gff[i] = s;
gflog[s] = i;
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
}
}
void mk_gf_mul_table(u8 * table)
void
mk_gf_mul_table(u8 *table)
{
// Populate a single table with all multiply combinations for a fast,
// single-table lookup of GF(2^8) multiply at the expense of memory.
int i, j;
for (i = 0; i < 256; i++)
for (j = 0; j < 256; j++)
table[i * 256 + j] = gf_mul(i, j);
// Populate a single table with all multiply combinations for a fast,
// single-table lookup of GF(2^8) multiply at the expense of memory.
int i, j;
for (i = 0; i < 256; i++)
for (j = 0; j < 256; j++)
table[i * 256 + j] = gf_mul(i, j);
}
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
void
gf_vect_dot_prod_ref(int len, int vlen, u8 *v, u8 **src, u8 *dest)
{
int i, j;
u8 s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j]);
int i, j;
u8 s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++)
s ^= gf_mul(src[j][i], v[j]);
dest[i] = s;
}
dest[i] = s;
}
}
void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
void
gf_vect_dot_prod_mult(int len, int vlen, u8 *v, u8 **src, u8 *dest)
{
int i, j;
u8 s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++) {
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
}
dest[i] = s;
}
int i, j;
u8 s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++) {
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
}
dest[i] = s;
}
}
int main(void)
int
main(void)
{
int i, j;
u8 vec[TEST_SOURCES], *dest1, *dest2;
u8 *matrix[TEST_SOURCES];
struct perf start;
int i, j;
u8 vec[TEST_SOURCES], *dest1, *dest2;
u8 *matrix[TEST_SOURCES];
struct perf start;
dest1 = (u8 *) malloc(TEST_LEN);
dest2 = (u8 *) malloc(TEST_LEN);
dest1 = (u8 *) malloc(TEST_LEN);
dest2 = (u8 *) malloc(TEST_LEN);
if (NULL == dest1 || NULL == dest2) {
printf("buffer alloc error\n");
return -1;
}
memset(dest1, 0xfe, TEST_LEN);
memset(dest2, 0xfe, TEST_LEN);
if (NULL == dest1 || NULL == dest2) {
printf("buffer alloc error\n");
return -1;
}
memset(dest1, 0xfe, TEST_LEN);
memset(dest2, 0xfe, TEST_LEN);
mk_gf_field();
mk_gf_mul_table(gf_mul_table);
mk_gf_field();
mk_gf_mul_table(gf_mul_table);
//generate random vector and matrix/data
for (i = 0; i < TEST_SOURCES; i++) {
vec[i] = rand();
// generate random vector and matrix/data
for (i = 0; i < TEST_SOURCES; i++) {
vec[i] = rand();
if (!(matrix[i] = malloc(TEST_LEN))) {
fprintf(stderr, "Error failure\n\n");
return -1;
}
for (j = 0; j < TEST_LEN; j++)
matrix[i][j] = rand();
if (!(matrix[i] = malloc(TEST_LEN))) {
fprintf(stderr, "Error failure\n\n");
return -1;
}
for (j = 0; j < TEST_LEN; j++)
matrix[i][j] = rand();
}
}
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
// Compare with reference function
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
printf("Error, different results!\n\n");
return -1;
}
// Compare with reference function
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
printf("Error, different results!\n\n");
return -1;
}
printf("Pass functional test\n");
return 0;
printf("Pass functional test\n");
return 0;
}

View File

@ -29,19 +29,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include <assert.h>
#include "erasure_code.h"
#include "test.h"
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2)
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN / 2)
#ifndef TEST_SOURCES
# define TEST_SOURCES 250
#define TEST_SOURCES 250
#endif
#ifndef RANDOMS
# define RANDOMS 20
#define RANDOMS 20
#endif
#define MMAX TEST_SOURCES
@ -49,244 +49,251 @@
typedef unsigned char u8;
void dump(unsigned char *buf, int len)
void
dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void dump_matrix(unsigned char **s, int k, int m)
void
dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
void dump_u8xu8(unsigned char *s, int k, int m)
void
dump_u8xu8(unsigned char *s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, rtest, m, k, nerrs, r, err;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
int i, j, rtest, m, k, nerrs, r, err;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
// Init
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
// Init
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
// Test erasure code using gf_vect_dot_prod
// Pick a first test
m = 9;
k = 5;
assert(!(m > MMAX || k > KMAX));
// Test erasure code using gf_vect_dot_prod
// Pick a first test
m = 9;
k = 5;
assert(!(m > MMAX || k > KMAX));
gf_gen_cauchy1_matrix(a, m, k);
gf_gen_cauchy1_matrix(a, m, k);
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
}
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
}
// Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
// Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
return -1;
}
}
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
return -1;
}
}
// Do more random tests
// Do more random tests
for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2) ;
while ((k = (rand() % KMAX)) >= m || k < 1) ;
for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2)
;
while ((k = (rand() % KMAX)) >= m || k < 1)
;
if (m > MMAX || k > KMAX)
continue;
if (m > MMAX || k > KMAX)
continue;
gf_gen_cauchy1_matrix(a, m, k);
gf_gen_cauchy1_matrix(a, m, k);
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
}
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
}
// Random errors
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k) ;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// Random errors
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k)
;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = ");
for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]);
printf("\na:\n");
dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n");
dump_matrix(buffs, m, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
return -1;
}
}
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = ");
for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]);
printf("\na:\n");
dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n");
dump_matrix(buffs, m, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
printf("done all: Pass\n");
return 0;
printf("done all: Pass\n");
return 0;
}

View File

@ -29,146 +29,148 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "erasure_code.h"
#include "test.h"
#ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
#endif
#define str(s) #s
#define str(s) #s
#define xstr(s) str(s)
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 10
#define TEST_LEN 8 * 1024
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 10
#define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64 - 1))
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
typedef unsigned char u8;
void dump(unsigned char *buf, int len)
void
dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void dump_matrix(unsigned char **s, int k, int m)
void
dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
void vect_dot_prod_perf(void (*fun_ptr)
(int, int, unsigned char *, unsigned char **, unsigned char *),
u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref)
void
vect_dot_prod_perf(void (*fun_ptr)(int, int, unsigned char *, unsigned char **, unsigned char *),
u8 *g, u8 *g_tbls, u8 **buffs, u8 *dest_ref)
{
int j;
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
int j;
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
(*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
(*fun_ptr)(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
u8 *temp_buff, *buffs[TEST_SOURCES];
struct perf start;
int i, j;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
u8 *temp_buff, *buffs[TEST_SOURCES];
struct perf start;
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
// Performance test
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Performance test
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
#ifdef DO_REF_PERF
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref)
);
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref));
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
#else
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
#endif
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
printf("pass perf check\n");
return 0;
printf("pass perf check\n");
return 0;
}

View File

@ -29,28 +29,28 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "erasure_code.h"
#include "test.h"
#ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
#endif
#ifndef TEST_MIN_SIZE
# define TEST_MIN_SIZE 32
#define TEST_MIN_SIZE 32
#endif
#define str(s) #s
#define str(s) #s
#define xstr(s) str(s)
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2)
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN / 2)
#ifndef TEST_SOURCES
# define TEST_SOURCES 16
#define TEST_SOURCES 16
#endif
#ifndef RANDOMS
# define RANDOMS 20
#define RANDOMS 20
#endif
#define MMAX TEST_SOURCES
@ -58,481 +58,486 @@
#ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 0
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#define PTR_ALIGN_CHK_B 0
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#else
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 32
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
#define PTR_ALIGN_CHK_B 32
#define LEN_ALIGN_CHK_B 32 // 0 for aligned only
#endif
typedef unsigned char u8;
void dump(unsigned char *buf, int len)
void
dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void dump_matrix(unsigned char **s, int k, int m)
void
dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
void dump_u8xu8(unsigned char *s, int k, int m)
void
dump_u8xu8(unsigned char *s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, rtest, srcs, m, k, nerrs, r, err;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
int i, j, rtest, srcs, m, k, nerrs, r, err;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
int align, size;
unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset;
u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptr;
int align, size;
unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset;
u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptr;
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buff = buf;
// Test of all zeros
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
// Test of all zeros
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
memset(dest, 0, TEST_LEN);
memset(temp_buff, 0, TEST_LEN);
memset(dest_ref, 0, TEST_LEN);
memset(g, 0, TEST_SOURCES);
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
#ifdef TEST_VERBOSE
else
putchar('.');
else
putchar('.');
#endif
// Rand data test
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Rand data test
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
dump_matrix(buffs, 5, srcs);
printf("dprod_base:");
dump(dest_ref, 5);
printf("dprod:");
dump(dest, 5);
return -1;
}
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
dump_matrix(buffs, 5, srcs);
printf("dprod_base:");
dump(dest_ref, 5);
printf("dprod:");
dump(dest, 5);
return -1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
}
}
// Test erasure code using gf_vect_dot_prod
// Test erasure code using gf_vect_dot_prod
// Pick a first test
m = 9;
k = 5;
if (m > MMAX || k > KMAX)
return -1;
// Pick a first test
m = 9;
k = 5;
if (m > MMAX || k > KMAX)
return -1;
gf_gen_rs_matrix(a, m, k);
gf_gen_rs_matrix(a, m, k);
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
#ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
#else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
#endif
}
}
// Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
// Random buffers in erasure
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
#ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
#else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
#endif
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
return -1;
}
}
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
return -1;
}
}
// Do more random tests
// Do more random tests
for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2) ;
while ((k = (rand() % KMAX)) >= m || k < 1) ;
for (rtest = 0; rtest < RANDOMS; rtest++) {
while ((m = (rand() % MMAX)) < 2)
;
while ((k = (rand() % KMAX)) >= m || k < 1)
;
if (m > MMAX || k > KMAX)
continue;
if (m > MMAX || k > KMAX)
continue;
gf_gen_rs_matrix(a, m, k);
gf_gen_rs_matrix(a, m, k);
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
// Make parity vects
for (i = k; i < m; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
#ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
#else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
#endif
}
}
// Random errors
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k) ;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
// Random errors
memset(src_in_err, 0, TEST_SOURCES);
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err)
src_err_list[nerrs++] = i;
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= k)
;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
}
// construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
printf("BAD MATRIX\n");
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r]) {
r++;
continue;
}
recov[i] = buffs[r];
}
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
// Recover data
for (i = 0; i < nerrs; i++) {
for (j = 0; j < k; j++)
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
#ifndef USEREF
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
#else
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
#endif
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = ");
for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]);
printf("\na:\n");
dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n");
dump_matrix(buffs, m, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
return -1;
}
}
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf(" - erase list = ");
for (i = 0; i < nerrs; i++)
printf(" %d", src_err_list[i]);
printf("\na:\n");
dump_u8xu8((u8 *) a, m, k);
printf("inv b:\n");
dump_u8xu8((u8 *) d, k, k);
printf("orig data:\n");
dump_matrix(buffs, m, 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
printf("recov %d:", src_err_list[i]);
dump(temp_buff, 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < TEST_SOURCES; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
if (0 != memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
dump_matrix(efence_buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, align);
printf("dprod:");
dump(dest, align);
return -1;
}
if (0 != memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
dump_matrix(efence_buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, align);
printf("dprod:");
dump(dest, align);
return -1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Test rand ptr alignment if available
// Test rand ptr alignment if available
for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES;
if (srcs == 0)
continue;
for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES;
if (srcs == 0)
continue;
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets
for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets
for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
memset(dest, 0, TEST_LEN); // zero pad to check write-over
memset(dest, 0, TEST_LEN); // zero pad to check write-over
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
ubuffs[i][j] = rand();
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
ubuffs[i][j] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
if (memcmp(dest_ref, udest_ptr, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
srcs);
dump_matrix(ubuffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(udest_ptr, 25);
return -1;
}
// Confirm that padding around dests is unchanged
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
offset = udest_ptr - dest;
if (memcmp(dest_ref, udest_ptr, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", srcs);
dump_matrix(ubuffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(udest_ptr, 25);
return -1;
}
// Confirm that padding around dests is unchanged
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
offset = udest_ptr - dest;
if (memcmp(dest, dest_ref, offset)) {
printf("Fail rand ualign pad start\n");
return -1;
}
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
printf("Fail rand ualign pad end\n");
return -1;
}
if (memcmp(dest, dest_ref, offset)) {
printf("Fail rand ualign pad start\n");
return -1;
}
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
printf("Fail rand ualign pad end\n");
return -1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
// Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
srcs = TEST_SOURCES;
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
srcs = TEST_SOURCES;
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
buffs[i][j] = rand();
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
buffs[i][j] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
g[i] = rand();
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
for (i = 0; i < srcs; i++)
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
if (memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
size);
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
}
if (memcmp(dest_ref, dest, size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", size);
dump_matrix(buffs, 5, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref, 25);
printf("dprod:");
dump(dest, 25);
return -1;
}
}
printf("done all: Pass\n");
return 0;
printf("done all: Pass\n");
return 0;
}

View File

@ -29,503 +29,500 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset, memcmp
#include <string.h> // for memset, memcmp
#include "erasure_code.h"
#include "test.h"
#ifndef ALIGN_SIZE
# define ALIGN_SIZE 32
#define ALIGN_SIZE 32
#endif
#ifndef FUNCTION_UNDER_TEST
//By default, test multi-binary version
# define FUNCTION_UNDER_TEST gf_vect_mad
# define REF_FUNCTION gf_vect_dot_prod
# define VECT 1
// By default, test multi-binary version
#define FUNCTION_UNDER_TEST gf_vect_mad
#define REF_FUNCTION gf_vect_dot_prod
#define VECT 1
#endif
#ifndef TEST_MIN_SIZE
# define TEST_MIN_SIZE 64
#define TEST_MIN_SIZE 64
#endif
#define str(s) #s
#define str(s) #s
#define xstr(s) str(s)
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2)
#define TEST_MEM TEST_SIZE
#define TEST_LOOPS 20000
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN / 2)
#define TEST_MEM TEST_SIZE
#define TEST_LOOPS 20000
#define TEST_TYPE_STR ""
#ifndef TEST_SOURCES
# define TEST_SOURCES 16
#define TEST_SOURCES 16
#endif
#ifndef RANDOMS
# define RANDOMS 20
#define RANDOMS 20
#endif
#ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 0
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#define PTR_ALIGN_CHK_B 0
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#else
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B ALIGN_SIZE
# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
#define PTR_ALIGN_CHK_B ALIGN_SIZE
#define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
#endif
#define str(s) #s
#define str(s) #s
#define xstr(s) str(s)
typedef unsigned char u8;
#if (VECT == 1)
# define LAST_ARG *dest
#define LAST_ARG *dest
#else
# define LAST_ARG **dest
#define LAST_ARG **dest
#endif
extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char LAST_ARG);
extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char LAST_ARG);
extern void
FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char LAST_ARG);
extern void
REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char LAST_ARG);
void dump(unsigned char *buf, int len)
void
dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void dump_matrix(unsigned char **s, int k, int m)
void
dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
void dump_u8xu8(unsigned char *s, int k, int m)
void
dump_u8xu8(unsigned char *s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, j, rtest, srcs;
void *buf;
u8 gf[6][TEST_SOURCES];
u8 *g_tbls;
u8 *dest_ref[VECT];
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
int vector = VECT;
int i, j, rtest, srcs;
void *buf;
u8 gf[6][TEST_SOURCES];
u8 *g_tbls;
u8 *dest_ref[VECT];
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
int vector = VECT;
int align, size;
unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset;
u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptrs[VECT];
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
int align, size;
unsigned char *efence_buffs[TEST_SOURCES];
unsigned int offset;
u8 *ubuffs[TEST_SOURCES];
u8 *udest_ptrs[VECT];
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
printf("alloc error: Fail");
return -1;
}
g_tbls = buf;
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
printf("alloc error: Fail");
return -1;
}
g_tbls = buf;
for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ptrs[i] = buf;
memset(dest_ptrs[i], 0, TEST_LEN);
}
for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ptrs[i] = buf;
memset(dest_ptrs[i], 0, TEST_LEN);
}
for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref[i] = buf;
memset(dest_ref[i], 0, TEST_LEN);
}
for (i = 0; i < vector; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
dest_ref[i] = buf;
memset(dest_ref[i], 0, TEST_LEN);
}
// Test of all zeros
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
// Test of all zeros
for (i = 0; i < TEST_SOURCES; i++)
memset(buffs[i], 0, TEST_LEN);
switch (vector) {
case 6:
memset(gf[5], 0xe6, TEST_SOURCES);
case 5:
memset(gf[4], 4, TEST_SOURCES);
case 4:
memset(gf[3], 9, TEST_SOURCES);
case 3:
memset(gf[2], 7, TEST_SOURCES);
case 2:
memset(gf[1], 1, TEST_SOURCES);
case 1:
memset(gf[0], 2, TEST_SOURCES);
break;
default:
return -1;
}
switch (vector) {
case 6:
memset(gf[5], 0xe6, TEST_SOURCES);
case 5:
memset(gf[4], 4, TEST_SOURCES);
case 4:
memset(gf[3], 9, TEST_SOURCES);
case 3:
memset(gf[2], 7, TEST_SOURCES);
case 2:
memset(gf[1], 1, TEST_SOURCES);
case 1:
memset(gf[0], 2, TEST_SOURCES);
break;
default:
return -1;
}
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
buffs, dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], buffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) {
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
#else
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
#if (VECT == 1)
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
#else
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
#endif
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
// Rand data test
// Rand data test
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
&g_tbls[i * 32 * TEST_SOURCES], buffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
&g_tbls[i * 32 * TEST_SOURCES], buffs, dest_ref[i]);
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) {
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
*dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
*dest_ptrs);
#else
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n",
i, rtest);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", i,
rtest);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Rand data test with varied parameters
for (rtest = 0; rtest < RANDOMS; rtest++) {
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
for (i = 0; i < srcs; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * srcs) + j * 32]);
}
for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * srcs) + j * 32]);
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs],
buffs, dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], buffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < srcs; i++) {
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, TEST_LEN);
for (i = 0; i < srcs; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
*dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
*dest_ptrs);
#else
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
dest_ptrs);
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], dest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
" test%d srcs=%d\n", i, srcs);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
printf("Fail rand " xstr(
FUNCTION_UNDER_TEST) " test%d srcs=%d\n",
i, srcs);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
}
}
// Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// Run tests at end of buffer for Electric Fence
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
efence_buffs[i] = buffs[i] + TEST_LEN - size;
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES,
&g_tbls[i * 32 * TEST_SOURCES], efence_buffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
efence_buffs, dest_ref[i]);
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, size);
for (i = 0; i < TEST_SOURCES; i++) {
for (i = 0; i < vector; i++)
memset(dest_ptrs[i], 0, size);
for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
*dest_ptrs);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
*dest_ptrs);
#else
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
dest_ptrs);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
dest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
" test%d size=%d\n", i, size);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], TEST_MIN_SIZE + align);
printf("dprod_dut:");
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d size=%d\n",
i, size);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], TEST_MIN_SIZE + align);
printf("dprod_dut:");
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Test rand ptr alignment if available
// Test rand ptr alignment if available
for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES;
if (srcs == 0)
continue;
for (rtest = 0; rtest < RANDOMS; rtest++) {
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
srcs = rand() % TEST_SOURCES;
if (srcs == 0)
continue;
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets
for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
// Add random offsets
for (i = 0; i < srcs; i++)
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
for (i = 0; i < vector; i++) {
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
}
for (i = 0; i < vector; i++) {
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
}
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
ubuffs[i][j] = rand();
for (i = 0; i < srcs; i++)
for (j = 0; j < size; j++)
ubuffs[i][j] = rand();
for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
}
for (i = 0; i < vector; i++)
for (j = 0; j < srcs; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
dest_ref[i]);
for (i = 0; i < srcs; i++) {
for (i = 0; i < srcs; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
#else
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
" test%d ualign srcs=%d\n", i, srcs);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(udest_ptrs[i], 25);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
printf("Fail rand " xstr(
FUNCTION_UNDER_TEST) " test%d ualign srcs=%d\n",
i, srcs);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(udest_ptrs[i], 25);
return -1;
}
}
// Confirm that padding around dests is unchanged
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
// Confirm that padding around dests is unchanged
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
for (i = 0; i < vector; i++) {
offset = udest_ptrs[i] - dest_ptrs[i];
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
printf("Fail rand ualign pad1 start\n");
return -1;
}
if (memcmp
(dest_ptrs[i] + offset + size, dest_ref[0],
PTR_ALIGN_CHK_B - offset)) {
printf("Fail rand ualign pad1 end\n");
return -1;
}
}
for (i = 0; i < vector; i++) {
offset = udest_ptrs[i] - dest_ptrs[i];
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
printf("Fail rand ualign pad1 start\n");
return -1;
}
if (memcmp(dest_ptrs[i] + offset + size, dest_ref[0],
PTR_ALIGN_CHK_B - offset)) {
printf("Fail rand ualign pad1 end\n");
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
// Test all size alignment
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < size; j++)
buffs[i][j] = rand();
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
for (i = 0; i < TEST_SOURCES; i++)
for (j = 0; j < size; j++)
buffs[i][j] = rand();
for (i = 0; i < vector; i++) {
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
}
for (i = 0; i < vector; i++) {
for (j = 0; j < TEST_SOURCES; j++) {
gf[i][j] = rand();
gf_vect_mul_init(gf[i][j],
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
}
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
}
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES,
&g_tbls[i * 32 * TEST_SOURCES], buffs,
dest_ref[i]);
for (i = 0; i < vector; i++)
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
buffs, dest_ref[i]);
for (i = 0; i < TEST_SOURCES; i++) {
for (i = 0; i < TEST_SOURCES; i++) {
#if (VECT == 1)
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
*dest_ptrs);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
#else
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
dest_ptrs);
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
#endif
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
" test%d ualign len=%d\n", i, size);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
}
for (i = 0; i < vector; i++) {
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
printf("Fail rand " xstr(
FUNCTION_UNDER_TEST) " test%d ualign len=%d\n",
i, size);
dump_matrix(buffs, vector, TEST_SOURCES);
printf("dprod_base:");
dump(dest_ref[i], 25);
printf("dprod_dut:");
dump(dest_ptrs[i], 25);
return -1;
}
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
printf("Pass\n");
return 0;
printf("Pass\n");
return 0;
}

View File

@ -29,117 +29,116 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset
#include <string.h> // for memset
#include "erasure_code.h"
#define TEST_SIZE 8192
#define TEST_MEM TEST_SIZE
#define TEST_LOOPS 100000
#define TEST_SIZE 8192
#define TEST_MEM TEST_SIZE
#define TEST_LOOPS 100000
#define TEST_TYPE_STR ""
typedef unsigned char u8;
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i;
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
int align, size;
unsigned char *efence_buff1;
unsigned char *efence_buff2;
int i;
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
int align, size;
unsigned char *efence_buff1;
unsigned char *efence_buff2;
printf("gf_vect_mul_base_test:\n");
printf("gf_vect_mul_base_test:\n");
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul_init(a, gf_const_tbl);
buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE);
buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE);
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n");
return -1;
}
// Fill with rand data
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n");
return -1;
}
// Fill with rand data
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail fill with rand data\n");
return 1;
}
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail fill with rand data\n");
return 1;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
gf_mul(2, buff1[i]));
return 1;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
gf_mul(2, buff1[i]));
return 1;
}
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data for buff1\n");
return -1;
}
// Check reference function
for (i = 0; i < TEST_SIZE; i++)
if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
return 1;
}
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data for buff1\n");
return -1;
}
// Check reference function
for (i = 0; i < TEST_SIZE; i++)
if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
gf_mul(a, buff1[i]));
return 1;
}
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
// Check each possible constant
printf("Random tests ");
for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail random tests\n");
return 1;
}
// Check each possible constant
printf("Random tests ");
for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail random tests\n");
return 1;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
return 1;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
buff2[i], gf_mul(2, buff1[i]));
return 1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Run tests at end of buffer for Electric Fence
align = 32;
a = 2;
// Run tests at end of buffer for Electric Fence
align = 32;
a = 2;
gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end
efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size;
gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end
efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size;
if (gf_vect_mul_base
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) != 0) {
printf("fail tests at end of buffer\n");
return -1;
}
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) !=
0) {
printf("fail tests at end of buffer\n");
return -1;
}
for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
efence_buff1
[i]));
return 1;
}
for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
efence_buff2[i], gf_mul(2, efence_buff1[i]));
return 1;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
printf(" done: Pass\n");
return 0;
printf(" done: Pass\n");
return 0;
}

View File

@ -29,63 +29,65 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // for memset
#include <string.h> // for memset
#include "erasure_code.h"
#include "test.h"
#ifndef GT_L3_CACHE
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
#endif
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_TYPE_STR "_warm"
#elif defined (COLD_TEST)
#define TEST_SOURCES 10
#define TEST_LEN 8 * 1024
#define TEST_TYPE_STR "_warm"
#elif defined(COLD_TEST)
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define TEST_LEN (GT_L3_CACHE / 2)
# define TEST_TYPE_STR "_cold"
#elif defined (TEST_CUSTOM)
# define TEST_TYPE_STR "_cus"
#define TEST_SOURCES 10
#define TEST_LEN (GT_L3_CACHE / 2)
#define TEST_TYPE_STR "_cold"
#elif defined(TEST_CUSTOM)
#define TEST_TYPE_STR "_cus"
#endif
#define TEST_MEM (2 * TEST_LEN)
typedef unsigned char u8;
void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2)
void
gf_vect_mul_perf(u8 a, u8 *gf_const_tbl, u8 *buff1, u8 *buff2)
{
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
}
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
struct perf start;
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
struct perf start;
printf("gf_vect_mul_perf:\n");
printf("gf_vect_mul_perf:\n");
// Allocate large mem region
buff1 = (u8 *) malloc(TEST_LEN);
buff2 = (u8 *) malloc(TEST_LEN);
if (NULL == buff1 || NULL == buff2) {
printf("Failed to allocate %dB\n", TEST_LEN);
return 1;
}
// Allocate large mem region
buff1 = (u8 *) malloc(TEST_LEN);
buff2 = (u8 *) malloc(TEST_LEN);
if (NULL == buff1 || NULL == buff2) {
printf("Failed to allocate %dB\n", TEST_LEN);
return 1;
}
memset(buff1, 0, TEST_LEN);
memset(buff2, 0, TEST_LEN);
memset(buff1, 0, TEST_LEN);
memset(buff2, 0, TEST_LEN);
printf("Start timed tests\n");
fflush(0);
printf("Start timed tests\n");
fflush(0);
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
printf("gf_vect_mul" TEST_TYPE_STR ": ");
perf_print(start, (long long)TEST_LEN);
printf("gf_vect_mul" TEST_TYPE_STR ": ");
perf_print(start, (long long) TEST_LEN);
return 0;
return 0;
}

View File

@ -31,165 +31,164 @@
#include <stdlib.h>
#include "erasure_code.h"
#define TEST_SIZE (128*1024)
#define TEST_SIZE (128 * 1024)
typedef unsigned char u8;
int main(int argc, char *argv[])
int
main(int argc, char *argv[])
{
int i, ret = -1;
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
int tsize;
int align, size;
unsigned char *efence_buff1;
unsigned char *efence_buff2;
unsigned char *efence_buff3;
int i, ret = -1;
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
int tsize;
int align, size;
unsigned char *efence_buff1;
unsigned char *efence_buff2;
unsigned char *efence_buff3;
printf("gf_vect_mul_test: ");
printf("gf_vect_mul_test: ");
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul_init(a, gf_const_tbl);
buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE);
buff1 = (u8 *) malloc(TEST_SIZE);
buff2 = (u8 *) malloc(TEST_SIZE);
buff3 = (u8 *) malloc(TEST_SIZE);
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n");
goto exit;
}
// Fill with rand data
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
printf("buffer alloc error\n");
goto exit;
}
// Fill with rand data
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n");
goto exit;
}
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n");
goto exit;
}
for (i = 0; i < TEST_SIZE; i++) {
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
buff1[i], buff2[i], gf_mul(2, buff1[i]));
goto exit;
}
}
for (i = 0; i < TEST_SIZE; i++) {
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
gf_mul(2, buff1[i]));
goto exit;
}
}
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data\n");
goto exit;
}
// Check reference function
for (i = 0; i < TEST_SIZE; i++) {
if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
goto exit;
}
}
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
printf("fail fill with rand data\n");
goto exit;
}
// Check reference function
for (i = 0; i < TEST_SIZE; i++) {
if (buff2[i] != buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
gf_mul(a, buff1[i]));
goto exit;
}
}
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
for (i = 0; i < TEST_SIZE; i++)
buff1[i] = rand();
// Check each possible constant
for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n");
goto exit;
}
// Check each possible constant
for (a = 0; a != 255; a++) {
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2\n");
goto exit;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
goto exit;
}
for (i = 0; i < TEST_SIZE; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
buff2[i], gf_mul(2, buff1[i]));
goto exit;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Check buffer len
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
a = rand();
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2 (len %d)\n", tsize);
goto exit;
}
// Check buffer len
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
a = rand();
gf_vect_mul_init(a, gf_const_tbl);
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
printf("fail creating buff2 (len %d)\n", tsize);
goto exit;
}
for (i = 0; i < tsize; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
goto exit;
}
for (i = 0; i < tsize; i++)
if (gf_mul(a, buff1[i]) != buff2[i]) {
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
buff2[i], gf_mul(2, buff1[i]));
goto exit;
}
#ifdef TEST_VERBOSE
if (0 == tsize % (32 * 8)) {
putchar('.');
fflush(0);
}
if (0 == tsize % (32 * 8)) {
putchar('.');
fflush(0);
}
#endif
}
}
// Run tests at end of buffer for Electric Fence
align = 32;
a = 2;
// Run tests at end of buffer for Electric Fence
align = 32;
a = 2;
gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end
efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size;
efence_buff3 = buff3 + size;
gf_vect_mul_init(a, gf_const_tbl);
for (size = 0; size < TEST_SIZE; size += align) {
// Line up TEST_SIZE from end
efence_buff1 = buff1 + size;
efence_buff2 = buff2 + size;
efence_buff3 = buff3 + size;
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
i, efence_buff1[i], efence_buff2[i],
gf_mul(2, efence_buff1[i]));
goto exit;
}
for (i = 0; i < TEST_SIZE - size; i++)
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
efence_buff2[i], gf_mul(2, efence_buff1[i]));
goto exit;
}
if (gf_vect_mul_base
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) != 0) {
printf("fail line up TEST_SIZE from end\n");
goto exit;
}
// Check reference function
for (i = 0; i < TEST_SIZE - size; i++)
if (efence_buff2[i] != efence_buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
i, a, efence_buff2[i], efence_buff3[i],
gf_mul(2, efence_buff1[i]));
goto exit;
}
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) !=
0) {
printf("fail line up TEST_SIZE from end\n");
goto exit;
}
// Check reference function
for (i = 0; i < TEST_SIZE - size; i++)
if (efence_buff2[i] != efence_buff3[i]) {
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a,
efence_buff2[i], efence_buff3[i],
gf_mul(2, efence_buff1[i]));
goto exit;
}
#ifdef TEST_VERBOSE
putchar('.');
putchar('.');
#endif
}
}
// Test all unsupported sizes up to TEST_SIZE
for (size = 0; size < TEST_SIZE; size++) {
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
printf
("fail expecting nonzero return code for unaligned size param (%d)\n",
size);
goto exit;
}
}
// Test all unsupported sizes up to TEST_SIZE
for (size = 0; size < TEST_SIZE; size++) {
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
printf("fail expecting nonzero return code for unaligned size param (%d)\n",
size);
goto exit;
}
}
printf(" done: Pass\n");
fflush(0);
printf(" done: Pass\n");
fflush(0);
ret = 0;
exit:
ret = 0;
exit:
free(buff1);
free(buff2);
free(buff3);
free(buff1);
free(buff2);
free(buff3);
return ret;
return ret;
}

View File

@ -1,106 +1,109 @@
#include "erasure_code.h"
#include "ec_base_vsx.h"
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
unsigned char **src, unsigned char *dest)
void
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
{
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
}
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v,
unsigned char *src, unsigned char *dest)
void
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
{
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
}
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
unsigned char **src, unsigned char **dest)
void
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
unsigned char **dest)
{
if (len < 64) {
ec_encode_data_base(len, srcs, dests, v, src, dest);
return;
}
if (len < 64) {
ec_encode_data_base(len, srcs, dests, v, src, dest);
return;
}
while (dests >= 6) {
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
v += 6 * srcs * 32;
dest += 6;
dests -= 6;
}
switch (dests) {
case 6:
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 5:
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 4:
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 3:
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 2:
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 1:
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
break;
case 0:
break;
}
while (dests >= 6) {
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
v += 6 * srcs * 32;
dest += 6;
dests -= 6;
}
switch (dests) {
case 6:
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 5:
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 4:
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 3:
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 2:
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
break;
case 1:
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
break;
case 0:
break;
}
}
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
unsigned char *data, unsigned char **dest)
void
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
unsigned char **dest)
{
if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
return;
}
if (len < 64) {
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
return;
}
while (rows >= 6) {
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
v += 6 * k * 32;
dest += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 5:
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 4:
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 3:
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 2:
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 1:
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
break;
case 0:
break;
}
while (rows >= 6) {
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
v += 6 * k * 32;
dest += 6;
rows -= 6;
}
switch (rows) {
case 6:
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 5:
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 4:
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 3:
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 2:
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
break;
case 1:
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
break;
case 0:
break;
}
}
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
int
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
{
/* Size must be aligned to 32 bytes */
if ((len % 32) != 0)
return -1;
/* Size must be aligned to 32 bytes */
if ((len % 32) != 0)
return -1;
gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest);
return 0;
gf_vect_mul_vsx(len, a, (unsigned char *) src, (unsigned char *) dest);
return 0;
}
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
void
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
{
return ec_init_tables_base(k, rows, a, g_tbls);
return ec_init_tables_base(k, rows, a, g_tbls);
}

View File

@ -9,29 +9,37 @@ extern "C" {
#endif
#if defined(__ibmxl__)
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 8
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 7
#if defined _ARCH_PWR9
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
return vc;
inline vector unsigned char
EC_vec_xl(int off, unsigned char *ptr)
{
vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
: "=wa"(vc)
: "r"(off), "r"(ptr));
return vc;
}
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#endif
#else
#if defined _ARCH_PWR8
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
return vc;
inline vector unsigned char
EC_vec_xl(int off, unsigned char *ptr)
{
vector unsigned char vc;
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
: "=wa"(vc)
: "r"(off), "r"(ptr));
return vc;
}
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else
@ -57,7 +65,8 @@ inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
* @returns none
*/
void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
void
gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
/**
* @brief GF(2^8) vector dot product. VSX version.
@ -77,8 +86,9 @@ void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigne
* @returns none
*/
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest);
void
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest);
/**
* @brief GF(2^8) vector dot product with two outputs. VSX version.
@ -99,8 +109,9 @@ void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
void
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/**
* @brief GF(2^8) vector dot product with three outputs. VSX version.
@ -121,8 +132,9 @@ void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
void
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/**
* @brief GF(2^8) vector dot product with four outputs. VSX version.
@ -143,8 +155,9 @@ void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
void
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/**
* @brief GF(2^8) vector dot product with five outputs. VSX version.
@ -165,8 +178,9 @@ void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
void
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/**
* @brief GF(2^8) vector dot product with six outputs. VSX version.
@ -187,8 +201,9 @@ void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
void
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest);
/**
* @brief GF(2^8) vector multiply accumulate. VSX version.
@ -211,8 +226,9 @@ void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
* @returns none
*/
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
void
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest);
/**
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
*
@ -234,8 +250,9 @@ void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
* @returns none
*/
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/**
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
@ -258,8 +275,9 @@ void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @returns none
*/
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/**
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
@ -282,8 +300,9 @@ void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @returns none
*/
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/**
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
@ -305,8 +324,9 @@ void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @param dest Array of pointers to destination input/outputs.
* @returns none
*/
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
/**
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
@ -328,8 +348,9 @@ void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
* @param dest Array of pointers to destination input/outputs.
* @returns none
*/
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
void
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest);
#ifdef __cplusplus
}

View File

@ -1,83 +1,84 @@
#include "ec_base_vsx.h"
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest)
void
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, j, head;
unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
for (j = 1; j < vlen; j++) {
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
}
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
}
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
g0 += 32;
g1 += 32;
}
g0 += 32;
g1 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
}
return;
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
}
return;
}

View File

@ -1,65 +1,66 @@
#include "ec_base_vsx.h"
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest)
void
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, head;
unsigned char *s, *t0, *t1;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4;
vector unsigned char vYD, vYE, vYF, vYG;
vector unsigned char vhi0, vlo0, vhi1, vlo1;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
s = (unsigned char *) src;
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
}
return;
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
}
return;
}

View File

@ -1,104 +1,105 @@
#include "ec_base_vsx.h"
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest)
void
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, j, head;
unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
for (j = 1; j < vlen; j++) {
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
}
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
}
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
g0 += 32;
g1 += 32;
g2 += 32;
}
g0 += 32;
g1 += 32;
g2 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
}
return;
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
}
return;
}

View File

@ -1,84 +1,85 @@
#include "ec_base_vsx.h"
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest)
void
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, head;
unsigned char *s, *t0, *t1, *t2;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
s = (unsigned char *) src;
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
}
return;
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
}
return;
}

View File

@ -1,124 +1,125 @@
#include "ec_base_vsx.h"
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest)
void
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, j, head;
unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
for (j = 1; j < vlen; j++) {
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
}
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
}
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
}
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
}
return;
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
}
return;
}

View File

@ -1,103 +1,104 @@
#include "ec_base_vsx.h"
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest)
void
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, head;
unsigned char *s, *t0, *t1, *t2, *t3;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
s = (unsigned char *) src;
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
}
return;
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
}
return;
}

View File

@ -1,145 +1,146 @@
#include "ec_base_vsx.h"
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest)
void
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, j, head;
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
for (j = 1; j < vlen; j++) {
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
t4 = (unsigned char *)dest[4];
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
t4 = (unsigned char *) dest[4];
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
}
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
}
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA;
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
vYL = vYL ^ vYL;
vYM = vYM ^ vYM;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
vYL = vYL ^ vYL;
vYM = vYM ^ vYM;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3);
vlo2 = vec_xl(0, g2);
vhi2 = vec_xl(16, g2);
vlo3 = vec_xl(0, g3);
vhi3 = vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vlo4 = vec_xl(0, g4);
vhi4 = vec_xl(16, g4);
vlo4 = vec_xl(0, g4);
vhi4 = vec_xl(16, g4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
g4 += 32;
}
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
g4 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
}
return;
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
}
return;
}

View File

@ -1,122 +1,123 @@
#include "ec_base_vsx.h"
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest)
void
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, head;
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
t4 = (unsigned char *)dest[4];
s = (unsigned char *) src;
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
t4 = (unsigned char *) dest[4];
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i);
vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
}
return;
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
}
return;
}

View File

@ -1,166 +1,167 @@
#include "ec_base_vsx.h"
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest)
void
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, j, head;
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *) dest[5]);
for (j = 1; j < vlen; j++) {
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
t4 = (unsigned char *)dest[4];
t5 = (unsigned char *)dest[5];
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
t4 = (unsigned char *) dest[4];
t5 = (unsigned char *) dest[5];
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
}
head = len % 64;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
}
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA;
vYB = vYB ^ vYB;
vYC = vYC ^ vYC;
for (i = head; i < len - 63; i += 64) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vY9 = vY9 ^ vY9;
vYA = vYA ^ vYA;
vYB = vYB ^ vYB;
vYC = vYC ^ vYC;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
vYL = vYL ^ vYL;
vYM = vYM ^ vYM;
vYN = vYN ^ vYN;
vYO = vYO ^ vYO;
vYD = vYD ^ vYD;
vYE = vYE ^ vYE;
vYF = vYF ^ vYF;
vYG = vYG ^ vYG;
vYH = vYH ^ vYH;
vYI = vYI ^ vYI;
vYJ = vYJ ^ vYJ;
vYK = vYK ^ vYK;
vYL = vYL ^ vYL;
vYM = vYM ^ vYM;
vYN = vYN ^ vYN;
vYO = vYO ^ vYO;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen];
unsigned char *g5 = &gftbls[5 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g1 = &gftbls[1 * 32 * vlen];
unsigned char *g2 = &gftbls[2 * 32 * vlen];
unsigned char *g3 = &gftbls[3 * 32 * vlen];
unsigned char *g4 = &gftbls[4 * 32 * vlen];
unsigned char *g5 = &gftbls[5 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo1 = EC_vec_xl(0, g1);
vhi1 = EC_vec_xl(16, g1);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vlo2 = EC_vec_xl(0, g2);
vhi2 = EC_vec_xl(16, g2);
vlo3 = EC_vec_xl(0, g3);
vhi3 = EC_vec_xl(16, g3);
vlo2 = EC_vec_xl(0, g2);
vhi2 = EC_vec_xl(16, g2);
vlo3 = EC_vec_xl(0, g3);
vhi3 = EC_vec_xl(16, g3);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vlo4 = EC_vec_xl(0, g4);
vhi4 = EC_vec_xl(16, g4);
vlo5 = EC_vec_xl(0, g5);
vhi5 = EC_vec_xl(16, g5);
vlo4 = EC_vec_xl(0, g4);
vhi4 = EC_vec_xl(16, g4);
vlo5 = EC_vec_xl(0, g5);
vhi5 = EC_vec_xl(16, g5);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
g4 += 32;
g5 += 32;
}
g0 += 32;
g1 += 32;
g2 += 32;
g3 += 32;
g4 += 32;
g5 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i);
}
return;
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i);
}
return;
}

View File

@ -1,142 +1,143 @@
#include "ec_base_vsx.h"
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest)
void
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char **dest)
{
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, head;
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
vector unsigned char vX1, vX2, vX3, vX4;
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest[0];
t1 = (unsigned char *)dest[1];
t2 = (unsigned char *)dest[2];
t3 = (unsigned char *)dest[3];
t4 = (unsigned char *)dest[4];
t5 = (unsigned char *)dest[5];
s = (unsigned char *) src;
t0 = (unsigned char *) dest[0];
t1 = (unsigned char *) dest[1];
t2 = (unsigned char *) dest[2];
t3 = (unsigned char *) dest[3];
t4 = (unsigned char *) dest[4];
t5 = (unsigned char *) dest[5];
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vYD = vec_xl(32, t0 + i);
vYE = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vYD, 32, t0 + i);
vec_xst(vYE, 48, t0 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vec_xl(0, t1 + i);
vY4 = vec_xl(16, t1 + i);
vYF = vec_xl(32, t1 + i);
vYG = vec_xl(48, t1 + i);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vec_xst(vY3, 0, t1 + i);
vec_xst(vY4, 16, t1 + i);
vec_xst(vYF, 32, t1 + i);
vec_xst(vYG, 48, t1 + i);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vY5 = vec_xl(0, t2 + i);
vY6 = vec_xl(16, t2 + i);
vYH = vec_xl(32, t2 + i);
vYI = vec_xl(48, t2 + i);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vY7 = vec_xl(0, t3 + i);
vY8 = vec_xl(16, t3 + i);
vYJ = vec_xl(32, t3 + i);
vYK = vec_xl(48, t3 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vec_xst(vY5, 0, t2 + i);
vec_xst(vY6, 16, t2 + i);
vec_xst(vYH, 32, t2 + i);
vec_xst(vYI, 48, t2 + i);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i);
vY9 = vec_xl(0, t4 + i);
vYA = vec_xl(16, t4 + i);
vYL = vec_xl(32, t4 + i);
vYM = vec_xl(48, t4 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vec_xst(vY7, 0, t3 + i);
vec_xst(vY8, 16, t3 + i);
vec_xst(vYJ, 32, t3 + i);
vec_xst(vYK, 48, t3 + i);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
vYB = vec_xl(0, t5 + i);
vYC = vec_xl(16, t5 + i);
vYN = vec_xl(32, t5 + i);
vYO = vec_xl(48, t5 + i);
vYB = vec_xl(0, t5 + i);
vYC = vec_xl(16, t5 + i);
vYN = vec_xl(32, t5 + i);
vYO = vec_xl(48, t5 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
vec_xst(vY9, 0, t4 + i);
vec_xst(vYA, 16, t4 + i);
vec_xst(vYL, 32, t4 + i);
vec_xst(vYM, 48, t4 + i);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i);
vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i);
}
return;
vec_xst(vYB, 0, t5 + i);
vec_xst(vYC, 16, t5 + i);
vec_xst(vYN, 32, t5 + i);
vec_xst(vYO, 48, t5 + i);
}
return;
}

View File

@ -1,85 +1,86 @@
#include "ec_base_vsx.h"
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest)
void
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
unsigned char *dest)
{
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0;
int i, j, head;
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0;
int i, j, head;
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest);
if (vlen < 128) {
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest);
for (j = 1; j < vlen; j++) {
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
for (j = 1; j < vlen; j++) {
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
}
return;
}
t0 = (unsigned char *)dest;
t0 = (unsigned char *) dest;
head = len % 128;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
}
head = len % 128;
if (head != 0) {
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
}
for (i = head; i < len - 127; i += 128) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
for (i = head; i < len - 127; i += 128) {
vY1 = vY1 ^ vY1;
vY2 = vY2 ^ vY2;
vY3 = vY3 ^ vY3;
vY4 = vY4 ^ vY4;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
vY5 = vY5 ^ vY5;
vY6 = vY6 ^ vY6;
vY7 = vY7 ^ vY7;
vY8 = vY8 ^ vY8;
unsigned char *g0 = &gftbls[0 * 32 * vlen];
unsigned char *g0 = &gftbls[0 * 32 * vlen];
for (j = 0; j < vlen; j++) {
s = (unsigned char *)src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (j = 0; j < vlen; j++) {
s = (unsigned char *) src[j];
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vlo0 = EC_vec_xl(0, g0);
vhi0 = EC_vec_xl(16, g0);
vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i);
vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
g0 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
g0 += 32;
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i);
}
return;
vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i);
}
return;
}

View File

@ -1,48 +1,49 @@
#include "ec_base_vsx.h"
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest)
void
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
unsigned char *dest)
{
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vhi0, vlo0;
int i, head;
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vhi0, vlo0;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest;
s = (unsigned char *) src;
t0 = (unsigned char *) dest;
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
}
head = len % 64;
if (head != 0) {
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
}
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 63; i += 64) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vY3 = vec_xl(32, t0 + i);
vY4 = vec_xl(48, t0 + i);
vY1 = vec_xl(0, t0 + i);
vY2 = vec_xl(16, t0 + i);
vY3 = vec_xl(32, t0 + i);
vY4 = vec_xl(48, t0 + i);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
}
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
}
return;
return;
}

View File

@ -3,73 +3,74 @@
/*
* Same as gf_vect_mul_base in "ec_base.h" but without the size restriction.
*/
static void _gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
unsigned char *dest)
static void
_gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
{
//2nd element of table array is ref value used to fill it in
unsigned char c = a[1];
// 2nd element of table array is ref value used to fill it in
unsigned char c = a[1];
while (len-- > 0)
*dest++ = gf_mul(c, *src++);
return;
while (len-- > 0)
*dest++ = gf_mul(c, *src++);
return;
}
void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
void
gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
{
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0;
int i, head;
unsigned char *s, *t0;
vector unsigned char vX1, vY1;
vector unsigned char vX2, vY2;
vector unsigned char vX3, vY3;
vector unsigned char vX4, vY4;
vector unsigned char vX5, vY5;
vector unsigned char vX6, vY6;
vector unsigned char vX7, vY7;
vector unsigned char vX8, vY8;
vector unsigned char vhi0, vlo0;
int i, head;
s = (unsigned char *)src;
t0 = (unsigned char *)dest;
s = (unsigned char *) src;
t0 = (unsigned char *) dest;
head = len % 128;
if (head != 0) {
_gf_vect_mul_base(head, gftbl, src, dest);
}
head = len % 128;
if (head != 0) {
_gf_vect_mul_base(head, gftbl, src, dest);
}
vlo0 = EC_vec_xl(0, gftbl);
vhi0 = EC_vec_xl(16, gftbl);
vlo0 = EC_vec_xl(0, gftbl);
vhi0 = EC_vec_xl(16, gftbl);
for (i = head; i < len - 127; i += 128) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
for (i = head; i < len - 127; i += 128) {
vX1 = vec_xl(0, s + i);
vX2 = vec_xl(16, s + i);
vX3 = vec_xl(32, s + i);
vX4 = vec_xl(48, s + i);
vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i);
vX5 = vec_xl(64, s + i);
vX6 = vec_xl(80, s + i);
vX7 = vec_xl(96, s + i);
vX8 = vec_xl(112, s + i);
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
vec_xst(vY1, 0, t0 + i);
vec_xst(vY2, 16, t0 + i);
vec_xst(vY3, 32, t0 + i);
vec_xst(vY4, 48, t0 + i);
vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i);
}
vec_xst(vY5, 64, t0 + i);
vec_xst(vY6, 80, t0 + i);
vec_xst(vY7, 96, t0 + i);
vec_xst(vY8, 112, t0 + i);
}
return;
return;
}