mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
erasure_code: reformat using new code style
Signed-off-by: Marcel Cornu <marcel.d.cornu@intel.com>
This commit is contained in:
parent
671e67b62d
commit
300260a4d9
@ -31,94 +31,86 @@
|
|||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||||
#endif
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_dot_prod);
|
return PROVIDER_BASIC(gf_vect_dot_prod);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||||
#endif
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_mad);
|
return PROVIDER_BASIC(gf_vect_mad);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(ec_encode_data_sve);
|
return PROVIDER_INFO(ec_encode_data_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(ec_encode_data_neon);
|
return PROVIDER_INFO(ec_encode_data_neon);
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
return PROVIDER_INFO(ec_encode_data_sve);
|
return PROVIDER_INFO(ec_encode_data_sve);
|
||||||
return PROVIDER_INFO(ec_encode_data_neon);
|
return PROVIDER_INFO(ec_encode_data_neon);
|
||||||
#endif
|
#endif
|
||||||
return PROVIDER_BASIC(ec_encode_data);
|
return PROVIDER_BASIC(ec_encode_data);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||||
#endif
|
#endif
|
||||||
return PROVIDER_BASIC(ec_encode_data_update);
|
return PROVIDER_BASIC(ec_encode_data_update);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||||
{
|
{
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||||
#endif
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_mul);
|
return PROVIDER_BASIC(gf_vect_mul);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_init_tables)
|
DEFINE_INTERFACE_DISPATCHER(ec_init_tables) { return PROVIDER_BASIC(ec_init_tables); }
|
||||||
{
|
|
||||||
return PROVIDER_BASIC(ec_init_tables);
|
|
||||||
}
|
|
||||||
|
@ -29,236 +29,265 @@
|
|||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
/*external function*/
|
/*external function*/
|
||||||
extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char **src, unsigned char *dest);
|
gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
unsigned char *dest);
|
||||||
unsigned char **src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char **src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char *src, unsigned char *dest);
|
unsigned char **dest);
|
||||||
extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *dest);
|
||||||
extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
|
gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
void
|
||||||
unsigned char **coding)
|
ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows > 5) {
|
while (rows > 5) {
|
||||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 5 * k * 32;
|
g_tbls += 5 * k * 32;
|
||||||
coding += 5;
|
coding += 5;
|
||||||
rows -= 5;
|
rows -= 5;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
|
gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (rows > 6) {
|
while (rows > 6) {
|
||||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* SVE */
|
/* SVE */
|
||||||
extern void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char **src, unsigned char *dest);
|
gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
unsigned char *dest);
|
||||||
unsigned char **src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
extern void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char **src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
extern void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char **src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char *src, unsigned char *dest);
|
unsigned char **dest);
|
||||||
extern void gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
extern void gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
extern void gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char *dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
|
gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
void ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
void
|
||||||
unsigned char **coding)
|
ec_encode_data_sve(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows > 11) {
|
while (rows > 11) {
|
||||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 11:
|
case 11:
|
||||||
/* 7 + 4 */
|
/* 7 + 4 */
|
||||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 7 * k * 32;
|
g_tbls += 7 * k * 32;
|
||||||
coding += 7;
|
coding += 7;
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 10:
|
case 10:
|
||||||
/* 6 + 4 */
|
/* 6 + 4 */
|
||||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 9:
|
case 9:
|
||||||
/* 5 + 4 */
|
/* 5 + 4 */
|
||||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 5 * k * 32;
|
g_tbls += 5 * k * 32;
|
||||||
coding += 5;
|
coding += 5;
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
/* 4 + 4 */
|
/* 4 + 4 */
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 4 * k * 32;
|
g_tbls += 4 * k * 32;
|
||||||
coding += 4;
|
coding += 4;
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 7:
|
case 7:
|
||||||
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_7vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_6vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_3vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
gf_2vect_dot_prod_sve(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
|
gf_vect_dot_prod_sve(len, k, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_sve(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (rows > 6) {
|
while (rows > 6) {
|
||||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_sve(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_sve(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,322 +28,331 @@
|
|||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <string.h> // for memset
|
#include <string.h> // for memset
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "ec_base.h" // for GF tables
|
#include "ec_base.h" // for GF tables
|
||||||
|
|
||||||
void ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
void
|
||||||
|
ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
for (i = 0; i < rows; i++) {
|
for (i = 0; i < rows; i++) {
|
||||||
for (j = 0; j < k; j++) {
|
for (j = 0; j < k; j++) {
|
||||||
gf_vect_mul_init(*a++, g_tbls);
|
gf_vect_mul_init(*a++, g_tbls);
|
||||||
g_tbls += 32;
|
g_tbls += 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char gf_mul(unsigned char a, unsigned char b)
|
unsigned char
|
||||||
|
gf_mul(unsigned char a, unsigned char b)
|
||||||
{
|
{
|
||||||
#ifndef GF_LARGE_TABLES
|
#ifndef GF_LARGE_TABLES
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if ((a == 0) || (b == 0))
|
if ((a == 0) || (b == 0))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||||
#else
|
#else
|
||||||
return gf_mul_table_base[b * 256 + a];
|
return gf_mul_table_base[b * 256 + a];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char gf_inv(unsigned char a)
|
unsigned char
|
||||||
|
gf_inv(unsigned char a)
|
||||||
{
|
{
|
||||||
#ifndef GF_LARGE_TABLES
|
#ifndef GF_LARGE_TABLES
|
||||||
if (a == 0)
|
if (a == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return gff_base[255 - gflog_base[a]];
|
return gff_base[255 - gflog_base[a]];
|
||||||
#else
|
#else
|
||||||
return gf_inv_table_base[a];
|
return gf_inv_table_base[a];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
void
|
||||||
|
gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
unsigned char p, gen = 1;
|
unsigned char p, gen = 1;
|
||||||
|
|
||||||
memset(a, 0, k * m);
|
memset(a, 0, k * m);
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
a[k * i + i] = 1;
|
a[k * i + i] = 1;
|
||||||
|
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
p = 1;
|
p = 1;
|
||||||
for (j = 0; j < k; j++) {
|
for (j = 0; j < k; j++) {
|
||||||
a[k * i + j] = p;
|
a[k * i + j] = p;
|
||||||
p = gf_mul(p, gen);
|
p = gf_mul(p, gen);
|
||||||
}
|
}
|
||||||
gen = gf_mul(gen, 2);
|
gen = gf_mul(gen, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
void
|
||||||
|
gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
|
|
||||||
// Identity matrix in high position
|
// Identity matrix in high position
|
||||||
memset(a, 0, k * m);
|
memset(a, 0, k * m);
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
a[k * i + i] = 1;
|
a[k * i + i] = 1;
|
||||||
|
|
||||||
// For the rest choose 1/(i + j) | i != j
|
|
||||||
p = &a[k * k];
|
|
||||||
for (i = k; i < m; i++)
|
|
||||||
for (j = 0; j < k; j++)
|
|
||||||
*p++ = gf_inv(i ^ j);
|
|
||||||
|
|
||||||
|
// For the rest choose 1/(i + j) | i != j
|
||||||
|
p = &a[k * k];
|
||||||
|
for (i = k; i < m; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
*p++ = gf_inv(i ^ j);
|
||||||
}
|
}
|
||||||
|
|
||||||
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
int
|
||||||
|
gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||||
{
|
{
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
unsigned char temp;
|
unsigned char temp;
|
||||||
|
|
||||||
// Set out_mat[] to the identity matrix
|
// Set out_mat[] to the identity matrix
|
||||||
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||||
out_mat[i] = 0;
|
out_mat[i] = 0;
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
out_mat[i * n + i] = 1;
|
out_mat[i * n + i] = 1;
|
||||||
|
|
||||||
// Inverse
|
// Inverse
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
// Check for 0 in pivot element
|
// Check for 0 in pivot element
|
||||||
if (in_mat[i * n + i] == 0) {
|
if (in_mat[i * n + i] == 0) {
|
||||||
// Find a row with non-zero in current column and swap
|
// Find a row with non-zero in current column and swap
|
||||||
for (j = i + 1; j < n; j++)
|
for (j = i + 1; j < n; j++)
|
||||||
if (in_mat[j * n + i])
|
if (in_mat[j * n + i])
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (j == n) // Couldn't find means it's singular
|
if (j == n) // Couldn't find means it's singular
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||||
temp = in_mat[i * n + k];
|
temp = in_mat[i * n + k];
|
||||||
in_mat[i * n + k] = in_mat[j * n + k];
|
in_mat[i * n + k] = in_mat[j * n + k];
|
||||||
in_mat[j * n + k] = temp;
|
in_mat[j * n + k] = temp;
|
||||||
|
|
||||||
temp = out_mat[i * n + k];
|
temp = out_mat[i * n + k];
|
||||||
out_mat[i * n + k] = out_mat[j * n + k];
|
out_mat[i * n + k] = out_mat[j * n + k];
|
||||||
out_mat[j * n + k] = temp;
|
out_mat[j * n + k] = temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n; j++) {
|
||||||
if (j == i)
|
if (j == i)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
temp = in_mat[j * n + i];
|
temp = in_mat[j * n + i];
|
||||||
for (k = 0; k < n; k++) {
|
for (k = 0; k < n; k++) {
|
||||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculates const table gftbl in GF(2^8) from single input A
|
// Calculates const table gftbl in GF(2^8) from single input A
|
||||||
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
||||||
|
|
||||||
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
void
|
||||||
|
gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||||
{
|
{
|
||||||
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
|
|
||||||
#if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
#if (__WORDSIZE == 64 || _WIN64 || __x86_64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||||
unsigned long long v1, v2, v4, v8, *t;
|
unsigned long long v1, v2, v4, v8, *t;
|
||||||
unsigned long long v10, v20, v40, v80;
|
unsigned long long v10, v20, v40, v80;
|
||||||
unsigned char c17, c18, c20, c24;
|
unsigned char c17, c18, c20, c24;
|
||||||
|
|
||||||
t = (unsigned long long *)tbl;
|
t = (unsigned long long *) tbl;
|
||||||
|
|
||||||
v1 = c * 0x0100010001000100ull;
|
v1 = c * 0x0100010001000100ull;
|
||||||
v2 = c2 * 0x0101000001010000ull;
|
v2 = c2 * 0x0101000001010000ull;
|
||||||
v4 = c4 * 0x0101010100000000ull;
|
v4 = c4 * 0x0101010100000000ull;
|
||||||
v8 = c8 * 0x0101010101010101ull;
|
v8 = c8 * 0x0101010101010101ull;
|
||||||
|
|
||||||
v4 = v1 ^ v2 ^ v4;
|
v4 = v1 ^ v2 ^ v4;
|
||||||
t[0] = v4;
|
t[0] = v4;
|
||||||
t[1] = v8 ^ v4;
|
t[1] = v8 ^ v4;
|
||||||
|
|
||||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
|
|
||||||
v10 = c17 * 0x0100010001000100ull;
|
v10 = c17 * 0x0100010001000100ull;
|
||||||
v20 = c18 * 0x0101000001010000ull;
|
v20 = c18 * 0x0101000001010000ull;
|
||||||
v40 = c20 * 0x0101010100000000ull;
|
v40 = c20 * 0x0101010100000000ull;
|
||||||
v80 = c24 * 0x0101010101010101ull;
|
v80 = c24 * 0x0101010101010101ull;
|
||||||
|
|
||||||
v40 = v10 ^ v20 ^ v40;
|
v40 = v10 ^ v20 ^ v40;
|
||||||
t[2] = v40;
|
t[2] = v40;
|
||||||
t[3] = v80 ^ v40;
|
t[3] = v80 ^ v40;
|
||||||
|
|
||||||
#else // 32-bit or other
|
#else // 32-bit or other
|
||||||
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||||
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
|
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31;
|
||||||
c31;
|
|
||||||
|
|
||||||
c3 = c2 ^ c;
|
c3 = c2 ^ c;
|
||||||
c5 = c4 ^ c;
|
c5 = c4 ^ c;
|
||||||
c6 = c4 ^ c2;
|
c6 = c4 ^ c2;
|
||||||
c7 = c4 ^ c3;
|
c7 = c4 ^ c3;
|
||||||
|
|
||||||
c9 = c8 ^ c;
|
c9 = c8 ^ c;
|
||||||
c10 = c8 ^ c2;
|
c10 = c8 ^ c2;
|
||||||
c11 = c8 ^ c3;
|
c11 = c8 ^ c3;
|
||||||
c12 = c8 ^ c4;
|
c12 = c8 ^ c4;
|
||||||
c13 = c8 ^ c5;
|
c13 = c8 ^ c5;
|
||||||
c14 = c8 ^ c6;
|
c14 = c8 ^ c6;
|
||||||
c15 = c8 ^ c7;
|
c15 = c8 ^ c7;
|
||||||
|
|
||||||
tbl[0] = 0;
|
tbl[0] = 0;
|
||||||
tbl[1] = c;
|
tbl[1] = c;
|
||||||
tbl[2] = c2;
|
tbl[2] = c2;
|
||||||
tbl[3] = c3;
|
tbl[3] = c3;
|
||||||
tbl[4] = c4;
|
tbl[4] = c4;
|
||||||
tbl[5] = c5;
|
tbl[5] = c5;
|
||||||
tbl[6] = c6;
|
tbl[6] = c6;
|
||||||
tbl[7] = c7;
|
tbl[7] = c7;
|
||||||
tbl[8] = c8;
|
tbl[8] = c8;
|
||||||
tbl[9] = c9;
|
tbl[9] = c9;
|
||||||
tbl[10] = c10;
|
tbl[10] = c10;
|
||||||
tbl[11] = c11;
|
tbl[11] = c11;
|
||||||
tbl[12] = c12;
|
tbl[12] = c12;
|
||||||
tbl[13] = c13;
|
tbl[13] = c13;
|
||||||
tbl[14] = c14;
|
tbl[14] = c14;
|
||||||
tbl[15] = c15;
|
tbl[15] = c15;
|
||||||
|
|
||||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c19 = c18 ^ c17;
|
c19 = c18 ^ c17;
|
||||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c21 = c20 ^ c17;
|
c21 = c20 ^ c17;
|
||||||
c22 = c20 ^ c18;
|
c22 = c20 ^ c18;
|
||||||
c23 = c20 ^ c19;
|
c23 = c20 ^ c19;
|
||||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); // Mult by GF{2}
|
||||||
c25 = c24 ^ c17;
|
c25 = c24 ^ c17;
|
||||||
c26 = c24 ^ c18;
|
c26 = c24 ^ c18;
|
||||||
c27 = c24 ^ c19;
|
c27 = c24 ^ c19;
|
||||||
c28 = c24 ^ c20;
|
c28 = c24 ^ c20;
|
||||||
c29 = c24 ^ c21;
|
c29 = c24 ^ c21;
|
||||||
c30 = c24 ^ c22;
|
c30 = c24 ^ c22;
|
||||||
c31 = c24 ^ c23;
|
c31 = c24 ^ c23;
|
||||||
|
|
||||||
tbl[16] = 0;
|
tbl[16] = 0;
|
||||||
tbl[17] = c17;
|
tbl[17] = c17;
|
||||||
tbl[18] = c18;
|
tbl[18] = c18;
|
||||||
tbl[19] = c19;
|
tbl[19] = c19;
|
||||||
tbl[20] = c20;
|
tbl[20] = c20;
|
||||||
tbl[21] = c21;
|
tbl[21] = c21;
|
||||||
tbl[22] = c22;
|
tbl[22] = c22;
|
||||||
tbl[23] = c23;
|
tbl[23] = c23;
|
||||||
tbl[24] = c24;
|
tbl[24] = c24;
|
||||||
tbl[25] = c25;
|
tbl[25] = c25;
|
||||||
tbl[26] = c26;
|
tbl[26] = c26;
|
||||||
tbl[27] = c27;
|
tbl[27] = c27;
|
||||||
tbl[28] = c28;
|
tbl[28] = c28;
|
||||||
tbl[29] = c29;
|
tbl[29] = c29;
|
||||||
tbl[30] = c30;
|
tbl[30] = c30;
|
||||||
tbl[31] = c31;
|
tbl[31] = c31;
|
||||||
|
|
||||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char *dest)
|
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
unsigned char s;
|
unsigned char s;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = 0;
|
s = 0;
|
||||||
for (j = 0; j < vlen; j++)
|
for (j = 0; j < vlen; j++)
|
||||||
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||||
|
|
||||||
dest[i] = s;
|
dest[i] = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_mad_base(int len, int vec, int vec_i,
|
void
|
||||||
unsigned char *v, unsigned char *src, unsigned char *dest)
|
gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
|
||||||
|
unsigned char *dest)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
unsigned char s;
|
unsigned char s;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = dest[i];
|
s = dest[i];
|
||||||
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
|
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
|
||||||
dest[i] = s;
|
dest[i] = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
int i, j, l;
|
int i, j, l;
|
||||||
unsigned char s;
|
unsigned char s;
|
||||||
|
|
||||||
for (l = 0; l < dests; l++) {
|
for (l = 0; l < dests; l++) {
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = 0;
|
s = 0;
|
||||||
for (j = 0; j < srcs; j++)
|
for (j = 0; j < srcs; j++)
|
||||||
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||||
|
|
||||||
dest[l][i] = s;
|
dest[l][i] = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
void
|
||||||
unsigned char *data, unsigned char **dest)
|
ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||||
|
unsigned char *data, unsigned char **dest)
|
||||||
{
|
{
|
||||||
int i, l;
|
int i, l;
|
||||||
unsigned char s;
|
unsigned char s;
|
||||||
|
|
||||||
for (l = 0; l < rows; l++) {
|
for (l = 0; l < rows; l++) {
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = dest[l][i];
|
s = dest[l][i];
|
||||||
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
|
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
|
||||||
|
|
||||||
dest[l][i] = s;
|
dest[l][i] = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
int
|
||||||
|
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
//2nd element of table array is ref value used to fill it in
|
// 2nd element of table array is ref value used to fill it in
|
||||||
unsigned char c = a[1];
|
unsigned char c = a[1];
|
||||||
|
|
||||||
// Len must be aligned to 32B
|
// Len must be aligned to 32B
|
||||||
if ((len % 32) != 0) {
|
if ((len % 32) != 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (len-- > 0)
|
while (len-- > 0)
|
||||||
*dest++ = gf_mul(c, *src++);
|
*dest++ = gf_mul(c, *src++);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
11184
erasure_code/ec_base.h
11184
erasure_code/ec_base.h
File diff suppressed because it is too large
Load Diff
@ -29,37 +29,40 @@
|
|||||||
|
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char *dest)
|
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
gf_vect_dot_prod_base(len, vlen, v, src, dest);
|
gf_vect_dot_prod_base(len, vlen, v, src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_mad(int len, int vec, int vec_i,
|
void
|
||||||
unsigned char *v, unsigned char *src, unsigned char *dest)
|
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
|
gf_vect_mad_base(len, vec, vec_i, v, src, dest);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
|
void
|
||||||
unsigned char *data, unsigned char **dest)
|
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
int
|
||||||
|
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||||
{
|
{
|
||||||
return gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest);
|
return gf_vect_mul_base(len, a, (unsigned char *) src, (unsigned char *) dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
void
|
||||||
|
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||||
{
|
{
|
||||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||||
}
|
}
|
||||||
|
@ -28,387 +28,423 @@
|
|||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "ec_base.h" /* for GF tables */
|
#include "ec_base.h" /* for GF tables */
|
||||||
|
|
||||||
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
|
#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
|
||||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
void
|
||||||
unsigned char **coding)
|
ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
|
||||||
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
|
||||||
g_tbls += 6 * k * 32;
|
|
||||||
coding += 6;
|
|
||||||
rows -= 6;
|
|
||||||
}
|
|
||||||
switch (rows) {
|
|
||||||
case 5:
|
|
||||||
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
|
||||||
break;
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (rows >= 6) {
|
||||||
|
gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 5:
|
||||||
|
gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
void
|
||||||
unsigned char **coding)
|
ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
|
||||||
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
|
||||||
g_tbls += 6 * k * 32;
|
|
||||||
coding += 6;
|
|
||||||
rows -= 6;
|
|
||||||
}
|
|
||||||
switch (rows) {
|
|
||||||
case 5:
|
|
||||||
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
|
||||||
break;
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (rows >= 6) {
|
||||||
|
gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 5:
|
||||||
|
gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
void
|
||||||
unsigned char **coding)
|
ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (len < 32) {
|
if (len < 32) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
|
||||||
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
|
||||||
g_tbls += 6 * k * 32;
|
|
||||||
coding += 6;
|
|
||||||
rows -= 6;
|
|
||||||
}
|
|
||||||
switch (rows) {
|
|
||||||
case 5:
|
|
||||||
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
|
||||||
break;
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (rows >= 6) {
|
||||||
|
gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 5:
|
||||||
|
gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_AS_KNOWS_AVX512
|
#ifdef HAVE_AS_KNOWS_AVX512
|
||||||
|
|
||||||
extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
extern int
|
||||||
unsigned char *dest);
|
gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
unsigned char *dest);
|
||||||
unsigned char **data, unsigned char **coding);
|
extern int
|
||||||
extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char **data, unsigned char **coding);
|
unsigned char **coding);
|
||||||
extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
extern int
|
||||||
unsigned char **data, unsigned char **coding);
|
gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
unsigned char **coding);
|
||||||
unsigned char **data, unsigned char **coding);
|
extern int
|
||||||
extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
|
gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char **data, unsigned char **coding);
|
unsigned char **coding);
|
||||||
extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern int
|
||||||
unsigned char *src, unsigned char *dest);
|
gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **coding);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern int
|
||||||
extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char **coding);
|
||||||
extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char *dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls,
|
void
|
||||||
unsigned char **data, unsigned char **coding)
|
ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (len < 64) {
|
if (len < 64) {
|
||||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
while (rows >= 6) {
|
||||||
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
|
gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 64) {
|
if (len < 64) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
while (rows >= 6) {
|
||||||
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if AS_FEATURE_LEVEL >= 10
|
#if AS_FEATURE_LEVEL >= 10
|
||||||
|
|
||||||
extern void gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
extern void
|
||||||
unsigned char **data, unsigned char *dest);
|
gf_vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern void gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
unsigned char *dest);
|
||||||
unsigned char **data, unsigned char **coding);
|
extern void
|
||||||
extern void gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
gf_2vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char **data, unsigned char **coding);
|
unsigned char **coding);
|
||||||
extern void gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
extern void
|
||||||
unsigned char **data, unsigned char **coding);
|
gf_3vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern void gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
unsigned char **coding);
|
||||||
unsigned char **data, unsigned char **coding);
|
extern void
|
||||||
extern void gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls,
|
gf_4vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char **data, unsigned char **coding);
|
unsigned char **coding);
|
||||||
|
extern void
|
||||||
|
gf_5vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding);
|
||||||
|
extern void
|
||||||
|
gf_6vect_dot_prod_avx512_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding);
|
||||||
|
|
||||||
extern void gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char *dest);
|
gf_vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char *dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_2vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
extern void gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_3vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_4vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_6vect_mad_avx512_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
extern void gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
extern void
|
||||||
unsigned char **data, unsigned char *dest);
|
gf_vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern void gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
unsigned char *dest);
|
||||||
unsigned char **data, unsigned char **coding);
|
extern void
|
||||||
extern void gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls,
|
gf_2vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
unsigned char **data, unsigned char **coding);
|
unsigned char **coding);
|
||||||
extern void gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char *dest);
|
gf_3vect_dot_prod_avx2_gfni(int len, int k, unsigned char *g_tbls, unsigned char **data,
|
||||||
extern void gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **coding);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
extern void gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
gf_vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *dest);
|
||||||
extern void gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char **dest);
|
gf_2vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls,
|
unsigned char **dest);
|
||||||
unsigned char *src, unsigned char **dest);
|
extern void
|
||||||
|
gf_3vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_4vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
extern void
|
||||||
|
gf_5vect_mad_avx2_gfni(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
void ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
void
|
||||||
|
ec_init_tables_gfni(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
uint64_t *g64 = (uint64_t *) g_tbls;
|
uint64_t *g64 = (uint64_t *) g_tbls;
|
||||||
|
|
||||||
for (i = 0; i < rows; i++)
|
|
||||||
for (j = 0; j < k; j++)
|
|
||||||
*(g64++) = gf_table_gfni[*a++];
|
|
||||||
|
|
||||||
|
for (i = 0; i < rows; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
*(g64++) = gf_table_gfni[*a++];
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls,
|
void
|
||||||
unsigned char **data, unsigned char **coding)
|
ec_encode_data_avx512_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
|
|
||||||
while (rows >= 6) {
|
while (rows >= 6) {
|
||||||
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
gf_6vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 8;
|
g_tbls += 6 * k * 8;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
gf_5vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
gf_4vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
gf_3vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
gf_2vect_dot_prod_avx512_gfni(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
|
gf_vect_dot_prod_avx512_gfni(len, k, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls,
|
void
|
||||||
unsigned char **data, unsigned char **coding)
|
ec_encode_data_avx2_gfni(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
{
|
{
|
||||||
while (rows >= 3) {
|
while (rows >= 3) {
|
||||||
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
gf_3vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||||
g_tbls += 3 * k * 8;
|
g_tbls += 3 * k * 8;
|
||||||
coding += 3;
|
coding += 3;
|
||||||
rows -= 3;
|
rows -= 3;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
gf_2vect_dot_prod_avx2_gfni(len, k, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
|
gf_vect_dot_prod_avx2_gfni(len, k, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i,
|
void
|
||||||
unsigned char *g_tbls, unsigned char *data,
|
ec_encode_data_update_avx512_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
unsigned char **coding)
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
while (rows >= 6) {
|
while (rows >= 6) {
|
||||||
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 8;
|
g_tbls += 6 * k * 8;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_avx512_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
|
void
|
||||||
unsigned char *g_tbls, unsigned char *data,
|
ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
unsigned char **coding)
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
while (rows >= 5) {
|
while (rows >= 5) {
|
||||||
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 5 * k * 8;
|
g_tbls += 5 * k * 8;
|
||||||
coding += 5;
|
coding += 5;
|
||||||
rows -= 5;
|
rows -= 5;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_avx2_gfni(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // AS_FEATURE_LEVEL >= 10
|
#endif // AS_FEATURE_LEVEL >= 10
|
||||||
@ -416,119 +452,119 @@ void ec_encode_data_update_avx2_gfni(int len, int k, int rows, int vec_i,
|
|||||||
|
|
||||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
|
||||||
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows > 6) {
|
|
||||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
g_tbls += 6 * k * 32;
|
|
||||||
coding += 6;
|
|
||||||
rows -= 6;
|
|
||||||
}
|
|
||||||
switch (rows) {
|
|
||||||
case 6:
|
|
||||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 5:
|
|
||||||
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
|
|
||||||
break;
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (rows > 6) {
|
||||||
|
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
g_tbls += 6 * k * 32;
|
||||||
|
coding += 6;
|
||||||
|
rows -= 6;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 6:
|
||||||
|
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 16) {
|
if (len < 16) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (rows > 6) {
|
while (rows > 6) {
|
||||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
void
|
||||||
unsigned char *data, unsigned char **coding)
|
ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||||
|
unsigned char *data, unsigned char **coding)
|
||||||
{
|
{
|
||||||
if (len < 32) {
|
if (len < 32) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (rows > 6) {
|
while (rows > 6) {
|
||||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
g_tbls += 6 * k * 32;
|
g_tbls += 6 * k * 32;
|
||||||
coding += 6;
|
coding += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
|
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
@ -29,27 +29,27 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MMAX TEST_SOURCES
|
#define MMAX TEST_SOURCES
|
||||||
@ -59,117 +59,120 @@
|
|||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
|
void
|
||||||
|
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs)
|
||||||
{
|
{
|
||||||
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
|
ec_init_tables_base(k, m - k, &a[k * k], g_tbls);
|
||||||
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
|
int
|
||||||
u8 * src_err_list, int nerrs, u8 ** temp_buffs)
|
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
|
||||||
|
int nerrs, u8 **temp_buffs)
|
||||||
{
|
{
|
||||||
int i, j, r;
|
int i, j, r;
|
||||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
u8 *recov[TEST_SOURCES];
|
u8 *recov[TEST_SOURCES];
|
||||||
|
|
||||||
// Construct b by removing error rows
|
// Construct b by removing error rows
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r])
|
while (src_in_err[r])
|
||||||
r++;
|
r++;
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
b[k * i + j] = a[k * r + j];
|
b[k * i + j] = a[k * r + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix(b, d, k) < 0)
|
if (gf_invert_matrix(b, d, k) < 0)
|
||||||
return BAD_MATRIX;
|
return BAD_MATRIX;
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
ec_init_tables_base(k, nerrs, c, g_tbls);
|
ec_init_tables_base(k, nerrs, c, g_tbls);
|
||||||
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, m, k, nerrs, check;
|
int i, j, m, k, nerrs, check;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
u8 a[MMAX * KMAX];
|
u8 a[MMAX * KMAX];
|
||||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
u8 src_err_list[TEST_SOURCES];
|
u8 src_err_list[TEST_SOURCES];
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
// Pick test parameters
|
// Pick test parameters
|
||||||
m = 14;
|
m = 14;
|
||||||
k = 10;
|
k = 10;
|
||||||
nerrs = 4;
|
nerrs = 4;
|
||||||
const u8 err_list[] = { 2, 4, 5, 7 };
|
const u8 err_list[] = { 2, 4, 5, 7 };
|
||||||
|
|
||||||
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
// check input parameters
|
// check input parameters
|
||||||
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
|
assert(!(m > MMAX || k > KMAX || nerrs > (m - k)));
|
||||||
|
|
||||||
memcpy(src_err_list, err_list, nerrs);
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
src_in_err[src_err_list[i]] = 1;
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < m; i++) {
|
for (i = 0; i < m; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("alloc error: Fail\n");
|
printf("alloc error: Fail\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (m - k); i++) {
|
for (i = 0; i < (m - k); i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("alloc error: Fail\n");
|
printf("alloc error: Fail\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
temp_buffs[i] = buf;
|
temp_buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN(m); j++)
|
for (j = 0; j < TEST_LEN(m); j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
gf_gen_rs_matrix(a, m, k);
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
// Start encode test
|
// Start encode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
|
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
|
||||||
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||||
|
|
||||||
// Start decode test
|
// Start decode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME, check =
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
||||||
temp_buffs));
|
temp_buffs));
|
||||||
|
|
||||||
if (check == BAD_MATRIX) {
|
if (check == BAD_MATRIX) {
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
return check;
|
return check;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||||
|
|
||||||
printf("done all: Pass\n");
|
printf("done all: Pass\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -29,29 +29,29 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_SEED
|
#ifndef TEST_SEED
|
||||||
# define TEST_SEED 0x1234
|
#define TEST_SEED 0x1234
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MMAX TEST_SOURCES
|
#define MMAX TEST_SOURCES
|
||||||
@ -61,215 +61,219 @@
|
|||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void usage(const char *app_name)
|
void
|
||||||
|
usage(const char *app_name)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Usage: %s [options]\n"
|
"Usage: %s [options]\n"
|
||||||
" -h Help\n"
|
" -h Help\n"
|
||||||
" -k <val> Number of source buffers\n"
|
" -k <val> Number of source buffers\n"
|
||||||
" -p <val> Number of parity buffers\n"
|
" -p <val> Number of parity buffers\n"
|
||||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
|
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
|
||||||
app_name);
|
"k)\n",
|
||||||
|
app_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start)
|
void
|
||||||
|
ec_encode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, struct perf *start)
|
||||||
{
|
{
|
||||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
BENCHMARK(start, BENCHMARK_TIME,
|
BENCHMARK(start, BENCHMARK_TIME,
|
||||||
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
|
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
|
||||||
}
|
}
|
||||||
|
|
||||||
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
|
int
|
||||||
u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start)
|
ec_decode_perf(int m, int k, u8 *a, u8 *g_tbls, u8 **buffs, u8 *src_in_err, u8 *src_err_list,
|
||||||
|
int nerrs, u8 **temp_buffs, struct perf *start)
|
||||||
{
|
{
|
||||||
int i, j, r;
|
int i, j, r;
|
||||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
u8 *recov[TEST_SOURCES];
|
u8 *recov[TEST_SOURCES];
|
||||||
|
|
||||||
// Construct b by removing error rows
|
// Construct b by removing error rows
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r])
|
while (src_in_err[r])
|
||||||
r++;
|
r++;
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
b[k * i + j] = a[k * r + j];
|
b[k * i + j] = a[k * r + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix(b, d, k) < 0)
|
if (gf_invert_matrix(b, d, k) < 0)
|
||||||
return BAD_MATRIX;
|
return BAD_MATRIX;
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
ec_init_tables(k, nerrs, c, g_tbls);
|
ec_init_tables(k, nerrs, c, g_tbls);
|
||||||
BENCHMARK(start, BENCHMARK_TIME,
|
BENCHMARK(start, BENCHMARK_TIME,
|
||||||
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
|
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, m, k, p, nerrs, check, ret = -1;
|
int i, j, m, k, p, nerrs, check, ret = -1;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 a[MMAX * KMAX];
|
u8 a[MMAX * KMAX];
|
||||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
u8 src_err_list[TEST_SOURCES];
|
u8 src_err_list[TEST_SOURCES];
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
/* Set default parameters */
|
/* Set default parameters */
|
||||||
k = 8;
|
k = 8;
|
||||||
p = 6;
|
p = 6;
|
||||||
nerrs = 4;
|
nerrs = 4;
|
||||||
|
|
||||||
/* Parse arguments */
|
/* Parse arguments */
|
||||||
for (i = 1; i < argc; i++) {
|
for (i = 1; i < argc; i++) {
|
||||||
if (strcmp(argv[i], "-k") == 0) {
|
if (strcmp(argv[i], "-k") == 0) {
|
||||||
k = atoi(argv[++i]);
|
k = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-p") == 0) {
|
} else if (strcmp(argv[i], "-p") == 0) {
|
||||||
p = atoi(argv[++i]);
|
p = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-e") == 0) {
|
} else if (strcmp(argv[i], "-e") == 0) {
|
||||||
nerrs = atoi(argv[++i]);
|
nerrs = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-h") == 0) {
|
} else if (strcmp(argv[i], "-h") == 0) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs > k) {
|
if (nerrs > k) {
|
||||||
printf
|
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||||
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
nerrs, k);
|
||||||
nerrs, k);
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (k <= 0) {
|
if (k <= 0) {
|
||||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p <= 0) {
|
if (p <= 0) {
|
||||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs <= 0) {
|
if (nerrs <= 0) {
|
||||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs > p) {
|
if (nerrs > p) {
|
||||||
printf
|
printf("Number of errors (%d) cannot be higher than number of parity buffers "
|
||||||
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
|
"(%d)\n",
|
||||||
nerrs, p);
|
nerrs, p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
m = k + p;
|
m = k + p;
|
||||||
|
|
||||||
if (m > MMAX) {
|
if (m > MMAX) {
|
||||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||||
MMAX);
|
MMAX);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *err_list = malloc((size_t)nerrs);
|
u8 *err_list = malloc((size_t) nerrs);
|
||||||
if (err_list == NULL) {
|
if (err_list == NULL) {
|
||||||
printf("Error allocating list of array of error indices\n");
|
printf("Error allocating list of array of error indices\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
srand(TEST_SEED);
|
srand(TEST_SEED);
|
||||||
|
|
||||||
for (i = 0; i < nerrs;) {
|
for (i = 0; i < nerrs;) {
|
||||||
u8 next_err = rand() % k;
|
u8 next_err = rand() % k;
|
||||||
for (j = 0; j < i; j++)
|
for (j = 0; j < i; j++)
|
||||||
if (next_err == err_list[j])
|
if (next_err == err_list[j])
|
||||||
break;
|
break;
|
||||||
if (j != i)
|
if (j != i)
|
||||||
continue;
|
continue;
|
||||||
err_list[i++] = next_err;
|
err_list[i++] = next_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
|
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
|
||||||
p, nerrs);
|
nerrs);
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
printf("%d ", (int)err_list[i]);
|
printf("%d ", (int) err_list[i]);
|
||||||
|
|
||||||
printf("])\n");
|
printf("])\n");
|
||||||
|
|
||||||
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
memcpy(src_err_list, err_list, nerrs);
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
src_in_err[src_err_list[i]] = 1;
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < m; i++) {
|
for (i = 0; i < m; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("Error allocating buffers\n");
|
printf("Error allocating buffers\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < p; i++) {
|
for (i = 0; i < p; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("Error allocating buffers\n");
|
printf("Error allocating buffers\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
temp_buffs[i] = buf;
|
temp_buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN(m); j++)
|
for (j = 0; j < TEST_LEN(m); j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
gf_gen_rs_matrix(a, m, k);
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
// Start encode test
|
// Start encode test
|
||||||
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
|
ec_encode_perf(m, k, a, g_tbls, buffs, &start);
|
||||||
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||||
|
|
||||||
// Start decode test
|
// Start decode test
|
||||||
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
|
check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, temp_buffs,
|
||||||
temp_buffs, &start);
|
&start);
|
||||||
|
|
||||||
if (check == BAD_MATRIX) {
|
if (check == BAD_MATRIX) {
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
ret = check;
|
ret = check;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||||
|
|
||||||
printf("done all: Pass\n");
|
printf("done all: Pass\n");
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
free(err_list);
|
free(err_list);
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
free(buffs[i]);
|
free(buffs[i]);
|
||||||
free(temp_buffs[i]);
|
free(temp_buffs[i]);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -29,43 +29,43 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
//By default, test multibinary version
|
// By default, test multibinary version
|
||||||
#ifndef FUNCTION_UNDER_TEST
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
# define FUNCTION_UNDER_TEST ec_encode_data_update
|
#define FUNCTION_UNDER_TEST ec_encode_data_update
|
||||||
# define REF_FUNCTION ec_encode_data
|
#define REF_FUNCTION ec_encode_data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//By default, test EC(8+4)
|
// By default, test EC(8+4)
|
||||||
#if (!defined(VECT))
|
#if (!defined(VECT))
|
||||||
# define VECT 4
|
#define VECT 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define str(s) #s
|
#define str(s) #s
|
||||||
#define xstr(s) str(s)
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
#define TEST_LEN(m) ((128 * 1024 / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 32
|
#define TEST_SOURCES 32
|
||||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
#define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_SEED
|
#ifndef TEST_SEED
|
||||||
# define TEST_SEED 0x1234
|
#define TEST_SEED 0x1234
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MMAX TEST_SOURCES
|
#define MMAX TEST_SOURCES
|
||||||
@ -73,308 +73,316 @@
|
|||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void usage(const char *app_name)
|
void
|
||||||
|
usage(const char *app_name)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Usage: %s [options]\n"
|
"Usage: %s [options]\n"
|
||||||
" -h Help\n"
|
" -h Help\n"
|
||||||
" -k <val> Number of source buffers\n"
|
" -k <val> Number of source buffers\n"
|
||||||
" -p <val> Number of parity buffers\n"
|
" -p <val> Number of parity buffers\n"
|
||||||
" -e <val> Number of simulated buffers with errors (cannot be higher than p or k)\n",
|
" -e <val> Number of simulated buffers with errors (cannot be higher than p or "
|
||||||
app_name);
|
"k)\n",
|
||||||
|
app_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump(unsigned char *buf, int len)
|
void
|
||||||
|
dump(unsigned char *buf, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len;) {
|
for (i = 0; i < len;) {
|
||||||
printf(" %2x", 0xff & buf[i++]);
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
if (i % 32 == 0)
|
if (i % 32 == 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a)
|
void
|
||||||
|
encode_update_test_ref(int m, int k, u8 *g_tbls, u8 **buffs, u8 *a)
|
||||||
{
|
{
|
||||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a)
|
void
|
||||||
|
encode_update_test(int m, int k, u8 *g_tbls, u8 **perf_update_buffs, u8 *a)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
|
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, perf_update_buffs[i],
|
||||||
perf_update_buffs[i], &perf_update_buffs[k]);
|
&perf_update_buffs[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err,
|
int
|
||||||
u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs)
|
decode_test(int m, int k, u8 **update_buffs, u8 **recov, u8 *a, u8 *src_in_err, u8 *src_err_list,
|
||||||
|
int nerrs, u8 *g_tbls, u8 **perf_update_buffs)
|
||||||
{
|
{
|
||||||
int i, j, r;
|
int i, j, r;
|
||||||
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
// Construct b by removing error rows
|
// Construct b by removing error rows
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r])
|
while (src_in_err[r])
|
||||||
r++;
|
r++;
|
||||||
recov[i] = update_buffs[r];
|
recov[i] = update_buffs[r];
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
b[k * i + j] = a[k * r + j];
|
b[k * i + j] = a[k * r + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix(b, d, k) < 0) {
|
if (gf_invert_matrix(b, d, k) < 0) {
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
ec_init_tables(k, nerrs, c, g_tbls);
|
ec_init_tables(k, nerrs, c, g_tbls);
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
|
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], perf_update_buffs);
|
||||||
perf_update_buffs);
|
}
|
||||||
}
|
return 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, check, m, k, p, nerrs, ret = -1;
|
int i, j, check, m, k, p, nerrs, ret = -1;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
u8 *temp_buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 *buffs[TEST_SOURCES] = { NULL };
|
u8 *buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 *update_buffs[TEST_SOURCES] = { NULL };
|
u8 *update_buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
|
u8 *perf_update_buffs[TEST_SOURCES] = { NULL };
|
||||||
u8 a[MMAX * KMAX];
|
u8 a[MMAX * KMAX];
|
||||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
/* Set default parameters */
|
/* Set default parameters */
|
||||||
k = 10;
|
k = 10;
|
||||||
p = VECT;
|
p = VECT;
|
||||||
nerrs = VECT;
|
nerrs = VECT;
|
||||||
|
|
||||||
/* Parse arguments */
|
/* Parse arguments */
|
||||||
for (i = 1; i < argc; i++) {
|
for (i = 1; i < argc; i++) {
|
||||||
if (strcmp(argv[i], "-k") == 0) {
|
if (strcmp(argv[i], "-k") == 0) {
|
||||||
k = atoi(argv[++i]);
|
k = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-p") == 0) {
|
} else if (strcmp(argv[i], "-p") == 0) {
|
||||||
p = atoi(argv[++i]);
|
p = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-e") == 0) {
|
} else if (strcmp(argv[i], "-e") == 0) {
|
||||||
nerrs = atoi(argv[++i]);
|
nerrs = atoi(argv[++i]);
|
||||||
} else if (strcmp(argv[i], "-h") == 0) {
|
} else if (strcmp(argv[i], "-h") == 0) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs > k) {
|
if (nerrs > k) {
|
||||||
printf
|
printf("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
||||||
("Number of errors (%d) cannot be higher than number of data buffers (%d)\n",
|
nerrs, k);
|
||||||
nerrs, k);
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (k <= 0) {
|
if (k <= 0) {
|
||||||
printf("Number of source buffers (%d) must be > 0\n", k);
|
printf("Number of source buffers (%d) must be > 0\n", k);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p <= 0) {
|
if (p <= 0) {
|
||||||
printf("Number of parity buffers (%d) must be > 0\n", p);
|
printf("Number of parity buffers (%d) must be > 0\n", p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs > p) {
|
if (nerrs > p) {
|
||||||
printf
|
printf("Number of errors (%d) cannot be higher than number of parity buffers "
|
||||||
("Number of errors (%d) cannot be higher than number of parity buffers (%d)\n",
|
"(%d)\n",
|
||||||
nerrs, p);
|
nerrs, p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nerrs <= 0) {
|
if (nerrs <= 0) {
|
||||||
printf("Number of errors (%d) must be > 0\n", nerrs);
|
printf("Number of errors (%d) must be > 0\n", nerrs);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
m = k + p;
|
m = k + p;
|
||||||
|
|
||||||
if (m > MMAX) {
|
if (m > MMAX) {
|
||||||
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
printf("Number of total buffers (data and parity) cannot be higher than %d\n",
|
||||||
MMAX);
|
MMAX);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *err_list = malloc((size_t)nerrs);
|
u8 *err_list = malloc((size_t) nerrs);
|
||||||
if (err_list == NULL) {
|
if (err_list == NULL) {
|
||||||
printf("Error allocating list of array of error indices\n");
|
printf("Error allocating list of array of error indices\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
srand(TEST_SEED);
|
srand(TEST_SEED);
|
||||||
|
|
||||||
for (i = 0; i < nerrs;) {
|
for (i = 0; i < nerrs;) {
|
||||||
u8 next_err = rand() % k;
|
u8 next_err = rand() % k;
|
||||||
for (j = 0; j < i; j++)
|
for (j = 0; j < i; j++)
|
||||||
if (next_err == err_list[j])
|
if (next_err == err_list[j])
|
||||||
break;
|
break;
|
||||||
if (j != i)
|
if (j != i)
|
||||||
continue;
|
continue;
|
||||||
err_list[i++] = next_err;
|
err_list[i++] = next_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k,
|
printf("Testing with %u data buffers and %u parity buffers (num errors = %u, in [ ", k, p,
|
||||||
p, nerrs);
|
nerrs);
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
printf("%d ", err_list[i]);
|
printf("%d ", err_list[i]);
|
||||||
|
|
||||||
printf("])\n");
|
printf("])\n");
|
||||||
|
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
memcpy(src_err_list, err_list, nerrs);
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
src_in_err[src_err_list[i]] = 1;
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < m; i++) {
|
for (i = 0; i < m; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("Error allocating buffers\n");
|
printf("Error allocating buffers\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (m - k); i++) {
|
for (i = 0; i < (m - k); i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("Error allocating buffers\n");
|
printf("Error allocating buffers\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
temp_buffs[i] = buf;
|
temp_buffs[i] = buf;
|
||||||
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||||
}
|
// zero for update function
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
printf("Error allocating buffers\n");
|
printf("Error allocating buffers\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
update_buffs[i] = buf;
|
update_buffs[i] = buf;
|
||||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||||
}
|
// zero for update function
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
}
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
printf("Error allocating buffers\n");
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
goto exit;
|
printf("Error allocating buffers\n");
|
||||||
}
|
goto exit;
|
||||||
perf_update_buffs[i] = buf;
|
}
|
||||||
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
perf_update_buffs[i] = buf;
|
||||||
}
|
memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer
|
||||||
|
// to be zero for update function
|
||||||
|
}
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN(m); j++) {
|
for (j = 0; j < TEST_LEN(m); j++) {
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
update_buffs[i][j] = buffs[i][j];
|
update_buffs[i][j] = buffs[i][j];
|
||||||
}
|
}
|
||||||
|
|
||||||
gf_gen_rs_matrix(a, m, k);
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
encode_update_test_ref(m, k, g_tbls, buffs, a);
|
encode_update_test_ref(m, k, g_tbls, buffs, a);
|
||||||
encode_update_test(m, k, g_tbls, update_buffs, a);
|
encode_update_test(m, k, g_tbls, update_buffs, a);
|
||||||
for (i = 0; i < m - k; i++) {
|
for (i = 0; i < m - k; i++) {
|
||||||
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
|
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
|
||||||
printf("\nupdate_buffs%d :", i);
|
printf("\nupdate_buffs%d :", i);
|
||||||
dump(update_buffs[k + i], 25);
|
dump(update_buffs[k + i], 25);
|
||||||
printf("buffs%d :", i);
|
printf("buffs%d :", i);
|
||||||
dump(buffs[k + i], 25);
|
dump(buffs[k + i], 25);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DO_REF_PERF
|
#ifdef DO_REF_PERF
|
||||||
// Start encode test
|
// Start encode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
|
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
|
||||||
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
|
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Start encode test
|
// Start encode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test(m, k, g_tbls, perf_update_buffs, a));
|
||||||
encode_update_test(m, k, g_tbls, perf_update_buffs, a));
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
perf_print(start, (long long) (TEST_LEN(m)) * (m));
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m));
|
|
||||||
|
|
||||||
// Start encode test
|
// Start encode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||||
&perf_update_buffs[k]));
|
&perf_update_buffs[k]));
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
|
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
|
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
|
||||||
|
|
||||||
// Start encode test
|
// Start encode test
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
|
||||||
&perf_update_buffs[k]));
|
&perf_update_buffs[k]));
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
|
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
|
perf_print(start, (long long) (TEST_LEN(m)) * (m - k + 1));
|
||||||
|
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
|
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be
|
||||||
}
|
// zero for update function
|
||||||
for (i = 0; i < k; i++) {
|
}
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
|
for (i = 0; i < k; i++) {
|
||||||
&update_buffs[k]);
|
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
|
||||||
}
|
&update_buffs[k]);
|
||||||
|
}
|
||||||
|
|
||||||
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
|
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs, g_tbls,
|
||||||
nerrs, g_tbls, temp_buffs);
|
temp_buffs);
|
||||||
BENCHMARK(&start, BENCHMARK_TIME, check =
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
|
check = decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, nerrs,
|
||||||
nerrs, g_tbls, perf_update_buffs));
|
g_tbls, perf_update_buffs));
|
||||||
if (check) {
|
if (check) {
|
||||||
printf("BAD_MATRIX\n");
|
printf("BAD_MATRIX\n");
|
||||||
ret = check;
|
ret = check;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
|
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
|
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
|
perf_print(start, (long long) (TEST_LEN(m)) * (k + nerrs));
|
||||||
|
|
||||||
printf("done all: Pass\n");
|
printf("done all: Pass\n");
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
free(err_list);
|
free(err_list);
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
free(buffs[i]);
|
free(buffs[i]);
|
||||||
free(temp_buffs[i]);
|
free(temp_buffs[i]);
|
||||||
free(update_buffs[i]);
|
free(update_buffs[i]);
|
||||||
free(perf_update_buffs[i]);
|
free(perf_update_buffs[i]);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -3,114 +3,117 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
|
#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
|
||||||
#define M_MAX 0x20
|
#define M_MAX 0x20
|
||||||
#define K_MAX 0x10
|
#define K_MAX 0x10
|
||||||
#define ROWS M_MAX
|
#define ROWS M_MAX
|
||||||
#define COLS K_MAX
|
#define COLS K_MAX
|
||||||
|
|
||||||
static inline uint64_t min(const uint64_t a, const uint64_t b)
|
static inline uint64_t
|
||||||
|
min(const uint64_t a, const uint64_t b)
|
||||||
{
|
{
|
||||||
if (a <= b)
|
if (a <= b)
|
||||||
return a;
|
return a;
|
||||||
else
|
else
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
|
void
|
||||||
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
|
gen_sub_matrix(unsigned char *out_matrix, const uint64_t dim, unsigned char *in_matrix,
|
||||||
const uint64_t col_indicator)
|
const uint64_t rows, const uint64_t cols, const uint64_t row_indicator,
|
||||||
|
const uint64_t col_indicator)
|
||||||
{
|
{
|
||||||
uint64_t i, j, r, s;
|
uint64_t i, j, r, s;
|
||||||
|
|
||||||
for (i = 0, r = 0; i < rows; i++) {
|
for (i = 0, r = 0; i < rows; i++) {
|
||||||
if (!(row_indicator & ((uint64_t) 1 << i)))
|
if (!(row_indicator & ((uint64_t) 1 << i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for (j = 0, s = 0; j < cols; j++) {
|
for (j = 0, s = 0; j < cols; j++) {
|
||||||
if (!(col_indicator & ((uint64_t) 1 << j)))
|
if (!(col_indicator & ((uint64_t) 1 << j)))
|
||||||
continue;
|
continue;
|
||||||
out_matrix[dim * r + s] = in_matrix[cols * i + j];
|
out_matrix[dim * r + s] = in_matrix[cols * i + j];
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
r++;
|
r++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gosper's Hack */
|
/* Gosper's Hack */
|
||||||
uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize)
|
uint64_t
|
||||||
|
next_subset(uint64_t *subset, uint64_t element_count, uint64_t subsize)
|
||||||
{
|
{
|
||||||
uint64_t tmp1 = *subset & -*subset;
|
uint64_t tmp1 = *subset & -*subset;
|
||||||
uint64_t tmp2 = *subset + tmp1;
|
uint64_t tmp2 = *subset + tmp1;
|
||||||
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
|
*subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
|
||||||
if (*subset & (((uint64_t) 1 << element_count))) {
|
if (*subset & (((uint64_t) 1 << element_count))) {
|
||||||
/* Overflow on last subset */
|
/* Overflow on last subset */
|
||||||
*subset = ((uint64_t) 1 << subsize) - 1;
|
*subset = ((uint64_t) 1 << subsize) - 1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
|
int
|
||||||
|
are_submatrices_singular(unsigned char *vmatrix, const uint64_t rows, const uint64_t cols)
|
||||||
{
|
{
|
||||||
unsigned char matrix[COLS * COLS];
|
unsigned char matrix[COLS * COLS];
|
||||||
unsigned char invert_matrix[COLS * COLS];
|
unsigned char invert_matrix[COLS * COLS];
|
||||||
uint64_t subsize;
|
uint64_t subsize;
|
||||||
|
|
||||||
/* Check all square subsize x subsize submatrices of the rows x cols
|
/* Check all square subsize x subsize submatrices of the rows x cols
|
||||||
* vmatrix for singularity*/
|
* vmatrix for singularity*/
|
||||||
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
|
for (subsize = 1; subsize <= min(rows, cols); subsize++) {
|
||||||
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
|
const uint64_t subset_init = (1ULL << subsize) - 1ULL;
|
||||||
uint64_t col_indicator = subset_init;
|
uint64_t col_indicator = subset_init;
|
||||||
do {
|
do {
|
||||||
uint64_t row_indicator = subset_init;
|
uint64_t row_indicator = subset_init;
|
||||||
do {
|
do {
|
||||||
gen_sub_matrix(matrix, subsize, vmatrix, rows,
|
gen_sub_matrix(matrix, subsize, vmatrix, rows, cols, row_indicator,
|
||||||
cols, row_indicator, col_indicator);
|
col_indicator);
|
||||||
if (gf_invert_matrix(matrix, invert_matrix, (int)subsize))
|
if (gf_invert_matrix(matrix, invert_matrix, (int) subsize))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
} while (next_subset(&row_indicator, rows, subsize) == 0);
|
} while (next_subset(&row_indicator, rows, subsize) == 0);
|
||||||
} while (next_subset(&col_indicator, cols, subsize) == 0);
|
} while (next_subset(&col_indicator, cols, subsize) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
unsigned char vmatrix[(ROWS + COLS) * COLS];
|
unsigned char vmatrix[(ROWS + COLS) * COLS];
|
||||||
uint64_t rows, cols;
|
uint64_t rows, cols;
|
||||||
|
|
||||||
if (K_MAX > MAX_CHECK) {
|
if (K_MAX > MAX_CHECK) {
|
||||||
printf("K_MAX too large for this test\n");
|
printf("K_MAX too large for this test\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (M_MAX > MAX_CHECK) {
|
if (M_MAX > MAX_CHECK) {
|
||||||
printf("M_MAX too large for this test\n");
|
printf("M_MAX too large for this test\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (M_MAX < K_MAX) {
|
if (M_MAX < K_MAX) {
|
||||||
printf("M_MAX must be smaller than K_MAX");
|
printf("M_MAX must be smaller than K_MAX");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
|
printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
|
||||||
printf("gen_rs_matrix creates erasure codes for:\n");
|
printf("gen_rs_matrix creates erasure codes for:\n");
|
||||||
|
|
||||||
for (cols = 1; cols <= K_MAX; cols++) {
|
for (cols = 1; cols <= K_MAX; cols++) {
|
||||||
for (rows = 1; rows <= M_MAX - cols; rows++) {
|
for (rows = 1; rows <= M_MAX - cols; rows++) {
|
||||||
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
|
gf_gen_rs_matrix(vmatrix, rows + cols, cols);
|
||||||
|
|
||||||
/* Verify the Vandermonde portion of vmatrix contains no
|
/* Verify the Vandermonde portion of vmatrix contains no
|
||||||
* singular submatrix */
|
* singular submatrix */
|
||||||
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
|
if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
printf(" k = %2u, m <= %2u \n", (unsigned) cols, (unsigned) (rows + cols - 1));
|
||||||
printf(" k = %2u, m <= %2u \n", (unsigned)cols, (unsigned)(rows + cols - 1));
|
}
|
||||||
|
return 0;
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
@ -37,202 +37,194 @@
|
|||||||
#define TEST_LEN 8192
|
#define TEST_LEN 8192
|
||||||
|
|
||||||
#ifndef TEST_SOURCES
|
#ifndef TEST_SOURCES
|
||||||
# define TEST_SOURCES 128
|
#define TEST_SOURCES 128
|
||||||
#endif
|
#endif
|
||||||
#ifndef RANDOMS
|
#ifndef RANDOMS
|
||||||
# define RANDOMS 200
|
#define RANDOMS 200
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define KMAX TEST_SOURCES
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
|
void
|
||||||
|
matrix_mult(u8 *a, u8 *b, u8 *c, int n)
|
||||||
{
|
{
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
u8 d;
|
u8 d;
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n; j++) {
|
||||||
d = 0;
|
d = 0;
|
||||||
for (k = 0; k < n; k++) {
|
for (k = 0; k < n; k++) {
|
||||||
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
||||||
}
|
}
|
||||||
c[i * n + j] = d;
|
c[i * n + j] = d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_matrix(u8 * a, int n)
|
void
|
||||||
|
print_matrix(u8 *a, int n)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n; j++) {
|
||||||
printf(" %2x", a[i * n + j]);
|
printf(" %2x", a[i * n + j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int is_ident(u8 * a, const int n)
|
int
|
||||||
|
is_ident(u8 *a, const int n)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
u8 c;
|
u8 c;
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
for (j = 0; j < n; j++) {
|
for (j = 0; j < n; j++) {
|
||||||
c = *a++;
|
c = *a++;
|
||||||
if (i == j)
|
if (i == j)
|
||||||
c--;
|
c--;
|
||||||
if (c != 0)
|
if (c != 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
|
int
|
||||||
|
inv_test(u8 *in, u8 *inv, u8 *sav, int n)
|
||||||
{
|
{
|
||||||
memcpy(sav, in, n * n);
|
memcpy(sav, in, n * n);
|
||||||
|
|
||||||
if (gf_invert_matrix(in, inv, n)) {
|
if (gf_invert_matrix(in, inv, n)) {
|
||||||
printf("Given singular matrix\n");
|
printf("Given singular matrix\n");
|
||||||
print_matrix(sav, n);
|
print_matrix(sav, n);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
matrix_mult(inv, sav, in, n);
|
matrix_mult(inv, sav, in, n);
|
||||||
|
|
||||||
if (is_ident(in, n)) {
|
if (is_ident(in, n)) {
|
||||||
printf("fail\n");
|
printf("fail\n");
|
||||||
print_matrix(sav, n);
|
print_matrix(sav, n);
|
||||||
print_matrix(inv, n);
|
print_matrix(inv, n);
|
||||||
print_matrix(in, n);
|
print_matrix(in, n);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, k, t;
|
int i, k, t;
|
||||||
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
|
u8 *test_mat = NULL, *save_mat = NULL, *invr_mat = NULL;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
|
|
||||||
u8 test1[] = { 1, 1, 6,
|
u8 test1[] = { 1, 1, 6, 1, 1, 1, 7, 1, 9 };
|
||||||
1, 1, 1,
|
|
||||||
7, 1, 9
|
|
||||||
};
|
|
||||||
|
|
||||||
u8 test2[] = { 0, 1, 6,
|
u8 test2[] = { 0, 1, 6, 1, 0, 1, 0, 1, 9 };
|
||||||
1, 0, 1,
|
|
||||||
0, 1, 9
|
|
||||||
};
|
|
||||||
|
|
||||||
u8 test3[] = { 0, 0, 1,
|
u8 test3[] = { 0, 0, 1, 1, 0, 0, 0, 1, 1 };
|
||||||
1, 0, 0,
|
|
||||||
0, 1, 1
|
|
||||||
};
|
|
||||||
|
|
||||||
u8 test4[] = { 0, 1, 6, 7,
|
u8 test4[] = { 0, 1, 6, 7, 1, 1, 0, 0, 0, 1, 2, 3, 3, 2, 2, 3 }; // = row3+3*row2
|
||||||
1, 1, 0, 0,
|
|
||||||
0, 1, 2, 3,
|
|
||||||
3, 2, 2, 3
|
|
||||||
}; // = row3+3*row2
|
|
||||||
|
|
||||||
printf("gf_inverse_test: max=%d ", KMAX);
|
printf("gf_inverse_test: max=%d ", KMAX);
|
||||||
|
|
||||||
test_mat = malloc(KMAX * KMAX);
|
test_mat = malloc(KMAX * KMAX);
|
||||||
save_mat = malloc(KMAX * KMAX);
|
save_mat = malloc(KMAX * KMAX);
|
||||||
invr_mat = malloc(KMAX * KMAX);
|
invr_mat = malloc(KMAX * KMAX);
|
||||||
|
|
||||||
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
// Test with lots of leading 1's
|
// Test with lots of leading 1's
|
||||||
k = 3;
|
k = 3;
|
||||||
memcpy(test_mat, test1, k * k);
|
memcpy(test_mat, test1, k * k);
|
||||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
// Test with leading zeros
|
// Test with leading zeros
|
||||||
k = 3;
|
k = 3;
|
||||||
memcpy(test_mat, test2, k * k);
|
memcpy(test_mat, test2, k * k);
|
||||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
// Test 3
|
// Test 3
|
||||||
k = 3;
|
k = 3;
|
||||||
memcpy(test_mat, test3, k * k);
|
memcpy(test_mat, test3, k * k);
|
||||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
// Test 4 - try a singular matrix
|
// Test 4 - try a singular matrix
|
||||||
k = 4;
|
k = 4;
|
||||||
memcpy(test_mat, test4, k * k);
|
memcpy(test_mat, test4, k * k);
|
||||||
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||||
printf("Fail: didn't catch singular matrix\n");
|
printf("Fail: didn't catch singular matrix\n");
|
||||||
print_matrix(test4, 4);
|
print_matrix(test4, 4);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// Do random test of size KMAX
|
// Do random test of size KMAX
|
||||||
k = KMAX;
|
k = KMAX;
|
||||||
|
|
||||||
for (i = 0; i < k * k; i++)
|
for (i = 0; i < k * k; i++)
|
||||||
test_mat[i] = save_mat[i] = rand();
|
test_mat[i] = save_mat[i] = rand();
|
||||||
|
|
||||||
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||||
printf("rand picked a singular matrix, try again\n");
|
printf("rand picked a singular matrix, try again\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||||
|
|
||||||
if (is_ident(test_mat, k)) {
|
if (is_ident(test_mat, k)) {
|
||||||
printf("fail\n");
|
printf("fail\n");
|
||||||
print_matrix(save_mat, k);
|
print_matrix(save_mat, k);
|
||||||
print_matrix(invr_mat, k);
|
print_matrix(invr_mat, k);
|
||||||
print_matrix(test_mat, k);
|
print_matrix(test_mat, k);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// Do Randoms. Random size and coefficients
|
// Do Randoms. Random size and coefficients
|
||||||
for (t = 0; t < RANDOMS; t++) {
|
for (t = 0; t < RANDOMS; t++) {
|
||||||
k = rand() % KMAX;
|
k = rand() % KMAX;
|
||||||
|
|
||||||
for (i = 0; i < k * k; i++)
|
for (i = 0; i < k * k; i++)
|
||||||
test_mat[i] = save_mat[i] = rand();
|
test_mat[i] = save_mat[i] = rand();
|
||||||
|
|
||||||
if (gf_invert_matrix(test_mat, invr_mat, k))
|
if (gf_invert_matrix(test_mat, invr_mat, k))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||||
|
|
||||||
if (is_ident(test_mat, k)) {
|
if (is_ident(test_mat, k)) {
|
||||||
printf("fail rand k=%d\n", k);
|
printf("fail rand k=%d\n", k);
|
||||||
print_matrix(save_mat, k);
|
print_matrix(save_mat, k);
|
||||||
print_matrix(invr_mat, k);
|
print_matrix(invr_mat, k);
|
||||||
print_matrix(test_mat, k);
|
print_matrix(test_mat, k);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
if (0 == (t % 8))
|
if (0 == (t % 8))
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(" Pass\n");
|
printf(" Pass\n");
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
free(test_mat);
|
free(test_mat);
|
||||||
free(save_mat);
|
free(save_mat);
|
||||||
free(invr_mat);
|
free(invr_mat);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -29,26 +29,26 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN 8*1024
|
#define TEST_LEN 8 * 1024
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
|
#define TEST_LEN (GT_L3_CACHE / TEST_SOURCES)
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
@ -58,105 +58,108 @@ u8 gff[256];
|
|||||||
u8 gflog[256];
|
u8 gflog[256];
|
||||||
u8 gf_mul_table[256 * 256];
|
u8 gf_mul_table[256 * 256];
|
||||||
|
|
||||||
void mk_gf_field(void)
|
void
|
||||||
|
mk_gf_field(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
u8 s = 1;
|
u8 s = 1;
|
||||||
gflog[0] = 0;
|
gflog[0] = 0;
|
||||||
|
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
gff[i] = s;
|
gff[i] = s;
|
||||||
gflog[s] = i;
|
gflog[s] = i;
|
||||||
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mk_gf_mul_table(u8 * table)
|
void
|
||||||
|
mk_gf_mul_table(u8 *table)
|
||||||
{
|
{
|
||||||
// Populate a single table with all multiply combinations for a fast,
|
// Populate a single table with all multiply combinations for a fast,
|
||||||
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < 256; i++)
|
for (i = 0; i < 256; i++)
|
||||||
for (j = 0; j < 256; j++)
|
for (j = 0; j < 256; j++)
|
||||||
table[i * 256 + j] = gf_mul(i, j);
|
table[i * 256 + j] = gf_mul(i, j);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
void
|
||||||
|
gf_vect_dot_prod_ref(int len, int vlen, u8 *v, u8 **src, u8 *dest)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
u8 s;
|
u8 s;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = 0;
|
s = 0;
|
||||||
for (j = 0; j < vlen; j++)
|
for (j = 0; j < vlen; j++)
|
||||||
s ^= gf_mul(src[j][i], v[j]);
|
s ^= gf_mul(src[j][i], v[j]);
|
||||||
|
|
||||||
dest[i] = s;
|
dest[i] = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
void
|
||||||
|
gf_vect_dot_prod_mult(int len, int vlen, u8 *v, u8 **src, u8 *dest)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
u8 s;
|
u8 s;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
s = 0;
|
s = 0;
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
|
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
|
||||||
}
|
}
|
||||||
dest[i] = s;
|
dest[i] = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(void)
|
int
|
||||||
|
main(void)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
u8 vec[TEST_SOURCES], *dest1, *dest2;
|
u8 vec[TEST_SOURCES], *dest1, *dest2;
|
||||||
u8 *matrix[TEST_SOURCES];
|
u8 *matrix[TEST_SOURCES];
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
dest1 = (u8 *) malloc(TEST_LEN);
|
dest1 = (u8 *) malloc(TEST_LEN);
|
||||||
dest2 = (u8 *) malloc(TEST_LEN);
|
dest2 = (u8 *) malloc(TEST_LEN);
|
||||||
|
|
||||||
if (NULL == dest1 || NULL == dest2) {
|
if (NULL == dest1 || NULL == dest2) {
|
||||||
printf("buffer alloc error\n");
|
printf("buffer alloc error\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
memset(dest1, 0xfe, TEST_LEN);
|
memset(dest1, 0xfe, TEST_LEN);
|
||||||
memset(dest2, 0xfe, TEST_LEN);
|
memset(dest2, 0xfe, TEST_LEN);
|
||||||
|
|
||||||
mk_gf_field();
|
mk_gf_field();
|
||||||
mk_gf_mul_table(gf_mul_table);
|
mk_gf_mul_table(gf_mul_table);
|
||||||
|
|
||||||
//generate random vector and matrix/data
|
// generate random vector and matrix/data
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
vec[i] = rand();
|
vec[i] = rand();
|
||||||
|
|
||||||
if (!(matrix[i] = malloc(TEST_LEN))) {
|
if (!(matrix[i] = malloc(TEST_LEN))) {
|
||||||
fprintf(stderr, "Error failure\n\n");
|
fprintf(stderr, "Error failure\n\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
matrix[i][j] = rand();
|
matrix[i][j] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
|
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
|
||||||
|
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||||
|
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
|
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
|
||||||
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||||
|
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
// Compare with reference function
|
||||||
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
|
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
||||||
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
printf("Error, different results!\n\n");
|
||||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Compare with reference function
|
printf("Pass functional test\n");
|
||||||
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
return 0;
|
||||||
printf("Error, different results!\n\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Pass functional test\n");
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
@ -29,19 +29,19 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#define TEST_LEN 8192
|
#define TEST_LEN 8192
|
||||||
#define TEST_SIZE (TEST_LEN/2)
|
#define TEST_SIZE (TEST_LEN / 2)
|
||||||
|
|
||||||
#ifndef TEST_SOURCES
|
#ifndef TEST_SOURCES
|
||||||
# define TEST_SOURCES 250
|
#define TEST_SOURCES 250
|
||||||
#endif
|
#endif
|
||||||
#ifndef RANDOMS
|
#ifndef RANDOMS
|
||||||
# define RANDOMS 20
|
#define RANDOMS 20
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MMAX TEST_SOURCES
|
#define MMAX TEST_SOURCES
|
||||||
@ -49,244 +49,251 @@
|
|||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void dump(unsigned char *buf, int len)
|
void
|
||||||
|
dump(unsigned char *buf, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len;) {
|
for (i = 0; i < len;) {
|
||||||
printf(" %2x", 0xff & buf[i++]);
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
if (i % 32 == 0)
|
if (i % 32 == 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_matrix(unsigned char **s, int k, int m)
|
void
|
||||||
|
dump_matrix(unsigned char **s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", s[i][j]);
|
printf(" %2x", s[i][j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
void
|
||||||
|
dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, rtest, m, k, nerrs, r, err;
|
int i, j, rtest, m, k, nerrs, r, err;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest = buf;
|
dest = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest_ref = buf;
|
dest_ref = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
temp_buff = buf;
|
temp_buff = buf;
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
memset(buffs[i], 0, TEST_LEN);
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
memset(dest, 0, TEST_LEN);
|
memset(dest, 0, TEST_LEN);
|
||||||
memset(temp_buff, 0, TEST_LEN);
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
memset(dest_ref, 0, TEST_LEN);
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
memset(g, 0, TEST_SOURCES);
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
// Test erasure code using gf_vect_dot_prod
|
// Test erasure code using gf_vect_dot_prod
|
||||||
// Pick a first test
|
// Pick a first test
|
||||||
m = 9;
|
m = 9;
|
||||||
k = 5;
|
k = 5;
|
||||||
assert(!(m > MMAX || k > KMAX));
|
assert(!(m > MMAX || k > KMAX));
|
||||||
|
|
||||||
gf_gen_cauchy1_matrix(a, m, k);
|
gf_gen_cauchy1_matrix(a, m, k);
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Random buffers in erasure
|
// Random buffers in erasure
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
err = 1 & rand();
|
err = 1 & rand();
|
||||||
src_in_err[i] = err;
|
src_in_err[i] = err;
|
||||||
if (err)
|
if (err)
|
||||||
src_err_list[nerrs++] = i;
|
src_err_list[nerrs++] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// construct b by removing error rows
|
// construct b by removing error rows
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
b[k * i + j] = a[k * r + j];
|
b[k * i + j] = a[k * r + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
|
||||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
printf("recov %d:", src_err_list[i]);
|
printf("recov %d:", src_err_list[i]);
|
||||||
dump(temp_buff, 25);
|
dump(temp_buff, 25);
|
||||||
printf("orig :");
|
printf("orig :");
|
||||||
dump(buffs[src_err_list[i]], 25);
|
dump(buffs[src_err_list[i]], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do more random tests
|
// Do more random tests
|
||||||
|
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
while ((m = (rand() % MMAX)) < 2) ;
|
while ((m = (rand() % MMAX)) < 2)
|
||||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1)
|
||||||
|
;
|
||||||
|
|
||||||
if (m > MMAX || k > KMAX)
|
if (m > MMAX || k > KMAX)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
gf_gen_cauchy1_matrix(a, m, k);
|
gf_gen_cauchy1_matrix(a, m, k);
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Random errors
|
// Random errors
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
err = 1 & rand();
|
err = 1 & rand();
|
||||||
src_in_err[i] = err;
|
src_in_err[i] = err;
|
||||||
if (err)
|
if (err)
|
||||||
src_err_list[nerrs++] = i;
|
src_err_list[nerrs++] = i;
|
||||||
}
|
}
|
||||||
if (nerrs == 0) { // should have at least one error
|
if (nerrs == 0) { // should have at least one error
|
||||||
while ((err = (rand() % KMAX)) >= k) ;
|
while ((err = (rand() % KMAX)) >= k)
|
||||||
src_err_list[nerrs++] = err;
|
;
|
||||||
src_in_err[err] = 1;
|
src_err_list[nerrs++] = err;
|
||||||
}
|
src_in_err[err] = 1;
|
||||||
// construct b by removing error rows
|
}
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
// construct b by removing error rows
|
||||||
while (src_in_err[r]) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
r++;
|
while (src_in_err[r]) {
|
||||||
continue;
|
r++;
|
||||||
}
|
continue;
|
||||||
for (j = 0; j < k; j++)
|
}
|
||||||
b[k * i + j] = a[k * r + j];
|
for (j = 0; j < k; j++)
|
||||||
}
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
|
||||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
printf(" - erase list = ");
|
printf(" - erase list = ");
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
printf(" %d", src_err_list[i]);
|
printf(" %d", src_err_list[i]);
|
||||||
printf("\na:\n");
|
printf("\na:\n");
|
||||||
dump_u8xu8((u8 *) a, m, k);
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
printf("inv b:\n");
|
printf("inv b:\n");
|
||||||
dump_u8xu8((u8 *) d, k, k);
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
printf("orig data:\n");
|
printf("orig data:\n");
|
||||||
dump_matrix(buffs, m, 25);
|
dump_matrix(buffs, m, 25);
|
||||||
printf("orig :");
|
printf("orig :");
|
||||||
dump(buffs[src_err_list[i]], 25);
|
dump(buffs[src_err_list[i]], 25);
|
||||||
printf("recov %d:", src_err_list[i]);
|
printf("recov %d:", src_err_list[i]);
|
||||||
dump(temp_buff, 25);
|
dump(temp_buff, 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("done all: Pass\n");
|
printf("done all: Pass\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -29,146 +29,148 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef FUNCTION_UNDER_TEST
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define str(s) #s
|
#define str(s) #s
|
||||||
#define xstr(s) str(s)
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN 8*1024
|
#define TEST_LEN 8 * 1024
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
#define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64 - 1))
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void dump(unsigned char *buf, int len)
|
void
|
||||||
|
dump(unsigned char *buf, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len;) {
|
for (i = 0; i < len;) {
|
||||||
printf(" %2x", 0xff & buf[i++]);
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
if (i % 32 == 0)
|
if (i % 32 == 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_matrix(unsigned char **s, int k, int m)
|
void
|
||||||
|
dump_matrix(unsigned char **s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", s[i][j]);
|
printf(" %2x", s[i][j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void vect_dot_prod_perf(void (*fun_ptr)
|
void
|
||||||
(int, int, unsigned char *, unsigned char **, unsigned char *),
|
vect_dot_prod_perf(void (*fun_ptr)(int, int, unsigned char *, unsigned char **, unsigned char *),
|
||||||
u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref)
|
u8 *g, u8 *g_tbls, u8 **buffs, u8 *dest_ref)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
for (j = 0; j < TEST_SOURCES; j++)
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
(*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
(*fun_ptr)(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest = buf;
|
dest = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest_ref = buf;
|
dest_ref = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
temp_buff = buf;
|
temp_buff = buf;
|
||||||
|
|
||||||
// Performance test
|
// Performance test
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
memset(dest, 0, TEST_LEN);
|
memset(dest, 0, TEST_LEN);
|
||||||
memset(temp_buff, 0, TEST_LEN);
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
memset(dest_ref, 0, TEST_LEN);
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
memset(g, 0, TEST_SOURCES);
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
#ifdef DO_REF_PERF
|
#ifdef DO_REF_PERF
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref)
|
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref));
|
||||||
);
|
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
|
||||||
#else
|
#else
|
||||||
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
|
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BENCHMARK(&start, BENCHMARK_TIME,
|
BENCHMARK(&start, BENCHMARK_TIME,
|
||||||
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
|
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
|
perf_print(start, (long long) TEST_LEN * (TEST_SOURCES + 1));
|
||||||
|
|
||||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref, 25);
|
dump(dest_ref, 25);
|
||||||
printf("dprod:");
|
printf("dprod:");
|
||||||
dump(dest, 25);
|
dump(dest, 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("pass perf check\n");
|
printf("pass perf check\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -29,28 +29,28 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef FUNCTION_UNDER_TEST
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
#define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_MIN_SIZE
|
#ifndef TEST_MIN_SIZE
|
||||||
# define TEST_MIN_SIZE 32
|
#define TEST_MIN_SIZE 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define str(s) #s
|
#define str(s) #s
|
||||||
#define xstr(s) str(s)
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
#define TEST_LEN 8192
|
#define TEST_LEN 8192
|
||||||
#define TEST_SIZE (TEST_LEN/2)
|
#define TEST_SIZE (TEST_LEN / 2)
|
||||||
|
|
||||||
#ifndef TEST_SOURCES
|
#ifndef TEST_SOURCES
|
||||||
# define TEST_SOURCES 16
|
#define TEST_SOURCES 16
|
||||||
#endif
|
#endif
|
||||||
#ifndef RANDOMS
|
#ifndef RANDOMS
|
||||||
# define RANDOMS 20
|
#define RANDOMS 20
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MMAX TEST_SOURCES
|
#define MMAX TEST_SOURCES
|
||||||
@ -58,481 +58,486 @@
|
|||||||
|
|
||||||
#ifdef EC_ALIGNED_ADDR
|
#ifdef EC_ALIGNED_ADDR
|
||||||
// Define power of 2 range to check ptr, len alignment
|
// Define power of 2 range to check ptr, len alignment
|
||||||
# define PTR_ALIGN_CHK_B 0
|
#define PTR_ALIGN_CHK_B 0
|
||||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
#else
|
#else
|
||||||
// Define power of 2 range to check ptr, len alignment
|
// Define power of 2 range to check ptr, len alignment
|
||||||
# define PTR_ALIGN_CHK_B 32
|
#define PTR_ALIGN_CHK_B 32
|
||||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
#define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void dump(unsigned char *buf, int len)
|
void
|
||||||
|
dump(unsigned char *buf, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len;) {
|
for (i = 0; i < len;) {
|
||||||
printf(" %2x", 0xff & buf[i++]);
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
if (i % 32 == 0)
|
if (i % 32 == 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_matrix(unsigned char **s, int k, int m)
|
void
|
||||||
|
dump_matrix(unsigned char **s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", s[i][j]);
|
printf(" %2x", s[i][j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
void
|
||||||
|
dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
int align, size;
|
int align, size;
|
||||||
unsigned char *efence_buffs[TEST_SOURCES];
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
unsigned int offset;
|
unsigned int offset;
|
||||||
u8 *ubuffs[TEST_SOURCES];
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
u8 *udest_ptr;
|
u8 *udest_ptr;
|
||||||
|
|
||||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest = buf;
|
dest = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest_ref = buf;
|
dest_ref = buf;
|
||||||
|
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
temp_buff = buf;
|
temp_buff = buf;
|
||||||
|
|
||||||
// Test of all zeros
|
// Test of all zeros
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
memset(buffs[i], 0, TEST_LEN);
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
memset(dest, 0, TEST_LEN);
|
memset(dest, 0, TEST_LEN);
|
||||||
memset(temp_buff, 0, TEST_LEN);
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
memset(dest_ref, 0, TEST_LEN);
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
memset(g, 0, TEST_SOURCES);
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref, 25);
|
dump(dest_ref, 25);
|
||||||
printf("dprod:");
|
printf("dprod:");
|
||||||
dump(dest, 25);
|
dump(dest, 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
else
|
else
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Rand data test
|
// Rand data test
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref, 25);
|
dump(dest_ref, 25);
|
||||||
printf("dprod:");
|
printf("dprod:");
|
||||||
dump(dest, 25);
|
dump(dest, 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rand data test with varied parameters
|
// Rand data test with varied parameters
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||||
dump_matrix(buffs, 5, srcs);
|
dump_matrix(buffs, 5, srcs);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref, 5);
|
dump(dest_ref, 5);
|
||||||
printf("dprod:");
|
printf("dprod:");
|
||||||
dump(dest, 5);
|
dump(dest, 5);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test erasure code using gf_vect_dot_prod
|
// Test erasure code using gf_vect_dot_prod
|
||||||
|
|
||||||
// Pick a first test
|
// Pick a first test
|
||||||
m = 9;
|
m = 9;
|
||||||
k = 5;
|
k = 5;
|
||||||
if (m > MMAX || k > KMAX)
|
if (m > MMAX || k > KMAX)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
gf_gen_rs_matrix(a, m, k);
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
#ifndef USEREF
|
#ifndef USEREF
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
#else
|
#else
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Random buffers in erasure
|
// Random buffers in erasure
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
err = 1 & rand();
|
err = 1 & rand();
|
||||||
src_in_err[i] = err;
|
src_in_err[i] = err;
|
||||||
if (err)
|
if (err)
|
||||||
src_err_list[nerrs++] = i;
|
src_err_list[nerrs++] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// construct b by removing error rows
|
// construct b by removing error rows
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
b[k * i + j] = a[k * r + j];
|
b[k * i + j] = a[k * r + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
#ifndef USEREF
|
#ifndef USEREF
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
#else
|
#else
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
printf("recov %d:", src_err_list[i]);
|
printf("recov %d:", src_err_list[i]);
|
||||||
dump(temp_buff, 25);
|
dump(temp_buff, 25);
|
||||||
printf("orig :");
|
printf("orig :");
|
||||||
dump(buffs[src_err_list[i]], 25);
|
dump(buffs[src_err_list[i]], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do more random tests
|
// Do more random tests
|
||||||
|
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
while ((m = (rand() % MMAX)) < 2) ;
|
while ((m = (rand() % MMAX)) < 2)
|
||||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1)
|
||||||
|
;
|
||||||
|
|
||||||
if (m > MMAX || k > KMAX)
|
if (m > MMAX || k > KMAX)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
gf_gen_rs_matrix(a, m, k);
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
// Make random data
|
// Make random data
|
||||||
for (i = 0; i < k; i++)
|
for (i = 0; i < k; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
// Make parity vects
|
// Make parity vects
|
||||||
for (i = k; i < m; i++) {
|
for (i = k; i < m; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
#ifndef USEREF
|
#ifndef USEREF
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
#else
|
#else
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Random errors
|
// Random errors
|
||||||
memset(src_in_err, 0, TEST_SOURCES);
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
err = 1 & rand();
|
err = 1 & rand();
|
||||||
src_in_err[i] = err;
|
src_in_err[i] = err;
|
||||||
if (err)
|
if (err)
|
||||||
src_err_list[nerrs++] = i;
|
src_err_list[nerrs++] = i;
|
||||||
}
|
}
|
||||||
if (nerrs == 0) { // should have at least one error
|
if (nerrs == 0) { // should have at least one error
|
||||||
while ((err = (rand() % KMAX)) >= k) ;
|
while ((err = (rand() % KMAX)) >= k)
|
||||||
src_err_list[nerrs++] = err;
|
;
|
||||||
src_in_err[err] = 1;
|
src_err_list[nerrs++] = err;
|
||||||
}
|
src_in_err[err] = 1;
|
||||||
// construct b by removing error rows
|
}
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
// construct b by removing error rows
|
||||||
while (src_in_err[r]) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
r++;
|
while (src_in_err[r]) {
|
||||||
continue;
|
r++;
|
||||||
}
|
continue;
|
||||||
for (j = 0; j < k; j++)
|
}
|
||||||
b[k * i + j] = a[k * r + j];
|
for (j = 0; j < k; j++)
|
||||||
}
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
printf("BAD MATRIX\n");
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
for (i = 0, r = 0; i < k; i++, r++) {
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
while (src_in_err[r]) {
|
while (src_in_err[r]) {
|
||||||
r++;
|
r++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
recov[i] = buffs[r];
|
recov[i] = buffs[r];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recover data
|
// Recover data
|
||||||
for (i = 0; i < nerrs; i++) {
|
for (i = 0; i < nerrs; i++) {
|
||||||
for (j = 0; j < k; j++)
|
for (j = 0; j < k; j++)
|
||||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
#ifndef USEREF
|
#ifndef USEREF
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
#else
|
#else
|
||||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
#endif
|
#endif
|
||||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
printf(" - erase list = ");
|
printf(" - erase list = ");
|
||||||
for (i = 0; i < nerrs; i++)
|
for (i = 0; i < nerrs; i++)
|
||||||
printf(" %d", src_err_list[i]);
|
printf(" %d", src_err_list[i]);
|
||||||
printf("\na:\n");
|
printf("\na:\n");
|
||||||
dump_u8xu8((u8 *) a, m, k);
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
printf("inv b:\n");
|
printf("inv b:\n");
|
||||||
dump_u8xu8((u8 *) d, k, k);
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
printf("orig data:\n");
|
printf("orig data:\n");
|
||||||
dump_matrix(buffs, m, 25);
|
dump_matrix(buffs, m, 25);
|
||||||
printf("orig :");
|
printf("orig :");
|
||||||
dump(buffs[src_err_list[i]], 25);
|
dump(buffs[src_err_list[i]], 25);
|
||||||
printf("recov %d:", src_err_list[i]);
|
printf("recov %d:", src_err_list[i]);
|
||||||
dump(temp_buff, 25);
|
dump(temp_buff, 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run tests at end of buffer for Electric Fence
|
// Run tests at end of buffer for Electric Fence
|
||||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||||
|
|
||||||
if (0 != memcmp(dest_ref, dest, size)) {
|
if (0 != memcmp(dest_ref, dest, size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref, align);
|
dump(dest_ref, align);
|
||||||
printf("dprod:");
|
printf("dprod:");
|
||||||
dump(dest, align);
|
dump(dest, align);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test rand ptr alignment if available
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
srcs = rand() % TEST_SOURCES;
|
srcs = rand() % TEST_SOURCES;
|
||||||
if (srcs == 0)
|
if (srcs == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
// Add random offsets
|
// Add random offsets
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
for (j = 0; j < size; j++)
|
for (j = 0; j < size; j++)
|
||||||
ubuffs[i][j] = rand();
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||||
|
|
||||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||||
|
|
||||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", srcs);
|
||||||
srcs);
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
printf("dprod_base:");
|
||||||
printf("dprod_base:");
|
dump(dest_ref, 25);
|
||||||
dump(dest_ref, 25);
|
printf("dprod:");
|
||||||
printf("dprod:");
|
dump(udest_ptr, 25);
|
||||||
dump(udest_ptr, 25);
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
// Confirm that padding around dests is unchanged
|
||||||
// Confirm that padding around dests is unchanged
|
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
offset = udest_ptr - dest;
|
||||||
offset = udest_ptr - dest;
|
|
||||||
|
|
||||||
if (memcmp(dest, dest_ref, offset)) {
|
if (memcmp(dest, dest_ref, offset)) {
|
||||||
printf("Fail rand ualign pad start\n");
|
printf("Fail rand ualign pad start\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||||
printf("Fail rand ualign pad end\n");
|
printf("Fail rand ualign pad end\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test all size alignment
|
// Test all size alignment
|
||||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
srcs = TEST_SOURCES;
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
for (j = 0; j < size; j++)
|
for (j = 0; j < size; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
g[i] = rand();
|
g[i] = rand();
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
if (memcmp(dest_ref, dest, size)) {
|
if (memcmp(dest_ref, dest, size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", size);
|
||||||
size);
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
printf("dprod_base:");
|
||||||
printf("dprod_base:");
|
dump(dest_ref, 25);
|
||||||
dump(dest_ref, 25);
|
printf("dprod:");
|
||||||
printf("dprod:");
|
dump(dest, 25);
|
||||||
dump(dest, 25);
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
printf("done all: Pass\n");
|
printf("done all: Pass\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -29,503 +29,500 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset, memcmp
|
#include <string.h> // for memset, memcmp
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef ALIGN_SIZE
|
#ifndef ALIGN_SIZE
|
||||||
# define ALIGN_SIZE 32
|
#define ALIGN_SIZE 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef FUNCTION_UNDER_TEST
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
//By default, test multi-binary version
|
// By default, test multi-binary version
|
||||||
# define FUNCTION_UNDER_TEST gf_vect_mad
|
#define FUNCTION_UNDER_TEST gf_vect_mad
|
||||||
# define REF_FUNCTION gf_vect_dot_prod
|
#define REF_FUNCTION gf_vect_dot_prod
|
||||||
# define VECT 1
|
#define VECT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TEST_MIN_SIZE
|
#ifndef TEST_MIN_SIZE
|
||||||
# define TEST_MIN_SIZE 64
|
#define TEST_MIN_SIZE 64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define str(s) #s
|
#define str(s) #s
|
||||||
#define xstr(s) str(s)
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
#define TEST_LEN 8192
|
#define TEST_LEN 8192
|
||||||
#define TEST_SIZE (TEST_LEN/2)
|
#define TEST_SIZE (TEST_LEN / 2)
|
||||||
#define TEST_MEM TEST_SIZE
|
#define TEST_MEM TEST_SIZE
|
||||||
#define TEST_LOOPS 20000
|
#define TEST_LOOPS 20000
|
||||||
#define TEST_TYPE_STR ""
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
#ifndef TEST_SOURCES
|
#ifndef TEST_SOURCES
|
||||||
# define TEST_SOURCES 16
|
#define TEST_SOURCES 16
|
||||||
#endif
|
#endif
|
||||||
#ifndef RANDOMS
|
#ifndef RANDOMS
|
||||||
# define RANDOMS 20
|
#define RANDOMS 20
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EC_ALIGNED_ADDR
|
#ifdef EC_ALIGNED_ADDR
|
||||||
// Define power of 2 range to check ptr, len alignment
|
// Define power of 2 range to check ptr, len alignment
|
||||||
# define PTR_ALIGN_CHK_B 0
|
#define PTR_ALIGN_CHK_B 0
|
||||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
#define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
#else
|
#else
|
||||||
// Define power of 2 range to check ptr, len alignment
|
// Define power of 2 range to check ptr, len alignment
|
||||||
# define PTR_ALIGN_CHK_B ALIGN_SIZE
|
#define PTR_ALIGN_CHK_B ALIGN_SIZE
|
||||||
# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
|
#define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define str(s) #s
|
#define str(s) #s
|
||||||
#define xstr(s) str(s)
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
# define LAST_ARG *dest
|
#define LAST_ARG *dest
|
||||||
#else
|
#else
|
||||||
# define LAST_ARG **dest
|
#define LAST_ARG **dest
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls,
|
extern void
|
||||||
unsigned char *src, unsigned char LAST_ARG);
|
FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
unsigned char LAST_ARG);
|
||||||
unsigned char LAST_ARG);
|
extern void
|
||||||
|
REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char LAST_ARG);
|
||||||
|
|
||||||
void dump(unsigned char *buf, int len)
|
void
|
||||||
|
dump(unsigned char *buf, int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < len;) {
|
for (i = 0; i < len;) {
|
||||||
printf(" %2x", 0xff & buf[i++]);
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
if (i % 32 == 0)
|
if (i % 32 == 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_matrix(unsigned char **s, int k, int m)
|
void
|
||||||
|
dump_matrix(unsigned char **s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", s[i][j]);
|
printf(" %2x", s[i][j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
void
|
||||||
|
dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < k; i++) {
|
for (i = 0; i < k; i++) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, j, rtest, srcs;
|
int i, j, rtest, srcs;
|
||||||
void *buf;
|
void *buf;
|
||||||
u8 gf[6][TEST_SOURCES];
|
u8 gf[6][TEST_SOURCES];
|
||||||
u8 *g_tbls;
|
u8 *g_tbls;
|
||||||
u8 *dest_ref[VECT];
|
u8 *dest_ref[VECT];
|
||||||
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
|
u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
|
||||||
int vector = VECT;
|
int vector = VECT;
|
||||||
|
|
||||||
int align, size;
|
int align, size;
|
||||||
unsigned char *efence_buffs[TEST_SOURCES];
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
unsigned int offset;
|
unsigned int offset;
|
||||||
u8 *ubuffs[TEST_SOURCES];
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
u8 *udest_ptrs[VECT];
|
u8 *udest_ptrs[VECT];
|
||||||
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
// Allocate the arrays
|
// Allocate the arrays
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
buffs[i] = buf;
|
buffs[i] = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
|
if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
g_tbls = buf;
|
g_tbls = buf;
|
||||||
|
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest_ptrs[i] = buf;
|
dest_ptrs[i] = buf;
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
printf("alloc error: Fail");
|
printf("alloc error: Fail");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dest_ref[i] = buf;
|
dest_ref[i] = buf;
|
||||||
memset(dest_ref[i], 0, TEST_LEN);
|
memset(dest_ref[i], 0, TEST_LEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test of all zeros
|
// Test of all zeros
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
memset(buffs[i], 0, TEST_LEN);
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
switch (vector) {
|
switch (vector) {
|
||||||
case 6:
|
case 6:
|
||||||
memset(gf[5], 0xe6, TEST_SOURCES);
|
memset(gf[5], 0xe6, TEST_SOURCES);
|
||||||
case 5:
|
case 5:
|
||||||
memset(gf[4], 4, TEST_SOURCES);
|
memset(gf[4], 4, TEST_SOURCES);
|
||||||
case 4:
|
case 4:
|
||||||
memset(gf[3], 9, TEST_SOURCES);
|
memset(gf[3], 9, TEST_SOURCES);
|
||||||
case 3:
|
case 3:
|
||||||
memset(gf[2], 7, TEST_SOURCES);
|
memset(gf[2], 7, TEST_SOURCES);
|
||||||
case 2:
|
case 2:
|
||||||
memset(gf[1], 1, TEST_SOURCES);
|
memset(gf[1], 1, TEST_SOURCES);
|
||||||
case 1:
|
case 1:
|
||||||
memset(gf[0], 2, TEST_SOURCES);
|
memset(gf[0], 2, TEST_SOURCES);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
for (j = 0; j < TEST_SOURCES; j++) {
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], buffs,
|
||||||
buffs, dest_ref[i]);
|
dest_ref[i]);
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref[i], 25);
|
dump(dest_ref[i], 25);
|
||||||
printf("dprod_dut:");
|
printf("dprod_dut:");
|
||||||
dump(dest_ptrs[i], 25);
|
dump(dest_ptrs[i], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
|
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
|
||||||
#else
|
#else
|
||||||
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
|
REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref[i], 25);
|
dump(dest_ref[i], 25);
|
||||||
printf("dprod_dut:");
|
printf("dprod_dut:");
|
||||||
dump(dest_ptrs[i], 25);
|
dump(dest_ptrs[i], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Rand data test
|
// Rand data test
|
||||||
|
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
for (j = 0; j < TEST_SOURCES; j++) {
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j],
|
gf_vect_mul_init(gf[i][j],
|
||||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
|
||||||
&g_tbls[i * 32 * TEST_SOURCES], buffs,
|
&g_tbls[i * 32 * TEST_SOURCES], buffs, dest_ref[i]);
|
||||||
dest_ref[i]);
|
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
||||||
*dest_ptrs);
|
*dest_ptrs);
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||||
dest_ptrs);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n",
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", i,
|
||||||
i, rtest);
|
rtest);
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref[i], 25);
|
dump(dest_ref[i], 25);
|
||||||
printf("dprod_dut:");
|
printf("dprod_dut:");
|
||||||
dump(dest_ptrs[i], 25);
|
dump(dest_ptrs[i], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rand data test with varied parameters
|
// Rand data test with varied parameters
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
for (j = 0; j < srcs; j++) {
|
for (j = 0; j < srcs; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j],
|
gf_vect_mul_init(gf[i][j],
|
||||||
&g_tbls[i * (32 * srcs) + j * 32]);
|
&g_tbls[i * (32 * srcs) + j * 32]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs],
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], buffs,
|
||||||
buffs, dest_ref[i]);
|
dest_ref[i]);
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN);
|
memset(dest_ptrs[i], 0, TEST_LEN);
|
||||||
for (i = 0; i < srcs; i++) {
|
for (i = 0; i < srcs; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
||||||
*dest_ptrs);
|
*dest_ptrs);
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], dest_ptrs);
|
||||||
dest_ptrs);
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
for (i = 0; i < vector; i++) {
|
||||||
for (i = 0; i < vector; i++) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
|
printf("Fail rand " xstr(
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
FUNCTION_UNDER_TEST) " test%d srcs=%d\n",
|
||||||
" test%d srcs=%d\n", i, srcs);
|
i, srcs);
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref[i], 25);
|
dump(dest_ref[i], 25);
|
||||||
printf("dprod_dut:");
|
printf("dprod_dut:");
|
||||||
dump(dest_ptrs[i], 25);
|
dump(dest_ptrs[i], 25);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run tests at end of buffer for Electric Fence
|
// Run tests at end of buffer for Electric Fence
|
||||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < TEST_LEN; j++)
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
for (j = 0; j < TEST_SOURCES; j++) {
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j],
|
gf_vect_mul_init(gf[i][j],
|
||||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(size, TEST_SOURCES,
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
||||||
&g_tbls[i * 32 * TEST_SOURCES], efence_buffs,
|
efence_buffs, dest_ref[i]);
|
||||||
dest_ref[i]);
|
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
memset(dest_ptrs[i], 0, size);
|
memset(dest_ptrs[i], 0, size);
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||||
*dest_ptrs);
|
*dest_ptrs);
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
|
||||||
dest_ptrs);
|
dest_ptrs);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d size=%d\n",
|
||||||
" test%d size=%d\n", i, size);
|
i, size);
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
printf("dprod_base:");
|
printf("dprod_base:");
|
||||||
dump(dest_ref[i], TEST_MIN_SIZE + align);
|
dump(dest_ref[i], TEST_MIN_SIZE + align);
|
||||||
printf("dprod_dut:");
|
printf("dprod_dut:");
|
||||||
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
|
dump(dest_ptrs[i], TEST_MIN_SIZE + align);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test rand ptr alignment if available
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
srcs = rand() % TEST_SOURCES;
|
srcs = rand() % TEST_SOURCES;
|
||||||
if (srcs == 0)
|
if (srcs == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
// Add random offsets
|
// Add random offsets
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++)
|
for (i = 0; i < srcs; i++)
|
||||||
for (j = 0; j < size; j++)
|
for (j = 0; j < size; j++)
|
||||||
ubuffs[i][j] = rand();
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
for (j = 0; j < srcs; j++) {
|
for (j = 0; j < srcs; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
|
gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
|
||||||
dest_ref[i]);
|
dest_ref[i]);
|
||||||
|
|
||||||
for (i = 0; i < srcs; i++) {
|
for (i = 0; i < srcs; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
|
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
|
FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
|
if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
printf("Fail rand " xstr(
|
||||||
" test%d ualign srcs=%d\n", i, srcs);
|
FUNCTION_UNDER_TEST) " test%d ualign srcs=%d\n",
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
i, srcs);
|
||||||
printf("dprod_base:");
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
dump(dest_ref[i], 25);
|
printf("dprod_base:");
|
||||||
printf("dprod_dut:");
|
dump(dest_ref[i], 25);
|
||||||
dump(udest_ptrs[i], 25);
|
printf("dprod_dut:");
|
||||||
return -1;
|
dump(udest_ptrs[i], 25);
|
||||||
}
|
return -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Confirm that padding around dests is unchanged
|
// Confirm that padding around dests is unchanged
|
||||||
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
offset = udest_ptrs[i] - dest_ptrs[i];
|
offset = udest_ptrs[i] - dest_ptrs[i];
|
||||||
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
|
if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
|
||||||
printf("Fail rand ualign pad1 start\n");
|
printf("Fail rand ualign pad1 start\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (memcmp
|
if (memcmp(dest_ptrs[i] + offset + size, dest_ref[0],
|
||||||
(dest_ptrs[i] + offset + size, dest_ref[0],
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
PTR_ALIGN_CHK_B - offset)) {
|
printf("Fail rand ualign pad1 end\n");
|
||||||
printf("Fail rand ualign pad1 end\n");
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test all size alignment
|
// Test all size alignment
|
||||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
|
||||||
|
|
||||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
for (i = 0; i < TEST_SOURCES; i++)
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
for (j = 0; j < size; j++)
|
for (j = 0; j < size; j++)
|
||||||
buffs[i][j] = rand();
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
for (j = 0; j < TEST_SOURCES; j++) {
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
gf[i][j] = rand();
|
gf[i][j] = rand();
|
||||||
gf_vect_mul_init(gf[i][j],
|
gf_vect_mul_init(gf[i][j],
|
||||||
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
&g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
|
||||||
}
|
}
|
||||||
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < vector; i++)
|
for (i = 0; i < vector; i++)
|
||||||
gf_vect_dot_prod_base(size, TEST_SOURCES,
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
|
||||||
&g_tbls[i * 32 * TEST_SOURCES], buffs,
|
buffs, dest_ref[i]);
|
||||||
dest_ref[i]);
|
|
||||||
|
|
||||||
for (i = 0; i < TEST_SOURCES; i++) {
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
#if (VECT == 1)
|
#if (VECT == 1)
|
||||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
|
||||||
*dest_ptrs);
|
|
||||||
#else
|
#else
|
||||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
|
||||||
dest_ptrs);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
for (i = 0; i < vector; i++) {
|
for (i = 0; i < vector; i++) {
|
||||||
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
|
||||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
printf("Fail rand " xstr(
|
||||||
" test%d ualign len=%d\n", i, size);
|
FUNCTION_UNDER_TEST) " test%d ualign len=%d\n",
|
||||||
dump_matrix(buffs, vector, TEST_SOURCES);
|
i, size);
|
||||||
printf("dprod_base:");
|
dump_matrix(buffs, vector, TEST_SOURCES);
|
||||||
dump(dest_ref[i], 25);
|
printf("dprod_base:");
|
||||||
printf("dprod_dut:");
|
dump(dest_ref[i], 25);
|
||||||
dump(dest_ptrs[i], 25);
|
printf("dprod_dut:");
|
||||||
return -1;
|
dump(dest_ptrs[i], 25);
|
||||||
}
|
return -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
}
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
printf("Pass\n");
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -29,117 +29,116 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset
|
#include <string.h> // for memset
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
#define TEST_SIZE 8192
|
#define TEST_SIZE 8192
|
||||||
#define TEST_MEM TEST_SIZE
|
#define TEST_MEM TEST_SIZE
|
||||||
#define TEST_LOOPS 100000
|
#define TEST_LOOPS 100000
|
||||||
#define TEST_TYPE_STR ""
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||||
int align, size;
|
int align, size;
|
||||||
unsigned char *efence_buff1;
|
unsigned char *efence_buff1;
|
||||||
unsigned char *efence_buff2;
|
unsigned char *efence_buff2;
|
||||||
|
|
||||||
printf("gf_vect_mul_base_test:\n");
|
printf("gf_vect_mul_base_test:\n");
|
||||||
|
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
printf("buffer alloc error\n");
|
printf("buffer alloc error\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Fill with rand data
|
// Fill with rand data
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
buff1[i] = rand();
|
buff1[i] = rand();
|
||||||
|
|
||||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||||
printf("fail fill with rand data\n");
|
printf("fail fill with rand data\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||||
gf_mul(2, buff1[i]));
|
gf_mul(2, buff1[i]));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||||
printf("fail fill with rand data for buff1\n");
|
printf("fail fill with rand data for buff1\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Check reference function
|
// Check reference function
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
if (buff2[i] != buff3[i]) {
|
if (buff2[i] != buff3[i]) {
|
||||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
|
||||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
gf_mul(a, buff1[i]));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
buff1[i] = rand();
|
buff1[i] = rand();
|
||||||
|
|
||||||
// Check each possible constant
|
// Check each possible constant
|
||||||
printf("Random tests ");
|
printf("Random tests ");
|
||||||
for (a = 0; a != 255; a++) {
|
for (a = 0; a != 255; a++) {
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||||
printf("fail random tests\n");
|
printf("fail random tests\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
buff2[i], gf_mul(2, buff1[i]));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run tests at end of buffer for Electric Fence
|
// Run tests at end of buffer for Electric Fence
|
||||||
align = 32;
|
align = 32;
|
||||||
a = 2;
|
a = 2;
|
||||||
|
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
for (size = 0; size < TEST_SIZE; size += align) {
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
// Line up TEST_SIZE from end
|
// Line up TEST_SIZE from end
|
||||||
efence_buff1 = buff1 + size;
|
efence_buff1 = buff1 + size;
|
||||||
efence_buff2 = buff2 + size;
|
efence_buff2 = buff2 + size;
|
||||||
|
|
||||||
if (gf_vect_mul_base
|
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) !=
|
||||||
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2) != 0) {
|
0) {
|
||||||
printf("fail tests at end of buffer\n");
|
printf("fail tests at end of buffer\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE - size; i++)
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
|
||||||
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
efence_buff2[i], gf_mul(2, efence_buff1[i]));
|
||||||
efence_buff1
|
return 1;
|
||||||
[i]));
|
}
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(" done: Pass\n");
|
printf(" done: Pass\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -29,63 +29,65 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h> // for memset
|
#include <string.h> // for memset
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#ifndef GT_L3_CACHE
|
#ifndef GT_L3_CACHE
|
||||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
#define GT_L3_CACHE 32 * 1024 * 1024 /* some number > last level cache */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
#if !defined(COLD_TEST) && !defined(TEST_CUSTOM)
|
||||||
// Cached test, loop many times over small dataset
|
// Cached test, loop many times over small dataset
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN 8*1024
|
#define TEST_LEN 8 * 1024
|
||||||
# define TEST_TYPE_STR "_warm"
|
#define TEST_TYPE_STR "_warm"
|
||||||
#elif defined (COLD_TEST)
|
#elif defined(COLD_TEST)
|
||||||
// Uncached test. Pull from large mem base.
|
// Uncached test. Pull from large mem base.
|
||||||
# define TEST_SOURCES 10
|
#define TEST_SOURCES 10
|
||||||
# define TEST_LEN (GT_L3_CACHE / 2)
|
#define TEST_LEN (GT_L3_CACHE / 2)
|
||||||
# define TEST_TYPE_STR "_cold"
|
#define TEST_TYPE_STR "_cold"
|
||||||
#elif defined (TEST_CUSTOM)
|
#elif defined(TEST_CUSTOM)
|
||||||
# define TEST_TYPE_STR "_cus"
|
#define TEST_TYPE_STR "_cus"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TEST_MEM (2 * TEST_LEN)
|
#define TEST_MEM (2 * TEST_LEN)
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2)
|
void
|
||||||
|
gf_vect_mul_perf(u8 a, u8 *gf_const_tbl, u8 *buff1, u8 *buff2)
|
||||||
{
|
{
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||||
struct perf start;
|
struct perf start;
|
||||||
|
|
||||||
printf("gf_vect_mul_perf:\n");
|
printf("gf_vect_mul_perf:\n");
|
||||||
|
|
||||||
// Allocate large mem region
|
// Allocate large mem region
|
||||||
buff1 = (u8 *) malloc(TEST_LEN);
|
buff1 = (u8 *) malloc(TEST_LEN);
|
||||||
buff2 = (u8 *) malloc(TEST_LEN);
|
buff2 = (u8 *) malloc(TEST_LEN);
|
||||||
if (NULL == buff1 || NULL == buff2) {
|
if (NULL == buff1 || NULL == buff2) {
|
||||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(buff1, 0, TEST_LEN);
|
memset(buff1, 0, TEST_LEN);
|
||||||
memset(buff2, 0, TEST_LEN);
|
memset(buff2, 0, TEST_LEN);
|
||||||
|
|
||||||
printf("Start timed tests\n");
|
printf("Start timed tests\n");
|
||||||
fflush(0);
|
fflush(0);
|
||||||
|
|
||||||
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
|
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
|
||||||
|
|
||||||
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
||||||
perf_print(start, (long long)TEST_LEN);
|
perf_print(start, (long long) TEST_LEN);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -31,165 +31,164 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
|
|
||||||
#define TEST_SIZE (128*1024)
|
#define TEST_SIZE (128 * 1024)
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int i, ret = -1;
|
int i, ret = -1;
|
||||||
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
|
u8 *buff1 = NULL, *buff2 = NULL, *buff3 = NULL, gf_const_tbl[64], a = 2;
|
||||||
int tsize;
|
int tsize;
|
||||||
int align, size;
|
int align, size;
|
||||||
unsigned char *efence_buff1;
|
unsigned char *efence_buff1;
|
||||||
unsigned char *efence_buff2;
|
unsigned char *efence_buff2;
|
||||||
unsigned char *efence_buff3;
|
unsigned char *efence_buff3;
|
||||||
|
|
||||||
printf("gf_vect_mul_test: ");
|
printf("gf_vect_mul_test: ");
|
||||||
|
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
printf("buffer alloc error\n");
|
printf("buffer alloc error\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// Fill with rand data
|
// Fill with rand data
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
buff1[i] = rand();
|
buff1[i] = rand();
|
||||||
|
|
||||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||||
printf("fail creating buff2\n");
|
printf("fail creating buff2\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++) {
|
for (i = 0; i < TEST_SIZE; i++) {
|
||||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||||
buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
gf_mul(2, buff1[i]));
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
if (gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3) != 0) {
|
||||||
printf("fail fill with rand data\n");
|
printf("fail fill with rand data\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// Check reference function
|
// Check reference function
|
||||||
for (i = 0; i < TEST_SIZE; i++) {
|
for (i = 0; i < TEST_SIZE; i++) {
|
||||||
if (buff2[i] != buff3[i]) {
|
if (buff2[i] != buff3[i]) {
|
||||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i],
|
||||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
gf_mul(a, buff1[i]));
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
buff1[i] = rand();
|
buff1[i] = rand();
|
||||||
|
|
||||||
// Check each possible constant
|
// Check each possible constant
|
||||||
for (a = 0; a != 255; a++) {
|
for (a = 0; a != 255; a++) {
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
if (gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2) != 0) {
|
||||||
printf("fail creating buff2\n");
|
printf("fail creating buff2\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE; i++)
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
buff2[i], gf_mul(2, buff1[i]));
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check buffer len
|
// Check buffer len
|
||||||
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
||||||
a = rand();
|
a = rand();
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
|
if (gf_vect_mul(tsize, gf_const_tbl, buff1, buff2) != 0) {
|
||||||
printf("fail creating buff2 (len %d)\n", tsize);
|
printf("fail creating buff2 (len %d)\n", tsize);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < tsize; i++)
|
for (i = 0; i < tsize; i++)
|
||||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i],
|
||||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
buff2[i], gf_mul(2, buff1[i]));
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
if (0 == tsize % (32 * 8)) {
|
if (0 == tsize % (32 * 8)) {
|
||||||
putchar('.');
|
putchar('.');
|
||||||
fflush(0);
|
fflush(0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run tests at end of buffer for Electric Fence
|
// Run tests at end of buffer for Electric Fence
|
||||||
align = 32;
|
align = 32;
|
||||||
a = 2;
|
a = 2;
|
||||||
|
|
||||||
gf_vect_mul_init(a, gf_const_tbl);
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
for (size = 0; size < TEST_SIZE; size += align) {
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
// Line up TEST_SIZE from end
|
// Line up TEST_SIZE from end
|
||||||
efence_buff1 = buff1 + size;
|
efence_buff1 = buff1 + size;
|
||||||
efence_buff2 = buff2 + size;
|
efence_buff2 = buff2 + size;
|
||||||
efence_buff3 = buff3 + size;
|
efence_buff3 = buff3 + size;
|
||||||
|
|
||||||
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||||
|
|
||||||
for (i = 0; i < TEST_SIZE - size; i++)
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, efence_buff1[i],
|
||||||
i, efence_buff1[i], efence_buff2[i],
|
efence_buff2[i], gf_mul(2, efence_buff1[i]));
|
||||||
gf_mul(2, efence_buff1[i]));
|
goto exit;
|
||||||
goto exit;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (gf_vect_mul_base
|
if (gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) !=
|
||||||
(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3) != 0) {
|
0) {
|
||||||
printf("fail line up TEST_SIZE from end\n");
|
printf("fail line up TEST_SIZE from end\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// Check reference function
|
// Check reference function
|
||||||
for (i = 0; i < TEST_SIZE - size; i++)
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
if (efence_buff2[i] != efence_buff3[i]) {
|
if (efence_buff2[i] != efence_buff3[i]) {
|
||||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a,
|
||||||
i, a, efence_buff2[i], efence_buff3[i],
|
efence_buff2[i], efence_buff3[i],
|
||||||
gf_mul(2, efence_buff1[i]));
|
gf_mul(2, efence_buff1[i]));
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
#ifdef TEST_VERBOSE
|
#ifdef TEST_VERBOSE
|
||||||
putchar('.');
|
putchar('.');
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test all unsupported sizes up to TEST_SIZE
|
// Test all unsupported sizes up to TEST_SIZE
|
||||||
for (size = 0; size < TEST_SIZE; size++) {
|
for (size = 0; size < TEST_SIZE; size++) {
|
||||||
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
|
if (size % align != 0 && gf_vect_mul(size, gf_const_tbl, buff1, buff2) == 0) {
|
||||||
printf
|
printf("fail expecting nonzero return code for unaligned size param (%d)\n",
|
||||||
("fail expecting nonzero return code for unaligned size param (%d)\n",
|
size);
|
||||||
size);
|
goto exit;
|
||||||
goto exit;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
printf(" done: Pass\n");
|
printf(" done: Pass\n");
|
||||||
fflush(0);
|
fflush(0);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
exit:
|
exit:
|
||||||
|
|
||||||
free(buff1);
|
free(buff1);
|
||||||
free(buff2);
|
free(buff2);
|
||||||
free(buff3);
|
free(buff3);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1,106 +1,109 @@
|
|||||||
#include "erasure_code.h"
|
#include "erasure_code.h"
|
||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char *dest)
|
gf_vect_dot_prod(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
|
gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v,
|
void
|
||||||
unsigned char *src, unsigned char *dest)
|
gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
|
gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
ec_encode_data(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
if (len < 64) {
|
if (len < 64) {
|
||||||
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
ec_encode_data_base(len, srcs, dests, v, src, dest);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (dests >= 6) {
|
while (dests >= 6) {
|
||||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
v += 6 * srcs * 32;
|
v += 6 * srcs * 32;
|
||||||
dest += 6;
|
dest += 6;
|
||||||
dests -= 6;
|
dests -= 6;
|
||||||
}
|
}
|
||||||
switch (dests) {
|
switch (dests) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
|
gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
|
gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
|
void
|
||||||
unsigned char *data, unsigned char **dest)
|
ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, unsigned char *data,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
if (len < 64) {
|
if (len < 64) {
|
||||||
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (rows >= 6) {
|
while (rows >= 6) {
|
||||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
v += 6 * k * 32;
|
v += 6 * k * 32;
|
||||||
dest += 6;
|
dest += 6;
|
||||||
rows -= 6;
|
rows -= 6;
|
||||||
}
|
}
|
||||||
switch (rows) {
|
switch (rows) {
|
||||||
case 6:
|
case 6:
|
||||||
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
|
gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
|
gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
int
|
||||||
|
gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
|
||||||
{
|
{
|
||||||
/* Size must be aligned to 32 bytes */
|
/* Size must be aligned to 32 bytes */
|
||||||
if ((len % 32) != 0)
|
if ((len % 32) != 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest);
|
gf_vect_mul_vsx(len, a, (unsigned char *) src, (unsigned char *) dest);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
void
|
||||||
|
ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||||
{
|
{
|
||||||
return ec_init_tables_base(k, rows, a, g_tbls);
|
return ec_init_tables_base(k, rows, a, g_tbls);
|
||||||
}
|
}
|
||||||
|
@ -9,29 +9,37 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__ibmxl__)
|
#if defined(__ibmxl__)
|
||||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||||
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
|
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
|
||||||
#elif defined __GNUC__ && __GNUC__ >= 8
|
#elif defined __GNUC__ && __GNUC__ >= 8
|
||||||
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
#define EC_vec_xl(a, b) vec_xl_be(a, b)
|
||||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
|
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
|
||||||
#elif defined __GNUC__ && __GNUC__ >= 7
|
#elif defined __GNUC__ && __GNUC__ >= 7
|
||||||
#if defined _ARCH_PWR9
|
#if defined _ARCH_PWR9
|
||||||
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
|
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
|
||||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||||
#else
|
#else
|
||||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
inline vector unsigned char
|
||||||
vector unsigned char vc;
|
EC_vec_xl(int off, unsigned char *ptr)
|
||||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
{
|
||||||
return vc;
|
vector unsigned char vc;
|
||||||
|
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
|
||||||
|
: "=wa"(vc)
|
||||||
|
: "r"(off), "r"(ptr));
|
||||||
|
return vc;
|
||||||
}
|
}
|
||||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#if defined _ARCH_PWR8
|
#if defined _ARCH_PWR8
|
||||||
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
inline vector unsigned char
|
||||||
vector unsigned char vc;
|
EC_vec_xl(int off, unsigned char *ptr)
|
||||||
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
|
{
|
||||||
return vc;
|
vector unsigned char vc;
|
||||||
|
__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0"
|
||||||
|
: "=wa"(vc)
|
||||||
|
: "r"(off), "r"(ptr));
|
||||||
|
return vc;
|
||||||
}
|
}
|
||||||
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
|
||||||
#else
|
#else
|
||||||
@ -57,7 +65,8 @@ inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
|
void
|
||||||
|
gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product. VSX version.
|
* @brief GF(2^8) vector dot product. VSX version.
|
||||||
@ -77,8 +86,9 @@ void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigne
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char *dest);
|
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char *dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product with two outputs. VSX version.
|
* @brief GF(2^8) vector dot product with two outputs. VSX version.
|
||||||
@ -99,8 +109,9 @@ void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product with three outputs. VSX version.
|
* @brief GF(2^8) vector dot product with three outputs. VSX version.
|
||||||
@ -121,8 +132,9 @@ void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product with four outputs. VSX version.
|
* @brief GF(2^8) vector dot product with four outputs. VSX version.
|
||||||
@ -143,8 +155,9 @@ void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product with five outputs. VSX version.
|
* @brief GF(2^8) vector dot product with five outputs. VSX version.
|
||||||
@ -165,8 +178,9 @@ void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector dot product with six outputs. VSX version.
|
* @brief GF(2^8) vector dot product with six outputs. VSX version.
|
||||||
@ -187,8 +201,9 @@ void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest);
|
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply accumulate. VSX version.
|
* @brief GF(2^8) vector multiply accumulate. VSX version.
|
||||||
@ -211,8 +226,9 @@ void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char *dest);
|
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char *dest);
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
|
* @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
|
||||||
*
|
*
|
||||||
@ -234,8 +250,9 @@ void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigne
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char **dest);
|
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
|
* @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
|
||||||
@ -258,8 +275,9 @@ void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char **dest);
|
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
|
* @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
|
||||||
@ -282,8 +300,9 @@ void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
|||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char **dest);
|
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
|
* @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
|
||||||
@ -305,8 +324,9 @@ void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
|||||||
* @param dest Array of pointers to destination input/outputs.
|
* @param dest Array of pointers to destination input/outputs.
|
||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char **dest);
|
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
|
* @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
|
||||||
@ -328,8 +348,9 @@ void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsign
|
|||||||
* @param dest Array of pointers to destination input/outputs.
|
* @param dest Array of pointers to destination input/outputs.
|
||||||
* @returns none
|
* @returns none
|
||||||
*/
|
*/
|
||||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
void
|
||||||
unsigned char **dest);
|
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -1,83 +1,84 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1;
|
unsigned char *s, *t0, *t1;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4;
|
vector unsigned char vY1, vY2, vY3, vY4;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG;
|
vector unsigned char vYD, vYE, vYF, vYG;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
|
|
||||||
vYD = vYD ^ vYD;
|
vYD = vYD ^ vYD;
|
||||||
vYE = vYE ^ vYE;
|
vYE = vYE ^ vYE;
|
||||||
vYF = vYF ^ vYF;
|
vYF = vYF ^ vYF;
|
||||||
vYG = vYG ^ vYG;
|
vYG = vYG ^ vYG;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
vlo1 = EC_vec_xl(0, g1);
|
vlo1 = EC_vec_xl(0, g1);
|
||||||
vhi1 = EC_vec_xl(16, g1);
|
vhi1 = EC_vec_xl(16, g1);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
g1 += 32;
|
g1 += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
|
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,65 +1,66 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char **dest)
|
gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1;
|
unsigned char *s, *t0, *t1;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4;
|
vector unsigned char vY1, vY2, vY3, vY4;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG;
|
vector unsigned char vYD, vYE, vYF, vYG;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vYD = vec_xl(32, t0 + i);
|
vYD = vec_xl(32, t0 + i);
|
||||||
vYE = vec_xl(48, t0 + i);
|
vYE = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY3 = vec_xl(0, t1 + i);
|
vY3 = vec_xl(0, t1 + i);
|
||||||
vY4 = vec_xl(16, t1 + i);
|
vY4 = vec_xl(16, t1 + i);
|
||||||
vYF = vec_xl(32, t1 + i);
|
vYF = vec_xl(32, t1 + i);
|
||||||
vYG = vec_xl(48, t1 + i);
|
vYG = vec_xl(48, t1 + i);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,104 +1,105 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2;
|
unsigned char *s, *t0, *t1, *t2;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
vY5 = vY5 ^ vY5;
|
vY5 = vY5 ^ vY5;
|
||||||
vY6 = vY6 ^ vY6;
|
vY6 = vY6 ^ vY6;
|
||||||
|
|
||||||
vYD = vYD ^ vYD;
|
vYD = vYD ^ vYD;
|
||||||
vYE = vYE ^ vYE;
|
vYE = vYE ^ vYE;
|
||||||
vYF = vYF ^ vYF;
|
vYF = vYF ^ vYF;
|
||||||
vYG = vYG ^ vYG;
|
vYG = vYG ^ vYG;
|
||||||
vYH = vYH ^ vYH;
|
vYH = vYH ^ vYH;
|
||||||
vYI = vYI ^ vYI;
|
vYI = vYI ^ vYI;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
vlo1 = EC_vec_xl(0, g1);
|
vlo1 = EC_vec_xl(0, g1);
|
||||||
vhi1 = EC_vec_xl(16, g1);
|
vhi1 = EC_vec_xl(16, g1);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vlo2 = vec_xl(0, g2);
|
vlo2 = vec_xl(0, g2);
|
||||||
vhi2 = vec_xl(16, g2);
|
vhi2 = vec_xl(16, g2);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
g1 += 32;
|
g1 += 32;
|
||||||
g2 += 32;
|
g2 += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
|
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,84 +1,85 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char **dest)
|
gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2;
|
unsigned char *s, *t0, *t1, *t2;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vYD = vec_xl(32, t0 + i);
|
vYD = vec_xl(32, t0 + i);
|
||||||
vYE = vec_xl(48, t0 + i);
|
vYE = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY3 = vec_xl(0, t1 + i);
|
vY3 = vec_xl(0, t1 + i);
|
||||||
vY4 = vec_xl(16, t1 + i);
|
vY4 = vec_xl(16, t1 + i);
|
||||||
vYF = vec_xl(32, t1 + i);
|
vYF = vec_xl(32, t1 + i);
|
||||||
vYG = vec_xl(48, t1 + i);
|
vYG = vec_xl(48, t1 + i);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vY5 = vec_xl(0, t2 + i);
|
vY5 = vec_xl(0, t2 + i);
|
||||||
vY6 = vec_xl(16, t2 + i);
|
vY6 = vec_xl(16, t2 + i);
|
||||||
vYH = vec_xl(32, t2 + i);
|
vYH = vec_xl(32, t2 + i);
|
||||||
vYI = vec_xl(48, t2 + i);
|
vYI = vec_xl(48, t2 + i);
|
||||||
|
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,124 +1,125 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
vY5 = vY5 ^ vY5;
|
vY5 = vY5 ^ vY5;
|
||||||
vY6 = vY6 ^ vY6;
|
vY6 = vY6 ^ vY6;
|
||||||
vY7 = vY7 ^ vY7;
|
vY7 = vY7 ^ vY7;
|
||||||
vY8 = vY8 ^ vY8;
|
vY8 = vY8 ^ vY8;
|
||||||
|
|
||||||
vYD = vYD ^ vYD;
|
vYD = vYD ^ vYD;
|
||||||
vYE = vYE ^ vYE;
|
vYE = vYE ^ vYE;
|
||||||
vYF = vYF ^ vYF;
|
vYF = vYF ^ vYF;
|
||||||
vYG = vYG ^ vYG;
|
vYG = vYG ^ vYG;
|
||||||
vYH = vYH ^ vYH;
|
vYH = vYH ^ vYH;
|
||||||
vYI = vYI ^ vYI;
|
vYI = vYI ^ vYI;
|
||||||
vYJ = vYJ ^ vYJ;
|
vYJ = vYJ ^ vYJ;
|
||||||
vYK = vYK ^ vYK;
|
vYK = vYK ^ vYK;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
vlo1 = EC_vec_xl(0, g1);
|
vlo1 = EC_vec_xl(0, g1);
|
||||||
vhi1 = EC_vec_xl(16, g1);
|
vhi1 = EC_vec_xl(16, g1);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vlo2 = vec_xl(0, g2);
|
vlo2 = vec_xl(0, g2);
|
||||||
vhi2 = vec_xl(16, g2);
|
vhi2 = vec_xl(16, g2);
|
||||||
vlo3 = vec_xl(0, g3);
|
vlo3 = vec_xl(0, g3);
|
||||||
vhi3 = vec_xl(16, g3);
|
vhi3 = vec_xl(16, g3);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
g1 += 32;
|
g1 += 32;
|
||||||
g2 += 32;
|
g2 += 32;
|
||||||
g3 += 32;
|
g3 += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
|
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,103 +1,104 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char **dest)
|
gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3;
|
unsigned char *s, *t0, *t1, *t2, *t3;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vYD = vec_xl(32, t0 + i);
|
vYD = vec_xl(32, t0 + i);
|
||||||
vYE = vec_xl(48, t0 + i);
|
vYE = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY3 = vec_xl(0, t1 + i);
|
vY3 = vec_xl(0, t1 + i);
|
||||||
vY4 = vec_xl(16, t1 + i);
|
vY4 = vec_xl(16, t1 + i);
|
||||||
vYF = vec_xl(32, t1 + i);
|
vYF = vec_xl(32, t1 + i);
|
||||||
vYG = vec_xl(48, t1 + i);
|
vYG = vec_xl(48, t1 + i);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vY5 = vec_xl(0, t2 + i);
|
vY5 = vec_xl(0, t2 + i);
|
||||||
vY6 = vec_xl(16, t2 + i);
|
vY6 = vec_xl(16, t2 + i);
|
||||||
vYH = vec_xl(32, t2 + i);
|
vYH = vec_xl(32, t2 + i);
|
||||||
vYI = vec_xl(48, t2 + i);
|
vYI = vec_xl(48, t2 + i);
|
||||||
|
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vec_xl(0, t3 + i);
|
vY7 = vec_xl(0, t3 + i);
|
||||||
vY8 = vec_xl(16, t3 + i);
|
vY8 = vec_xl(16, t3 + i);
|
||||||
vYJ = vec_xl(32, t3 + i);
|
vYJ = vec_xl(32, t3 + i);
|
||||||
vYK = vec_xl(48, t3 + i);
|
vYK = vec_xl(48, t3 + i);
|
||||||
|
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,145 +1,146 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
t4 = (unsigned char *)dest[4];
|
t4 = (unsigned char *) dest[4];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
vY5 = vY5 ^ vY5;
|
vY5 = vY5 ^ vY5;
|
||||||
vY6 = vY6 ^ vY6;
|
vY6 = vY6 ^ vY6;
|
||||||
vY7 = vY7 ^ vY7;
|
vY7 = vY7 ^ vY7;
|
||||||
vY8 = vY8 ^ vY8;
|
vY8 = vY8 ^ vY8;
|
||||||
vY9 = vY9 ^ vY9;
|
vY9 = vY9 ^ vY9;
|
||||||
vYA = vYA ^ vYA;
|
vYA = vYA ^ vYA;
|
||||||
|
|
||||||
vYD = vYD ^ vYD;
|
vYD = vYD ^ vYD;
|
||||||
vYE = vYE ^ vYE;
|
vYE = vYE ^ vYE;
|
||||||
vYF = vYF ^ vYF;
|
vYF = vYF ^ vYF;
|
||||||
vYG = vYG ^ vYG;
|
vYG = vYG ^ vYG;
|
||||||
vYH = vYH ^ vYH;
|
vYH = vYH ^ vYH;
|
||||||
vYI = vYI ^ vYI;
|
vYI = vYI ^ vYI;
|
||||||
vYJ = vYJ ^ vYJ;
|
vYJ = vYJ ^ vYJ;
|
||||||
vYK = vYK ^ vYK;
|
vYK = vYK ^ vYK;
|
||||||
vYL = vYL ^ vYL;
|
vYL = vYL ^ vYL;
|
||||||
vYM = vYM ^ vYM;
|
vYM = vYM ^ vYM;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
vlo1 = EC_vec_xl(0, g1);
|
vlo1 = EC_vec_xl(0, g1);
|
||||||
vhi1 = EC_vec_xl(16, g1);
|
vhi1 = EC_vec_xl(16, g1);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vlo2 = vec_xl(0, g2);
|
vlo2 = vec_xl(0, g2);
|
||||||
vhi2 = vec_xl(16, g2);
|
vhi2 = vec_xl(16, g2);
|
||||||
vlo3 = vec_xl(0, g3);
|
vlo3 = vec_xl(0, g3);
|
||||||
vhi3 = vec_xl(16, g3);
|
vhi3 = vec_xl(16, g3);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vlo4 = vec_xl(0, g4);
|
vlo4 = vec_xl(0, g4);
|
||||||
vhi4 = vec_xl(16, g4);
|
vhi4 = vec_xl(16, g4);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
g1 += 32;
|
g1 += 32;
|
||||||
g2 += 32;
|
g2 += 32;
|
||||||
g3 += 32;
|
g3 += 32;
|
||||||
g4 += 32;
|
g4 += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
vec_xst(vY9, 0, t4 + i);
|
vec_xst(vY9, 0, t4 + i);
|
||||||
vec_xst(vYA, 16, t4 + i);
|
vec_xst(vYA, 16, t4 + i);
|
||||||
|
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
vec_xst(vYL, 32, t4 + i);
|
vec_xst(vYL, 32, t4 + i);
|
||||||
vec_xst(vYM, 48, t4 + i);
|
vec_xst(vYM, 48, t4 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,122 +1,123 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char **dest)
|
gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
unsigned char *s, *t0, *t1, *t2, *t3, *t4;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
t4 = (unsigned char *)dest[4];
|
t4 = (unsigned char *) dest[4];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vYD = vec_xl(32, t0 + i);
|
vYD = vec_xl(32, t0 + i);
|
||||||
vYE = vec_xl(48, t0 + i);
|
vYE = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY3 = vec_xl(0, t1 + i);
|
vY3 = vec_xl(0, t1 + i);
|
||||||
vY4 = vec_xl(16, t1 + i);
|
vY4 = vec_xl(16, t1 + i);
|
||||||
vYF = vec_xl(32, t1 + i);
|
vYF = vec_xl(32, t1 + i);
|
||||||
vYG = vec_xl(48, t1 + i);
|
vYG = vec_xl(48, t1 + i);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vY5 = vec_xl(0, t2 + i);
|
vY5 = vec_xl(0, t2 + i);
|
||||||
vY6 = vec_xl(16, t2 + i);
|
vY6 = vec_xl(16, t2 + i);
|
||||||
vYH = vec_xl(32, t2 + i);
|
vYH = vec_xl(32, t2 + i);
|
||||||
vYI = vec_xl(48, t2 + i);
|
vYI = vec_xl(48, t2 + i);
|
||||||
|
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vec_xl(0, t3 + i);
|
vY7 = vec_xl(0, t3 + i);
|
||||||
vY8 = vec_xl(16, t3 + i);
|
vY8 = vec_xl(16, t3 + i);
|
||||||
vYJ = vec_xl(32, t3 + i);
|
vYJ = vec_xl(32, t3 + i);
|
||||||
vYK = vec_xl(48, t3 + i);
|
vYK = vec_xl(48, t3 + i);
|
||||||
|
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
vY9 = vec_xl(0, t4 + i);
|
vY9 = vec_xl(0, t4 + i);
|
||||||
vYA = vec_xl(16, t4 + i);
|
vYA = vec_xl(16, t4 + i);
|
||||||
vYL = vec_xl(32, t4 + i);
|
vYL = vec_xl(32, t4 + i);
|
||||||
vYM = vec_xl(48, t4 + i);
|
vYM = vec_xl(48, t4 + i);
|
||||||
|
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
|
|
||||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||||
|
|
||||||
vec_xst(vY9, 0, t4 + i);
|
vec_xst(vY9, 0, t4 + i);
|
||||||
vec_xst(vYA, 16, t4 + i);
|
vec_xst(vYA, 16, t4 + i);
|
||||||
vec_xst(vYL, 32, t4 + i);
|
vec_xst(vYL, 32, t4 + i);
|
||||||
vec_xst(vYM, 48, t4 + i);
|
vec_xst(vYM, 48, t4 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,166 +1,167 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char **dest)
|
gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest[0]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
|
gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *) dest[1]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
|
gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *) dest[2]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
|
gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *) dest[3]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
|
gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *) dest[4]);
|
||||||
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]);
|
gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *) dest[5]);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
t4 = (unsigned char *)dest[4];
|
t4 = (unsigned char *) dest[4];
|
||||||
t5 = (unsigned char *)dest[5];
|
t5 = (unsigned char *) dest[5];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
vY5 = vY5 ^ vY5;
|
vY5 = vY5 ^ vY5;
|
||||||
vY6 = vY6 ^ vY6;
|
vY6 = vY6 ^ vY6;
|
||||||
vY7 = vY7 ^ vY7;
|
vY7 = vY7 ^ vY7;
|
||||||
vY8 = vY8 ^ vY8;
|
vY8 = vY8 ^ vY8;
|
||||||
vY9 = vY9 ^ vY9;
|
vY9 = vY9 ^ vY9;
|
||||||
vYA = vYA ^ vYA;
|
vYA = vYA ^ vYA;
|
||||||
vYB = vYB ^ vYB;
|
vYB = vYB ^ vYB;
|
||||||
vYC = vYC ^ vYC;
|
vYC = vYC ^ vYC;
|
||||||
|
|
||||||
vYD = vYD ^ vYD;
|
vYD = vYD ^ vYD;
|
||||||
vYE = vYE ^ vYE;
|
vYE = vYE ^ vYE;
|
||||||
vYF = vYF ^ vYF;
|
vYF = vYF ^ vYF;
|
||||||
vYG = vYG ^ vYG;
|
vYG = vYG ^ vYG;
|
||||||
vYH = vYH ^ vYH;
|
vYH = vYH ^ vYH;
|
||||||
vYI = vYI ^ vYI;
|
vYI = vYI ^ vYI;
|
||||||
vYJ = vYJ ^ vYJ;
|
vYJ = vYJ ^ vYJ;
|
||||||
vYK = vYK ^ vYK;
|
vYK = vYK ^ vYK;
|
||||||
vYL = vYL ^ vYL;
|
vYL = vYL ^ vYL;
|
||||||
vYM = vYM ^ vYM;
|
vYM = vYM ^ vYM;
|
||||||
vYN = vYN ^ vYN;
|
vYN = vYN ^ vYN;
|
||||||
vYO = vYO ^ vYO;
|
vYO = vYO ^ vYO;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
unsigned char *g1 = &gftbls[1 * 32 * vlen];
|
||||||
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
unsigned char *g2 = &gftbls[2 * 32 * vlen];
|
||||||
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
unsigned char *g3 = &gftbls[3 * 32 * vlen];
|
||||||
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
unsigned char *g4 = &gftbls[4 * 32 * vlen];
|
||||||
unsigned char *g5 = &gftbls[5 * 32 * vlen];
|
unsigned char *g5 = &gftbls[5 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
vlo1 = EC_vec_xl(0, g1);
|
vlo1 = EC_vec_xl(0, g1);
|
||||||
vhi1 = EC_vec_xl(16, g1);
|
vhi1 = EC_vec_xl(16, g1);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vlo2 = EC_vec_xl(0, g2);
|
vlo2 = EC_vec_xl(0, g2);
|
||||||
vhi2 = EC_vec_xl(16, g2);
|
vhi2 = EC_vec_xl(16, g2);
|
||||||
vlo3 = EC_vec_xl(0, g3);
|
vlo3 = EC_vec_xl(0, g3);
|
||||||
vhi3 = EC_vec_xl(16, g3);
|
vhi3 = EC_vec_xl(16, g3);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vlo4 = EC_vec_xl(0, g4);
|
vlo4 = EC_vec_xl(0, g4);
|
||||||
vhi4 = EC_vec_xl(16, g4);
|
vhi4 = EC_vec_xl(16, g4);
|
||||||
vlo5 = EC_vec_xl(0, g5);
|
vlo5 = EC_vec_xl(0, g5);
|
||||||
vhi5 = EC_vec_xl(16, g5);
|
vhi5 = EC_vec_xl(16, g5);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||||
|
|
||||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
g1 += 32;
|
g1 += 32;
|
||||||
g2 += 32;
|
g2 += 32;
|
||||||
g3 += 32;
|
g3 += 32;
|
||||||
g4 += 32;
|
g4 += 32;
|
||||||
g5 += 32;
|
g5 += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
vec_xst(vY9, 0, t4 + i);
|
vec_xst(vY9, 0, t4 + i);
|
||||||
vec_xst(vYA, 16, t4 + i);
|
vec_xst(vYA, 16, t4 + i);
|
||||||
vec_xst(vYB, 0, t5 + i);
|
vec_xst(vYB, 0, t5 + i);
|
||||||
vec_xst(vYC, 16, t5 + i);
|
vec_xst(vYC, 16, t5 + i);
|
||||||
|
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
vec_xst(vYL, 32, t4 + i);
|
vec_xst(vYL, 32, t4 + i);
|
||||||
vec_xst(vYM, 48, t4 + i);
|
vec_xst(vYM, 48, t4 + i);
|
||||||
vec_xst(vYN, 32, t5 + i);
|
vec_xst(vYN, 32, t5 + i);
|
||||||
vec_xst(vYO, 48, t5 + i);
|
vec_xst(vYO, 48, t5 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,142 +1,143 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char **dest)
|
gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char **dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
|
||||||
vector unsigned char vX1, vX2, vX3, vX4;
|
vector unsigned char vX1, vX2, vX3, vX4;
|
||||||
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
|
||||||
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
|
||||||
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
|
||||||
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest[0];
|
t0 = (unsigned char *) dest[0];
|
||||||
t1 = (unsigned char *)dest[1];
|
t1 = (unsigned char *) dest[1];
|
||||||
t2 = (unsigned char *)dest[2];
|
t2 = (unsigned char *) dest[2];
|
||||||
t3 = (unsigned char *)dest[3];
|
t3 = (unsigned char *) dest[3];
|
||||||
t4 = (unsigned char *)dest[4];
|
t4 = (unsigned char *) dest[4];
|
||||||
t5 = (unsigned char *)dest[5];
|
t5 = (unsigned char *) dest[5];
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
|
||||||
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vYD = vec_xl(32, t0 + i);
|
vYD = vec_xl(32, t0 + i);
|
||||||
vYE = vec_xl(48, t0 + i);
|
vYE = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vYD, 32, t0 + i);
|
vec_xst(vYD, 32, t0 + i);
|
||||||
vec_xst(vYE, 48, t0 + i);
|
vec_xst(vYE, 48, t0 + i);
|
||||||
|
|
||||||
vY3 = vec_xl(0, t1 + i);
|
vY3 = vec_xl(0, t1 + i);
|
||||||
vY4 = vec_xl(16, t1 + i);
|
vY4 = vec_xl(16, t1 + i);
|
||||||
vYF = vec_xl(32, t1 + i);
|
vYF = vec_xl(32, t1 + i);
|
||||||
vYG = vec_xl(48, t1 + i);
|
vYG = vec_xl(48, t1 + i);
|
||||||
|
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
|
||||||
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
|
||||||
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
|
||||||
|
|
||||||
vec_xst(vY3, 0, t1 + i);
|
vec_xst(vY3, 0, t1 + i);
|
||||||
vec_xst(vY4, 16, t1 + i);
|
vec_xst(vY4, 16, t1 + i);
|
||||||
vec_xst(vYF, 32, t1 + i);
|
vec_xst(vYF, 32, t1 + i);
|
||||||
vec_xst(vYG, 48, t1 + i);
|
vec_xst(vYG, 48, t1 + i);
|
||||||
|
|
||||||
vY5 = vec_xl(0, t2 + i);
|
vY5 = vec_xl(0, t2 + i);
|
||||||
vY6 = vec_xl(16, t2 + i);
|
vY6 = vec_xl(16, t2 + i);
|
||||||
vYH = vec_xl(32, t2 + i);
|
vYH = vec_xl(32, t2 + i);
|
||||||
vYI = vec_xl(48, t2 + i);
|
vYI = vec_xl(48, t2 + i);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
|
||||||
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
|
||||||
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
|
||||||
|
|
||||||
vY7 = vec_xl(0, t3 + i);
|
vY7 = vec_xl(0, t3 + i);
|
||||||
vY8 = vec_xl(16, t3 + i);
|
vY8 = vec_xl(16, t3 + i);
|
||||||
vYJ = vec_xl(32, t3 + i);
|
vYJ = vec_xl(32, t3 + i);
|
||||||
vYK = vec_xl(48, t3 + i);
|
vYK = vec_xl(48, t3 + i);
|
||||||
|
|
||||||
vec_xst(vY5, 0, t2 + i);
|
vec_xst(vY5, 0, t2 + i);
|
||||||
vec_xst(vY6, 16, t2 + i);
|
vec_xst(vY6, 16, t2 + i);
|
||||||
vec_xst(vYH, 32, t2 + i);
|
vec_xst(vYH, 32, t2 + i);
|
||||||
vec_xst(vYI, 48, t2 + i);
|
vec_xst(vYI, 48, t2 + i);
|
||||||
|
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
|
||||||
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
|
||||||
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
|
||||||
|
|
||||||
vY9 = vec_xl(0, t4 + i);
|
vY9 = vec_xl(0, t4 + i);
|
||||||
vYA = vec_xl(16, t4 + i);
|
vYA = vec_xl(16, t4 + i);
|
||||||
vYL = vec_xl(32, t4 + i);
|
vYL = vec_xl(32, t4 + i);
|
||||||
vYM = vec_xl(48, t4 + i);
|
vYM = vec_xl(48, t4 + i);
|
||||||
|
|
||||||
vec_xst(vY7, 0, t3 + i);
|
vec_xst(vY7, 0, t3 + i);
|
||||||
vec_xst(vY8, 16, t3 + i);
|
vec_xst(vY8, 16, t3 + i);
|
||||||
vec_xst(vYJ, 32, t3 + i);
|
vec_xst(vYJ, 32, t3 + i);
|
||||||
vec_xst(vYK, 48, t3 + i);
|
vec_xst(vYK, 48, t3 + i);
|
||||||
|
|
||||||
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
|
||||||
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
|
||||||
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
|
||||||
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
|
||||||
|
|
||||||
vYB = vec_xl(0, t5 + i);
|
vYB = vec_xl(0, t5 + i);
|
||||||
vYC = vec_xl(16, t5 + i);
|
vYC = vec_xl(16, t5 + i);
|
||||||
vYN = vec_xl(32, t5 + i);
|
vYN = vec_xl(32, t5 + i);
|
||||||
vYO = vec_xl(48, t5 + i);
|
vYO = vec_xl(48, t5 + i);
|
||||||
|
|
||||||
vec_xst(vY9, 0, t4 + i);
|
vec_xst(vY9, 0, t4 + i);
|
||||||
vec_xst(vYA, 16, t4 + i);
|
vec_xst(vYA, 16, t4 + i);
|
||||||
vec_xst(vYL, 32, t4 + i);
|
vec_xst(vYL, 32, t4 + i);
|
||||||
vec_xst(vYM, 48, t4 + i);
|
vec_xst(vYM, 48, t4 + i);
|
||||||
|
|
||||||
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
|
||||||
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
|
||||||
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
|
||||||
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
|
||||||
|
|
||||||
vec_xst(vYB, 0, t5 + i);
|
vec_xst(vYB, 0, t5 + i);
|
||||||
vec_xst(vYC, 16, t5 + i);
|
vec_xst(vYC, 16, t5 + i);
|
||||||
vec_xst(vYN, 32, t5 + i);
|
vec_xst(vYN, 32, t5 + i);
|
||||||
vec_xst(vYO, 48, t5 + i);
|
vec_xst(vYO, 48, t5 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,85 +1,86 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
|
void
|
||||||
unsigned char **src, unsigned char *dest)
|
gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
|
||||||
|
unsigned char *dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0;
|
unsigned char *s, *t0;
|
||||||
vector unsigned char vX1, vY1;
|
vector unsigned char vX1, vY1;
|
||||||
vector unsigned char vX2, vY2;
|
vector unsigned char vX2, vY2;
|
||||||
vector unsigned char vX3, vY3;
|
vector unsigned char vX3, vY3;
|
||||||
vector unsigned char vX4, vY4;
|
vector unsigned char vX4, vY4;
|
||||||
vector unsigned char vX5, vY5;
|
vector unsigned char vX5, vY5;
|
||||||
vector unsigned char vX6, vY6;
|
vector unsigned char vX6, vY6;
|
||||||
vector unsigned char vX7, vY7;
|
vector unsigned char vX7, vY7;
|
||||||
vector unsigned char vX8, vY8;
|
vector unsigned char vX8, vY8;
|
||||||
vector unsigned char vhi0, vlo0;
|
vector unsigned char vhi0, vlo0;
|
||||||
int i, j, head;
|
int i, j, head;
|
||||||
|
|
||||||
if (vlen < 128) {
|
if (vlen < 128) {
|
||||||
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest);
|
gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *) dest);
|
||||||
|
|
||||||
for (j = 1; j < vlen; j++) {
|
for (j = 1; j < vlen; j++) {
|
||||||
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
t0 = (unsigned char *)dest;
|
t0 = (unsigned char *) dest;
|
||||||
|
|
||||||
head = len % 128;
|
head = len % 128;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = head; i < len - 127; i += 128) {
|
for (i = head; i < len - 127; i += 128) {
|
||||||
vY1 = vY1 ^ vY1;
|
vY1 = vY1 ^ vY1;
|
||||||
vY2 = vY2 ^ vY2;
|
vY2 = vY2 ^ vY2;
|
||||||
vY3 = vY3 ^ vY3;
|
vY3 = vY3 ^ vY3;
|
||||||
vY4 = vY4 ^ vY4;
|
vY4 = vY4 ^ vY4;
|
||||||
|
|
||||||
vY5 = vY5 ^ vY5;
|
vY5 = vY5 ^ vY5;
|
||||||
vY6 = vY6 ^ vY6;
|
vY6 = vY6 ^ vY6;
|
||||||
vY7 = vY7 ^ vY7;
|
vY7 = vY7 ^ vY7;
|
||||||
vY8 = vY8 ^ vY8;
|
vY8 = vY8 ^ vY8;
|
||||||
|
|
||||||
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
unsigned char *g0 = &gftbls[0 * 32 * vlen];
|
||||||
|
|
||||||
for (j = 0; j < vlen; j++) {
|
for (j = 0; j < vlen; j++) {
|
||||||
s = (unsigned char *)src[j];
|
s = (unsigned char *) src[j];
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, g0);
|
vlo0 = EC_vec_xl(0, g0);
|
||||||
vhi0 = EC_vec_xl(16, g0);
|
vhi0 = EC_vec_xl(16, g0);
|
||||||
|
|
||||||
vX5 = vec_xl(64, s + i);
|
vX5 = vec_xl(64, s + i);
|
||||||
vX6 = vec_xl(80, s + i);
|
vX6 = vec_xl(80, s + i);
|
||||||
vX7 = vec_xl(96, s + i);
|
vX7 = vec_xl(96, s + i);
|
||||||
vX8 = vec_xl(112, s + i);
|
vX8 = vec_xl(112, s + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
|
vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
|
||||||
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
|
vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
|
||||||
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
|
vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
|
||||||
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
|
vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
|
||||||
|
|
||||||
g0 += 32;
|
g0 += 32;
|
||||||
}
|
}
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 32, t0 + i);
|
vec_xst(vY3, 32, t0 + i);
|
||||||
vec_xst(vY4, 48, t0 + i);
|
vec_xst(vY4, 48, t0 + i);
|
||||||
|
|
||||||
vec_xst(vY5, 64, t0 + i);
|
vec_xst(vY5, 64, t0 + i);
|
||||||
vec_xst(vY6, 80, t0 + i);
|
vec_xst(vY6, 80, t0 + i);
|
||||||
vec_xst(vY7, 96, t0 + i);
|
vec_xst(vY7, 96, t0 + i);
|
||||||
vec_xst(vY8, 112, t0 + i);
|
vec_xst(vY8, 112, t0 + i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,48 +1,49 @@
|
|||||||
#include "ec_base_vsx.h"
|
#include "ec_base_vsx.h"
|
||||||
|
|
||||||
void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
|
void
|
||||||
unsigned char *src, unsigned char *dest)
|
gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||||
|
unsigned char *dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0;
|
unsigned char *s, *t0;
|
||||||
vector unsigned char vX1, vY1;
|
vector unsigned char vX1, vY1;
|
||||||
vector unsigned char vX2, vY2;
|
vector unsigned char vX2, vY2;
|
||||||
vector unsigned char vX3, vY3;
|
vector unsigned char vX3, vY3;
|
||||||
vector unsigned char vX4, vY4;
|
vector unsigned char vX4, vY4;
|
||||||
vector unsigned char vhi0, vlo0;
|
vector unsigned char vhi0, vlo0;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest;
|
t0 = (unsigned char *) dest;
|
||||||
|
|
||||||
head = len % 64;
|
head = len % 64;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
|
gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
|
||||||
|
|
||||||
for (i = head; i < len - 63; i += 64) {
|
for (i = head; i < len - 63; i += 64) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vY1 = vec_xl(0, t0 + i);
|
vY1 = vec_xl(0, t0 + i);
|
||||||
vY2 = vec_xl(16, t0 + i);
|
vY2 = vec_xl(16, t0 + i);
|
||||||
vY3 = vec_xl(32, t0 + i);
|
vY3 = vec_xl(32, t0 + i);
|
||||||
vY4 = vec_xl(48, t0 + i);
|
vY4 = vec_xl(48, t0 + i);
|
||||||
|
|
||||||
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 32, t0 + i);
|
vec_xst(vY3, 32, t0 + i);
|
||||||
vec_xst(vY4, 48, t0 + i);
|
vec_xst(vY4, 48, t0 + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3,73 +3,74 @@
|
|||||||
/*
|
/*
|
||||||
* Same as gf_vect_mul_base in "ec_base.h" but without the size restriction.
|
* Same as gf_vect_mul_base in "ec_base.h" but without the size restriction.
|
||||||
*/
|
*/
|
||||||
static void _gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
|
static void
|
||||||
unsigned char *dest)
|
_gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
//2nd element of table array is ref value used to fill it in
|
// 2nd element of table array is ref value used to fill it in
|
||||||
unsigned char c = a[1];
|
unsigned char c = a[1];
|
||||||
|
|
||||||
while (len-- > 0)
|
while (len-- > 0)
|
||||||
*dest++ = gf_mul(c, *src++);
|
*dest++ = gf_mul(c, *src++);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
|
void
|
||||||
|
gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
|
||||||
{
|
{
|
||||||
unsigned char *s, *t0;
|
unsigned char *s, *t0;
|
||||||
vector unsigned char vX1, vY1;
|
vector unsigned char vX1, vY1;
|
||||||
vector unsigned char vX2, vY2;
|
vector unsigned char vX2, vY2;
|
||||||
vector unsigned char vX3, vY3;
|
vector unsigned char vX3, vY3;
|
||||||
vector unsigned char vX4, vY4;
|
vector unsigned char vX4, vY4;
|
||||||
vector unsigned char vX5, vY5;
|
vector unsigned char vX5, vY5;
|
||||||
vector unsigned char vX6, vY6;
|
vector unsigned char vX6, vY6;
|
||||||
vector unsigned char vX7, vY7;
|
vector unsigned char vX7, vY7;
|
||||||
vector unsigned char vX8, vY8;
|
vector unsigned char vX8, vY8;
|
||||||
vector unsigned char vhi0, vlo0;
|
vector unsigned char vhi0, vlo0;
|
||||||
int i, head;
|
int i, head;
|
||||||
|
|
||||||
s = (unsigned char *)src;
|
s = (unsigned char *) src;
|
||||||
t0 = (unsigned char *)dest;
|
t0 = (unsigned char *) dest;
|
||||||
|
|
||||||
head = len % 128;
|
head = len % 128;
|
||||||
if (head != 0) {
|
if (head != 0) {
|
||||||
_gf_vect_mul_base(head, gftbl, src, dest);
|
_gf_vect_mul_base(head, gftbl, src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
vlo0 = EC_vec_xl(0, gftbl);
|
vlo0 = EC_vec_xl(0, gftbl);
|
||||||
vhi0 = EC_vec_xl(16, gftbl);
|
vhi0 = EC_vec_xl(16, gftbl);
|
||||||
|
|
||||||
for (i = head; i < len - 127; i += 128) {
|
for (i = head; i < len - 127; i += 128) {
|
||||||
vX1 = vec_xl(0, s + i);
|
vX1 = vec_xl(0, s + i);
|
||||||
vX2 = vec_xl(16, s + i);
|
vX2 = vec_xl(16, s + i);
|
||||||
vX3 = vec_xl(32, s + i);
|
vX3 = vec_xl(32, s + i);
|
||||||
vX4 = vec_xl(48, s + i);
|
vX4 = vec_xl(48, s + i);
|
||||||
|
|
||||||
vX5 = vec_xl(64, s + i);
|
vX5 = vec_xl(64, s + i);
|
||||||
vX6 = vec_xl(80, s + i);
|
vX6 = vec_xl(80, s + i);
|
||||||
vX7 = vec_xl(96, s + i);
|
vX7 = vec_xl(96, s + i);
|
||||||
vX8 = vec_xl(112, s + i);
|
vX8 = vec_xl(112, s + i);
|
||||||
|
|
||||||
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
|
vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
|
||||||
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
|
vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
|
||||||
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
|
vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
|
||||||
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
|
vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
|
||||||
|
|
||||||
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
|
vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
|
||||||
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
|
vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
|
||||||
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
|
vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
|
||||||
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
|
vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
|
||||||
|
|
||||||
vec_xst(vY1, 0, t0 + i);
|
vec_xst(vY1, 0, t0 + i);
|
||||||
vec_xst(vY2, 16, t0 + i);
|
vec_xst(vY2, 16, t0 + i);
|
||||||
vec_xst(vY3, 32, t0 + i);
|
vec_xst(vY3, 32, t0 + i);
|
||||||
vec_xst(vY4, 48, t0 + i);
|
vec_xst(vY4, 48, t0 + i);
|
||||||
|
|
||||||
vec_xst(vY5, 64, t0 + i);
|
vec_xst(vY5, 64, t0 + i);
|
||||||
vec_xst(vY6, 80, t0 + i);
|
vec_xst(vY6, 80, t0 + i);
|
||||||
vec_xst(vY7, 96, t0 + i);
|
vec_xst(vY7, 96, t0 + i);
|
||||||
vec_xst(vY8, 112, t0 + i);
|
vec_xst(vY8, 112, t0 + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user