all: Revamp performance testing to be time based

Change-Id: I6260d28e4adc974d8db0a1c770e3eb922d87f8e4
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2019-01-22 15:38:26 -07:00
parent bde3fc5ff1
commit 699bb5bd3f
23 changed files with 735 additions and 785 deletions

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 4000000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -57,10 +55,9 @@
int main(int argc, char *argv[])
{
int i;
void *src, *dst;
uint16_t crc;
struct perf start, stop;
struct perf start;
printf("crc16_t10dif_copy_perf:\n");
@ -77,15 +74,10 @@ int main(int argc, char *argv[])
fflush(0);
memset(src, 0, TEST_LEN);
crc = crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc =
crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN));
printf("crc16_t10dif_copy" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);
return 0;

View File

@ -40,14 +40,12 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define NBLOCKS 100
# define TEST_LOOPS 1000000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define NBLOCKS (TEST_LEN / BLKSIZE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -66,13 +64,25 @@ struct blk_ext {
uint16_t crc;
};
void crc16_t10dif_copy_perf(struct blk *blks, struct blk *blkp, struct blk_ext *blks_ext,
struct blk_ext *blkp_ext, uint16_t * crc)
{
int i;
for (i = 0, blkp = blks, blkp_ext = blks_ext; i < NBLOCKS; i++) {
*crc = crc16_t10dif_copy(TEST_SEED, blkp_ext->data, blkp->data,
sizeof(blks->data));
blkp_ext->crc = *crc;
blkp++;
blkp_ext++;
}
}
int main(int argc, char *argv[])
{
int i, j;
uint16_t crc;
struct blk *blks, *blkp;
struct blk_ext *blks_ext, *blkp_ext;
struct perf start, stop;
struct perf start;
printf("crc16_t10dif_streaming_insert_perf:\n");
@ -95,19 +105,11 @@ int main(int argc, char *argv[])
fflush(0);
// Copy and insert test
perf_start(&start);
for (j = 0; j < TEST_LOOPS; j++) {
for (i = 0, blkp = blks, blkp_ext = blks_ext; i < NBLOCKS; i++) {
crc = crc16_t10dif_copy(TEST_SEED, blkp_ext->data, blkp->data,
sizeof(blks->data));
blkp_ext->crc = crc;
blkp++;
blkp_ext++;
}
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
crc16_t10dif_copy_perf(blks, blkp, blks_ext, blkp_ext, &crc));
printf("crc16_t10pi_op_copy_insert" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)sizeof(blks->data) * NBLOCKS * TEST_LOOPS);
perf_print(start, (long long)sizeof(blks->data) * NBLOCKS);
printf("finish 0x%x\n", crc);
return 0;

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 4000000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -57,10 +55,9 @@
int main(int argc, char *argv[])
{
int i;
void *buf;
uint16_t crc;
struct perf start, stop;
struct perf start;
printf("crc16_t10dif_perf:\n");
@ -73,14 +70,9 @@ int main(int argc, char *argv[])
fflush(0);
memset(buf, 0, TEST_LEN);
crc = crc16_t10dif(TEST_SEED, buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = crc16_t10dif(TEST_SEED, buf, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc = crc16_t10dif(TEST_SEED, buf, TEST_LEN));
printf("crc16_t10dif" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);
return 0;

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 400000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -57,10 +55,9 @@
int main(int argc, char *argv[])
{
int i;
void *buf;
uint32_t crc;
struct perf start, stop;
struct perf start;
printf("crc32_gzip_refl_perf:\n");
@ -73,14 +70,9 @@ int main(int argc, char *argv[])
fflush(0);
memset(buf, 0, TEST_LEN);
crc = crc32_gzip_refl(TEST_SEED, buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = crc32_gzip_refl(TEST_SEED, buf, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_gzip_refl(TEST_SEED, buf, TEST_LEN));
printf("crc32_gzip_refl" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);
@ -88,14 +80,10 @@ int main(int argc, char *argv[])
printf("Start timed tests\n");
fflush(0);
crc = crc32_gzip_refl_base(TEST_SEED, buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < (TEST_LOOPS / 100 + 1); i++) {
crc = crc32_gzip_refl_base(TEST_SEED, buf, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc =
crc32_gzip_refl_base(TEST_SEED, buf, TEST_LEN));
printf("crc32_gzip_refl_base" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 400000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -57,10 +55,9 @@
int main(int argc, char *argv[])
{
int i;
void *buf;
uint32_t crc;
struct perf start, stop;
struct perf start;
printf("crc32_ieee_perf:\n");
@ -73,14 +70,9 @@ int main(int argc, char *argv[])
fflush(0);
memset(buf, 0, TEST_LEN);
crc = crc32_ieee(TEST_SEED, buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = crc32_ieee(TEST_SEED, buf, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_ieee(TEST_SEED, buf, TEST_LEN));
printf("crc32_ieee" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);
return 0;

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 1000000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 500
# define TEST_TYPE_STR "_cold"
#endif
@ -57,10 +55,9 @@
int main(int argc, char *argv[])
{
int i;
void *buf;
uint32_t crc;
struct perf start, stop;
struct perf start;
printf("crc32_iscsi_perf:\n");
@ -73,14 +70,9 @@ int main(int argc, char *argv[])
fflush(0);
memset(buf, 0, TEST_LEN);
crc = crc32_iscsi(buf, TEST_LEN, TEST_SEED);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = crc32_iscsi(buf, TEST_LEN, TEST_SEED);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_iscsi(buf, TEST_LEN, TEST_SEED));
printf("crc32_iscsi" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%x\n", crc);
return 0;

View File

@ -39,13 +39,11 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 400000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN (2 * GT_L3_CACHE)
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
#endif
@ -74,10 +72,10 @@ func_case_t test_funcs[] = {
int main(int argc, char *argv[])
{
int i, j;
int j;
void *buf;
uint64_t crc;
struct perf start, stop;
struct perf start;
func_case_t *test_func;
if (posix_memalign(&buf, 1024, TEST_LEN)) {
@ -93,14 +91,10 @@ int main(int argc, char *argv[])
printf("Start timed tests\n");
fflush(0);
crc = test_func->crc64_func_call(TEST_SEED, buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
crc = test_func->crc64_func_call(TEST_SEED, buf, TEST_LEN);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, crc =
test_func->crc64_func_call(TEST_SEED, buf, TEST_LEN));
printf("%s" TEST_TYPE_STR ": ", test_func->note);
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
printf("finish 0x%lx\n", crc);
}

View File

@ -38,7 +38,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_LOOPS(m) (100*m)
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -46,30 +45,64 @@
# define TEST_SOURCES 32
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_LOOPS(m) (10)
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS(m) 1000
# endif
# endif
#endif
#define MMAX TEST_SOURCES
#define KMAX TEST_SOURCES
#define BAD_MATRIX -1
typedef unsigned char u8;
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
{
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 ** temp_buffs)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
return 0;
}
int main(int argc, char *argv[])
{
int i, j, rtest, m, k, nerrs, r;
int i, j, m, k, nerrs, check;
void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start, stop;
u8 src_err_list[TEST_SOURCES];
struct perf start;
// Pick test parameters
m = 14;
@ -112,46 +145,21 @@ int main(int argc, char *argv[])
buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k);
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start decode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
BENCHMARK(&start, BENCHMARK_TIME, check =
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs));
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
return check;
}
perf_stop(&stop);
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
@ -161,7 +169,7 @@ int main(int argc, char *argv[])
}
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
return 0;

View File

@ -38,7 +38,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_LOOPS(m) (10000*m)
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -46,30 +45,64 @@
# define TEST_SOURCES 32
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_LOOPS(m) (50*m)
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS(m) 1000
# endif
# endif
#endif
#define MMAX TEST_SOURCES
#define KMAX TEST_SOURCES
#define BAD_MATRIX -1
typedef unsigned char u8;
void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
{
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 ** temp_buffs)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 *recov[TEST_SOURCES];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0)
return BAD_MATRIX;
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
return 0;
}
int main(int argc, char *argv[])
{
int i, j, rtest, m, k, nerrs, r;
int i, j, m, k, nerrs, check;
void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start, stop;
u8 src_err_list[TEST_SOURCES];
struct perf start;
// Pick test parameters
m = 14;
@ -112,46 +145,21 @@ int main(int argc, char *argv[])
buffs[i][j] = rand();
gf_gen_rs_matrix(a, m, k);
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
printf("erasure_code_encode" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start decode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
BENCHMARK(&start, BENCHMARK_TIME, check =
ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
temp_buffs));
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
if (check == BAD_MATRIX) {
printf("BAD MATRIX\n");
return check;
}
perf_stop(&stop);
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
@ -161,7 +169,7 @@ int main(int argc, char *argv[])
}
printf("erasure_code_decode" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
return 0;

View File

@ -53,7 +53,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 32
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
# define TEST_LOOPS(m) (10000*m)
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -61,13 +60,9 @@
# define TEST_SOURCES 32
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
# define TEST_LOOPS(m) (50*m)
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS(m) 1000
# endif
# endif
#endif
@ -87,17 +82,65 @@ void dump(unsigned char *buf, int len)
printf("\n");
}
void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a)
{
ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a)
{
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
for (int i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
perf_update_buffs[i], &perf_update_buffs[k]);
}
}
int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err,
u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs)
{
int i, j, r;
u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = update_buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
perf_update_buffs);
}
return 0;
}
int main(int argc, char *argv[])
{
int i, j, rtest, m, k, nerrs, r;
int i, j, check, m, k, nerrs;
void *buf;
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
u8 *update_buffs[TEST_SOURCES];
u8 *perf_update_buffs[TEST_SOURCES];
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
u8 a[MMAX * KMAX];
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
struct perf start, stop;
struct perf start;
// Pick test parameters
k = 10;
@ -160,13 +203,9 @@ int main(int argc, char *argv[])
}
gf_gen_rs_matrix(a, m, k);
ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
&update_buffs[k]);
}
encode_update_test_ref(m, k, g_tbls, buffs, a);
encode_update_test(m, k, g_tbls, update_buffs, a);
for (i = 0; i < m - k; i++) {
if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
printf("\nupdate_buffs%d :", i);
@ -178,58 +217,34 @@ int main(int argc, char *argv[])
}
#ifdef DO_REF_PERF
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m));
#endif
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, perf_update_buffs[i],
&perf_update_buffs[k]);
}
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
perf_update_buffs[i], &perf_update_buffs[k]);
}
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
encode_update_test(m, k, g_tbls, perf_update_buffs, a));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m));
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
ec_init_tables(k, m - k, &a[k * k], g_tbls);
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m - k + 1) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
// Start encode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Make parity vects
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
// Make parity vects
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
&perf_update_buffs[k]));
printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m - k + 1) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
for (i = k; i < m; i++) {
memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
@ -238,68 +253,26 @@ int main(int argc, char *argv[])
FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
&update_buffs[k]);
}
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = update_buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
nerrs, g_tbls, temp_buffs);
BENCHMARK(&start, BENCHMARK_TIME, check =
decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
nerrs, g_tbls, perf_update_buffs));
if (check) {
printf("BAD_MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], temp_buffs);
}
// Start decode test
perf_start(&start);
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
// Construct b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
recov[i] = update_buffs[r];
for (j = 0; j < k; j++)
b[k * i + j] = a[k * r + j];
}
if (gf_invert_matrix(b, d, k) < 0) {
printf("BAD MATRIX\n");
return -1;
}
for (i = 0; i < nerrs; i++)
for (j = 0; j < k; j++)
c[k * i + j] = d[k * src_err_list[i] + j];
// Recover data
ec_init_tables(k, nerrs, c, g_tbls);
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
perf_update_buffs);
}
}
perf_stop(&stop);
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
return -1;
}
}
printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
printf("done all: Pass\n");
return 0;

View File

@ -38,7 +38,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 4000
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -46,13 +45,9 @@
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
# define TEST_LOOPS 10
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS 1000
# endif
# endif
#endif
@ -99,12 +94,26 @@ void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
}
}
void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
{
int i, j;
u8 s;
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < vlen; j++) {
s ^= gf_mul_table[v[j] * 256 + src[j][i]];
}
dest[i] = s;
}
}
int main(void)
{
int i, j, k;
u8 s, vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN];
int i, j;
u8 vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN];
u8 *matrix[TEST_SOURCES];
struct perf start, stop;
struct perf start;
mk_gf_field();
mk_gf_mul_table(gf_mul_table);
@ -122,38 +131,15 @@ int main(void)
}
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
// Warm up mult tables
for (i = 0; i < TEST_LEN; i++) {
s = 0;
for (j = 0; j < TEST_SOURCES; j++) {
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
}
dest2[i] = s;
}
perf_start(&start);
for (k = 0; k < TEST_LOOPS; k++) {
for (i = 0; i < TEST_LEN; i++) {
s = 0;
for (j = 0; j < TEST_SOURCES; j++) {
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
}
dest2[i] = s;
}
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * k);
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
// Compare with reference function
if (0 != memcmp(dest1, dest2, TEST_LEN)) {

View File

@ -45,7 +45,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 40000
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -53,13 +52,9 @@
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
# define TEST_LOOPS 100
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS 1000
# endif
# endif
#endif
@ -88,13 +83,24 @@ void dump_matrix(unsigned char **s, int k, int m)
printf("\n");
}
void vect_dot_prod_perf(void (*fun_ptr)
(int, int, unsigned char *, unsigned char **, unsigned char *),
u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref)
{
int j;
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
(*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
}
int main(int argc, char *argv[])
{
int i, j;
void *buf;
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
u8 *temp_buff, *buffs[TEST_SOURCES];
struct perf start, stop;
struct perf start;
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
@ -138,36 +144,20 @@ int main(int argc, char *argv[])
for (i = 0; i < TEST_SOURCES; i++)
g[i] = rand();
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
#ifdef DO_REF_PERF
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref)
);
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
#else
vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
#endif
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
for (j = 0; j < TEST_SOURCES; j++)
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");

View File

@ -37,7 +37,6 @@
#ifdef CACHED_TEST
// Cached test, loop many times over small dataset
# define TEST_LEN 8*1024
# define TEST_LOOPS 4000000
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -45,13 +44,9 @@
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN GT_L3_CACHE / 2
# define TEST_LOOPS 1000
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS 1000
# endif
# endif
#endif
@ -59,16 +54,19 @@
typedef unsigned char u8;
void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2)
{
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
}
int main(int argc, char *argv[])
{
int i;
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
struct perf start, stop;
struct perf start;
printf("gf_vect_mul_perf:\n");
gf_vect_mul_init(a, gf_const_tbl);
// Allocate large mem region
buff1 = (u8 *) malloc(TEST_LEN);
buff2 = (u8 *) malloc(TEST_LEN);
@ -80,20 +78,13 @@ int main(int argc, char *argv[])
memset(buff1, 0, TEST_LEN);
memset(buff2, 0, TEST_LEN);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
printf("Start timed tests\n");
fflush(0);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
gf_vect_mul_init(a, gf_const_tbl);
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
printf("gf_vect_mul" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
return 0;
}

View File

@ -334,16 +334,12 @@ int main(int argc, char *argv[])
ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs, parity_ptrs);
if (benchmark) {
struct perf start, stop;
unsigned long long iterations = (1ull << 32) / (m * len);
perf_start(&start);
for (i = 0; i < iterations; i++) {
ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs,
parity_ptrs);
}
perf_stop(&stop);
struct perf start;
BENCHMARK(&start, BENCHMARK_TIME,
ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs,
parity_ptrs));
printf("ec_piggyback_encode_std: ");
perf_print(stop, start, iterations * m2 * len / 2);
perf_print(start, m2 * len / 2);
}
} else {
// Sparse matrix optimization - use fact that input matrix is sparse
@ -380,18 +376,14 @@ int main(int argc, char *argv[])
&parity_ptrs[p]);
if (benchmark) {
struct perf start, stop;
unsigned long long iterations = (1ull << 32) / (m * len);
perf_start(&start);
for (i = 0; i < iterations; i++) {
ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs,
parity_ptrs);
ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32], frag_ptrs,
&parity_ptrs[p]);
}
perf_stop(&stop);
struct perf start;
BENCHMARK(&start, BENCHMARK_TIME,
ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs,
parity_ptrs);
ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32],
frag_ptrs, &parity_ptrs[p]));
printf("ec_piggyback_encode_sparse: ");
perf_print(stop, start, iterations * m2 * len / 2);
perf_print(start, m2 * len / 2);
}
}
@ -429,16 +421,12 @@ int main(int argc, char *argv[])
ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs, recover_outp);
if (benchmark) {
struct perf start, stop;
unsigned long long iterations = (1ull << 32) / (k * len);
perf_start(&start);
for (i = 0; i < iterations; i++) {
ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs,
recover_outp);
}
perf_stop(&stop);
struct perf start;
BENCHMARK(&start, BENCHMARK_TIME,
ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs,
recover_outp));
printf("ec_piggyback_decode: ");
perf_print(stop, start, iterations * (k2 + nerrs2) * len / 2);
perf_print(start, (k2 + nerrs2) * len / 2);
}
// Check that recovered buffers are the same as original
printf(" check recovery of block {");

View File

@ -7,7 +7,6 @@
#include "test.h"
#define DICT_LEN 32*1024
#define ITERATIONS 100000
extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len);
@ -21,7 +20,7 @@ void create_rand_data(uint8_t * data, uint32_t size)
int main(int argc, char *argv[])
{
int i, iterations = ITERATIONS;
int time = BENCHMARK_TIME;
struct isal_zstream stream;
uint8_t dict[DICT_LEN];
uint32_t dict_len = DICT_LEN;
@ -29,18 +28,12 @@ int main(int argc, char *argv[])
stream.level = 0;
create_rand_data(dict, dict_len);
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_hash(&stream, dict, dict_len);
}
perf_stop(&stop);
struct perf start;
BENCHMARK(&start, time, isal_deflate_hash(&stream, dict, dict_len));
printf("igzip_build_hash_table_perf:\n");
printf(" in_size=%u iter=%d ", dict_len, i);
perf_print(stop, start, (long long)dict_len * i);
printf(" in_size=%u ", dict_len);
perf_print(start, (long long)dict_len);
return 0;
}

View File

@ -37,10 +37,6 @@
#include "test.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 10
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 500000000
#endif
int level_size_buf[10] = {
#ifdef ISAL_DEF_LVL0_DEFAULT
@ -95,8 +91,6 @@ int level_size_buf[10] = {
#endif
};
struct isal_zstream stream;
int usage(void)
{
fprintf(stderr,
@ -104,7 +98,7 @@ int usage(void)
" -h help\n"
" -X use compression level X with 0 <= X <= 1\n"
" -b <size> input buffer size, 0 buffers all the input\n"
" -i <iter> number of iterations (at least 1)\n"
" -i <time> time in seconds to benchmark (at least 1)\n"
" -o <file> output file for compresed data\n"
" -d <file> dictionary file used by compression\n"
" -w <size> log base 2 size of history window, between 8 and 15\n");
@ -112,17 +106,54 @@ int usage(void)
exit(0);
}
void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size,
size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level,
uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf,
size_t dictfile_size, struct isal_hufftables *hufftables_custom)
{
int avail_in;
isal_deflate_init(stream);
if (dictbuf != NULL)
isal_deflate_set_dict(stream, dictbuf, dictfile_size);
stream->end_of_stream = 0;
stream->flush = NO_FLUSH;
stream->level = level;
stream->level_buf = level_buf;
stream->level_buf_size = level_size;
stream->next_out = outbuf;
stream->avail_out = outbuf_size;
stream->next_in = inbuf;
if (hufftables_custom != NULL)
stream->hufftables = hufftables_custom;
stream->hist_bits = hist_bits;
avail_in = infile_size;
while (avail_in > 0) {
stream->avail_in = avail_in >= inbuf_size ? inbuf_size : avail_in;
avail_in -= inbuf_size;
if (avail_in <= 0)
stream->end_of_stream = 1;
isal_deflate(stream);
if (stream->avail_in != 0)
break;
}
}
int main(int argc, char *argv[])
{
FILE *in = NULL, *out = NULL, *dict = NULL;
unsigned char *inbuf, *outbuf, *level_buf = NULL, *dictbuf = NULL;
int i, c, iterations = 0, inbuf_size = 0;
uint64_t infile_size, outbuf_size, dictfile_size;
int c, time = BENCHMARK_TIME, inbuf_size = 0;
size_t infile_size, outbuf_size, dictfile_size;
struct isal_huff_histogram histogram;
struct isal_hufftables hufftables_custom;
int level = 0, level_size = 0, avail_in;
int level = 0, level_size = 0;
char *in_file_name = NULL, *out_file_name = NULL, *dict_file_name = NULL;
uint32_t hist_bits = 0;
struct isal_zstream stream;
while ((c = getopt(argc, argv, "h0123456789i:b:o:d:w:")) != -1) {
if (c >= '0' && c <= '9') {
@ -143,8 +174,8 @@ int main(int argc, char *argv[])
dict_file_name = optarg;
break;
case 'i':
iterations = atoi(optarg);
if (iterations < 1)
time = atoi(optarg);
if (time < 1)
usage();
break;
case 'b':
@ -210,12 +241,6 @@ int main(int argc, char *argv[])
dictfile_size = (dict_file_name != NULL) ? get_filesize(dict) : 0;
if (iterations == 0) {
iterations = infile_size ? RUN_MEM_SIZE / infile_size : MIN_TEST_LOOPS;
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
}
inbuf = malloc(infile_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
@ -245,7 +270,7 @@ int main(int argc, char *argv[])
inbuf_size = inbuf_size ? inbuf_size : infile_size;
printf("igzip_file_perf: %s %d iterations\n", in_file_name, iterations);
printf("igzip_file_perf: %s\n", in_file_name);
/* Read complete input file into buffer */
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
@ -260,47 +285,18 @@ int main(int argc, char *argv[])
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_init(&stream);
if (dict_file_name != NULL)
isal_deflate_set_dict(&stream, dictbuf, dictfile_size);
stream.end_of_stream = 0;
stream.flush = NO_FLUSH;
stream.level = level;
stream.level_buf = level_buf;
stream.level_buf_size = level_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
stream.next_in = inbuf;
stream.hist_bits = hist_bits;
avail_in = infile_size;
while (avail_in > 0) {
stream.avail_in = avail_in >= inbuf_size ? inbuf_size : avail_in;
avail_in -= inbuf_size;
if (avail_in <= 0)
stream.end_of_stream = 1;
isal_deflate(&stream);
if (stream.avail_in != 0)
break;
}
}
perf_stop(&stop);
struct perf start;
BENCHMARK(&start, time,
deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
level, level_buf, level_size, hist_bits, dictbuf,
dictfile_size, NULL));
if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n");
exit(0);
}
printf(" file %s - in_size=%lu out_size=%d iter=%d ratio=%3.1f%%",
in_file_name, infile_size, stream.total_out, i,
printf(" file %s - in_size=%lu out_size=%d ratio=%3.1f%%",
in_file_name, infile_size, stream.total_out,
100.0 * stream.total_out / infile_size);
if (level == 0) {
@ -309,31 +305,9 @@ int main(int argc, char *argv[])
isal_update_histogram(inbuf, infile_size, &histogram);
isal_create_hufftables(&hufftables_custom, &histogram);
isal_deflate_init(&stream);
stream.end_of_stream = 0;
stream.flush = NO_FLUSH;
stream.level = level;
stream.level_buf = level_buf;
stream.level_buf_size = level_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
stream.next_in = inbuf;
stream.hufftables = &hufftables_custom;
stream.hist_bits = hist_bits;
avail_in = infile_size;
while (avail_in > 0) {
stream.avail_in = avail_in >= inbuf_size ? inbuf_size : avail_in;
avail_in -= inbuf_size;
if (avail_in <= 0)
stream.end_of_stream = 1;
isal_deflate(&stream);
if (stream.avail_in != 0)
break;
}
deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
level, level_buf, level_size, hist_bits, dictbuf,
dictfile_size, &hufftables_custom);
printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size);
}
@ -345,7 +319,7 @@ int main(int argc, char *argv[])
}
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
perf_print(start, (long long)infile_size);
if (argc > 2 && out) {
printf("writing %s\n", out_file_name);

View File

@ -69,7 +69,7 @@ int main(int argc, char *argv[])
{
FILE *in;
unsigned char *inbuf, *outbuf;
int i, iterations, avail_in;
int iterations, avail_in;
uint64_t infile_size, outbuf_size;
struct isal_huff_histogram histogram1, histogram2;
@ -120,16 +120,12 @@ int main(int argc, char *argv[])
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++)
isal_update_histogram(inbuf, infile_size, &histogram1);
perf_stop(&stop);
printf(" file %s - in_size=%lu iter=%d\n", argv[1], infile_size, i);
struct perf start;
BENCHMARK(&start, BENCHMARK_TIME,
isal_update_histogram(inbuf, infile_size, &histogram1));
printf(" file %s - in_size=%lu\n", argv[1], infile_size);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
perf_print(start, (long long)infile_size);
fclose(in);
fflush(0);

View File

@ -38,10 +38,6 @@
#include <zlib.h>
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 1
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 200000000
#endif
#define OPTARGS "hl:f:z:i:d:stub:y:"
@ -127,21 +123,20 @@ struct perf_info {
char *file_name;
size_t file_size;
size_t deflate_size;
int32_t deflate_iter;
int32_t inflate_iter;
uint32_t inblock_size;
uint32_t flush_type;
int32_t deflate_time;
int32_t inflate_time;
struct compress_strategy strategy;
uint32_t inflate_mode;
struct perf start;
struct perf stop;
};
void init_perf_info(struct perf_info *info)
{
memset(info, 0, sizeof(*info));
info->inflate_iter = UNSET;
info->deflate_iter = UNSET;
info->deflate_time = BENCHMARK_TIME;
info->inflate_time = BENCHMARK_TIME;
}
int usage(void)
@ -153,8 +148,8 @@ int usage(void)
" -l <level> isa-l stateless deflate level to test\n"
" -f <level> isa-l stateful deflate level to test\n"
" -z <level> zlib deflate level to test\n"
" -d <iter> number of iterations for deflate (at least 1)\n"
" -i <iter> number of iterations for inflate (at least 1)\n"
" -d <time> approx time in seconds for deflate (at least 0)\n"
" -i <time> approx time in seconds for inflate (at least 0)\n"
" -s performance test isa-l stateful inflate\n"
" -t performance test isa-l stateless inflate\n"
" -u performance test zlib inflate\n"
@ -166,10 +161,8 @@ int usage(void)
void print_perf_info_line(struct perf_info *info)
{
printf("igzip_perf-> compress level: %d compress_iterations: %d "
"decompress_iterations: %d flush_type: %d block_size: %d\n",
info->strategy.level, info->deflate_iter, info->inflate_iter,
info->flush_type, info->inblock_size);
printf("igzip_perf-> compress level: %d flush_type: %d block_size: %d\n",
info->strategy.level, info->flush_type, info->inblock_size);
}
void print_file_line(struct perf_info *info)
@ -188,7 +181,7 @@ void print_deflate_perf_line(struct perf_info *info)
else if (info->strategy.mode == ZLIB)
printf(" zlib_deflate-> ");
perf_print(info->stop, info->start, info->file_size * info->deflate_iter);
perf_print(info->start, info->file_size);
}
void print_inflate_perf_line(struct perf_info *info)
@ -200,7 +193,7 @@ void print_inflate_perf_line(struct perf_info *info)
else if (info->inflate_mode == ZLIB)
printf(" zlib_inflate-> ");
perf_print(info->stop, info->start, info->file_size * info->inflate_iter);
perf_print(info->start, info->file_size);
}
int isal_deflate_round(struct isal_zstream *stream, uint8_t * outbuf, uint32_t outbuf_size,
@ -386,12 +379,12 @@ int zlib_inflate_round(z_stream * gstream, uint8_t * inbuf,
}
int isal_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
uint64_t inbuf_size, int level, int flush_type, int iterations,
struct perf *start, struct perf *stop)
uint64_t inbuf_size, int level, int flush_type, int time,
struct perf *start)
{
struct isal_zstream stream;
uint8_t *level_buf = NULL;
int i, check;
int check;
if (level_size_buf[level] > 0) {
level_buf = malloc(level_size_buf[level]);
@ -399,31 +392,20 @@ int isal_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
return 1;
}
check = isal_deflate_round(&stream, outbuf, *outbuf_size, inbuf,
inbuf_size, level, level_buf, level_size_buf[level],
flush_type);
perf_start(start);
for (i = 0; i < iterations && check == 0; i++)
check = isal_deflate_round(&stream, outbuf, *outbuf_size, inbuf,
inbuf_size, level, level_buf,
level_size_buf[level], flush_type);
perf_stop(stop);
BENCHMARK(start, time, check = isal_deflate_round(&stream, outbuf, *outbuf_size, inbuf,
inbuf_size, level, level_buf,
level_size_buf[level], flush_type));
*outbuf_size = stream.total_out;
return check;
}
int isal_deflate_stateful_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
uint64_t inbuf_size, int level, int flush_type,
uint64_t in_block_size, int iterations, struct perf *start,
struct perf *stop)
uint64_t in_block_size, int time, struct perf *start)
{
struct isal_zstream stream;
uint8_t *level_buf = NULL;
int i, check;
int check;
if (in_block_size == 0)
in_block_size = inbuf_size;
@ -434,30 +416,21 @@ int isal_deflate_stateful_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t
return 1;
}
check = isal_deflate_stateful_round(&stream, outbuf, *outbuf_size, inbuf,
inbuf_size, in_block_size, level, level_buf,
level_size_buf[level], flush_type);
perf_start(start);
for (i = 0; i < iterations && check == 0; i++)
check = isal_deflate_stateful_round(&stream, outbuf, *outbuf_size,
inbuf, inbuf_size, in_block_size, level,
level_buf, level_size_buf[level],
flush_type);
perf_stop(stop);
BENCHMARK(start, time, check =
isal_deflate_stateful_round(&stream, outbuf, *outbuf_size, inbuf, inbuf_size,
in_block_size, level, level_buf,
level_size_buf[level], flush_type));
*outbuf_size = stream.total_out;
return 0;
return check;
}
int zlib_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
uint64_t inbuf_size, int level, int flush_type,
uint64_t in_block_size, int iterations, struct perf *start,
struct perf *stop)
uint64_t in_block_size, int time, struct perf *start)
{
int i, check;
int check;
z_stream gstream;
int flush_translator[] = { Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH };
@ -475,16 +448,9 @@ int zlib_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
if (0 != deflateInit2(&gstream, level, Z_DEFLATED, -15, 9, Z_DEFAULT_STRATEGY))
return 1;
check = zlib_deflate_round(&gstream, outbuf, *outbuf_size, inbuf,
inbuf_size, in_block_size, level, flush_type);
perf_start(start);
for (i = 0; i < iterations && check == 0; i++)
check = zlib_deflate_round(&gstream, outbuf, *outbuf_size, inbuf,
inbuf_size, in_block_size, level, flush_type);
perf_stop(stop);
BENCHMARK(start, time, check =
zlib_deflate_round(&gstream, outbuf, *outbuf_size, inbuf, inbuf_size,
in_block_size, level, flush_type));
*outbuf_size = gstream.total_out;
deflateEnd(&gstream);
@ -494,33 +460,28 @@ int zlib_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
int isal_inflate_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
int iterations, struct perf *start, struct perf *stop)
int time, struct perf *start)
{
struct inflate_state state;
int i, check;
int check;
/* Check that data decompresses */
check = isal_inflate_round(&state, inbuf, inbuf_size, outbuf, outbuf_size);
if (check || state.total_out != file_size || memcmp(outbuf, filebuf, file_size))
return 1;
perf_start(start);
for (i = 0; i < iterations && check == 0; i++)
check = isal_inflate_round(&state, inbuf, inbuf_size, outbuf, outbuf_size);
perf_stop(stop);
BENCHMARK(start, time,
isal_inflate_round(&state, inbuf, inbuf_size, outbuf, outbuf_size));
return check;
}
int isal_inflate_stateful_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
uint64_t in_block_size, int iterations, struct perf *start,
struct perf *stop)
uint64_t in_block_size, int time, struct perf *start)
{
struct inflate_state state;
int i, check;
int check;
if (in_block_size == 0)
in_block_size = inbuf_size;
@ -529,13 +490,9 @@ int isal_inflate_stateful_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * o
if (check || state.total_out != file_size || memcmp(outbuf, filebuf, file_size))
return 1;
perf_start(start);
for (i = 0; i < iterations && check == 0; i++)
check = isal_inflate_stateful_round(&state, inbuf, inbuf_size, in_block_size,
outbuf, outbuf_size);
perf_stop(stop);
BENCHMARK(start, time,
isal_inflate_stateful_round(&state, inbuf, inbuf_size, in_block_size, outbuf,
outbuf_size));
return 0;
@ -543,9 +500,9 @@ int isal_inflate_stateful_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * o
int zlib_inflate_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
int iterations, struct perf *start, struct perf *stop)
int time, struct perf *start)
{
int i, check;
int check;
z_stream gstream;
gstream.next_in = inbuf;
@ -560,12 +517,9 @@ int zlib_inflate_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
if (check || gstream.total_out != file_size || memcmp(outbuf, filebuf, file_size))
return 1;
perf_start(start);
BENCHMARK(start, time,
zlib_inflate_round(&gstream, inbuf, inbuf_size, outbuf, outbuf_size));
for (i = 0; i < iterations && check == 0; i++)
check = zlib_inflate_round(&gstream, inbuf, inbuf_size, outbuf, outbuf_size);
perf_stop(stop);
inflateEnd(&gstream);
return 0;
}
@ -636,13 +590,13 @@ int main(int argc, char *argv[])
compression_queue_size++;
break;
case 'i':
info.inflate_iter = atoi(optarg);
if (info.inflate_iter < 0)
info.inflate_time = atoi(optarg);
if (info.inflate_time < 0)
usage();
break;
case 'd':
info.deflate_iter = atoi(optarg);
if (info.deflate_iter < 1)
info.deflate_time = atoi(optarg);
if (info.deflate_time < 0)
usage();
break;
case 's':
@ -700,20 +654,6 @@ int main(int argc, char *argv[])
}
decompbuf_size = info.file_size;
if (info.inflate_iter == UNSET) {
info.inflate_iter =
info.file_size ? RUN_MEM_SIZE / info.file_size : MIN_TEST_LOOPS;
if (info.inflate_iter < MIN_TEST_LOOPS)
info.inflate_iter = MIN_TEST_LOOPS;
}
decompbuf_size = info.file_size;
if (info.deflate_iter == UNSET) {
info.deflate_iter =
info.file_size ? RUN_MEM_SIZE / info.file_size : MIN_TEST_LOOPS;
if (info.deflate_iter < MIN_TEST_LOOPS)
info.deflate_iter = MIN_TEST_LOOPS;
}
if (compression_queue_size == 0) {
if (info.inblock_size == 0)
@ -770,21 +710,20 @@ int main(int argc, char *argv[])
if (info.strategy.mode == ISAL_STATELESS)
ret = isal_deflate_perf(compressbuf, &info.deflate_size, filebuf,
info.file_size, compression_queue[i].level,
info.flush_type, info.deflate_iter,
&info.start, &info.stop);
info.flush_type, info.deflate_time,
&info.start);
else if (info.strategy.mode == ISAL_STATEFUL)
ret =
isal_deflate_stateful_perf(compressbuf, &info.deflate_size,
filebuf, info.file_size,
compression_queue[i].level,
info.flush_type, info.inblock_size,
info.deflate_iter, &info.start,
&info.stop);
info.deflate_time, &info.start);
else if (info.strategy.mode == ZLIB)
ret = zlib_deflate_perf(compressbuf, &info.deflate_size, filebuf,
info.file_size, compression_queue[i].level,
info.flush_type, info.inblock_size,
info.deflate_iter, &info.start, &info.stop);
info.deflate_time, &info.start);
if (ret) {
printf(" Error in compression\n");
@ -796,14 +735,14 @@ int main(int argc, char *argv[])
print_deflate_perf_line(&info);
printf("\n");
if (info.inflate_iter == 0)
if (info.inflate_time == 0)
continue;
if (inflate_strat.stateless) {
info.inflate_mode = ISAL_STATELESS;
ret = isal_inflate_perf(compressbuf, info.deflate_size, decompbuf,
decompbuf_size, filebuf, info.file_size,
info.inflate_iter, &info.start, &info.stop);
info.inflate_time, &info.start);
if (ret)
printf(" Error in isal stateless inflate\n");
else
@ -816,8 +755,7 @@ int main(int argc, char *argv[])
isal_inflate_stateful_perf(compressbuf, info.deflate_size,
decompbuf, decompbuf_size, filebuf,
info.file_size, info.inblock_size,
info.inflate_iter, &info.start,
&info.stop);
info.inflate_time, &info.start);
if (ret)
printf(" Error in isal stateful inflate\n");
@ -829,7 +767,7 @@ int main(int argc, char *argv[])
info.inflate_mode = ZLIB;
ret = zlib_inflate_perf(compressbuf, info.deflate_size, decompbuf,
decompbuf_size, filebuf, info.file_size,
info.inflate_iter, &info.start, &info.stop);
info.inflate_time, &info.start);
if (ret)
printf(" Error in zlib inflate\n");
else

View File

@ -50,7 +50,6 @@ int usage(void)
"Usage: igzip_semi_dynamic [options] <infile>\n"
" -h help\n"
" -v (don't) validate output by inflate and compare\n"
" -i <iter> iterations\n"
" -t <type> 1:stateless 0:(default)stateful\n"
" -c <size> chunk size default=%d\n"
" -s <size> sample size default=%d\n"
@ -90,6 +89,88 @@ int str_to_i(char *s)
return i;
}
void semi_dyn_stateless_perf(struct isal_zstream *stream, uint8_t * inbuf,
uint64_t infile_size, uint8_t * outbuf, uint64_t outbuf_size,
int segment_size, int hist_size)
{
struct isal_huff_histogram histogram;
struct isal_hufftables hufftable;
isal_deflate_stateless_init(stream);
stream->end_of_stream = 0;
stream->flush = FULL_FLUSH;
stream->next_in = inbuf;
stream->next_out = outbuf;
int remaining = infile_size;
int chunk_size = segment_size;
while (remaining > 0) {
// Generate custom hufftables on sample
memset(&histogram, 0, sizeof(struct isal_huff_histogram));
if (remaining < segment_size * 2) {
chunk_size = remaining;
stream->end_of_stream = 1;
}
int hist_rem = (hist_size > chunk_size) ? chunk_size : hist_size;
isal_update_histogram(stream->next_in, hist_rem, &histogram);
if (hist_rem == chunk_size)
isal_create_hufftables_subset(&hufftable, &histogram);
else
isal_create_hufftables(&hufftable, &histogram);
// Compress with custom table
stream->avail_in = chunk_size;
stream->avail_out = chunk_size + 8 * (1 + (chunk_size >> 16));
stream->hufftables = &hufftable;
remaining -= chunk_size;
isal_deflate_stateless(stream);
if (stream->avail_in != 0)
break;
}
}
void semi_dyn_stateful_perf(struct isal_zstream *stream, uint8_t * inbuf,
uint64_t infile_size, uint8_t * outbuf, uint64_t outbuf_size,
int segment_size, int hist_size)
{
struct isal_huff_histogram histogram;
struct isal_hufftables hufftable;
isal_deflate_init(stream);
stream->end_of_stream = 0;
stream->flush = SYNC_FLUSH;
stream->next_in = inbuf;
stream->next_out = outbuf;
stream->avail_out = outbuf_size;
int remaining = infile_size;
int chunk_size = segment_size;
while (remaining > 0) {
// Generate custom hufftables on sample
memset(&histogram, 0, sizeof(struct isal_huff_histogram));
if (remaining < segment_size * 2) {
chunk_size = remaining;
stream->end_of_stream = 1;
}
int hist_rem = (hist_size > chunk_size) ? chunk_size : hist_size;
isal_update_histogram(stream->next_in, hist_rem, &histogram);
if (hist_rem == chunk_size)
isal_create_hufftables_subset(&hufftable, &histogram);
else
isal_create_hufftables(&hufftable, &histogram);
// Compress with custom table
stream->avail_in = chunk_size;
stream->hufftables = &hufftable;
remaining -= chunk_size;
isal_deflate(stream);
if (stream->internal_state.state != ZSTATE_NEW_HDR)
break;
}
}
int main(int argc, char *argv[])
{
FILE *in = stdin, *out = NULL;
@ -99,14 +180,12 @@ int main(int argc, char *argv[])
int segment_size = DEFAULT_SEG_SIZE;
int sample_size = DEFAULT_SAMPLE_SIZE;
int check_output = 1;
int iterations = 0, do_stateless = 0, do_stateful = 1;
int do_stateless = 0, do_stateful = 1;
int ret = 0;
char *out_file_name = NULL;
struct isal_zstream stream;
struct isal_huff_histogram histogram;
struct isal_hufftables hufftable;
while ((c = getopt(argc, argv, "vht:c:s:o:i:")) != -1) {
while ((c = getopt(argc, argv, "vht:c:s:o:")) != -1) {
switch (c) {
case 'v':
check_output ^= 1;
@ -126,9 +205,6 @@ int main(int argc, char *argv[])
case 'o':
out_file_name = optarg;
break;
case 'i':
iterations = str_to_i(optarg);
break;
case 'h':
default:
usage();
@ -165,12 +241,6 @@ int main(int argc, char *argv[])
usage();
}
if (iterations == 0) {
iterations = RUN_MEM_SIZE / infile_size;
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
}
outbuf_size = infile_size * 1.30 > MIN_BUF_SIZE ? infile_size * 1.30 : MIN_BUF_SIZE;
if (NULL == (inbuf = malloc(infile_size))) {
@ -186,7 +256,7 @@ int main(int argc, char *argv[])
printf("semi-dynamic sample=%d segment=%d %s\n", hist_size, segment_size,
do_stateful ? "stateful" : "stateless");
printf("igzip_file_perf: %s %d iterations\n", argv[optind], iterations);
printf("igzip_file_perf: %s\n", argv[optind]);
// Read complete input file into buffer
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
@ -195,88 +265,19 @@ int main(int argc, char *argv[])
exit(0);
}
struct perf start, stop;
struct perf start;
if (do_stateful) {
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_init(&stream);
stream.end_of_stream = 0;
stream.flush = SYNC_FLUSH;
stream.next_in = inbuf;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
int remaining = infile_size;
int chunk_size = segment_size;
while (remaining > 0) {
// Generate custom hufftables on sample
memset(&histogram, 0, sizeof(struct isal_huff_histogram));
if (remaining < segment_size * 2) {
chunk_size = remaining;
stream.end_of_stream = 1;
}
int hist_rem =
(hist_size > chunk_size) ? chunk_size : hist_size;
isal_update_histogram(stream.next_in, hist_rem, &histogram);
if (hist_rem == chunk_size)
isal_create_hufftables_subset(&hufftable, &histogram);
else
isal_create_hufftables(&hufftable, &histogram);
// Compress with custom table
stream.avail_in = chunk_size;
stream.hufftables = &hufftable;
remaining -= chunk_size;
isal_deflate(&stream);
if (stream.internal_state.state != ZSTATE_NEW_HDR)
break;
}
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
semi_dyn_stateful_perf(&stream, inbuf, infile_size, outbuf,
outbuf_size, segment_size, hist_size)
);
}
if (do_stateless) {
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_stateless_init(&stream);
stream.end_of_stream = 0;
stream.flush = FULL_FLUSH;
stream.next_in = inbuf;
stream.next_out = outbuf;
int remaining = infile_size;
int chunk_size = segment_size;
while (remaining > 0) {
// Generate custom hufftables on sample
memset(&histogram, 0, sizeof(struct isal_huff_histogram));
if (remaining < segment_size * 2) {
chunk_size = remaining;
stream.end_of_stream = 1;
}
int hist_rem =
(hist_size > chunk_size) ? chunk_size : hist_size;
isal_update_histogram(stream.next_in, hist_rem, &histogram);
if (hist_rem == chunk_size)
isal_create_hufftables_subset(&hufftable, &histogram);
else
isal_create_hufftables(&hufftable, &histogram);
// Compress with custom table
stream.avail_in = chunk_size;
stream.avail_out = chunk_size + 8 * (1 + (chunk_size >> 16));
stream.hufftables = &hufftable;
remaining -= chunk_size;
isal_deflate_stateless(&stream);
if (stream.avail_in != 0)
break;
}
}
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME,
semi_dyn_stateless_perf(&stream, inbuf, infile_size, outbuf,
outbuf_size, segment_size, hist_size));
}
if (stream.avail_in != 0) {
@ -288,7 +289,7 @@ int main(int argc, char *argv[])
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
perf_print(start, (long long)infile_size);
if (out != NULL) {
printf("writing %s\n", out_file_name);

View File

@ -34,70 +34,245 @@
extern "C" {
#endif
// Use sys/time.h functions for time
#if defined (__unix__) || (__APPLE__) || (__MINGW32__)
# include <sys/time.h>
#endif
#ifdef _MSC_VER
# define inline __inline
# include <time.h>
# include <Windows.h>
#endif
#include <stdio.h>
#include <stdint.h>
struct perf{
struct timeval tv;
#ifdef _MSC_VER
# define inline __inline
#endif
/* Decide wether to use benchmark time as an approximation or a minimum. Fewer
* calls to the timer are required for the approximation case.*/
#define BENCHMARK_MIN_TIME 0
#define BENCHMARK_APPROX_TIME 1
#ifndef BENCHMARK_TYPE
#define BENCHMARK_TYPE BENCHMARK_MIN_TIME
#endif
#ifdef USE_RDTSC
/* The use of rtdsc is nuanced. On many processors it corresponds to a
* standardized clock source. To obtain a meaningful result it may be
* necessary to fix the CPU clock to match the rtdsc tick rate.
*/
# include <inttypes.h>
# include <x86intrin.h>
# define USE_CYCLES
#else
# include <time.h>
#define USE_SECONDS
#endif
#ifdef USE_RDTSC
#ifndef BENCHMARK_TIME
# define BENCHMARK_TIME 6
#endif
# define GHZ 1000000000
# define UNIT_SCALE (GHZ)
# define CALLIBRATE_TIME (UNIT_SCALE / 2)
static inline long long get_time(void) {
unsigned int dummy;
return __rdtscp(&dummy);
}
static inline long long get_res(void) {
return 1;
}
#else
#ifndef BENCHMARK_TIME
# define BENCHMARK_TIME 3
#endif
#ifdef _MSC_VER
#define UNIT_SCALE get_res()
#define CALLIBRATE_TIME (UNIT_SCALE / 4)
static inline long long get_time(void) {
long long ret = 0;
QueryPerformanceCounter(&ret);
return ret;
}
static inline long long get_res(void) {
long long ret = 0;
QueryPerformanceFrequency(&ret);
return ret;
}
#else
# define NANO_SCALE 1000000000
# define UNIT_SCALE NANO_SCALE
# define CALLIBRATE_TIME (UNIT_SCALE / 4)
#ifdef __FreeBSD__
# define CLOCK_ID CLOCK_MONOTONIC_PRECISE
#else
# define CLOCK_ID CLOCK_MONOTONIC
#endif
static inline long long get_time(void) {
struct timespec time;
long long nano_total;
clock_gettime(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
}
static inline long long get_res(void) {
struct timespec time;
long long nano_total;
clock_getres(CLOCK_ID, &time);
nano_total = time.tv_sec;
nano_total *= NANO_SCALE;
nano_total += time.tv_nsec;
return nano_total;
}
#endif
#endif
struct perf {
long long start;
long long stop;
long long run_total;
long long iterations;
};
#if defined (__unix__) || (__APPLE__) || (__MINGW32__)
static inline int perf_start(struct perf *p)
{
return gettimeofday(&(p->tv), 0);
}
static inline int perf_stop(struct perf *p)
{
return gettimeofday(&(p->tv), 0);
static inline void perf_init(struct perf *p) {
p->start = 0;
p->stop = 0;
p->run_total = 0;
}
static inline void perf_print(struct perf stop, struct perf start, long long dsize)
{
long long secs = stop.tv.tv_sec - start.tv.tv_sec;
long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec;
static inline void perf_continue(struct perf *p) {
p->start = get_time();
}
static inline void perf_pause(struct perf *p) {
p->stop = get_time();
p->run_total = p->run_total + p->stop - p->start;
p->start = p->stop;
}
static inline void perf_start(struct perf *p) {
perf_init(p);
perf_continue(p);
}
static inline void perf_stop(struct perf *p) {
perf_pause(p);
}
static inline double get_time_elapsed(struct perf *p) {
return 1.0 * p->run_total / UNIT_SCALE;
}
static inline long long get_base_elapsed(struct perf *p) {
return p->run_total;
}
static inline int estimate_perf_iterations(struct perf *p, unsigned long long runs,
unsigned long long total) {
total = total * runs;
if (get_base_elapsed(p) > 0)
return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
else
return (total + get_res() - 1) / get_res();
}
#define CALLIBRATE(PERF, FUNC_CALL) { \
unsigned long long _iter = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_pause(PERF); \
\
while (get_base_elapsed(PERF) < CALLIBRATE_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, \
2 * CALLIBRATE_TIME); \
perf_start(PERF); \
for (int _i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_stop(PERF); \
} \
(PERF)->iterations=_iter; \
}
#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \
unsigned long long _iter = (PERF)->iterations; \
unsigned long long _run_total = RUN_TIME; \
_run_total *= UNIT_SCALE; \
_iter = estimate_perf_iterations(PERF, _iter, _run_total);\
(PERF)->iterations = 0; \
perf_start(PERF); \
for (int _i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
\
if(get_base_elapsed(PERF) < _run_total && \
BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
_iter = estimate_perf_iterations(PERF, _iter, \
_run_total - get_base_elapsed(PERF) + \
(UNIT_SCALE / 16)); \
perf_continue(PERF); \
for (int _i = 0; _i < _iter; _i++) { \
FUNC_CALL; \
} \
perf_pause(PERF); \
(PERF)->iterations += _iter; \
} \
}
#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \
if((RUN_TIME) > 0) { \
CALLIBRATE(PERF, FUNC_CALL); \
PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
\
} else { \
(PERF)->iterations = 1; \
perf_start(PERF); \
FUNC_CALL; \
perf_stop(PERF); \
} \
}
#ifdef USE_CYCLES
static inline void perf_print(struct perf p, long long unit_count) {
long long total_units = p.iterations * unit_count;
printf("runtime = %10lld ticks", get_base_elapsed(&p));
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
total_units / (1000000), get_time_elapsed(&p),
get_base_elapsed(&p) / (double)total_units);
}
printf("\n");
}
#else
static inline void perf_print(struct perf p, double unit_count) {
long long total_units = p.iterations * unit_count;
long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
printf("runtime = %10lld usecs", usecs);
if (dsize != 0) {
#if 1 // not bug in printf for 32-bit
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024),
((double) usecs)/1000000, ((double) dsize) / (double)usecs);
#else
printf(", bandwidth %lld MB ", dsize/(1024*1024));
printf("in %.4f sec ",(double)usecs/1000000);
printf("= %.2f MB/s\n", (double)dsize/usecs);
#endif
if (total_units != 0) {
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
total_units / (1000000), get_time_elapsed(&p),
((double)total_units) / (1000000 * get_time_elapsed(&p)));
}
else
printf("\n");
printf("\n");
}
#endif
static inline uint64_t get_filesize(FILE *fp)
{
static inline uint64_t get_filesize(FILE * fp) {
uint64_t file_size;
fpos_t pos, pos_curr;
fgetpos(fp, &pos_curr); /* Save current position */
fgetpos(fp, &pos_curr); /* Save current position */
#if defined(_WIN32) || defined(_WIN64)
_fseeki64(fp, 0, SEEK_END);
#else
fseeko(fp, 0, SEEK_END);
#endif
fgetpos(fp, &pos);
file_size = *(uint64_t *)&pos;
fsetpos(fp, &pos_curr); /* Restore position */
file_size = *(uint64_t *) & pos;
fsetpos(fp, &pos_curr); /* Restore position */
return file_size;
}

View File

@ -29,20 +29,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mem_routines.h"
#include "test.h"
#include "types.h"
#define TEST_LEN 8*1024
#define TEST_LOOPS 10000000
#define TEST_TYPE_STR "_warm"
int main(int argc, char *argv[])
{
int i;
int val = 0;
void *buf;
struct perf start, stop;
struct perf start;
printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN);
@ -50,17 +49,12 @@ int main(int argc, char *argv[])
printf("alloc error: Fail");
return -1;
}
// Warm up
isal_zero_detect(buf, TEST_LEN);
perf_start(&start);
memset(buf, 0, TEST_LEN);
BENCHMARK(&start, BENCHMARK_TIME, val |= isal_zero_detect(buf, TEST_LEN));
for (i = 0; i < TEST_LOOPS; i++)
val |= isal_zero_detect(buf, TEST_LEN);
perf_stop(&stop);
printf("mem_zero_detect_perf" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
perf_print(start, (long long)TEST_LEN);
return 0;
}

View File

@ -40,7 +40,6 @@
// Cached test, loop many times over small dataset
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 800000
# define TEST_TYPE_STR "_warm"
#else
# ifndef TEST_CUSTOM
@ -48,13 +47,9 @@
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
# define TEST_LOOPS 1000
# define TEST_TYPE_STR "_cold"
# else
# define TEST_TYPE_STR "_cus"
# ifndef TEST_LOOPS
# define TEST_LOOPS 1000
# endif
# endif
#endif
@ -64,7 +59,7 @@ int main(int argc, char *argv[])
{
int i;
void *buffs[TEST_SOURCES + 2];
struct perf start, stop;
struct perf start;
printf("Test pq_gen_perf %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
@ -85,13 +80,9 @@ int main(int argc, char *argv[])
memset(buffs[i], 0, TEST_LEN);
// Warm up
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs));
printf("pq_gen" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_MEM * i);
perf_print(start, (long long)TEST_MEM);
return 0;
}

View File

@ -40,14 +40,12 @@
// Loop many times over same
# define TEST_SOURCES 10
# define TEST_LEN 8*1024
# define TEST_LOOPS 2000000
# define TEST_TYPE_STR "_warm"
#else
// Uncached test. Pull from large mem base.
# define TEST_SOURCES 10
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
# define TEST_LOOPS 1000
# define TEST_TYPE_STR "_cold"
#endif
@ -58,7 +56,7 @@ int main(int argc, char *argv[])
int i, ret, fail = 0;
void **buffs;
void *buff;
struct perf start, stop;
struct perf start;
printf("Test xor_gen_perf\n");
@ -84,15 +82,9 @@ int main(int argc, char *argv[])
for (i = 0; i < TEST_SOURCES + 1; i++)
memset(buffs[i], 0, TEST_LEN);
// Warm up
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
perf_stop(&stop);
BENCHMARK(&start, BENCHMARK_TIME, xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs));
printf("xor_gen" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_MEM * i);
perf_print(start, (long long)TEST_MEM);
return fail;
}