From aff65552268ffd2bcc4dbfb0b1f185c02627bbb3 Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Fri, 9 Jun 2017 14:00:01 -0700 Subject: [PATCH] igzip: Optimized deflate_hash Optimize deflate hash by unrolling crc calculations. Change-Id: Ief882910619a2cc3b052416d30499f6226e47419 Signed-off-by: Roy Oursler --- igzip/Makefile.am | 1 + igzip/igzip_build_hash_table_perf.c | 44 +++++++++++++++ igzip/igzip_deflate_hash.asm | 84 ++++++++++++++++++++++++++--- 3 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 igzip/igzip_build_hash_table_perf.c diff --git a/igzip/Makefile.am b/igzip/Makefile.am index ad43c45..09d9187 100644 --- a/igzip/Makefile.am +++ b/igzip/Makefile.am @@ -74,6 +74,7 @@ perf_tests += igzip/igzip_perf other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf other_tests += igzip/igzip_semi_dyn_file_perf +other_tests += igzip/igzip_build_hash_table_perf other_src += igzip/bitbuf2.asm \ igzip/data_struct2.asm \ diff --git a/igzip/igzip_build_hash_table_perf.c b/igzip/igzip_build_hash_table_perf.c new file mode 100644 index 0000000..003f576 --- /dev/null +++ b/igzip/igzip_build_hash_table_perf.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define DICT_LEN 32*1024 +#define ITERATIONS 100000 + +extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len); + +void create_rand_data(uint8_t * data, uint32_t size) +{ + int i; + for (i = 0; i < size; i++) { + data[i] = rand() % 256; + } +} + +int main(int argc, char *argv[]) +{ + int i, iterations = ITERATIONS; + struct isal_zstream stream; + uint8_t dict[DICT_LEN]; + uint32_t dict_len = DICT_LEN; + + stream.level = 0; + create_rand_data(dict, dict_len); + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + isal_deflate_hash(&stream, dict, dict_len); + } + + perf_stop(&stop); + + printf("igzip_build_hash_table_perf:\n"); + printf(" in_size=%u iter=%d ", dict_len, i); + perf_print(stop, start, (long long)dict_len * i); +} diff --git a/igzip/igzip_deflate_hash.asm b/igzip/igzip_deflate_hash.asm index 162014e..a78db79 100644 --- a/igzip/igzip_deflate_hash.asm +++ b/igzip/igzip_deflate_hash.asm @@ -4,16 +4,21 @@ %include "huffman.asm" %include "reg_sizes.asm" -%define DICT_SLOP 4 +%define DICT_SLOP 8 +%define DICT_END_SLOP 4 %ifidn __OUTPUT_FORMAT__, win64 %define arg1 rcx %define arg2 rdx %define arg3 r8 +%define swap1 rdi +%define swap2 rsi %else %define arg1 rdi %define arg2 rsi %define arg3 rdx +%define swap1 r8 +%define swap2 rcx %endif %define stream arg1 @@ -23,14 +28,36 @@ %define dict_len arg3 %define f_i arg3 -%define data r9 +%define f_i_tmp swap1 -%define hash r10 +%define hash swap2 -%define f_i_end r11 +%define hash2 r9 + +%define hash3 r10 + +%define hash4 r11 + +%define f_i_end rax + +%macro FUNC_SAVE 0 +%ifidn __OUTPUT_FORMAT__, win64 + push rsi + push rdi +%endif +%endm + +%macro FUNC_RESTORE 0 +%ifidn __OUTPUT_FORMAT__, win64 + pop rdi + pop rsi +%endif +%endm global isal_deflate_hash_lvl0_01 isal_deflate_hash_lvl0_01: + FUNC_SAVE + %ifnidn (arg1, stream) mov stream, arg1 %endif @@ -46,16 +73,57 @@ isal_deflate_hash_lvl0_01: sub f_i_end, DICT_SLOP cmp f_i, f_i_end - jg end + jg end_main main_loop: - mov data %+ d, [f_i + dict_offset] - compute_hash hash, data + lea f_i_tmp, [f_i + 2] + + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + + xor hash2, hash2 + crc32 hash2 %+ d, dword [f_i + dict_offset + 1] + + xor hash3, hash3 + crc32 hash3 %+ d, dword [f_i_tmp + dict_offset] + + xor hash4, hash4 + crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1] + + and hash, HASH_MASK + and hash2, HASH_MASK + and hash3, HASH_MASK + and hash4, HASH_MASK + + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + add f_i, 1 + + mov [stream + _internal_state_head + 2 * hash2], f_i %+ w + add f_i, 3 + + mov [stream + _internal_state_head + 2 * hash3], f_i_tmp %+ w + add f_i_tmp, 1 + + mov [stream + _internal_state_head + 2 * hash4], f_i_tmp %+ w + + cmp f_i, f_i_end + jle main_loop + +end_main: + add f_i_end, DICT_SLOP - DICT_END_SLOP + cmp f_i, f_i_end + jg end + +end_loop: + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + and hash, HASH_MASK mov [stream + _internal_state_head + 2 * hash], f_i %+ w add f_i, 1 cmp f_i, f_i_end - jle main_loop + jle end_loop end: + FUNC_RESTORE ret