igzip: Optimized deflate_hash

Optimize deflate hash by unrolling crc calculations.

Change-Id: Ief882910619a2cc3b052416d30499f6226e47419
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2017-06-09 14:00:01 -07:00 committed by Greg Tucker
parent cf936f0d84
commit aff6555226
3 changed files with 121 additions and 8 deletions

View File

@ -74,6 +74,7 @@ perf_tests += igzip/igzip_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
other_tests += igzip/igzip_semi_dyn_file_perf
other_tests += igzip/igzip_build_hash_table_perf
other_src += igzip/bitbuf2.asm \
igzip/data_struct2.asm \

View File

@ -0,0 +1,44 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <getopt.h>
#include "igzip_lib.h"
#include "test.h"
#define DICT_LEN 32*1024
#define ITERATIONS 100000
extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len);
void create_rand_data(uint8_t * data, uint32_t size)
{
int i;
for (i = 0; i < size; i++) {
data[i] = rand() % 256;
}
}
int main(int argc, char *argv[])
{
int i, iterations = ITERATIONS;
struct isal_zstream stream;
uint8_t dict[DICT_LEN];
uint32_t dict_len = DICT_LEN;
stream.level = 0;
create_rand_data(dict, dict_len);
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_hash(&stream, dict, dict_len);
}
perf_stop(&stop);
printf("igzip_build_hash_table_perf:\n");
printf(" in_size=%u iter=%d ", dict_len, i);
perf_print(stop, start, (long long)dict_len * i);
}

View File

@ -4,16 +4,21 @@
%include "huffman.asm"
%include "reg_sizes.asm"
%define DICT_SLOP 4
%define DICT_SLOP 8
%define DICT_END_SLOP 4
%ifidn __OUTPUT_FORMAT__, win64
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define swap1 rdi
%define swap2 rsi
%else
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define swap1 r8
%define swap2 rcx
%endif
%define stream arg1
@ -23,14 +28,36 @@
%define dict_len arg3
%define f_i arg3
%define data r9
%define f_i_tmp swap1
%define hash r10
%define hash swap2
%define f_i_end r11
%define hash2 r9
%define hash3 r10
%define hash4 r11
%define f_i_end rax
%macro FUNC_SAVE 0
%ifidn __OUTPUT_FORMAT__, win64
push rsi
push rdi
%endif
%endm
%macro FUNC_RESTORE 0
%ifidn __OUTPUT_FORMAT__, win64
pop rdi
pop rsi
%endif
%endm
global isal_deflate_hash_lvl0_01
isal_deflate_hash_lvl0_01:
FUNC_SAVE
%ifnidn (arg1, stream)
mov stream, arg1
%endif
@ -46,16 +73,57 @@ isal_deflate_hash_lvl0_01:
sub f_i_end, DICT_SLOP
cmp f_i, f_i_end
jg end
jg end_main
main_loop:
mov data %+ d, [f_i + dict_offset]
compute_hash hash, data
lea f_i_tmp, [f_i + 2]
xor hash, hash
crc32 hash %+ d, dword [f_i + dict_offset]
xor hash2, hash2
crc32 hash2 %+ d, dword [f_i + dict_offset + 1]
xor hash3, hash3
crc32 hash3 %+ d, dword [f_i_tmp + dict_offset]
xor hash4, hash4
crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1]
and hash, HASH_MASK
and hash2, HASH_MASK
and hash3, HASH_MASK
and hash4, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
add f_i, 1
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
add f_i, 3
mov [stream + _internal_state_head + 2 * hash3], f_i_tmp %+ w
add f_i_tmp, 1
mov [stream + _internal_state_head + 2 * hash4], f_i_tmp %+ w
cmp f_i, f_i_end
jle main_loop
end_main:
add f_i_end, DICT_SLOP - DICT_END_SLOP
cmp f_i, f_i_end
jg end
end_loop:
xor hash, hash
crc32 hash %+ d, dword [f_i + dict_offset]
and hash, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
add f_i, 1
cmp f_i, f_i_end
jle main_loop
jle end_loop
end:
FUNC_RESTORE
ret