mirror of
https://github.com/intel/isa-l.git
synced 2025-02-25 07:41:06 +01:00
igzip: Optimized deflate_hash
Optimize deflate hash by unrolling crc calculations. Change-Id: Ief882910619a2cc3b052416d30499f6226e47419 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
cf936f0d84
commit
aff6555226
@ -74,6 +74,7 @@ perf_tests += igzip/igzip_perf
|
||||
|
||||
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
|
||||
other_tests += igzip/igzip_semi_dyn_file_perf
|
||||
other_tests += igzip/igzip_build_hash_table_perf
|
||||
|
||||
other_src += igzip/bitbuf2.asm \
|
||||
igzip/data_struct2.asm \
|
||||
|
44
igzip/igzip_build_hash_table_perf.c
Normal file
44
igzip/igzip_build_hash_table_perf.c
Normal file
@ -0,0 +1,44 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <getopt.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define DICT_LEN 32*1024
|
||||
#define ITERATIONS 100000
|
||||
|
||||
extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len);
|
||||
|
||||
void create_rand_data(uint8_t * data, uint32_t size)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < size; i++) {
|
||||
data[i] = rand() % 256;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, iterations = ITERATIONS;
|
||||
struct isal_zstream stream;
|
||||
uint8_t dict[DICT_LEN];
|
||||
uint32_t dict_len = DICT_LEN;
|
||||
|
||||
stream.level = 0;
|
||||
create_rand_data(dict, dict_len);
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
isal_deflate_hash(&stream, dict, dict_len);
|
||||
}
|
||||
|
||||
perf_stop(&stop);
|
||||
|
||||
printf("igzip_build_hash_table_perf:\n");
|
||||
printf(" in_size=%u iter=%d ", dict_len, i);
|
||||
perf_print(stop, start, (long long)dict_len * i);
|
||||
}
|
@ -4,16 +4,21 @@
|
||||
%include "huffman.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%define DICT_SLOP 4
|
||||
%define DICT_SLOP 8
|
||||
%define DICT_END_SLOP 4
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg1 rcx
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define swap1 rdi
|
||||
%define swap2 rsi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define swap1 r8
|
||||
%define swap2 rcx
|
||||
%endif
|
||||
|
||||
%define stream arg1
|
||||
@ -23,14 +28,36 @@
|
||||
%define dict_len arg3
|
||||
%define f_i arg3
|
||||
|
||||
%define data r9
|
||||
%define f_i_tmp swap1
|
||||
|
||||
%define hash r10
|
||||
%define hash swap2
|
||||
|
||||
%define f_i_end r11
|
||||
%define hash2 r9
|
||||
|
||||
%define hash3 r10
|
||||
|
||||
%define hash4 r11
|
||||
|
||||
%define f_i_end rax
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
push rsi
|
||||
push rdi
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
pop rdi
|
||||
pop rsi
|
||||
%endif
|
||||
%endm
|
||||
|
||||
global isal_deflate_hash_lvl0_01
|
||||
isal_deflate_hash_lvl0_01:
|
||||
FUNC_SAVE
|
||||
|
||||
%ifnidn (arg1, stream)
|
||||
mov stream, arg1
|
||||
%endif
|
||||
@ -46,16 +73,57 @@ isal_deflate_hash_lvl0_01:
|
||||
|
||||
sub f_i_end, DICT_SLOP
|
||||
cmp f_i, f_i_end
|
||||
jg end
|
||||
jg end_main
|
||||
|
||||
main_loop:
|
||||
mov data %+ d, [f_i + dict_offset]
|
||||
compute_hash hash, data
|
||||
lea f_i_tmp, [f_i + 2]
|
||||
|
||||
xor hash, hash
|
||||
crc32 hash %+ d, dword [f_i + dict_offset]
|
||||
|
||||
xor hash2, hash2
|
||||
crc32 hash2 %+ d, dword [f_i + dict_offset + 1]
|
||||
|
||||
xor hash3, hash3
|
||||
crc32 hash3 %+ d, dword [f_i_tmp + dict_offset]
|
||||
|
||||
xor hash4, hash4
|
||||
crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1]
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
and hash3, HASH_MASK
|
||||
and hash4, HASH_MASK
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
add f_i, 1
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
|
||||
add f_i, 3
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash3], f_i_tmp %+ w
|
||||
add f_i_tmp, 1
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash4], f_i_tmp %+ w
|
||||
|
||||
cmp f_i, f_i_end
|
||||
jle main_loop
|
||||
|
||||
end_main:
|
||||
add f_i_end, DICT_SLOP - DICT_END_SLOP
|
||||
cmp f_i, f_i_end
|
||||
jg end
|
||||
|
||||
end_loop:
|
||||
xor hash, hash
|
||||
crc32 hash %+ d, dword [f_i + dict_offset]
|
||||
|
||||
and hash, HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
add f_i, 1
|
||||
cmp f_i, f_i_end
|
||||
jle main_loop
|
||||
jle end_loop
|
||||
end:
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user