igzip: Fixup level 3 first byte handling

Change-Id: Id9f59934d43b09af3c2ec722f5a825aa9b02e2dc
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-08-17 14:31:50 -04:00
parent cd7b70dd41
commit c1876a1221
4 changed files with 126 additions and 20 deletions

View File

@ -29,6 +29,7 @@
%include "options.asm" %include "options.asm"
%include "lz0a_const.asm" %include "lz0a_const.asm"
%include "stdmac.asm"
; Macros for doing Huffman Encoding ; Macros for doing Huffman Encoding

View File

@ -30,6 +30,11 @@
%include "reg_sizes.asm" %include "reg_sizes.asm"
%include "lz0a_const.asm" %include "lz0a_const.asm"
%include "data_struct2.asm" %include "data_struct2.asm"
%include "huffman.asm"
%define USE_HSWNI
%define ARCH 04
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg1 rcx %define arg1 rcx
@ -53,7 +58,7 @@
%define f_i rax %define f_i rax
%define file_start rbp %define file_start rbp
%define tmp r9 %define tmp r9
%define encode_size r10 %define tmp2 r10
%define prev_len r11 %define prev_len r11
%define prev_dist r12 %define prev_dist r12
%define f_i_orig r13 %define f_i_orig r13
@ -169,15 +174,13 @@ func(gen_icf_map_lh1_04)
jge end_main jge end_main
;; Prep for main loop ;; Prep for main loop
mov tmp, stream
mov level_buf, [stream + _level_buf] mov level_buf, [stream + _level_buf]
sub f_i_end, LA sub f_i_end, LA
vmovdqu yincrement, [increment] vmovdqu yincrement, [increment]
vpbroadcastd yones, [ones] vpbroadcastd yones, [ones]
vmovdqu ydatas_perm2, [datas_perm2] vmovdqu ydatas_perm2, [datas_perm2]
xor prev_len, prev_len
xor prev_dist, prev_dist
;; Process first byte ;; Process first byte
vpbroadcastd yhash_prod, [hash_prod] vpbroadcastd yhash_prod, [hash_prod]
vpbroadcastd yhash_mask, [hash_mask] vpbroadcastd yhash_mask, [hash_mask]
@ -186,6 +189,52 @@ func(gen_icf_map_lh1_04)
vpmaddwd yhashes, yhashes, yhash_prod vpmaddwd yhashes, yhashes, yhash_prod
vpand yhashes, yhashes, yhash_mask vpand yhashes, yhashes, yhash_mask
vmovd hash %+ d, yhashes %+ x vmovd hash %+ d, yhashes %+ x
cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
jne .has_hist
;; No history, the byte is a literal
xor prev_len, prev_len
xor prev_dist, prev_dist
mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
jmp .byte_processed
.has_hist:
;; History exists, need to set prev_len and prev_dist accordingly
lea next_in, [f_i + file_start]
;; Determine match lookback distance
xor tmp, tmp
mov tmp %+ w, f_i %+ w
dec tmp
sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
and tmp %+ d, [dist_mask]
neg tmp
;; Check first 8 bytes of match
mov prev_len, [next_in]
xor prev_len, [next_in + tmp - 1]
neg tmp
;; Set prev_dist
%ifidn arg1, rcx
mov tmp2, rcx
%endif
;; The third register is unused on Haswell and later,
;; This line will not work on previous architectures
get_dist_icf_code tmp, prev_dist, tmp
%ifidn arg1, rcx
mov rcx, tmp2
%endif
;; Set prev_len
xor tmp2, tmp2
tzcnt prev_len, prev_len
shr prev_len, 3
cmp prev_len, MIN_DEF_MATCH
cmovl prev_len, tmp2
.byte_processed:
mov word [hash_table + HASH_BYTES * hash], f_i %+ w mov word [hash_table + HASH_BYTES * hash], f_i %+ w
add f_i, 1 add f_i, 1
@ -472,13 +521,14 @@ loop1_end:
vpextrd tmp %+ d, ydists2 %+ x, 3 vpextrd tmp %+ d, ydists2 %+ x, 3
add tmp %+ d, f_i %+ d add tmp %+ d, f_i %+ d
vpbroadcastd yhash_prod, [hash_prod] vpbroadcastd yhash_prod %+ x, [hash_prod]
vpbroadcastd yhash_mask, [hash_mask] vpbroadcastd yhash_mask %+ x, [hash_mask]
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1] vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
vpmaddwd yhashes, yhashes, yhash_prod vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
vpmaddwd yhashes, yhashes, yhash_prod vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
vpand yhashes, yhashes, yhash_mask vpand yhashes %+ x, yhashes %+ x, yhash_mask %+ x
vmovd hash %+ d, yhashes %+ x
mov word [hash_table + HASH_BYTES * hash], tmp %+ w mov word [hash_table + HASH_BYTES * hash], tmp %+ w

View File

@ -30,6 +30,11 @@
%include "reg_sizes.asm" %include "reg_sizes.asm"
%include "lz0a_const.asm" %include "lz0a_const.asm"
%include "data_struct2.asm" %include "data_struct2.asm"
%include "huffman.asm"
%define USE_HSWNI
%define ARCH 06
%ifdef HAVE_AS_KNOWS_AVX512 %ifdef HAVE_AS_KNOWS_AVX512
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -54,7 +59,7 @@
%define f_i rax %define f_i rax
%define file_start rbp %define file_start rbp
%define tmp r9 %define tmp r9
%define encode_size r10 %define tmp2 r10
%define prev_len r11 %define prev_len r11
%define prev_dist r12 %define prev_dist r12
%define f_i_orig r13 %define f_i_orig r13
@ -169,6 +174,7 @@ func(gen_icf_map_lh1_06)
jge end_main jge end_main
;; Prep for main loop ;; Prep for main loop
mov tmp, stream
mov level_buf, [stream + _level_buf] mov level_buf, [stream + _level_buf]
sub f_i_end, LA sub f_i_end, LA
vmovdqu64 zdatas_perm, [datas_perm] vmovdqu64 zdatas_perm, [datas_perm]
@ -191,15 +197,60 @@ func(gen_icf_map_lh1_06)
kmovq k1, [k_mask_1] kmovq k1, [k_mask_1]
kmovq k2, [k_mask_2] kmovq k2, [k_mask_2]
xor prev_len, prev_len
xor prev_dist, prev_dist
;; Process first byte ;; Process first byte
vmovd zhashes %+ x, dword [f_i + file_start] vmovd zhashes %+ x, dword [f_i + file_start]
vpmaddwd zhashes, zhashes, zhash_prod vpmaddwd zhashes, zhashes, zhash_prod
vpmaddwd zhashes, zhashes, zhash_prod vpmaddwd zhashes, zhashes, zhash_prod
vpandd zhashes, zhashes, zhash_mask vpandd zhashes, zhashes, zhash_mask
vmovd hash %+ d, zhashes %+ x vmovd hash %+ d, zhashes %+ x
cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
jne .has_hist
;; No history, the byte is a literal
xor prev_len, prev_len
xor prev_dist, prev_dist
mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
jmp .byte_processed
.has_hist:
;; History exists, need to set prev_len and prev_dist accordingly
lea next_in, [f_i + file_start]
;; Determine match lookback distance
xor tmp, tmp
mov tmp %+ w, f_i %+ w
dec tmp
sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
vmovd tmp2 %+ d, zdist_mask %+ x
and tmp %+ d, tmp2 %+ d
neg tmp
;; Check first 8 bytes of match
mov prev_len, [next_in]
xor prev_len, [next_in + tmp - 1]
neg tmp
;; Set prev_dist
%ifidn arg1, rcx
mov tmp2, rcx
%endif
;; The third register is unused on Haswell and later,
;; This line will not work on previous architectures
get_dist_icf_code tmp, prev_dist, tmp
%ifidn arg1, rcx
mov rcx, tmp2
%endif
;; Set prev_len
xor tmp2, tmp2
tzcnt prev_len, prev_len
shr prev_len, 3
cmp prev_len, MIN_DEF_MATCH
cmovl prev_len, tmp2
.byte_processed:
mov word [hash_table + HASH_BYTES * hash], f_i %+ w mov word [hash_table + HASH_BYTES * hash], f_i %+ w
add f_i, 1 add f_i, 1
@ -383,6 +434,7 @@ loop1_end:
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x
vmovd hash %+ d, zhashes %+ x
mov word [hash_table + HASH_BYTES * hash], tmp %+ w mov word [hash_table + HASH_BYTES * hash], tmp %+ w

View File

@ -81,15 +81,18 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
if (input_size < ISAL_LOOK_AHEAD) if (input_size < ISAL_LOOK_AHEAD)
return 0; return 0;
matches_icf_lookup->lit_len = *next_in; if (stream->internal_state.has_hist == IGZIP_NO_HIST) {
matches_icf_lookup->lit_dist = 0x1e; matches_icf_lookup->lit_len = *next_in;
matches_icf_lookup->dist_extra = 0; matches_icf_lookup->lit_dist = 0x1e;
matches_icf_lookup->dist_extra = 0;
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
hash_table[hash] = (uint64_t) (next_in - file_start); hash_table[hash] = (uint64_t) (next_in - file_start);
next_in++; next_in++;
matches_icf_lookup++; matches_icf_lookup++;
stream->internal_state.has_hist = IGZIP_HIST;
}
while (next_in < end_in - ISAL_LOOK_AHEAD) { while (next_in < end_in - ISAL_LOOK_AHEAD) {
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;