mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: Fixup level 3 first byte handling
Change-Id: Id9f59934d43b09af3c2ec722f5a825aa9b02e2dc Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
cd7b70dd41
commit
c1876a1221
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
%include "options.asm"
|
%include "options.asm"
|
||||||
%include "lz0a_const.asm"
|
%include "lz0a_const.asm"
|
||||||
|
%include "stdmac.asm"
|
||||||
|
|
||||||
; Macros for doing Huffman Encoding
|
; Macros for doing Huffman Encoding
|
||||||
|
|
||||||
|
@ -30,6 +30,11 @@
|
|||||||
%include "reg_sizes.asm"
|
%include "reg_sizes.asm"
|
||||||
%include "lz0a_const.asm"
|
%include "lz0a_const.asm"
|
||||||
%include "data_struct2.asm"
|
%include "data_struct2.asm"
|
||||||
|
%include "huffman.asm"
|
||||||
|
|
||||||
|
|
||||||
|
%define USE_HSWNI
|
||||||
|
%define ARCH 04
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg1 rcx
|
%define arg1 rcx
|
||||||
@ -53,7 +58,7 @@
|
|||||||
%define f_i rax
|
%define f_i rax
|
||||||
%define file_start rbp
|
%define file_start rbp
|
||||||
%define tmp r9
|
%define tmp r9
|
||||||
%define encode_size r10
|
%define tmp2 r10
|
||||||
%define prev_len r11
|
%define prev_len r11
|
||||||
%define prev_dist r12
|
%define prev_dist r12
|
||||||
%define f_i_orig r13
|
%define f_i_orig r13
|
||||||
@ -169,15 +174,13 @@ func(gen_icf_map_lh1_04)
|
|||||||
jge end_main
|
jge end_main
|
||||||
|
|
||||||
;; Prep for main loop
|
;; Prep for main loop
|
||||||
|
mov tmp, stream
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
sub f_i_end, LA
|
sub f_i_end, LA
|
||||||
vmovdqu yincrement, [increment]
|
vmovdqu yincrement, [increment]
|
||||||
vpbroadcastd yones, [ones]
|
vpbroadcastd yones, [ones]
|
||||||
vmovdqu ydatas_perm2, [datas_perm2]
|
vmovdqu ydatas_perm2, [datas_perm2]
|
||||||
|
|
||||||
xor prev_len, prev_len
|
|
||||||
xor prev_dist, prev_dist
|
|
||||||
|
|
||||||
;; Process first byte
|
;; Process first byte
|
||||||
vpbroadcastd yhash_prod, [hash_prod]
|
vpbroadcastd yhash_prod, [hash_prod]
|
||||||
vpbroadcastd yhash_mask, [hash_mask]
|
vpbroadcastd yhash_mask, [hash_mask]
|
||||||
@ -186,6 +189,52 @@ func(gen_icf_map_lh1_04)
|
|||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes, yhashes, yhash_prod
|
||||||
vpand yhashes, yhashes, yhash_mask
|
vpand yhashes, yhashes, yhash_mask
|
||||||
vmovd hash %+ d, yhashes %+ x
|
vmovd hash %+ d, yhashes %+ x
|
||||||
|
cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
|
||||||
|
jne .has_hist
|
||||||
|
;; No history, the byte is a literal
|
||||||
|
xor prev_len, prev_len
|
||||||
|
xor prev_dist, prev_dist
|
||||||
|
mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
|
||||||
|
jmp .byte_processed
|
||||||
|
|
||||||
|
.has_hist:
|
||||||
|
;; History exists, need to set prev_len and prev_dist accordingly
|
||||||
|
lea next_in, [f_i + file_start]
|
||||||
|
|
||||||
|
;; Determine match lookback distance
|
||||||
|
xor tmp, tmp
|
||||||
|
mov tmp %+ w, f_i %+ w
|
||||||
|
dec tmp
|
||||||
|
sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
|
||||||
|
|
||||||
|
and tmp %+ d, [dist_mask]
|
||||||
|
neg tmp
|
||||||
|
|
||||||
|
;; Check first 8 bytes of match
|
||||||
|
mov prev_len, [next_in]
|
||||||
|
xor prev_len, [next_in + tmp - 1]
|
||||||
|
neg tmp
|
||||||
|
|
||||||
|
;; Set prev_dist
|
||||||
|
%ifidn arg1, rcx
|
||||||
|
mov tmp2, rcx
|
||||||
|
%endif
|
||||||
|
;; The third register is unused on Haswell and later,
|
||||||
|
;; This line will not work on previous architectures
|
||||||
|
get_dist_icf_code tmp, prev_dist, tmp
|
||||||
|
|
||||||
|
%ifidn arg1, rcx
|
||||||
|
mov rcx, tmp2
|
||||||
|
%endif
|
||||||
|
|
||||||
|
;; Set prev_len
|
||||||
|
xor tmp2, tmp2
|
||||||
|
tzcnt prev_len, prev_len
|
||||||
|
shr prev_len, 3
|
||||||
|
cmp prev_len, MIN_DEF_MATCH
|
||||||
|
cmovl prev_len, tmp2
|
||||||
|
|
||||||
|
.byte_processed:
|
||||||
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
|
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
|
||||||
|
|
||||||
add f_i, 1
|
add f_i, 1
|
||||||
@ -472,13 +521,14 @@ loop1_end:
|
|||||||
vpextrd tmp %+ d, ydists2 %+ x, 3
|
vpextrd tmp %+ d, ydists2 %+ x, 3
|
||||||
add tmp %+ d, f_i %+ d
|
add tmp %+ d, f_i %+ d
|
||||||
|
|
||||||
vpbroadcastd yhash_prod, [hash_prod]
|
vpbroadcastd yhash_prod %+ x, [hash_prod]
|
||||||
vpbroadcastd yhash_mask, [hash_mask]
|
vpbroadcastd yhash_mask %+ x, [hash_mask]
|
||||||
|
|
||||||
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
|
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
|
||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
|
||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
|
||||||
vpand yhashes, yhashes, yhash_mask
|
vpand yhashes %+ x, yhashes %+ x, yhash_mask %+ x
|
||||||
|
vmovd hash %+ d, yhashes %+ x
|
||||||
|
|
||||||
mov word [hash_table + HASH_BYTES * hash], tmp %+ w
|
mov word [hash_table + HASH_BYTES * hash], tmp %+ w
|
||||||
|
|
||||||
|
@ -30,6 +30,11 @@
|
|||||||
%include "reg_sizes.asm"
|
%include "reg_sizes.asm"
|
||||||
%include "lz0a_const.asm"
|
%include "lz0a_const.asm"
|
||||||
%include "data_struct2.asm"
|
%include "data_struct2.asm"
|
||||||
|
%include "huffman.asm"
|
||||||
|
|
||||||
|
|
||||||
|
%define USE_HSWNI
|
||||||
|
%define ARCH 06
|
||||||
|
|
||||||
%ifdef HAVE_AS_KNOWS_AVX512
|
%ifdef HAVE_AS_KNOWS_AVX512
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
@ -54,7 +59,7 @@
|
|||||||
%define f_i rax
|
%define f_i rax
|
||||||
%define file_start rbp
|
%define file_start rbp
|
||||||
%define tmp r9
|
%define tmp r9
|
||||||
%define encode_size r10
|
%define tmp2 r10
|
||||||
%define prev_len r11
|
%define prev_len r11
|
||||||
%define prev_dist r12
|
%define prev_dist r12
|
||||||
%define f_i_orig r13
|
%define f_i_orig r13
|
||||||
@ -169,6 +174,7 @@ func(gen_icf_map_lh1_06)
|
|||||||
jge end_main
|
jge end_main
|
||||||
|
|
||||||
;; Prep for main loop
|
;; Prep for main loop
|
||||||
|
mov tmp, stream
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
sub f_i_end, LA
|
sub f_i_end, LA
|
||||||
vmovdqu64 zdatas_perm, [datas_perm]
|
vmovdqu64 zdatas_perm, [datas_perm]
|
||||||
@ -191,15 +197,60 @@ func(gen_icf_map_lh1_06)
|
|||||||
kmovq k1, [k_mask_1]
|
kmovq k1, [k_mask_1]
|
||||||
kmovq k2, [k_mask_2]
|
kmovq k2, [k_mask_2]
|
||||||
|
|
||||||
xor prev_len, prev_len
|
|
||||||
xor prev_dist, prev_dist
|
|
||||||
|
|
||||||
;; Process first byte
|
;; Process first byte
|
||||||
vmovd zhashes %+ x, dword [f_i + file_start]
|
vmovd zhashes %+ x, dword [f_i + file_start]
|
||||||
vpmaddwd zhashes, zhashes, zhash_prod
|
vpmaddwd zhashes, zhashes, zhash_prod
|
||||||
vpmaddwd zhashes, zhashes, zhash_prod
|
vpmaddwd zhashes, zhashes, zhash_prod
|
||||||
vpandd zhashes, zhashes, zhash_mask
|
vpandd zhashes, zhashes, zhash_mask
|
||||||
vmovd hash %+ d, zhashes %+ x
|
vmovd hash %+ d, zhashes %+ x
|
||||||
|
|
||||||
|
cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
|
||||||
|
jne .has_hist
|
||||||
|
;; No history, the byte is a literal
|
||||||
|
xor prev_len, prev_len
|
||||||
|
xor prev_dist, prev_dist
|
||||||
|
mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
|
||||||
|
jmp .byte_processed
|
||||||
|
|
||||||
|
.has_hist:
|
||||||
|
;; History exists, need to set prev_len and prev_dist accordingly
|
||||||
|
lea next_in, [f_i + file_start]
|
||||||
|
|
||||||
|
;; Determine match lookback distance
|
||||||
|
xor tmp, tmp
|
||||||
|
mov tmp %+ w, f_i %+ w
|
||||||
|
dec tmp
|
||||||
|
sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
|
||||||
|
|
||||||
|
vmovd tmp2 %+ d, zdist_mask %+ x
|
||||||
|
and tmp %+ d, tmp2 %+ d
|
||||||
|
neg tmp
|
||||||
|
|
||||||
|
;; Check first 8 bytes of match
|
||||||
|
mov prev_len, [next_in]
|
||||||
|
xor prev_len, [next_in + tmp - 1]
|
||||||
|
neg tmp
|
||||||
|
|
||||||
|
;; Set prev_dist
|
||||||
|
%ifidn arg1, rcx
|
||||||
|
mov tmp2, rcx
|
||||||
|
%endif
|
||||||
|
;; The third register is unused on Haswell and later,
|
||||||
|
;; This line will not work on previous architectures
|
||||||
|
get_dist_icf_code tmp, prev_dist, tmp
|
||||||
|
|
||||||
|
%ifidn arg1, rcx
|
||||||
|
mov rcx, tmp2
|
||||||
|
%endif
|
||||||
|
|
||||||
|
;; Set prev_len
|
||||||
|
xor tmp2, tmp2
|
||||||
|
tzcnt prev_len, prev_len
|
||||||
|
shr prev_len, 3
|
||||||
|
cmp prev_len, MIN_DEF_MATCH
|
||||||
|
cmovl prev_len, tmp2
|
||||||
|
|
||||||
|
.byte_processed:
|
||||||
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
|
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
|
||||||
|
|
||||||
add f_i, 1
|
add f_i, 1
|
||||||
@ -383,6 +434,7 @@ loop1_end:
|
|||||||
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
|
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
|
||||||
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
|
vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
|
||||||
vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x
|
vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x
|
||||||
|
vmovd hash %+ d, zhashes %+ x
|
||||||
|
|
||||||
mov word [hash_table + HASH_BYTES * hash], tmp %+ w
|
mov word [hash_table + HASH_BYTES * hash], tmp %+ w
|
||||||
|
|
||||||
|
@ -81,15 +81,18 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
|||||||
if (input_size < ISAL_LOOK_AHEAD)
|
if (input_size < ISAL_LOOK_AHEAD)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
matches_icf_lookup->lit_len = *next_in;
|
if (stream->internal_state.has_hist == IGZIP_NO_HIST) {
|
||||||
matches_icf_lookup->lit_dist = 0x1e;
|
matches_icf_lookup->lit_len = *next_in;
|
||||||
matches_icf_lookup->dist_extra = 0;
|
matches_icf_lookup->lit_dist = 0x1e;
|
||||||
|
matches_icf_lookup->dist_extra = 0;
|
||||||
|
|
||||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
||||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
next_in++;
|
next_in++;
|
||||||
matches_icf_lookup++;
|
matches_icf_lookup++;
|
||||||
|
stream->internal_state.has_hist = IGZIP_HIST;
|
||||||
|
}
|
||||||
|
|
||||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
||||||
|
Loading…
Reference in New Issue
Block a user