From c1876a12216f73bcb169524d32be29831579740c Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Fri, 17 Aug 2018 14:31:50 -0400 Subject: [PATCH] igzip: Fixup level 3 first byte handling Change-Id: Id9f59934d43b09af3c2ec722f5a825aa9b02e2dc Signed-off-by: Roy Oursler --- igzip/huffman.asm | 1 + igzip/igzip_gen_icf_map_lh1_04.asm | 68 ++++++++++++++++++++++++++---- igzip/igzip_gen_icf_map_lh1_06.asm | 60 ++++++++++++++++++++++++-- igzip/igzip_icf_body.c | 17 +++++--- 4 files changed, 126 insertions(+), 20 deletions(-) diff --git a/igzip/huffman.asm b/igzip/huffman.asm index daf9eda..9056b5e 100644 --- a/igzip/huffman.asm +++ b/igzip/huffman.asm @@ -29,6 +29,7 @@ %include "options.asm" %include "lz0a_const.asm" +%include "stdmac.asm" ; Macros for doing Huffman Encoding diff --git a/igzip/igzip_gen_icf_map_lh1_04.asm b/igzip/igzip_gen_icf_map_lh1_04.asm index 112c2a2..bf80e5f 100644 --- a/igzip/igzip_gen_icf_map_lh1_04.asm +++ b/igzip/igzip_gen_icf_map_lh1_04.asm @@ -30,6 +30,11 @@ %include "reg_sizes.asm" %include "lz0a_const.asm" %include "data_struct2.asm" +%include "huffman.asm" + + +%define USE_HSWNI +%define ARCH 04 %ifidn __OUTPUT_FORMAT__, win64 %define arg1 rcx @@ -53,7 +58,7 @@ %define f_i rax %define file_start rbp %define tmp r9 -%define encode_size r10 +%define tmp2 r10 %define prev_len r11 %define prev_dist r12 %define f_i_orig r13 @@ -169,15 +174,13 @@ func(gen_icf_map_lh1_04) jge end_main ;; Prep for main loop + mov tmp, stream mov level_buf, [stream + _level_buf] sub f_i_end, LA vmovdqu yincrement, [increment] vpbroadcastd yones, [ones] vmovdqu ydatas_perm2, [datas_perm2] - xor prev_len, prev_len - xor prev_dist, prev_dist - ;; Process first byte vpbroadcastd yhash_prod, [hash_prod] vpbroadcastd yhash_mask, [hash_mask] @@ -186,6 +189,52 @@ func(gen_icf_map_lh1_04) vpmaddwd yhashes, yhashes, yhash_prod vpand yhashes, yhashes, yhash_mask vmovd hash %+ d, yhashes %+ x + cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST + jne .has_hist + ;; No history, the byte is a literal + xor prev_len, prev_len + xor prev_dist, prev_dist + mov byte [tmp + _internal_state_has_hist], IGZIP_HIST + jmp .byte_processed + +.has_hist: + ;; History exists, need to set prev_len and prev_dist accordingly + lea next_in, [f_i + file_start] + + ;; Determine match lookback distance + xor tmp, tmp + mov tmp %+ w, f_i %+ w + dec tmp + sub tmp %+ w, word [hash_table + HASH_BYTES * hash] + + and tmp %+ d, [dist_mask] + neg tmp + + ;; Check first 8 bytes of match + mov prev_len, [next_in] + xor prev_len, [next_in + tmp - 1] + neg tmp + + ;; Set prev_dist +%ifidn arg1, rcx + mov tmp2, rcx +%endif + ;; The third register is unused on Haswell and later, + ;; This line will not work on previous architectures + get_dist_icf_code tmp, prev_dist, tmp + +%ifidn arg1, rcx + mov rcx, tmp2 +%endif + + ;; Set prev_len + xor tmp2, tmp2 + tzcnt prev_len, prev_len + shr prev_len, 3 + cmp prev_len, MIN_DEF_MATCH + cmovl prev_len, tmp2 + +.byte_processed: mov word [hash_table + HASH_BYTES * hash], f_i %+ w add f_i, 1 @@ -472,13 +521,14 @@ loop1_end: vpextrd tmp %+ d, ydists2 %+ x, 3 add tmp %+ d, f_i %+ d - vpbroadcastd yhash_prod, [hash_prod] - vpbroadcastd yhash_mask, [hash_mask] + vpbroadcastd yhash_prod %+ x, [hash_prod] + vpbroadcastd yhash_mask %+ x, [hash_mask] vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1] - vpmaddwd yhashes, yhashes, yhash_prod - vpmaddwd yhashes, yhashes, yhash_prod - vpand yhashes, yhashes, yhash_mask + vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x + vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x + vpand yhashes %+ x, yhashes %+ x, yhash_mask %+ x + vmovd hash %+ d, yhashes %+ x mov word [hash_table + HASH_BYTES * hash], tmp %+ w diff --git a/igzip/igzip_gen_icf_map_lh1_06.asm b/igzip/igzip_gen_icf_map_lh1_06.asm index cf43cf3..5f0e000 100644 --- a/igzip/igzip_gen_icf_map_lh1_06.asm +++ b/igzip/igzip_gen_icf_map_lh1_06.asm @@ -30,6 +30,11 @@ %include "reg_sizes.asm" %include "lz0a_const.asm" %include "data_struct2.asm" +%include "huffman.asm" + + +%define USE_HSWNI +%define ARCH 06 %ifdef HAVE_AS_KNOWS_AVX512 %ifidn __OUTPUT_FORMAT__, win64 @@ -54,7 +59,7 @@ %define f_i rax %define file_start rbp %define tmp r9 -%define encode_size r10 +%define tmp2 r10 %define prev_len r11 %define prev_dist r12 %define f_i_orig r13 @@ -169,6 +174,7 @@ func(gen_icf_map_lh1_06) jge end_main ;; Prep for main loop + mov tmp, stream mov level_buf, [stream + _level_buf] sub f_i_end, LA vmovdqu64 zdatas_perm, [datas_perm] @@ -191,15 +197,60 @@ func(gen_icf_map_lh1_06) kmovq k1, [k_mask_1] kmovq k2, [k_mask_2] - xor prev_len, prev_len - xor prev_dist, prev_dist - ;; Process first byte vmovd zhashes %+ x, dword [f_i + file_start] vpmaddwd zhashes, zhashes, zhash_prod vpmaddwd zhashes, zhashes, zhash_prod vpandd zhashes, zhashes, zhash_mask vmovd hash %+ d, zhashes %+ x + + cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST + jne .has_hist + ;; No history, the byte is a literal + xor prev_len, prev_len + xor prev_dist, prev_dist + mov byte [tmp + _internal_state_has_hist], IGZIP_HIST + jmp .byte_processed + +.has_hist: + ;; History exists, need to set prev_len and prev_dist accordingly + lea next_in, [f_i + file_start] + + ;; Determine match lookback distance + xor tmp, tmp + mov tmp %+ w, f_i %+ w + dec tmp + sub tmp %+ w, word [hash_table + HASH_BYTES * hash] + + vmovd tmp2 %+ d, zdist_mask %+ x + and tmp %+ d, tmp2 %+ d + neg tmp + + ;; Check first 8 bytes of match + mov prev_len, [next_in] + xor prev_len, [next_in + tmp - 1] + neg tmp + + ;; Set prev_dist +%ifidn arg1, rcx + mov tmp2, rcx +%endif + ;; The third register is unused on Haswell and later, + ;; This line will not work on previous architectures + get_dist_icf_code tmp, prev_dist, tmp + +%ifidn arg1, rcx + mov rcx, tmp2 +%endif + + ;; Set prev_len + xor tmp2, tmp2 + tzcnt prev_len, prev_len + shr prev_len, 3 + cmp prev_len, MIN_DEF_MATCH + cmovl prev_len, tmp2 + +.byte_processed: mov word [hash_table + HASH_BYTES * hash], f_i %+ w add f_i, 1 @@ -383,6 +434,7 @@ loop1_end: vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x + vmovd hash %+ d, zhashes %+ x mov word [hash_table + HASH_BYTES * hash], tmp %+ w diff --git a/igzip/igzip_icf_body.c b/igzip/igzip_icf_body.c index 0b6e317..ad79ec8 100644 --- a/igzip/igzip_icf_body.c +++ b/igzip/igzip_icf_body.c @@ -81,15 +81,18 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, if (input_size < ISAL_LOOK_AHEAD) return 0; - matches_icf_lookup->lit_len = *next_in; - matches_icf_lookup->lit_dist = 0x1e; - matches_icf_lookup->dist_extra = 0; + if (stream->internal_state.has_hist == IGZIP_NO_HIST) { + matches_icf_lookup->lit_len = *next_in; + matches_icf_lookup->lit_dist = 0x1e; + matches_icf_lookup->dist_extra = 0; - hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; - hash_table[hash] = (uint64_t) (next_in - file_start); + hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; + hash_table[hash] = (uint64_t) (next_in - file_start); - next_in++; - matches_icf_lookup++; + next_in++; + matches_icf_lookup++; + stream->internal_state.has_hist = IGZIP_HIST; + } while (next_in < end_in - ISAL_LOOK_AHEAD) { hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;