From 03bef684a4e9ed4441fb7f18eaae53cef973eb26 Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Wed, 6 Jun 2018 16:33:19 -0700 Subject: [PATCH] igzip: Setup for variable hash mask Change-Id: I3be94dbc40c2e02dcff4f89e5a9df8ed1f744f02 Signed-off-by: Roy Oursler --- igzip/bitbuf2.asm | 5 +- igzip/data_struct2.asm | 4 +- igzip/igzip.c | 33 ++++- igzip/igzip_base.c | 10 +- igzip/igzip_base_aliases.c | 6 +- igzip/igzip_body.asm | 67 ++++----- igzip/igzip_finish.asm | 29 ++-- igzip/igzip_gen_icf_map_lh1_04.asm | 19 +-- igzip/igzip_gen_icf_map_lh1_06.asm | 4 +- igzip/igzip_icf_base.c | 221 ++--------------------------- igzip/igzip_icf_body.c | 5 +- igzip/igzip_icf_body_h1_gr_bt.asm | 63 ++++---- igzip/igzip_icf_finish.asm | 43 +++--- igzip/igzip_multibinary.asm | 11 +- igzip/stdmac.asm | 12 ++ include/igzip_lib.h | 3 +- 16 files changed, 184 insertions(+), 351 deletions(-) diff --git a/igzip/bitbuf2.asm b/igzip/bitbuf2.asm index 38a4f89..f40db78 100644 --- a/igzip/bitbuf2.asm +++ b/igzip/bitbuf2.asm @@ -32,14 +32,13 @@ ; Assumes m_out_buf is a register ; Clobbers RCX ; code is clobbered -; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1 -%macro write_bits 6 +; write_bits_always m_bits, m_bit_count, code, count, m_out_buf +%macro write_bits 5 %define %%m_bits %1 %define %%m_bit_count %2 %define %%code %3 %define %%count %4 %define %%m_out_buf %5 -%define %%tmp1 %6 %ifdef USE_HSWNI shlx %%code, %%code, %%m_bit_count diff --git a/igzip/data_struct2.asm b/igzip/data_struct2.asm index a49f3f2..233e264 100644 --- a/igzip/data_struct2.asm +++ b/igzip/data_struct2.asm @@ -160,9 +160,10 @@ FIELD _total_in_start,4, 4 FIELD _block_next, 4, 4 FIELD _block_end, 4, 4 FIELD _dist_mask, 4, 4 +FIELD _hash_mask, 4, 4 +FIELD _state, 4, 4 FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align FIELD _crc, 4, 4 -FIELD _state, 4, 4 FIELD _has_wrap_hdr, 1, 1 FIELD _has_eob_hdr, 1, 1 FIELD _has_eob, 1, 1 @@ -218,6 +219,7 @@ _internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid _internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed _internal_state_crc equ _internal_state+_crc _internal_state_dist_mask equ _internal_state+_dist_mask +_internal_state_hash_mask equ _internal_state+_hash_mask _internal_state_bitbuf equ _internal_state+_bitbuf _internal_state_state equ _internal_state+_state _internal_state_count equ _internal_state+_count diff --git a/igzip/igzip.c b/igzip/igzip.c index 6894bac..5682b10 100644 --- a/igzip/igzip.c +++ b/igzip/igzip.c @@ -923,6 +923,25 @@ static void inline set_dist_mask(struct isal_zstream *stream) } +static void inline set_hash_mask(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + + switch (stream->level) { + case 3: + state->hash_mask = LVL3_HASH_MASK; + break; + case 2: + state->hash_mask = LVL2_HASH_MASK; + break; + case 1: + state->hash_mask = LVL1_HASH_MASK; + break; + case 0: + state->hash_mask = LVL0_HASH_MASK; + } +} + void isal_deflate_init(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; @@ -1036,26 +1055,28 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic /* Reset history to prevent out of bounds matches this works because * dictionary must set at least 1 element in the history */ struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint32_t hash_mask = stream->internal_state.hash_mask; + switch (stream->level) { case 3: memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table)); - isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK, + isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask, stream->total_in, dict, dict_len); break; case 2: memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table)); - isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK, + isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask, stream->total_in, dict, dict_len); break; case 1: memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table)); - isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK, + isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask, stream->total_in, dict, dict_len); break; default: memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head)); - isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK, + isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask, stream->total_in, dict, dict_len); } @@ -1128,6 +1149,8 @@ int isal_deflate_stateless(struct isal_zstream *stream) return level_check; } + set_hash_mask(stream); + if (avail_in == 0) stored_len = TYPE0_BLK_HDR_LEN; else @@ -1263,6 +1286,7 @@ int isal_deflate(struct isal_zstream *stream) if (state->has_hist == IGZIP_NO_HIST) { set_dist_mask(stream); + set_hash_mask(stream); stream->total_in -= buffered_size; reset_match_history(stream); stream->total_in += buffered_size; @@ -1270,6 +1294,7 @@ int isal_deflate(struct isal_zstream *stream) } else if (state->has_hist == IGZIP_DICT_HIST) { set_dist_mask(stream); + set_hash_mask(stream); isal_deflate_hash(stream, state->buffer, state->b_bytes_processed); } diff --git a/igzip/igzip_base.c b/igzip/igzip_base.c index dcfb939..02a54af 100644 --- a/igzip/igzip_base.c +++ b/igzip/igzip_base.c @@ -37,6 +37,7 @@ void isal_deflate_body_base(struct isal_zstream *stream) uint16_t *last_seen = state->head; uint8_t *file_start = stream->next_in - stream->total_in; uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; if (stream->avail_in == 0) { if (stream->end_of_stream || stream->flush != NO_FLUSH) @@ -58,7 +59,7 @@ void isal_deflate_body_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & LVL0_HASH_MASK; + hash = compute_hash(literal) & hash_mask; dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; last_seen[hash] = (uint64_t) (next_in - file_start); @@ -79,7 +80,7 @@ void isal_deflate_body_base(struct isal_zstream *stream) for (; next_hash < end; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & LVL0_HASH_MASK; + hash = compute_hash(literal) & hash_mask; last_seen[hash] = (uint64_t) (next_hash - file_start); } @@ -124,6 +125,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream) uint16_t *last_seen = state->head; uint8_t *file_start = stream->next_in - stream->total_in; uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; set_buf(&state->bitbuf, stream->next_out, stream->avail_out); @@ -139,7 +141,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & LVL0_HASH_MASK; + hash = compute_hash(literal) & hash_mask; dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; last_seen[hash] = (uint64_t) (next_in - file_start); @@ -158,7 +160,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream) for (; next_hash < end - 3; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & LVL0_HASH_MASK; + hash = compute_hash(literal) & hash_mask; last_seen[hash] = (uint64_t) (next_hash - file_start); } diff --git a/igzip/igzip_base_aliases.c b/igzip/igzip_base_aliases.c index 1afead8..3c584d8 100644 --- a/igzip/igzip_base_aliases.c +++ b/igzip/igzip_base_aliases.c @@ -34,10 +34,8 @@ void isal_deflate_body_base(struct isal_zstream *stream); void isal_deflate_finish_base(struct isal_zstream *stream); -void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream); void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream); -void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream); void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream); void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream); void isal_update_histogram_base(uint8_t * start_stream, int length, @@ -68,7 +66,7 @@ void isal_deflate_finish(struct isal_zstream *stream) void isal_deflate_icf_body_lvl1(struct isal_zstream *stream) { - isal_deflate_icf_body_hash8k_base(stream); + isal_deflate_icf_body_hash_hist_base(stream); } void isal_deflate_icf_body_lvl2(struct isal_zstream *stream) @@ -83,7 +81,7 @@ void isal_deflate_icf_body_lvl3(struct isal_zstream *stream) void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream) { - isal_deflate_icf_finish_hash8k_base(stream); + isal_deflate_icf_finish_hash_hist_base(stream); } void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream) diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm index 10741b6..ea9fe04 100644 --- a/igzip/igzip_body.asm +++ b/igzip/igzip_body.asm @@ -52,6 +52,7 @@ %define tmp4 rbx %define dist rbx %define code2 rbx +%define hmask1 rbx %define hash rdx %define len rdx @@ -172,7 +173,8 @@ isal_deflate_body_ %+ ARCH %+ : mov stream, rcx mov byte [stream + _internal_state_has_eob], 0 - MOVDQU xmask, [mask] + MOVD xmask, [stream + _internal_state_hash_mask] + PSHUFD xmask, xmask, 0 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); mov m_out_buf, [stream + _next_out] @@ -203,6 +205,7 @@ isal_deflate_body_ %+ ARCH %+ : cmp f_end_i, f_i jle .input_end + MOVD hmask1 %+ d, xmask ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] @@ -214,8 +217,8 @@ isal_deflate_body_ %+ ARCH %+ : shr tmp3, 8 compute_hash hash2, tmp3 - and hash, LVL0_HASH_MASK - and hash2, LVL0_HASH_MASK + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST je .write_first_byte @@ -315,6 +318,8 @@ isal_deflate_body_ %+ ARCH %+ : %endif get_len_code len2, code, rcx, hufftables ;; rcx is code_len + MOVD hmask1 %+ d, xmask + SHLX code4, code4, rcx or code4, code add code_len2, rcx @@ -322,12 +327,13 @@ isal_deflate_body_ %+ ARCH %+ : add f_i, len2 neg len2 + SHLX code4, code4, code_len3 + MOVQ tmp5, xdata shr tmp5, 24 - compute_hash tmp4, tmp5 - and tmp4, LVL0_HASH_MASK + compute_hash hash2, tmp5 + and hash2 %+ d, hmask1 %+ d - SHLX code4, code4, code_len3 or code4, code3 add code_len2, code_len3 @@ -336,23 +342,23 @@ isal_deflate_body_ %+ ARCH %+ : MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] - mov curr_data2, curr_data MOVD hash %+ d, xhash - PEXTRD hash2 %+ d, xhash, 1 + PEXTRD tmp6 %+ d, xhash, 1 mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w compute_hash hash, curr_data add tmp3,1 - mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w add tmp3, 1 - mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w - write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4 + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf mov f_end_i, [rsp + f_end_i_mem_offset] + mov curr_data2, curr_data shr curr_data2, 8 compute_hash hash2, curr_data2 @@ -362,16 +368,16 @@ isal_deflate_body_ %+ ARCH %+ : cmp tmp3, f_i jae .loop3_done mov tmp6, [file_start + tmp3] - compute_hash tmp4, tmp6 - and tmp4 %+ d, LVL0_HASH_MASK + compute_hash tmp1, tmp6 + and tmp1 %+ d, hmask1 %+ d ; state->head[hash] = k; - mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w + mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w jmp .loop3 .loop3_done: %endif - ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; - and hash %+ d, LVL0_HASH_MASK - and hash2 %+ d, LVL0_HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d ; continue cmp f_i, f_end_i @@ -400,8 +406,8 @@ isal_deflate_body_ %+ ARCH %+ : ; code2 <<= code_len ; code2 |= code ; code_len2 += code_len - SHLX code2, code2, rcx - or code2, code + SHLX code4, code2, rcx + or code4, code add code_len2, rcx ;; Setup for updateing hash @@ -414,14 +420,15 @@ isal_deflate_body_ %+ ARCH %+ : add tmp3,1 mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + MOVD hmask1 %+ d, xmask MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] - mov curr_data2, curr_data compute_hash hash, curr_data - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7 + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf mov f_end_i, [rsp + f_end_i_mem_offset] + mov curr_data2, curr_data shr curr_data2, 8 compute_hash hash2, curr_data2 @@ -431,16 +438,16 @@ isal_deflate_body_ %+ ARCH %+ : cmp tmp3, f_i jae .loop4_done mov tmp6, [file_start + tmp3] - compute_hash tmp4, tmp6 - and tmp4, LVL0_HASH_MASK - mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w + compute_hash tmp1, tmp6 + and tmp1 %+ d, hmask1 %+ d + mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w jmp .loop4 .loop4_done: %endif - ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; - and hash %+ d, LVL0_HASH_MASK - and hash2 %+ d, LVL0_HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d ; continue cmp f_i, f_end_i @@ -455,7 +462,7 @@ isal_deflate_body_ %+ ARCH %+ : MOVD hash %+ d, xhash - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf PEXTRD hash2 %+ d, xhash, 1 @@ -564,7 +571,3 @@ isal_deflate_body_ %+ ARCH %+ : %xdefine COMPARE_TYPE1 COMPARE_TYPE2 %endif %endrep - -section .data - align 16 -mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm index e90f646..36823e1 100644 --- a/igzip/igzip_finish.asm +++ b/igzip/igzip_finish.asm @@ -60,12 +60,14 @@ %define f_i rdi %define code_len2 rbp +%define hmask1 rbp %define m_out_buf r8 %define m_bits r9 %define dist r10 +%define hmask2 r10 %define m_bit_count r11 @@ -131,9 +133,9 @@ skip_SLOP: cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end_loop_2 - + mov hmask1 %+ d, dword [stream + _internal_state_hash_mask] compute_hash hash, curr_data - and hash %+ d, LVL0_HASH_MASK + and hash %+ d, hmask1 %+ d mov [stream + _internal_state_head + 2 * hash], f_i %+ w mov byte [stream + _internal_state_has_hist], IGZIP_HIST jmp encode_literal @@ -142,15 +144,15 @@ skip_write_first_byte: loop2: mov tmp3 %+ d, dword [stream + _internal_state_dist_mask] - + mov hmask1 %+ d, dword [stream + _internal_state_hash_mask] ; if (state->bitbuf.is_full()) { cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end_loop_2 - ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; + ; hash = compute_hash(state->file_start + f_i) & hash_mask; mov curr_data %+ d, [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, LVL0_HASH_MASK + and hash %+ d, hmask1 %+ d ; f_index = state->head[hash]; movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash] @@ -198,6 +200,7 @@ loop2: ; get_len_code(len, &code, &code_len); get_len_code len, code, rcx, hufftables ;; rcx is code_len + mov hmask2 %+ d, dword [stream + _internal_state_hash_mask] ; code2 <<= code_len ; code2 |= code ; code_len2 += code_len @@ -213,24 +216,24 @@ loop2: ; only update hash twice - ; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK; + ; hash = compute_hash(state->file_start + k) & hash_mask; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, LVL0_HASH_MASK + and hash %+ d, hmask2 %+ d ; state->head[hash] = k; mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w add tmp3, 1 - ; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK; + ; hash = compute_hash(state->file_start + k) & hash_mask; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, LVL0_HASH_MASK + and hash %+ d, hmask2 %+ d ; state->head[hash] = k; mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w skip_hash_update: - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf ; continue cmp f_i, [rsp + f_end_i_mem_offset] @@ -242,7 +245,7 @@ encode_literal: movzx tmp5, byte [file_start + f_i] get_lit_code tmp5, code2, code_len2, hufftables - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5 + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf ; continue add f_i, 1 @@ -263,7 +266,7 @@ final_bytes: ja not_end movzx tmp5, byte [file_start + f_i] get_lit_code tmp5, code2, code_len2, hufftables - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3 + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf inc f_i cmp f_i, [rsp + f_end_i_mem_offset] @@ -276,7 +279,7 @@ write_eob: ; get_lit_code(256, &code2, &code_len2); get_lit_code 256, code2, code_len2, hufftables - write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1 + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf mov byte [stream + _internal_state_has_eob], 1 cmp word [stream + _end_of_stream], 1 diff --git a/igzip/igzip_gen_icf_map_lh1_04.asm b/igzip/igzip_gen_icf_map_lh1_04.asm index f4371ec..301ad62 100644 --- a/igzip/igzip_gen_icf_map_lh1_04.asm +++ b/igzip/igzip_gen_icf_map_lh1_04.asm @@ -100,8 +100,8 @@ %define ydist_mask ymm15 %ifidn __OUTPUT_FORMAT__, win64 -%define stack_size 10*16 + 6 * 8 + 8 -%define local_storage_offset (stack_size - 8) +%define stack_size 10*16 + 6 * 8 + 3 * 8 +%define local_storage_offset (stack_size - 16) %define func(x) proc_frame x %macro FUNC_SAVE 0 @@ -144,7 +144,7 @@ add rsp, stack_size %endm %else -%define stack_size 8 +%define stack_size 16 %define local_storage_offset 0 %define func(x) x: @@ -164,6 +164,7 @@ %endif %define dist_mask_offset local_storage_offset +%define hash_mask_offset local_storage_offset + 8 %define VECT_SIZE 8 %define HASH_BYTES 2 @@ -184,6 +185,8 @@ func(gen_icf_map_lh1_04) ;; Prep for main loop mov tmp %+ d, dword [stream + _internal_state_dist_mask] mov [rsp + dist_mask_offset], tmp + mov tmp %+ d, dword [stream + _internal_state_hash_mask] + mov [rsp + hash_mask_offset], tmp mov tmp, stream mov level_buf, [stream + _level_buf] sub f_i_end, LA @@ -193,7 +196,7 @@ func(gen_icf_map_lh1_04) ;; Process first byte vpbroadcastd yhash_prod, [hash_prod] - vpbroadcastd yhash_mask, [hash_mask] + vpbroadcastd yhash_mask, [rsp + hash_mask_offset] vmovd yhashes %+ x, dword [f_i + file_start] vpmaddwd yhashes, yhashes, yhash_prod vpmaddwd yhashes, yhashes, yhash_prod @@ -299,7 +302,7 @@ func(gen_icf_map_lh1_04) ;; Compute hash for next loop vpbroadcastd yhash_prod, [hash_prod] - vpbroadcastd yhash_mask, [hash_mask] + vpbroadcastd yhash_mask, [rsp + hash_mask_offset] vmovdqu datas, [f_i + file_start + VECT_SIZE] vpermq yhashes, datas, 0x44 vpshufb yhashes, yhashes, [datas_shuf] @@ -362,7 +365,7 @@ loop1: ;; Compute hash for next loop vpbroadcastd yhash_prod, [hash_prod] - vpbroadcastd yhash_mask, [hash_mask] + vpbroadcastd yhash_mask, [rsp + hash_mask_offset] vpermq yhashes, datas_lookup, 0x44 vpshufb yhashes, yhashes, [datas_shuf] vpmaddwd yhashes, yhashes, yhash_prod @@ -532,7 +535,7 @@ loop1_end: add tmp %+ d, f_i %+ d vpbroadcastd yhash_prod %+ x, [hash_prod] - vpbroadcastd yhash_mask %+ x, [hash_mask] + vpbroadcastd yhash_mask %+ x, [rsp + hash_mask_offset] vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1] vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x @@ -722,8 +725,6 @@ hash_prod: dw PROD1, PROD2 null_dist_syms: dd LIT -hash_mask: - dd HASH_MAP_HASH_MASK twofiftyfour: dd 0xfe shortest_matches: diff --git a/igzip/igzip_gen_icf_map_lh1_06.asm b/igzip/igzip_gen_icf_map_lh1_06.asm index aefee09..fc01700 100644 --- a/igzip/igzip_gen_icf_map_lh1_06.asm +++ b/igzip/igzip_gen_icf_map_lh1_06.asm @@ -175,13 +175,13 @@ func(gen_icf_map_lh1_06) ;; Prep for main loop vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask] + vpbroadcastd zhash_mask, dword [stream + _internal_state_hash_mask] mov tmp, stream mov level_buf, [stream + _level_buf] sub f_i_end, LA vmovdqu64 zdatas_perm, [datas_perm] vbroadcasti32x8 zdatas_shuf, [datas_shuf] vpbroadcastd zhash_prod, [hash_prod] - vpbroadcastd zhash_mask, [hash_mask] vmovdqu64 zincrement, [increment] vmovdqu64 zqword_shuf, [qword_shuf] vbroadcasti64x2 zdatas_perm2, [datas_perm2] @@ -569,8 +569,6 @@ thirty: dd 0x1e twofiftyfour: dd 0xfe -hash_mask: - dd HASH_MAP_HASH_MASK lit_len_mask: dd LIT_LEN_MASK shortest_matches: diff --git a/igzip/igzip_icf_base.c b/igzip/igzip_icf_base.c index 4a46957..fde43c5 100644 --- a/igzip/igzip_icf_base.c +++ b/igzip/igzip_icf_base.c @@ -32,102 +32,6 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in, level_buf->icf_buf_avail_out = end_out - next_out; } -void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream) -{ - uint32_t literal, hash; - uint8_t *start_in, *next_in, *end_in, *end, *next_hash; - struct deflate_icf *start_out, *next_out, *end_out; - uint16_t match_length; - uint32_t dist; - uint32_t code, code2, extra_bits; - struct isal_zstate *state = &stream->internal_state; - struct level_buf *level_buf = (struct level_buf *)stream->level_buf; - uint16_t *last_seen = level_buf->hash8k.hash_table; - uint8_t *file_start = stream->next_in - stream->total_in; - uint32_t hist_size = state->dist_mask; - - if (stream->avail_in == 0) { - if (stream->end_of_stream || stream->flush != NO_FLUSH) - state->state = ZSTATE_FLUSH_READ_BUFFER; - return; - } - - start_in = stream->next_in; - end_in = start_in + stream->avail_in; - next_in = start_in; - - start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; - end_out = - start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / - sizeof(struct deflate_icf); - next_out = start_out; - - while (next_in + ISAL_LOOK_AHEAD < end_in) { - - if (next_out >= end_out) { - state->state = ZSTATE_CREATE_HDR; - update_state(stream, start_in, next_in, end_in, start_out, next_out, - end_out); - return; - } - - literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH8K_HASH_MASK; - dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - file_start); - - /* The -1 are to handle the case when dist = 0 */ - if (dist - 1 < hist_size) { - assert(dist != 0); - - match_length = compare258(next_in - dist, next_in, 258); - - if (match_length >= SHORTEST_MATCH) { - next_hash = next_in; -#ifdef ISAL_LIMIT_HASH_UPDATE - end = next_hash + 3; -#else - end = next_hash + match_length; -#endif - next_hash++; - - for (; next_hash < end; next_hash++) { - literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH8K_HASH_MASK; - last_seen[hash] = (uint64_t) (next_hash - file_start); - } - - get_len_icf_code(match_length, &code); - get_dist_icf_code(dist, &code2, &extra_bits); - - level_buf->hist.ll_hist[code]++; - level_buf->hist.d_hist[code2]++; - - write_deflate_icf(next_out, code, code2, extra_bits); - next_out++; - next_in += match_length; - - continue; - } - } - - get_lit_icf_code(literal & 0xFF, &code); - level_buf->hist.ll_hist[code]++; - write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); - next_out++; - next_in++; - } - - update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); - - assert(stream->avail_in <= ISAL_LOOK_AHEAD); - if (stream->end_of_stream || stream->flush != NO_FLUSH) - state->state = ZSTATE_FLUSH_READ_BUFFER; - - return; - -} - void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) { uint32_t literal, hash; @@ -141,6 +45,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) uint16_t *last_seen = level_buf->hash_hist.hash_table; uint8_t *file_start = stream->next_in - stream->total_in; uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; if (stream->avail_in == 0) { if (stream->end_of_stream || stream->flush != NO_FLUSH) @@ -168,7 +73,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + hash = compute_hash(literal) & hash_mask; dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; last_seen[hash] = (uint64_t) (next_in - file_start); @@ -189,7 +94,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) for (; next_hash < end; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + hash = compute_hash(literal) & hash_mask; last_seen[hash] = (uint64_t) (next_hash - file_start); } @@ -224,116 +129,6 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) } -void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream) -{ - uint32_t literal = 0, hash; - uint8_t *start_in, *next_in, *end_in, *end, *next_hash; - struct deflate_icf *start_out, *next_out, *end_out; - uint16_t match_length; - uint32_t dist; - uint32_t code, code2, extra_bits; - struct isal_zstate *state = &stream->internal_state; - struct level_buf *level_buf = (struct level_buf *)stream->level_buf; - uint16_t *last_seen = level_buf->hash8k.hash_table; - uint8_t *file_start = stream->next_in - stream->total_in; - uint32_t hist_size = state->dist_mask; - - start_in = stream->next_in; - end_in = start_in + stream->avail_in; - next_in = start_in; - - start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; - end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / - sizeof(struct deflate_icf); - next_out = start_out; - - if (stream->avail_in == 0) { - if (stream->end_of_stream || stream->flush != NO_FLUSH) - state->state = ZSTATE_CREATE_HDR; - return; - } - - while (next_in + 3 < end_in) { - if (next_out >= end_out) { - state->state = ZSTATE_CREATE_HDR; - update_state(stream, start_in, next_in, end_in, start_out, next_out, - end_out); - return; - } - - literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH8K_HASH_MASK; - dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - file_start); - - if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */ - match_length = compare258(next_in - dist, next_in, end_in - next_in); - - if (match_length >= SHORTEST_MATCH) { - next_hash = next_in; -#ifdef ISAL_LIMIT_HASH_UPDATE - end = next_hash + 3; -#else - end = next_hash + match_length; -#endif - next_hash++; - - for (; next_hash < end - 3; next_hash++) { - literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH8K_HASH_MASK; - last_seen[hash] = (uint64_t) (next_hash - file_start); - } - - get_len_icf_code(match_length, &code); - get_dist_icf_code(dist, &code2, &extra_bits); - - level_buf->hist.ll_hist[code]++; - level_buf->hist.d_hist[code2]++; - - write_deflate_icf(next_out, code, code2, extra_bits); - - next_out++; - next_in += match_length; - - continue; - } - } - - get_lit_icf_code(literal & 0xFF, &code); - level_buf->hist.ll_hist[code]++; - write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); - next_out++; - next_in++; - - } - - while (next_in < end_in) { - if (next_out >= end_out) { - state->state = ZSTATE_CREATE_HDR; - update_state(stream, start_in, next_in, end_in, start_out, next_out, - end_out); - return; - } - - literal = *next_in; - get_lit_icf_code(literal & 0xFF, &code); - level_buf->hist.ll_hist[code]++; - write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); - next_out++; - next_in++; - - } - - if (next_in == end_in) { - if (stream->end_of_stream || stream->flush != NO_FLUSH) - state->state = ZSTATE_CREATE_HDR; - } - - update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); - - return; -} - void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) { uint32_t literal = 0, hash; @@ -347,6 +142,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) uint16_t *last_seen = level_buf->hash_hist.hash_table; uint8_t *file_start = stream->next_in - stream->total_in; uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; start_in = stream->next_in; end_in = start_in + stream->avail_in; @@ -372,7 +168,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + hash = compute_hash(literal) & hash_mask; dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; last_seen[hash] = (uint64_t) (next_in - file_start); @@ -390,7 +186,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) for (; next_hash < end - 3; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + hash = compute_hash(literal) & hash_mask; last_seen[hash] = (uint64_t) (next_hash - file_start); } @@ -457,6 +253,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream) uint16_t *last_seen = level_buf->hash_map.hash_table; uint8_t *file_start = stream->next_in - stream->total_in; uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; start_in = stream->next_in; end_in = start_in + stream->avail_in; @@ -481,7 +278,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK; + hash = compute_hash_mad(literal) & hash_mask; dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; last_seen[hash] = (uint64_t) (next_in - file_start); @@ -499,7 +296,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream) for (; next_hash < end - 3; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK; + hash = compute_hash_mad(literal) & hash_mask; last_seen[hash] = (uint64_t) (next_hash - file_start); } diff --git a/igzip/igzip_icf_body.c b/igzip/igzip_icf_body.c index 0943c64..28e7b1f 100644 --- a/igzip/igzip_icf_body.c +++ b/igzip/igzip_icf_body.c @@ -78,6 +78,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, struct level_buf *level_buf = (struct level_buf *)stream->level_buf; uint16_t *hash_table = level_buf->hash_map.hash_table; uint32_t hist_size = stream->internal_state.dist_mask; + uint32_t hash_mask = stream->internal_state.hash_mask; if (input_size < ISAL_LOOK_AHEAD) return 0; @@ -87,7 +88,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, matches_icf_lookup->lit_dist = 0x1e; matches_icf_lookup->dist_extra = 0; - hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; + hash = compute_hash(*(uint32_t *) next_in) & hash_mask; hash_table[hash] = (uint64_t) (next_in - file_start); next_in++; @@ -96,7 +97,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, } while (next_in < end_in - ISAL_LOOK_AHEAD) { - hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; + hash = compute_hash(*(uint32_t *) next_in) & hash_mask; dist = (next_in - file_start - hash_table[hash]); dist = ((dist - 1) & hist_size) + 1; hash_table[hash] = (uint64_t) (next_in - file_start); diff --git a/igzip/igzip_icf_body_h1_gr_bt.asm b/igzip/igzip_icf_body_h1_gr_bt.asm index 69ee73c..fb5cbac 100644 --- a/igzip/igzip_icf_body_h1_gr_bt.asm +++ b/igzip/igzip_icf_body_h1_gr_bt.asm @@ -63,9 +63,11 @@ global %1 %define dist rbx %define dist_code2 rbx %define lit_code2 rbx +%define hmask2 rbx %define dist2 r12 %define dist_code r12 +%define hmask3 r12 %define tmp1 rsi %define lit_code rsi @@ -73,6 +75,7 @@ global %1 %define curr_data2 r8 %define len2 r8 %define tmp4 r8 +%define hmask1 r8 %define len rdx %define len_code rdx @@ -104,9 +107,10 @@ global %1 m_out_end equ 0 ; local variable (8 bytes) m_out_start equ 8 dist_mask_offset equ 16 -f_end_i_mem_offset equ 24 -stream_offset equ 32 -gpr_save_mem_offset equ 40 ; gpr save area (8*8 bytes) +hash_mask_offset equ 24 +f_end_i_mem_offset equ 32 +stream_offset equ 40 +gpr_save_mem_offset equ 48 ; gpr save area (8*8 bytes) xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) stack_size equ 7*8 + 8*8 + 4*16 @@ -125,14 +129,10 @@ stack_size equ 7*8 + 8*8 + 4*16 %xdefine COMPARE_TYPE2 3 %endif -%rep 3 ;; Defines to generate functions for different levels -%xdefine HASH_MASK HASH8K_HASH_MASK -%xdefine HASH_MASK1 HASH_HIST_HASH_MASK -%xdefine METHOD hash8k -%xdefine METHOD1 hash_hist +%xdefine METHOD hash_hist -%rep 2 +%rep 3 %if ARCH == 04 %define USE_HSWNI %endif @@ -183,8 +183,11 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : mov [rsp + stream_offset], stream mov byte [stream + _internal_state_has_eob], 0 + mov tmp1 %+ d, dword[stream + _internal_state_dist_mask] mov [rsp + dist_mask_offset], tmp1 + mov tmp1 %+ d, dword[stream + _internal_state_hash_mask] + mov [rsp + hash_mask_offset], tmp1 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); mov level_buf, [stream + _level_buf] @@ -208,6 +211,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : ; file_length -= LA; sub file_length, LA ; if (file_length <= 0) continue; + mov hmask1 %+ d, [rsp + hash_mask_offset] cmp file_length, f_i jle .input_end @@ -223,8 +227,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : shr tmp1, 8 compute_hash hash2, tmp1 - and hash, HASH_MASK - and hash2, HASH_MASK + and hash, hmask1 + and hash2, hmask1 cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST je .write_first_byte @@ -234,6 +238,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : .loop2: mov tmp3 %+ d, [rsp + dist_mask_offset] + mov hmask1 %+ d, [rsp + hash_mask_offset] ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] ja .output_end @@ -253,7 +258,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : mov tmp2, curr_data shr curr_data, 16 compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, hmask1 %+ d mov dist2 %+ w, f_i %+ w dec dist2 @@ -266,7 +271,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : shr tmp2, 24 compute_hash hash2, tmp2 - and hash2 %+ d, HASH_MASK + and hash2 %+ d, hmask1 %+ d and dist2 %+ d, tmp3 %+ d neg dist2 @@ -308,6 +313,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : get_dist_icf_code dist2, dist_code2, tmp1 + mov hmask3 %+ d, dword [rsp + hash_mask_offset] + ;; Setup for updating hash lea tmp3, [f_i + 1] ; tmp3 <= k @@ -317,7 +324,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : shr curr_data, 24 compute_hash hash3, curr_data - and hash3, HASH_MASK + and hash3 %+ d, hmask3 %+ d mov curr_data, tmp1 shr tmp1, 8 @@ -349,9 +356,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : and dist_code2, 0x1F inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2] - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask3 %+ d + and hash2 %+ d, hmask3 %+ d ; continue cmp f_i, file_length @@ -373,6 +380,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : ; get_dist_code(dist, &code2, &code_len2); get_dist_icf_code dist, dist_code, tmp1 + mov hmask2 %+ d, [rsp + hash_mask_offset] + add file_start, f_i MOVDQU xdata, [file_start + len] mov curr_data2, [file_start + len] @@ -401,9 +410,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : and dist_code, 0x1F inc dword [dist_hist + HIST_ELEM_SIZE*dist_code] - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask2 %+ d + and hash2 %+ d, hmask2 %+ d ; continue cmp f_i, file_length @@ -494,6 +503,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : jmp .len_dist_lit_huffman .write_first_byte: + mov hmask1 %+ d, [rsp + hash_mask_offset] cmp m_out_buf, [rsp + m_out_end] ja .output_end @@ -515,26 +525,17 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : MOVDQU xdata, [file_start + f_i + 1] add f_i, 1 mov curr_data, [file_start + f_i] - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d cmp f_i, file_length jl .loop2 jmp .input_end - %ifdef USE_HSWNI %undef USE_HSWNI %endif -;; Shift defines over in order to iterate over all versions -%undef HASH_MASK -%xdefine HASH_MASK HASH_MASK1 - -%undef METHOD -%xdefine METHOD METHOD1 -%endrep - ;; Shift defines over in order to iterate over all versions %undef ARCH %xdefine ARCH ARCH1 diff --git a/igzip/igzip_icf_finish.asm b/igzip/igzip_icf_finish.asm index 5588487..ccff445 100644 --- a/igzip/igzip_icf_finish.asm +++ b/igzip/igzip_icf_finish.asm @@ -60,12 +60,14 @@ %define f_i rdi %define code_len2 rbp +%define hmask1 rbp %define m_out_buf r8 %define level_buf r9 -%define dist r10 +%define dist r10 +%define hmask2 r10 %define code2 r12 %define f_end_i r12 @@ -87,14 +89,11 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes) m_out_end equ 8 m_out_start equ 16 dist_mask_offset equ 24 -stack_size equ 32 +hash_mask_offset equ 32 +stack_size equ 5*8 -%xdefine HASH_MASK HASH8K_HASH_MASK -%xdefine HASH_MASK1 HASH_HIST_HASH_MASK -%xdefine METHOD hash8k -%xdefine METHOD1 hash_hist +%xdefine METHOD hash_hist -%rep 2 ; void isal_deflate_icf_finish ( isal_zstream *stream ) ; arg 1: rcx: addr of stream global isal_deflate_icf_finish_ %+ METHOD %+ _01 @@ -109,7 +108,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: %endif ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); - mov tmp2, [stream + _internal_state_dist_mask] + mov tmp2 %+ d, dword [stream + _internal_state_dist_mask] + mov tmp3 %+ d, dword [stream + _internal_state_hash_mask] mov level_buf, [stream + _level_buf] mov m_out_buf, [level_buf + _icf_buf_next] mov [rsp + m_out_start], m_out_buf @@ -118,6 +118,7 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: sub tmp1, 4 mov [rsp + dist_mask_offset], tmp2 + mov [rsp + hash_mask_offset], tmp3 mov [rsp + m_out_end], tmp1 mov hufftables, [stream + _hufftables] @@ -144,8 +145,9 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: cmp m_out_buf, [rsp + m_out_end] ja .end_loop_2 + mov hmask1 %+ d, [rsp + hash_mask_offset] compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, hmask1 %+ d mov [hash_table + 2 * hash], f_i %+ w mov byte [stream + _internal_state_has_hist], IGZIP_HIST jmp .encode_literal @@ -154,14 +156,15 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: .loop2: mov tmp3 %+ d, [rsp + dist_mask_offset] + mov hmask1 %+ d, [rsp + hash_mask_offset] ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] ja .end_loop_2 - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + ; hash = compute_hash(state->file_start + f_i) & hash_mask; mov curr_data %+ d, [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, hmask1 %+ d ; f_index = state->head[hash]; movzx f_index %+ d, word [hash_table + 2 * hash] @@ -209,6 +212,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: ;; get_len_code lea code, [len + 254] + mov hmask2 %+ d, [rsp + hash_mask_offset] + or code2, code inc dword [lit_len_hist + HIST_ELEM_SIZE*code] @@ -220,19 +225,19 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: ; only update hash twice - ; hash = compute_hash(state->file_start + k) & HASH_MASK; + ; hash = compute_hash(state->file_start + k) & hash_mask; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH_MASK + and hash %+ d, hmask2 %+ d ; state->head[hash] = k; mov [hash_table + 2 * hash], tmp3 %+ w add tmp3, 1 - ; hash = compute_hash(state->file_start + k) & HASH_MASK; + ; hash = compute_hash(state->file_start + k) & hash_mask; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH_MASK + and hash %+ d, hmask2 %+ d ; state->head[hash] = k; mov [hash_table + 2 * hash], tmp3 %+ w @@ -312,14 +317,6 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01: POP_ALL ret -;; Shift defines over in order to iterate over all versions -%undef HASH_MASK -%xdefine HASH_MASK HASH_MASK1 - -%undef METHOD -%xdefine METHOD METHOD1 -%endrep - section .data align 4 c258: dq 258 diff --git a/igzip/igzip_multibinary.asm b/igzip/igzip_multibinary.asm index 738e5af..bd864d6 100644 --- a/igzip/igzip_multibinary.asm +++ b/igzip/igzip_multibinary.asm @@ -39,13 +39,6 @@ extern isal_deflate_body_04 extern isal_deflate_finish_base extern isal_deflate_finish_01 -extern isal_deflate_icf_body_hash8k_base -extern isal_deflate_icf_body_hash8k_01 -extern isal_deflate_icf_body_hash8k_02 -extern isal_deflate_icf_body_hash8k_04 -extern isal_deflate_icf_finish_hash8k_base -extern isal_deflate_icf_finish_hash8k_01 - extern isal_deflate_icf_body_hash_hist_base extern isal_deflate_icf_body_hash_hist_01 extern isal_deflate_icf_body_hash_hist_02 @@ -99,7 +92,7 @@ mbin_interface isal_deflate_finish mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01 mbin_interface isal_deflate_icf_body_lvl1 -mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04 +mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04 mbin_interface isal_deflate_icf_body_lvl2 mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04 @@ -108,7 +101,7 @@ mbin_interface isal_deflate_icf_body_lvl3 mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy mbin_interface isal_deflate_icf_finish_lvl1 -mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01 +mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01 mbin_interface isal_deflate_icf_finish_lvl2 mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01 diff --git a/igzip/stdmac.asm b/igzip/stdmac.asm index 44741da..22f6cee 100644 --- a/igzip/stdmac.asm +++ b/igzip/stdmac.asm @@ -386,4 +386,16 @@ ssc: pxor %%dest, %%src2 %endif %endm + +%macro PSHUFD 3 +%define %%dest %1 +%define %%src1 %2 +%define %%imm8 %3 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpshufd %%dest, %%src1, %%imm8 +%else + pshufd %%dest, %%src1, %%imm8 +%endif +%endm + %endif ;; ifndef STDMAC_ASM diff --git a/include/igzip_lib.h b/include/igzip_lib.h index 1b31dd8..c2d4b27 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -315,9 +315,10 @@ struct isal_zstate { uint32_t block_next; //!< Start of current deflate block in the input uint32_t block_end; //!< End of current deflate block in the input uint32_t dist_mask; //!< Distance mask used. + uint32_t hash_mask; + enum isal_zstate_state state; //!< Current state in processing the data stream struct BitBuf2 bitbuf; //!< Bit Buffer uint32_t crc; //!< Current crc - enum isal_zstate_state state; //!< Current state in processing the data stream uint8_t has_wrap_hdr; //!< keeps track of wrapper header uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) uint8_t has_eob; //!< keeps track of eob on the last deflate block