igzip: Setup for variable hash mask

Change-Id: I3be94dbc40c2e02dcff4f89e5a9df8ed1f744f02
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-06-06 16:33:19 -07:00
parent 6317ce2b78
commit 03bef684a4
16 changed files with 184 additions and 351 deletions

View File

@ -32,14 +32,13 @@
; Assumes m_out_buf is a register
; Clobbers RCX
; code is clobbered
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1
%macro write_bits 6
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf
%macro write_bits 5
%define %%m_bits %1
%define %%m_bit_count %2
%define %%code %3
%define %%count %4
%define %%m_out_buf %5
%define %%tmp1 %6
%ifdef USE_HSWNI
shlx %%code, %%code, %%m_bit_count

View File

@ -160,9 +160,10 @@ FIELD _total_in_start,4, 4
FIELD _block_next, 4, 4
FIELD _block_end, 4, 4
FIELD _dist_mask, 4, 4
FIELD _hash_mask, 4, 4
FIELD _state, 4, 4
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
FIELD _crc, 4, 4
FIELD _state, 4, 4
FIELD _has_wrap_hdr, 1, 1
FIELD _has_eob_hdr, 1, 1
FIELD _has_eob, 1, 1
@ -218,6 +219,7 @@ _internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
_internal_state_crc equ _internal_state+_crc
_internal_state_dist_mask equ _internal_state+_dist_mask
_internal_state_hash_mask equ _internal_state+_hash_mask
_internal_state_bitbuf equ _internal_state+_bitbuf
_internal_state_state equ _internal_state+_state
_internal_state_count equ _internal_state+_count

View File

@ -923,6 +923,25 @@ static void inline set_dist_mask(struct isal_zstream *stream)
}
static void inline set_hash_mask(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
switch (stream->level) {
case 3:
state->hash_mask = LVL3_HASH_MASK;
break;
case 2:
state->hash_mask = LVL2_HASH_MASK;
break;
case 1:
state->hash_mask = LVL1_HASH_MASK;
break;
case 0:
state->hash_mask = LVL0_HASH_MASK;
}
}
void isal_deflate_init(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
@ -1036,26 +1055,28 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic
/* Reset history to prevent out of bounds matches this works because
* dictionary must set at least 1 element in the history */
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint32_t hash_mask = stream->internal_state.hash_mask;
switch (stream->level) {
case 3:
memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table));
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK,
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask,
stream->total_in, dict, dict_len);
break;
case 2:
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK,
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask,
stream->total_in, dict, dict_len);
break;
case 1:
memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table));
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK,
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask,
stream->total_in, dict, dict_len);
break;
default:
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK,
isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask,
stream->total_in, dict, dict_len);
}
@ -1128,6 +1149,8 @@ int isal_deflate_stateless(struct isal_zstream *stream)
return level_check;
}
set_hash_mask(stream);
if (avail_in == 0)
stored_len = TYPE0_BLK_HDR_LEN;
else
@ -1263,6 +1286,7 @@ int isal_deflate(struct isal_zstream *stream)
if (state->has_hist == IGZIP_NO_HIST) {
set_dist_mask(stream);
set_hash_mask(stream);
stream->total_in -= buffered_size;
reset_match_history(stream);
stream->total_in += buffered_size;
@ -1270,6 +1294,7 @@ int isal_deflate(struct isal_zstream *stream)
} else if (state->has_hist == IGZIP_DICT_HIST) {
set_dist_mask(stream);
set_hash_mask(stream);
isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
}

View File

@ -37,6 +37,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
uint16_t *last_seen = state->head;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
uint32_t hash_mask = state->hash_mask;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
@ -58,7 +59,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & LVL0_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -79,7 +80,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & LVL0_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -124,6 +125,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
uint16_t *last_seen = state->head;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
uint32_t hash_mask = state->hash_mask;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
@ -139,7 +141,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & LVL0_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -158,7 +160,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & LVL0_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
last_seen[hash] =
(uint64_t) (next_hash - file_start);
}

View File

@ -34,10 +34,8 @@
void isal_deflate_body_base(struct isal_zstream *stream);
void isal_deflate_finish_base(struct isal_zstream *stream);
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream);
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream);
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream);
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream);
void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream);
void isal_update_histogram_base(uint8_t * start_stream, int length,
@ -68,7 +66,7 @@ void isal_deflate_finish(struct isal_zstream *stream)
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
{
isal_deflate_icf_body_hash8k_base(stream);
isal_deflate_icf_body_hash_hist_base(stream);
}
void isal_deflate_icf_body_lvl2(struct isal_zstream *stream)
@ -83,7 +81,7 @@ void isal_deflate_icf_body_lvl3(struct isal_zstream *stream)
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
{
isal_deflate_icf_finish_hash8k_base(stream);
isal_deflate_icf_finish_hash_hist_base(stream);
}
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)

View File

@ -52,6 +52,7 @@
%define tmp4 rbx
%define dist rbx
%define code2 rbx
%define hmask1 rbx
%define hash rdx
%define len rdx
@ -172,7 +173,8 @@ isal_deflate_body_ %+ ARCH %+ :
mov stream, rcx
mov byte [stream + _internal_state_has_eob], 0
MOVDQU xmask, [mask]
MOVD xmask, [stream + _internal_state_hash_mask]
PSHUFD xmask, xmask, 0
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov m_out_buf, [stream + _next_out]
@ -203,6 +205,7 @@ isal_deflate_body_ %+ ARCH %+ :
cmp f_end_i, f_i
jle .input_end
MOVD hmask1 %+ d, xmask
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
@ -214,8 +217,8 @@ isal_deflate_body_ %+ ARCH %+ :
shr tmp3, 8
compute_hash hash2, tmp3
and hash, LVL0_HASH_MASK
and hash2, LVL0_HASH_MASK
and hash %+ d, hmask1 %+ d
and hash2 %+ d, hmask1 %+ d
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
je .write_first_byte
@ -315,6 +318,8 @@ isal_deflate_body_ %+ ARCH %+ :
%endif
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
MOVD hmask1 %+ d, xmask
SHLX code4, code4, rcx
or code4, code
add code_len2, rcx
@ -322,12 +327,13 @@ isal_deflate_body_ %+ ARCH %+ :
add f_i, len2
neg len2
SHLX code4, code4, code_len3
MOVQ tmp5, xdata
shr tmp5, 24
compute_hash tmp4, tmp5
and tmp4, LVL0_HASH_MASK
compute_hash hash2, tmp5
and hash2 %+ d, hmask1 %+ d
SHLX code4, code4, code_len3
or code4, code3
add code_len2, code_len3
@ -336,23 +342,23 @@ isal_deflate_body_ %+ ARCH %+ :
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
MOVD hash %+ d, xhash
PEXTRD hash2 %+ d, xhash, 1
PEXTRD tmp6 %+ d, xhash, 1
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
compute_hash hash, curr_data
add tmp3,1
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
add tmp3, 1
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
mov f_end_i, [rsp + f_end_i_mem_offset]
mov curr_data2, curr_data
shr curr_data2, 8
compute_hash hash2, curr_data2
@ -362,16 +368,16 @@ isal_deflate_body_ %+ ARCH %+ :
cmp tmp3, f_i
jae .loop3_done
mov tmp6, [file_start + tmp3]
compute_hash tmp4, tmp6
and tmp4 %+ d, LVL0_HASH_MASK
compute_hash tmp1, tmp6
and tmp1 %+ d, hmask1 %+ d
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
jmp .loop3
.loop3_done:
%endif
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; hash = compute_hash(state->file_start + f_i) & hash_mask;
and hash %+ d, hmask1 %+ d
and hash2 %+ d, hmask1 %+ d
; continue
cmp f_i, f_end_i
@ -400,8 +406,8 @@ isal_deflate_body_ %+ ARCH %+ :
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
SHLX code2, code2, rcx
or code2, code
SHLX code4, code2, rcx
or code4, code
add code_len2, rcx
;; Setup for updateing hash
@ -414,14 +420,15 @@ isal_deflate_body_ %+ ARCH %+ :
add tmp3,1
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
MOVD hmask1 %+ d, xmask
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
mov f_end_i, [rsp + f_end_i_mem_offset]
mov curr_data2, curr_data
shr curr_data2, 8
compute_hash hash2, curr_data2
@ -431,16 +438,16 @@ isal_deflate_body_ %+ ARCH %+ :
cmp tmp3, f_i
jae .loop4_done
mov tmp6, [file_start + tmp3]
compute_hash tmp4, tmp6
and tmp4, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
compute_hash tmp1, tmp6
and tmp1 %+ d, hmask1 %+ d
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
jmp .loop4
.loop4_done:
%endif
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; hash = compute_hash(state->file_start + f_i) & hash_mask;
and hash %+ d, hmask1 %+ d
and hash2 %+ d, hmask1 %+ d
; continue
cmp f_i, f_end_i
@ -455,7 +462,7 @@ isal_deflate_body_ %+ ARCH %+ :
MOVD hash %+ d, xhash
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
PEXTRD hash2 %+ d, xhash, 1
@ -564,7 +571,3 @@ isal_deflate_body_ %+ ARCH %+ :
%xdefine COMPARE_TYPE1 COMPARE_TYPE2
%endif
%endrep
section .data
align 16
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK

View File

@ -60,12 +60,14 @@
%define f_i rdi
%define code_len2 rbp
%define hmask1 rbp
%define m_out_buf r8
%define m_bits r9
%define dist r10
%define hmask2 r10
%define m_bit_count r11
@ -131,9 +133,9 @@ skip_SLOP:
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
compute_hash hash, curr_data
and hash %+ d, LVL0_HASH_MASK
and hash %+ d, hmask1 %+ d
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
jmp encode_literal
@ -142,15 +144,15 @@ skip_write_first_byte:
loop2:
mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
; hash = compute_hash(state->file_start + f_i) & hash_mask;
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, LVL0_HASH_MASK
and hash %+ d, hmask1 %+ d
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
@ -198,6 +200,7 @@ loop2:
; get_len_code(len, &code, &code_len);
get_len_code len, code, rcx, hufftables ;; rcx is code_len
mov hmask2 %+ d, dword [stream + _internal_state_hash_mask]
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
@ -213,24 +216,24 @@ loop2:
; only update hash twice
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
; hash = compute_hash(state->file_start + k) & hash_mask;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, LVL0_HASH_MASK
and hash %+ d, hmask2 %+ d
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
; hash = compute_hash(state->file_start + k) & hash_mask;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, LVL0_HASH_MASK
and hash %+ d, hmask2 %+ d
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
skip_hash_update:
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
; continue
cmp f_i, [rsp + f_end_i_mem_offset]
@ -242,7 +245,7 @@ encode_literal:
movzx tmp5, byte [file_start + f_i]
get_lit_code tmp5, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
; continue
add f_i, 1
@ -263,7 +266,7 @@ final_bytes:
ja not_end
movzx tmp5, byte [file_start + f_i]
get_lit_code tmp5, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
inc f_i
cmp f_i, [rsp + f_end_i_mem_offset]
@ -276,7 +279,7 @@ write_eob:
; get_lit_code(256, &code2, &code_len2);
get_lit_code 256, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
mov byte [stream + _internal_state_has_eob], 1
cmp word [stream + _end_of_stream], 1

View File

@ -100,8 +100,8 @@
%define ydist_mask ymm15
%ifidn __OUTPUT_FORMAT__, win64
%define stack_size 10*16 + 6 * 8 + 8
%define local_storage_offset (stack_size - 8)
%define stack_size 10*16 + 6 * 8 + 3 * 8
%define local_storage_offset (stack_size - 16)
%define func(x) proc_frame x
%macro FUNC_SAVE 0
@ -144,7 +144,7 @@
add rsp, stack_size
%endm
%else
%define stack_size 8
%define stack_size 16
%define local_storage_offset 0
%define func(x) x:
@ -164,6 +164,7 @@
%endif
%define dist_mask_offset local_storage_offset
%define hash_mask_offset local_storage_offset + 8
%define VECT_SIZE 8
%define HASH_BYTES 2
@ -184,6 +185,8 @@ func(gen_icf_map_lh1_04)
;; Prep for main loop
mov tmp %+ d, dword [stream + _internal_state_dist_mask]
mov [rsp + dist_mask_offset], tmp
mov tmp %+ d, dword [stream + _internal_state_hash_mask]
mov [rsp + hash_mask_offset], tmp
mov tmp, stream
mov level_buf, [stream + _level_buf]
sub f_i_end, LA
@ -193,7 +196,7 @@ func(gen_icf_map_lh1_04)
;; Process first byte
vpbroadcastd yhash_prod, [hash_prod]
vpbroadcastd yhash_mask, [hash_mask]
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
vmovd yhashes %+ x, dword [f_i + file_start]
vpmaddwd yhashes, yhashes, yhash_prod
vpmaddwd yhashes, yhashes, yhash_prod
@ -299,7 +302,7 @@ func(gen_icf_map_lh1_04)
;; Compute hash for next loop
vpbroadcastd yhash_prod, [hash_prod]
vpbroadcastd yhash_mask, [hash_mask]
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
vmovdqu datas, [f_i + file_start + VECT_SIZE]
vpermq yhashes, datas, 0x44
vpshufb yhashes, yhashes, [datas_shuf]
@ -362,7 +365,7 @@ loop1:
;; Compute hash for next loop
vpbroadcastd yhash_prod, [hash_prod]
vpbroadcastd yhash_mask, [hash_mask]
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
vpermq yhashes, datas_lookup, 0x44
vpshufb yhashes, yhashes, [datas_shuf]
vpmaddwd yhashes, yhashes, yhash_prod
@ -532,7 +535,7 @@ loop1_end:
add tmp %+ d, f_i %+ d
vpbroadcastd yhash_prod %+ x, [hash_prod]
vpbroadcastd yhash_mask %+ x, [hash_mask]
vpbroadcastd yhash_mask %+ x, [rsp + hash_mask_offset]
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
@ -722,8 +725,6 @@ hash_prod:
dw PROD1, PROD2
null_dist_syms:
dd LIT
hash_mask:
dd HASH_MAP_HASH_MASK
twofiftyfour:
dd 0xfe
shortest_matches:

View File

@ -175,13 +175,13 @@ func(gen_icf_map_lh1_06)
;; Prep for main loop
vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask]
vpbroadcastd zhash_mask, dword [stream + _internal_state_hash_mask]
mov tmp, stream
mov level_buf, [stream + _level_buf]
sub f_i_end, LA
vmovdqu64 zdatas_perm, [datas_perm]
vbroadcasti32x8 zdatas_shuf, [datas_shuf]
vpbroadcastd zhash_prod, [hash_prod]
vpbroadcastd zhash_mask, [hash_mask]
vmovdqu64 zincrement, [increment]
vmovdqu64 zqword_shuf, [qword_shuf]
vbroadcasti64x2 zdatas_perm2, [datas_perm2]
@ -569,8 +569,6 @@ thirty:
dd 0x1e
twofiftyfour:
dd 0xfe
hash_mask:
dd HASH_MAP_HASH_MASK
lit_len_mask:
dd LIT_LEN_MASK
shortest_matches:

View File

@ -32,102 +32,6 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
level_buf->icf_buf_avail_out = end_out - next_out;
}
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream)
{
uint32_t literal, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
struct deflate_icf *start_out, *next_out, *end_out;
uint16_t match_length;
uint32_t dist;
uint32_t code, code2, extra_bits;
struct isal_zstate *state = &stream->internal_state;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *last_seen = level_buf->hash8k.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
end_out =
start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
while (next_in + ISAL_LOOK_AHEAD < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH8K_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
/* The -1 are to handle the case when dist = 0 */
if (dist - 1 < hist_size) {
assert(dist != 0);
match_length = compare258(next_in - dist, next_in, 258);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef ISAL_LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH8K_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
get_len_icf_code(match_length, &code);
get_dist_icf_code(dist, &code2, &extra_bits);
level_buf->hist.ll_hist[code]++;
level_buf->hist.d_hist[code2]++;
write_deflate_icf(next_out, code, code2, extra_bits);
next_out++;
next_in += match_length;
continue;
}
}
get_lit_icf_code(literal & 0xFF, &code);
level_buf->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
assert(stream->avail_in <= ISAL_LOOK_AHEAD);
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
{
uint32_t literal, hash;
@ -141,6 +45,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
uint16_t *last_seen = level_buf->hash_hist.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
uint32_t hash_mask = state->hash_mask;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
@ -168,7 +73,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -189,7 +94,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -224,116 +129,6 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
}
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
struct deflate_icf *start_out, *next_out, *end_out;
uint16_t match_length;
uint32_t dist;
uint32_t code, code2, extra_bits;
struct isal_zstate *state = &stream->internal_state;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *last_seen = level_buf->hash8k.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_CREATE_HDR;
return;
}
while (next_in + 3 < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH8K_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
match_length = compare258(next_in - dist, next_in, end_in - next_in);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef ISAL_LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH8K_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
get_len_icf_code(match_length, &code);
get_dist_icf_code(dist, &code2, &extra_bits);
level_buf->hist.ll_hist[code]++;
level_buf->hist.d_hist[code2]++;
write_deflate_icf(next_out, code, code2, extra_bits);
next_out++;
next_in += match_length;
continue;
}
}
get_lit_icf_code(literal & 0xFF, &code);
level_buf->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
while (next_in < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *next_in;
get_lit_icf_code(literal & 0xFF, &code);
level_buf->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
if (next_in == end_in) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_CREATE_HDR;
}
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
return;
}
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
@ -347,6 +142,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
uint16_t *last_seen = level_buf->hash_hist.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
uint32_t hash_mask = state->hash_mask;
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
@ -372,7 +168,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -390,7 +186,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
hash = compute_hash(literal) & hash_mask;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -457,6 +253,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
uint16_t *last_seen = level_buf->hash_map.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hist_size = state->dist_mask;
uint32_t hash_mask = state->hash_mask;
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
@ -481,7 +278,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
hash = compute_hash_mad(literal) & hash_mask;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -499,7 +296,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
hash = compute_hash_mad(literal) & hash_mask;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}

View File

@ -78,6 +78,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *hash_table = level_buf->hash_map.hash_table;
uint32_t hist_size = stream->internal_state.dist_mask;
uint32_t hash_mask = stream->internal_state.hash_mask;
if (input_size < ISAL_LOOK_AHEAD)
return 0;
@ -87,7 +88,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
matches_icf_lookup->lit_dist = 0x1e;
matches_icf_lookup->dist_extra = 0;
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
hash_table[hash] = (uint64_t) (next_in - file_start);
next_in++;
@ -96,7 +97,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
}
while (next_in < end_in - ISAL_LOOK_AHEAD) {
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
dist = (next_in - file_start - hash_table[hash]);
dist = ((dist - 1) & hist_size) + 1;
hash_table[hash] = (uint64_t) (next_in - file_start);

View File

@ -63,9 +63,11 @@ global %1
%define dist rbx
%define dist_code2 rbx
%define lit_code2 rbx
%define hmask2 rbx
%define dist2 r12
%define dist_code r12
%define hmask3 r12
%define tmp1 rsi
%define lit_code rsi
@ -73,6 +75,7 @@ global %1
%define curr_data2 r8
%define len2 r8
%define tmp4 r8
%define hmask1 r8
%define len rdx
%define len_code rdx
@ -104,9 +107,10 @@ global %1
m_out_end equ 0 ; local variable (8 bytes)
m_out_start equ 8
dist_mask_offset equ 16
f_end_i_mem_offset equ 24
stream_offset equ 32
gpr_save_mem_offset equ 40 ; gpr save area (8*8 bytes)
hash_mask_offset equ 24
f_end_i_mem_offset equ 32
stream_offset equ 40
gpr_save_mem_offset equ 48 ; gpr save area (8*8 bytes)
xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
stack_size equ 7*8 + 8*8 + 4*16
@ -125,14 +129,10 @@ stack_size equ 7*8 + 8*8 + 4*16
%xdefine COMPARE_TYPE2 3
%endif
%rep 3
;; Defines to generate functions for different levels
%xdefine HASH_MASK HASH8K_HASH_MASK
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
%xdefine METHOD hash8k
%xdefine METHOD1 hash_hist
%xdefine METHOD hash_hist
%rep 2
%rep 3
%if ARCH == 04
%define USE_HSWNI
%endif
@ -183,8 +183,11 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
mov [rsp + stream_offset], stream
mov byte [stream + _internal_state_has_eob], 0
mov tmp1 %+ d, dword[stream + _internal_state_dist_mask]
mov [rsp + dist_mask_offset], tmp1
mov tmp1 %+ d, dword[stream + _internal_state_hash_mask]
mov [rsp + hash_mask_offset], tmp1
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov level_buf, [stream + _level_buf]
@ -208,6 +211,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
; file_length -= LA;
sub file_length, LA
; if (file_length <= 0) continue;
mov hmask1 %+ d, [rsp + hash_mask_offset]
cmp file_length, f_i
jle .input_end
@ -223,8 +227,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
shr tmp1, 8
compute_hash hash2, tmp1
and hash, HASH_MASK
and hash2, HASH_MASK
and hash, hmask1
and hash2, hmask1
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
je .write_first_byte
@ -234,6 +238,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
.loop2:
mov tmp3 %+ d, [rsp + dist_mask_offset]
mov hmask1 %+ d, [rsp + hash_mask_offset]
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [rsp + m_out_end]
ja .output_end
@ -253,7 +258,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
mov tmp2, curr_data
shr curr_data, 16
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, hmask1 %+ d
mov dist2 %+ w, f_i %+ w
dec dist2
@ -266,7 +271,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
shr tmp2, 24
compute_hash hash2, tmp2
and hash2 %+ d, HASH_MASK
and hash2 %+ d, hmask1 %+ d
and dist2 %+ d, tmp3 %+ d
neg dist2
@ -308,6 +313,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
get_dist_icf_code dist2, dist_code2, tmp1
mov hmask3 %+ d, dword [rsp + hash_mask_offset]
;; Setup for updating hash
lea tmp3, [f_i + 1] ; tmp3 <= k
@ -317,7 +324,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
shr curr_data, 24
compute_hash hash3, curr_data
and hash3, HASH_MASK
and hash3 %+ d, hmask3 %+ d
mov curr_data, tmp1
shr tmp1, 8
@ -349,9 +356,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
and dist_code2, 0x1F
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & hash_mask;
and hash %+ d, hmask3 %+ d
and hash2 %+ d, hmask3 %+ d
; continue
cmp f_i, file_length
@ -373,6 +380,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
; get_dist_code(dist, &code2, &code_len2);
get_dist_icf_code dist, dist_code, tmp1
mov hmask2 %+ d, [rsp + hash_mask_offset]
add file_start, f_i
MOVDQU xdata, [file_start + len]
mov curr_data2, [file_start + len]
@ -401,9 +410,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
and dist_code, 0x1F
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & hash_mask;
and hash %+ d, hmask2 %+ d
and hash2 %+ d, hmask2 %+ d
; continue
cmp f_i, file_length
@ -494,6 +503,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
jmp .len_dist_lit_huffman
.write_first_byte:
mov hmask1 %+ d, [rsp + hash_mask_offset]
cmp m_out_buf, [rsp + m_out_end]
ja .output_end
@ -515,26 +525,17 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
MOVDQU xdata, [file_start + f_i + 1]
add f_i, 1
mov curr_data, [file_start + f_i]
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
and hash %+ d, hmask1 %+ d
and hash2 %+ d, hmask1 %+ d
cmp f_i, file_length
jl .loop2
jmp .input_end
%ifdef USE_HSWNI
%undef USE_HSWNI
%endif
;; Shift defines over in order to iterate over all versions
%undef HASH_MASK
%xdefine HASH_MASK HASH_MASK1
%undef METHOD
%xdefine METHOD METHOD1
%endrep
;; Shift defines over in order to iterate over all versions
%undef ARCH
%xdefine ARCH ARCH1

View File

@ -60,12 +60,14 @@
%define f_i rdi
%define code_len2 rbp
%define hmask1 rbp
%define m_out_buf r8
%define level_buf r9
%define dist r10
%define dist r10
%define hmask2 r10
%define code2 r12
%define f_end_i r12
@ -87,14 +89,11 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes)
m_out_end equ 8
m_out_start equ 16
dist_mask_offset equ 24
stack_size equ 32
hash_mask_offset equ 32
stack_size equ 5*8
%xdefine HASH_MASK HASH8K_HASH_MASK
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
%xdefine METHOD hash8k
%xdefine METHOD1 hash_hist
%xdefine METHOD hash_hist
%rep 2
; void isal_deflate_icf_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_finish_ %+ METHOD %+ _01
@ -109,7 +108,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
%endif
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov tmp2, [stream + _internal_state_dist_mask]
mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
mov level_buf, [stream + _level_buf]
mov m_out_buf, [level_buf + _icf_buf_next]
mov [rsp + m_out_start], m_out_buf
@ -118,6 +118,7 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
sub tmp1, 4
mov [rsp + dist_mask_offset], tmp2
mov [rsp + hash_mask_offset], tmp3
mov [rsp + m_out_end], tmp1
mov hufftables, [stream + _hufftables]
@ -144,8 +145,9 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
cmp m_out_buf, [rsp + m_out_end]
ja .end_loop_2
mov hmask1 %+ d, [rsp + hash_mask_offset]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, hmask1 %+ d
mov [hash_table + 2 * hash], f_i %+ w
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
jmp .encode_literal
@ -154,14 +156,15 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
.loop2:
mov tmp3 %+ d, [rsp + dist_mask_offset]
mov hmask1 %+ d, [rsp + hash_mask_offset]
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [rsp + m_out_end]
ja .end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
; hash = compute_hash(state->file_start + f_i) & hash_mask;
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, hmask1 %+ d
; f_index = state->head[hash];
movzx f_index %+ d, word [hash_table + 2 * hash]
@ -209,6 +212,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
;; get_len_code
lea code, [len + 254]
mov hmask2 %+ d, [rsp + hash_mask_offset]
or code2, code
inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
@ -220,19 +225,19 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & hash_mask;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, hmask2 %+ d
; state->head[hash] = k;
mov [hash_table + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & hash_mask;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, hmask2 %+ d
; state->head[hash] = k;
mov [hash_table + 2 * hash], tmp3 %+ w
@ -312,14 +317,6 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
POP_ALL
ret
;; Shift defines over in order to iterate over all versions
%undef HASH_MASK
%xdefine HASH_MASK HASH_MASK1
%undef METHOD
%xdefine METHOD METHOD1
%endrep
section .data
align 4
c258: dq 258

View File

@ -39,13 +39,6 @@ extern isal_deflate_body_04
extern isal_deflate_finish_base
extern isal_deflate_finish_01
extern isal_deflate_icf_body_hash8k_base
extern isal_deflate_icf_body_hash8k_01
extern isal_deflate_icf_body_hash8k_02
extern isal_deflate_icf_body_hash8k_04
extern isal_deflate_icf_finish_hash8k_base
extern isal_deflate_icf_finish_hash8k_01
extern isal_deflate_icf_body_hash_hist_base
extern isal_deflate_icf_body_hash_hist_01
extern isal_deflate_icf_body_hash_hist_02
@ -99,7 +92,7 @@ mbin_interface isal_deflate_finish
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
mbin_interface isal_deflate_icf_body_lvl1
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
mbin_interface isal_deflate_icf_body_lvl2
mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
@ -108,7 +101,7 @@ mbin_interface isal_deflate_icf_body_lvl3
mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy
mbin_interface isal_deflate_icf_finish_lvl1
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
mbin_interface isal_deflate_icf_finish_lvl2
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01

View File

@ -386,4 +386,16 @@ ssc:
pxor %%dest, %%src2
%endif
%endm
%macro PSHUFD 3
%define %%dest %1
%define %%src1 %2
%define %%imm8 %3
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
vpshufd %%dest, %%src1, %%imm8
%else
pshufd %%dest, %%src1, %%imm8
%endif
%endm
%endif ;; ifndef STDMAC_ASM

View File

@ -315,9 +315,10 @@ struct isal_zstate {
uint32_t block_next; //!< Start of current deflate block in the input
uint32_t block_end; //!< End of current deflate block in the input
uint32_t dist_mask; //!< Distance mask used.
uint32_t hash_mask;
enum isal_zstate_state state; //!< Current state in processing the data stream
struct BitBuf2 bitbuf; //!< Bit Buffer
uint32_t crc; //!< Current crc
enum isal_zstate_state state; //!< Current state in processing the data stream
uint8_t has_wrap_hdr; //!< keeps track of wrapper header
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
uint8_t has_eob; //!< keeps track of eob on the last deflate block