mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: Setup for variable hash mask
Change-Id: I3be94dbc40c2e02dcff4f89e5a9df8ed1f744f02 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
6317ce2b78
commit
03bef684a4
@ -32,14 +32,13 @@
|
||||
; Assumes m_out_buf is a register
|
||||
; Clobbers RCX
|
||||
; code is clobbered
|
||||
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1
|
||||
%macro write_bits 6
|
||||
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf
|
||||
%macro write_bits 5
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%code %3
|
||||
%define %%count %4
|
||||
%define %%m_out_buf %5
|
||||
%define %%tmp1 %6
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx %%code, %%code, %%m_bit_count
|
||||
|
@ -160,9 +160,10 @@ FIELD _total_in_start,4, 4
|
||||
FIELD _block_next, 4, 4
|
||||
FIELD _block_end, 4, 4
|
||||
FIELD _dist_mask, 4, 4
|
||||
FIELD _hash_mask, 4, 4
|
||||
FIELD _state, 4, 4
|
||||
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
|
||||
FIELD _crc, 4, 4
|
||||
FIELD _state, 4, 4
|
||||
FIELD _has_wrap_hdr, 1, 1
|
||||
FIELD _has_eob_hdr, 1, 1
|
||||
FIELD _has_eob, 1, 1
|
||||
@ -218,6 +219,7 @@ _internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
|
||||
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
|
||||
_internal_state_crc equ _internal_state+_crc
|
||||
_internal_state_dist_mask equ _internal_state+_dist_mask
|
||||
_internal_state_hash_mask equ _internal_state+_hash_mask
|
||||
_internal_state_bitbuf equ _internal_state+_bitbuf
|
||||
_internal_state_state equ _internal_state+_state
|
||||
_internal_state_count equ _internal_state+_count
|
||||
|
@ -923,6 +923,25 @@ static void inline set_dist_mask(struct isal_zstream *stream)
|
||||
|
||||
}
|
||||
|
||||
static void inline set_hash_mask(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
|
||||
switch (stream->level) {
|
||||
case 3:
|
||||
state->hash_mask = LVL3_HASH_MASK;
|
||||
break;
|
||||
case 2:
|
||||
state->hash_mask = LVL2_HASH_MASK;
|
||||
break;
|
||||
case 1:
|
||||
state->hash_mask = LVL1_HASH_MASK;
|
||||
break;
|
||||
case 0:
|
||||
state->hash_mask = LVL0_HASH_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
void isal_deflate_init(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
@ -1036,26 +1055,28 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic
|
||||
/* Reset history to prevent out of bounds matches this works because
|
||||
* dictionary must set at least 1 element in the history */
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint32_t hash_mask = stream->internal_state.hash_mask;
|
||||
|
||||
switch (stream->level) {
|
||||
case 3:
|
||||
memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table));
|
||||
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK,
|
||||
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask,
|
||||
stream->total_in, dict, dict_len);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
|
||||
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK,
|
||||
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask,
|
||||
stream->total_in, dict, dict_len);
|
||||
break;
|
||||
case 1:
|
||||
memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table));
|
||||
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK,
|
||||
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask,
|
||||
stream->total_in, dict, dict_len);
|
||||
break;
|
||||
default:
|
||||
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
|
||||
isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK,
|
||||
isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask,
|
||||
stream->total_in, dict, dict_len);
|
||||
}
|
||||
|
||||
@ -1128,6 +1149,8 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
return level_check;
|
||||
}
|
||||
|
||||
set_hash_mask(stream);
|
||||
|
||||
if (avail_in == 0)
|
||||
stored_len = TYPE0_BLK_HDR_LEN;
|
||||
else
|
||||
@ -1263,6 +1286,7 @@ int isal_deflate(struct isal_zstream *stream)
|
||||
|
||||
if (state->has_hist == IGZIP_NO_HIST) {
|
||||
set_dist_mask(stream);
|
||||
set_hash_mask(stream);
|
||||
stream->total_in -= buffered_size;
|
||||
reset_match_history(stream);
|
||||
stream->total_in += buffered_size;
|
||||
@ -1270,6 +1294,7 @@ int isal_deflate(struct isal_zstream *stream)
|
||||
|
||||
} else if (state->has_hist == IGZIP_DICT_HIST) {
|
||||
set_dist_mask(stream);
|
||||
set_hash_mask(stream);
|
||||
isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
|
||||
}
|
||||
|
||||
|
@ -37,6 +37,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
uint16_t *last_seen = state->head;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
uint32_t hash_mask = state->hash_mask;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
@ -58,7 +59,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -79,7 +80,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -124,6 +125,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
uint16_t *last_seen = state->head;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
uint32_t hash_mask = state->hash_mask;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
@ -139,7 +141,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -158,7 +160,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
@ -34,10 +34,8 @@
|
||||
|
||||
void isal_deflate_body_base(struct isal_zstream *stream);
|
||||
void isal_deflate_finish_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
||||
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream);
|
||||
void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
@ -68,7 +66,7 @@ void isal_deflate_finish(struct isal_zstream *stream)
|
||||
|
||||
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
|
||||
{
|
||||
isal_deflate_icf_body_hash8k_base(stream);
|
||||
isal_deflate_icf_body_hash_hist_base(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_lvl2(struct isal_zstream *stream)
|
||||
@ -83,7 +81,7 @@ void isal_deflate_icf_body_lvl3(struct isal_zstream *stream)
|
||||
|
||||
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
|
||||
{
|
||||
isal_deflate_icf_finish_hash8k_base(stream);
|
||||
isal_deflate_icf_finish_hash_hist_base(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
|
||||
|
@ -52,6 +52,7 @@
|
||||
%define tmp4 rbx
|
||||
%define dist rbx
|
||||
%define code2 rbx
|
||||
%define hmask1 rbx
|
||||
|
||||
%define hash rdx
|
||||
%define len rdx
|
||||
@ -172,7 +173,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
mov stream, rcx
|
||||
mov byte [stream + _internal_state_has_eob], 0
|
||||
|
||||
MOVDQU xmask, [mask]
|
||||
MOVD xmask, [stream + _internal_state_hash_mask]
|
||||
PSHUFD xmask, xmask, 0
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov m_out_buf, [stream + _next_out]
|
||||
@ -203,6 +205,7 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
cmp f_end_i, f_i
|
||||
jle .input_end
|
||||
|
||||
MOVD hmask1 %+ d, xmask
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
@ -214,8 +217,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
shr tmp3, 8
|
||||
compute_hash hash2, tmp3
|
||||
|
||||
and hash, LVL0_HASH_MASK
|
||||
and hash2, LVL0_HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||
je .write_first_byte
|
||||
@ -315,6 +318,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
%endif
|
||||
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
MOVD hmask1 %+ d, xmask
|
||||
|
||||
SHLX code4, code4, rcx
|
||||
or code4, code
|
||||
add code_len2, rcx
|
||||
@ -322,12 +327,13 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
add f_i, len2
|
||||
neg len2
|
||||
|
||||
SHLX code4, code4, code_len3
|
||||
|
||||
MOVQ tmp5, xdata
|
||||
shr tmp5, 24
|
||||
compute_hash tmp4, tmp5
|
||||
and tmp4, LVL0_HASH_MASK
|
||||
compute_hash hash2, tmp5
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
SHLX code4, code4, code_len3
|
||||
or code4, code3
|
||||
add code_len2, code_len3
|
||||
|
||||
@ -336,23 +342,23 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
|
||||
MOVD hash %+ d, xhash
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
PEXTRD tmp6 %+ d, xhash, 1
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
compute_hash hash, curr_data
|
||||
|
||||
add tmp3,1
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4
|
||||
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
|
||||
mov curr_data2, curr_data
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
@ -362,16 +368,16 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
cmp tmp3, f_i
|
||||
jae .loop3_done
|
||||
mov tmp6, [file_start + tmp3]
|
||||
compute_hash tmp4, tmp6
|
||||
and tmp4 %+ d, LVL0_HASH_MASK
|
||||
compute_hash tmp1, tmp6
|
||||
and tmp1 %+ d, hmask1 %+ d
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
|
||||
jmp .loop3
|
||||
.loop3_done:
|
||||
%endif
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
and hash %+ d, hmask1 %+ d
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
@ -400,8 +406,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
; code2 <<= code_len
|
||||
; code2 |= code
|
||||
; code_len2 += code_len
|
||||
SHLX code2, code2, rcx
|
||||
or code2, code
|
||||
SHLX code4, code2, rcx
|
||||
or code4, code
|
||||
add code_len2, rcx
|
||||
|
||||
;; Setup for updateing hash
|
||||
@ -414,14 +420,15 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
add tmp3,1
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
MOVD hmask1 %+ d, xmask
|
||||
MOVDQU xdata, [file_start + f_i]
|
||||
mov curr_data, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7
|
||||
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
|
||||
mov curr_data2, curr_data
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
@ -431,16 +438,16 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
cmp tmp3, f_i
|
||||
jae .loop4_done
|
||||
mov tmp6, [file_start + tmp3]
|
||||
compute_hash tmp4, tmp6
|
||||
and tmp4, LVL0_HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
compute_hash tmp1, tmp6
|
||||
and tmp1 %+ d, hmask1 %+ d
|
||||
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
|
||||
jmp .loop4
|
||||
.loop4_done:
|
||||
%endif
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
and hash %+ d, hmask1 %+ d
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
@ -455,7 +462,7 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
|
||||
MOVD hash %+ d, xhash
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||
|
||||
PEXTRD hash2 %+ d, xhash, 1
|
||||
|
||||
@ -564,7 +571,3 @@ isal_deflate_body_ %+ ARCH %+ :
|
||||
%xdefine COMPARE_TYPE1 COMPARE_TYPE2
|
||||
%endif
|
||||
%endrep
|
||||
|
||||
section .data
|
||||
align 16
|
||||
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
|
||||
|
@ -60,12 +60,14 @@
|
||||
%define f_i rdi
|
||||
|
||||
%define code_len2 rbp
|
||||
%define hmask1 rbp
|
||||
|
||||
%define m_out_buf r8
|
||||
|
||||
%define m_bits r9
|
||||
|
||||
%define dist r10
|
||||
%define hmask2 r10
|
||||
|
||||
%define m_bit_count r11
|
||||
|
||||
@ -131,9 +133,9 @@ skip_SLOP:
|
||||
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||
jmp encode_literal
|
||||
@ -142,15 +144,15 @@ skip_write_first_byte:
|
||||
|
||||
loop2:
|
||||
mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
|
||||
|
||||
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||
@ -198,6 +200,7 @@ loop2:
|
||||
; get_len_code(len, &code, &code_len);
|
||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
mov hmask2 %+ d, dword [stream + _internal_state_hash_mask]
|
||||
; code2 <<= code_len
|
||||
; code2 |= code
|
||||
; code_len2 += code_len
|
||||
@ -213,24 +216,24 @@ loop2:
|
||||
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash %+ d, hmask2 %+ d
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash %+ d, hmask2 %+ d
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
skip_hash_update:
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||
|
||||
; continue
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
@ -242,7 +245,7 @@ encode_literal:
|
||||
movzx tmp5, byte [file_start + f_i]
|
||||
get_lit_code tmp5, code2, code_len2, hufftables
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||
|
||||
; continue
|
||||
add f_i, 1
|
||||
@ -263,7 +266,7 @@ final_bytes:
|
||||
ja not_end
|
||||
movzx tmp5, byte [file_start + f_i]
|
||||
get_lit_code tmp5, code2, code_len2, hufftables
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||
|
||||
inc f_i
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
@ -276,7 +279,7 @@ write_eob:
|
||||
; get_lit_code(256, &code2, &code_len2);
|
||||
get_lit_code 256, code2, code_len2, hufftables
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||
|
||||
mov byte [stream + _internal_state_has_eob], 1
|
||||
cmp word [stream + _end_of_stream], 1
|
||||
|
@ -100,8 +100,8 @@
|
||||
%define ydist_mask ymm15
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define stack_size 10*16 + 6 * 8 + 8
|
||||
%define local_storage_offset (stack_size - 8)
|
||||
%define stack_size 10*16 + 6 * 8 + 3 * 8
|
||||
%define local_storage_offset (stack_size - 16)
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
@ -144,7 +144,7 @@
|
||||
add rsp, stack_size
|
||||
%endm
|
||||
%else
|
||||
%define stack_size 8
|
||||
%define stack_size 16
|
||||
%define local_storage_offset 0
|
||||
|
||||
%define func(x) x:
|
||||
@ -164,6 +164,7 @@
|
||||
%endif
|
||||
|
||||
%define dist_mask_offset local_storage_offset
|
||||
%define hash_mask_offset local_storage_offset + 8
|
||||
|
||||
%define VECT_SIZE 8
|
||||
%define HASH_BYTES 2
|
||||
@ -184,6 +185,8 @@ func(gen_icf_map_lh1_04)
|
||||
;; Prep for main loop
|
||||
mov tmp %+ d, dword [stream + _internal_state_dist_mask]
|
||||
mov [rsp + dist_mask_offset], tmp
|
||||
mov tmp %+ d, dword [stream + _internal_state_hash_mask]
|
||||
mov [rsp + hash_mask_offset], tmp
|
||||
mov tmp, stream
|
||||
mov level_buf, [stream + _level_buf]
|
||||
sub f_i_end, LA
|
||||
@ -193,7 +196,7 @@ func(gen_icf_map_lh1_04)
|
||||
|
||||
;; Process first byte
|
||||
vpbroadcastd yhash_prod, [hash_prod]
|
||||
vpbroadcastd yhash_mask, [hash_mask]
|
||||
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||
vmovd yhashes %+ x, dword [f_i + file_start]
|
||||
vpmaddwd yhashes, yhashes, yhash_prod
|
||||
vpmaddwd yhashes, yhashes, yhash_prod
|
||||
@ -299,7 +302,7 @@ func(gen_icf_map_lh1_04)
|
||||
|
||||
;; Compute hash for next loop
|
||||
vpbroadcastd yhash_prod, [hash_prod]
|
||||
vpbroadcastd yhash_mask, [hash_mask]
|
||||
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||
vmovdqu datas, [f_i + file_start + VECT_SIZE]
|
||||
vpermq yhashes, datas, 0x44
|
||||
vpshufb yhashes, yhashes, [datas_shuf]
|
||||
@ -362,7 +365,7 @@ loop1:
|
||||
|
||||
;; Compute hash for next loop
|
||||
vpbroadcastd yhash_prod, [hash_prod]
|
||||
vpbroadcastd yhash_mask, [hash_mask]
|
||||
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||
vpermq yhashes, datas_lookup, 0x44
|
||||
vpshufb yhashes, yhashes, [datas_shuf]
|
||||
vpmaddwd yhashes, yhashes, yhash_prod
|
||||
@ -532,7 +535,7 @@ loop1_end:
|
||||
add tmp %+ d, f_i %+ d
|
||||
|
||||
vpbroadcastd yhash_prod %+ x, [hash_prod]
|
||||
vpbroadcastd yhash_mask %+ x, [hash_mask]
|
||||
vpbroadcastd yhash_mask %+ x, [rsp + hash_mask_offset]
|
||||
|
||||
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
|
||||
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
|
||||
@ -722,8 +725,6 @@ hash_prod:
|
||||
dw PROD1, PROD2
|
||||
null_dist_syms:
|
||||
dd LIT
|
||||
hash_mask:
|
||||
dd HASH_MAP_HASH_MASK
|
||||
twofiftyfour:
|
||||
dd 0xfe
|
||||
shortest_matches:
|
||||
|
@ -175,13 +175,13 @@ func(gen_icf_map_lh1_06)
|
||||
|
||||
;; Prep for main loop
|
||||
vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask]
|
||||
vpbroadcastd zhash_mask, dword [stream + _internal_state_hash_mask]
|
||||
mov tmp, stream
|
||||
mov level_buf, [stream + _level_buf]
|
||||
sub f_i_end, LA
|
||||
vmovdqu64 zdatas_perm, [datas_perm]
|
||||
vbroadcasti32x8 zdatas_shuf, [datas_shuf]
|
||||
vpbroadcastd zhash_prod, [hash_prod]
|
||||
vpbroadcastd zhash_mask, [hash_mask]
|
||||
vmovdqu64 zincrement, [increment]
|
||||
vmovdqu64 zqword_shuf, [qword_shuf]
|
||||
vbroadcasti64x2 zdatas_perm2, [datas_perm2]
|
||||
@ -569,8 +569,6 @@ thirty:
|
||||
dd 0x1e
|
||||
twofiftyfour:
|
||||
dd 0xfe
|
||||
hash_mask:
|
||||
dd HASH_MAP_HASH_MASK
|
||||
lit_len_mask:
|
||||
dd LIT_LEN_MASK
|
||||
shortest_matches:
|
||||
|
@ -32,102 +32,6 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
|
||||
level_buf->icf_buf_avail_out = end_out - next_out;
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
struct deflate_icf *start_out, *next_out, *end_out;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint32_t code, code2, extra_bits;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *last_seen = level_buf->hash8k.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
return;
|
||||
}
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out =
|
||||
start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
while (next_in + ISAL_LOOK_AHEAD < end_in) {
|
||||
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
/* The -1 are to handle the case when dist = 0 */
|
||||
if (dist - 1 < hist_size) {
|
||||
assert(dist != 0);
|
||||
|
||||
match_length = compare258(next_in - dist, next_in, 258);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
get_len_icf_code(match_length, &code);
|
||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
||||
|
||||
level_buf->hist.ll_hist[code]++;
|
||||
level_buf->hist.d_hist[code2]++;
|
||||
|
||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
||||
next_out++;
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
level_buf->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
}
|
||||
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
||||
|
||||
assert(stream->avail_in <= ISAL_LOOK_AHEAD);
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal, hash;
|
||||
@ -141,6 +45,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
uint32_t hash_mask = state->hash_mask;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
@ -168,7 +73,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -189,7 +94,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -224,116 +129,6 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
struct deflate_icf *start_out, *next_out, *end_out;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint32_t code, code2, extra_bits;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *last_seen = level_buf->hash8k.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
return;
|
||||
}
|
||||
|
||||
while (next_in + 3 < end_in) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
|
||||
match_length = compare258(next_in - dist, next_in, end_in - next_in);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
get_len_icf_code(match_length, &code);
|
||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
||||
|
||||
level_buf->hist.ll_hist[code]++;
|
||||
level_buf->hist.d_hist[code2]++;
|
||||
|
||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
||||
|
||||
next_out++;
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
level_buf->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
while (next_in < end_in) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *next_in;
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
level_buf->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
if (next_in == end_in) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
}
|
||||
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
@ -347,6 +142,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
||||
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
uint32_t hash_mask = state->hash_mask;
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
@ -372,7 +168,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -390,7 +186,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
||||
hash = compute_hash(literal) & hash_mask;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -457,6 +253,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
||||
uint16_t *last_seen = level_buf->hash_map.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hist_size = state->dist_mask;
|
||||
uint32_t hash_mask = state->hash_mask;
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
@ -481,7 +278,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
|
||||
hash = compute_hash_mad(literal) & hash_mask;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -499,7 +296,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
|
||||
hash = compute_hash_mad(literal) & hash_mask;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *hash_table = level_buf->hash_map.hash_table;
|
||||
uint32_t hist_size = stream->internal_state.dist_mask;
|
||||
uint32_t hash_mask = stream->internal_state.hash_mask;
|
||||
|
||||
if (input_size < ISAL_LOOK_AHEAD)
|
||||
return 0;
|
||||
@ -87,7 +88,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
matches_icf_lookup->lit_dist = 0x1e;
|
||||
matches_icf_lookup->dist_extra = 0;
|
||||
|
||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
||||
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
|
||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
next_in++;
|
||||
@ -96,7 +97,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
}
|
||||
|
||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
||||
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
|
||||
dist = (next_in - file_start - hash_table[hash]);
|
||||
dist = ((dist - 1) & hist_size) + 1;
|
||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||
|
@ -63,9 +63,11 @@ global %1
|
||||
%define dist rbx
|
||||
%define dist_code2 rbx
|
||||
%define lit_code2 rbx
|
||||
%define hmask2 rbx
|
||||
|
||||
%define dist2 r12
|
||||
%define dist_code r12
|
||||
%define hmask3 r12
|
||||
|
||||
%define tmp1 rsi
|
||||
%define lit_code rsi
|
||||
@ -73,6 +75,7 @@ global %1
|
||||
%define curr_data2 r8
|
||||
%define len2 r8
|
||||
%define tmp4 r8
|
||||
%define hmask1 r8
|
||||
|
||||
%define len rdx
|
||||
%define len_code rdx
|
||||
@ -104,9 +107,10 @@ global %1
|
||||
m_out_end equ 0 ; local variable (8 bytes)
|
||||
m_out_start equ 8
|
||||
dist_mask_offset equ 16
|
||||
f_end_i_mem_offset equ 24
|
||||
stream_offset equ 32
|
||||
gpr_save_mem_offset equ 40 ; gpr save area (8*8 bytes)
|
||||
hash_mask_offset equ 24
|
||||
f_end_i_mem_offset equ 32
|
||||
stream_offset equ 40
|
||||
gpr_save_mem_offset equ 48 ; gpr save area (8*8 bytes)
|
||||
xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||
stack_size equ 7*8 + 8*8 + 4*16
|
||||
|
||||
@ -125,14 +129,10 @@ stack_size equ 7*8 + 8*8 + 4*16
|
||||
%xdefine COMPARE_TYPE2 3
|
||||
%endif
|
||||
|
||||
%rep 3
|
||||
;; Defines to generate functions for different levels
|
||||
%xdefine HASH_MASK HASH8K_HASH_MASK
|
||||
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
|
||||
%xdefine METHOD hash8k
|
||||
%xdefine METHOD1 hash_hist
|
||||
%xdefine METHOD hash_hist
|
||||
|
||||
%rep 2
|
||||
%rep 3
|
||||
%if ARCH == 04
|
||||
%define USE_HSWNI
|
||||
%endif
|
||||
@ -183,8 +183,11 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
mov [rsp + stream_offset], stream
|
||||
|
||||
mov byte [stream + _internal_state_has_eob], 0
|
||||
|
||||
mov tmp1 %+ d, dword[stream + _internal_state_dist_mask]
|
||||
mov [rsp + dist_mask_offset], tmp1
|
||||
mov tmp1 %+ d, dword[stream + _internal_state_hash_mask]
|
||||
mov [rsp + hash_mask_offset], tmp1
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov level_buf, [stream + _level_buf]
|
||||
@ -208,6 +211,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
; file_length -= LA;
|
||||
sub file_length, LA
|
||||
; if (file_length <= 0) continue;
|
||||
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||
|
||||
cmp file_length, f_i
|
||||
jle .input_end
|
||||
@ -223,8 +227,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
shr tmp1, 8
|
||||
compute_hash hash2, tmp1
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
and hash, hmask1
|
||||
and hash2, hmask1
|
||||
|
||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||
je .write_first_byte
|
||||
@ -234,6 +238,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
|
||||
.loop2:
|
||||
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
||||
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja .output_end
|
||||
@ -253,7 +258,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
mov tmp2, curr_data
|
||||
shr curr_data, 16
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
dec dist2
|
||||
@ -266,7 +271,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
|
||||
shr tmp2, 24
|
||||
compute_hash hash2, tmp2
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
and dist2 %+ d, tmp3 %+ d
|
||||
neg dist2
|
||||
@ -308,6 +313,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
|
||||
get_dist_icf_code dist2, dist_code2, tmp1
|
||||
|
||||
mov hmask3 %+ d, dword [rsp + hash_mask_offset]
|
||||
|
||||
;; Setup for updating hash
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
|
||||
@ -317,7 +324,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
|
||||
shr curr_data, 24
|
||||
compute_hash hash3, curr_data
|
||||
and hash3, HASH_MASK
|
||||
and hash3 %+ d, hmask3 %+ d
|
||||
|
||||
mov curr_data, tmp1
|
||||
shr tmp1, 8
|
||||
@ -349,9 +356,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
and dist_code2, 0x1F
|
||||
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
and hash %+ d, hmask3 %+ d
|
||||
and hash2 %+ d, hmask3 %+ d
|
||||
|
||||
; continue
|
||||
cmp f_i, file_length
|
||||
@ -373,6 +380,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
get_dist_icf_code dist, dist_code, tmp1
|
||||
|
||||
mov hmask2 %+ d, [rsp + hash_mask_offset]
|
||||
|
||||
add file_start, f_i
|
||||
MOVDQU xdata, [file_start + len]
|
||||
mov curr_data2, [file_start + len]
|
||||
@ -401,9 +410,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
and dist_code, 0x1F
|
||||
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
and hash %+ d, hmask2 %+ d
|
||||
and hash2 %+ d, hmask2 %+ d
|
||||
|
||||
; continue
|
||||
cmp f_i, file_length
|
||||
@ -494,6 +503,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
jmp .len_dist_lit_huffman
|
||||
|
||||
.write_first_byte:
|
||||
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja .output_end
|
||||
|
||||
@ -515,26 +525,17 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
||||
MOVDQU xdata, [file_start + f_i + 1]
|
||||
add f_i, 1
|
||||
mov curr_data, [file_start + f_i]
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
and hash2 %+ d, hmask1 %+ d
|
||||
|
||||
cmp f_i, file_length
|
||||
jl .loop2
|
||||
jmp .input_end
|
||||
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
%undef USE_HSWNI
|
||||
%endif
|
||||
|
||||
;; Shift defines over in order to iterate over all versions
|
||||
%undef HASH_MASK
|
||||
%xdefine HASH_MASK HASH_MASK1
|
||||
|
||||
%undef METHOD
|
||||
%xdefine METHOD METHOD1
|
||||
%endrep
|
||||
|
||||
;; Shift defines over in order to iterate over all versions
|
||||
%undef ARCH
|
||||
%xdefine ARCH ARCH1
|
||||
|
@ -60,12 +60,14 @@
|
||||
%define f_i rdi
|
||||
|
||||
%define code_len2 rbp
|
||||
%define hmask1 rbp
|
||||
|
||||
%define m_out_buf r8
|
||||
|
||||
%define level_buf r9
|
||||
|
||||
%define dist r10
|
||||
%define dist r10
|
||||
%define hmask2 r10
|
||||
|
||||
%define code2 r12
|
||||
%define f_end_i r12
|
||||
@ -87,14 +89,11 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes)
|
||||
m_out_end equ 8
|
||||
m_out_start equ 16
|
||||
dist_mask_offset equ 24
|
||||
stack_size equ 32
|
||||
hash_mask_offset equ 32
|
||||
stack_size equ 5*8
|
||||
|
||||
%xdefine HASH_MASK HASH8K_HASH_MASK
|
||||
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
|
||||
%xdefine METHOD hash8k
|
||||
%xdefine METHOD1 hash_hist
|
||||
%xdefine METHOD hash_hist
|
||||
|
||||
%rep 2
|
||||
; void isal_deflate_icf_finish ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_icf_finish_ %+ METHOD %+ _01
|
||||
@ -109,7 +108,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
%endif
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov tmp2, [stream + _internal_state_dist_mask]
|
||||
mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
|
||||
mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
|
||||
mov level_buf, [stream + _level_buf]
|
||||
mov m_out_buf, [level_buf + _icf_buf_next]
|
||||
mov [rsp + m_out_start], m_out_buf
|
||||
@ -118,6 +118,7 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
sub tmp1, 4
|
||||
|
||||
mov [rsp + dist_mask_offset], tmp2
|
||||
mov [rsp + hash_mask_offset], tmp3
|
||||
mov [rsp + m_out_end], tmp1
|
||||
|
||||
mov hufftables, [stream + _hufftables]
|
||||
@ -144,8 +145,9 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja .end_loop_2
|
||||
|
||||
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
mov [hash_table + 2 * hash], f_i %+ w
|
||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||
jmp .encode_literal
|
||||
@ -154,14 +156,15 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
|
||||
.loop2:
|
||||
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
||||
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja .end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, hmask1 %+ d
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [hash_table + 2 * hash]
|
||||
@ -209,6 +212,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
;; get_len_code
|
||||
lea code, [len + 254]
|
||||
|
||||
mov hmask2 %+ d, [rsp + hash_mask_offset]
|
||||
|
||||
or code2, code
|
||||
inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
|
||||
|
||||
@ -220,19 +225,19 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, hmask2 %+ d
|
||||
; state->head[hash] = k;
|
||||
mov [hash_table + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, hmask2 %+ d
|
||||
; state->head[hash] = k;
|
||||
mov [hash_table + 2 * hash], tmp3 %+ w
|
||||
|
||||
@ -312,14 +317,6 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
||||
POP_ALL
|
||||
ret
|
||||
|
||||
;; Shift defines over in order to iterate over all versions
|
||||
%undef HASH_MASK
|
||||
%xdefine HASH_MASK HASH_MASK1
|
||||
|
||||
%undef METHOD
|
||||
%xdefine METHOD METHOD1
|
||||
%endrep
|
||||
|
||||
section .data
|
||||
align 4
|
||||
c258: dq 258
|
||||
|
@ -39,13 +39,6 @@ extern isal_deflate_body_04
|
||||
extern isal_deflate_finish_base
|
||||
extern isal_deflate_finish_01
|
||||
|
||||
extern isal_deflate_icf_body_hash8k_base
|
||||
extern isal_deflate_icf_body_hash8k_01
|
||||
extern isal_deflate_icf_body_hash8k_02
|
||||
extern isal_deflate_icf_body_hash8k_04
|
||||
extern isal_deflate_icf_finish_hash8k_base
|
||||
extern isal_deflate_icf_finish_hash8k_01
|
||||
|
||||
extern isal_deflate_icf_body_hash_hist_base
|
||||
extern isal_deflate_icf_body_hash_hist_01
|
||||
extern isal_deflate_icf_body_hash_hist_02
|
||||
@ -99,7 +92,7 @@ mbin_interface isal_deflate_finish
|
||||
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
||||
|
||||
mbin_interface isal_deflate_icf_body_lvl1
|
||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04
|
||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
|
||||
|
||||
mbin_interface isal_deflate_icf_body_lvl2
|
||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
|
||||
@ -108,7 +101,7 @@ mbin_interface isal_deflate_icf_body_lvl3
|
||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy
|
||||
|
||||
mbin_interface isal_deflate_icf_finish_lvl1
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
|
||||
|
||||
mbin_interface isal_deflate_icf_finish_lvl2
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
|
||||
|
@ -386,4 +386,16 @@ ssc:
|
||||
pxor %%dest, %%src2
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%macro PSHUFD 3
|
||||
%define %%dest %1
|
||||
%define %%src1 %2
|
||||
%define %%imm8 %3
|
||||
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||
vpshufd %%dest, %%src1, %%imm8
|
||||
%else
|
||||
pshufd %%dest, %%src1, %%imm8
|
||||
%endif
|
||||
%endm
|
||||
|
||||
%endif ;; ifndef STDMAC_ASM
|
||||
|
@ -315,9 +315,10 @@ struct isal_zstate {
|
||||
uint32_t block_next; //!< Start of current deflate block in the input
|
||||
uint32_t block_end; //!< End of current deflate block in the input
|
||||
uint32_t dist_mask; //!< Distance mask used.
|
||||
uint32_t hash_mask;
|
||||
enum isal_zstate_state state; //!< Current state in processing the data stream
|
||||
struct BitBuf2 bitbuf; //!< Bit Buffer
|
||||
uint32_t crc; //!< Current crc
|
||||
enum isal_zstate_state state; //!< Current state in processing the data stream
|
||||
uint8_t has_wrap_hdr; //!< keeps track of wrapper header
|
||||
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
||||
uint8_t has_eob; //!< keeps track of eob on the last deflate block
|
||||
|
Loading…
Reference in New Issue
Block a user