mirror of
https://github.com/intel/isa-l.git
synced 2024-12-13 09:52:56 +01:00
igzip: Setup for variable hash mask
Change-Id: I3be94dbc40c2e02dcff4f89e5a9df8ed1f744f02 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
6317ce2b78
commit
03bef684a4
@ -32,14 +32,13 @@
|
|||||||
; Assumes m_out_buf is a register
|
; Assumes m_out_buf is a register
|
||||||
; Clobbers RCX
|
; Clobbers RCX
|
||||||
; code is clobbered
|
; code is clobbered
|
||||||
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1
|
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf
|
||||||
%macro write_bits 6
|
%macro write_bits 5
|
||||||
%define %%m_bits %1
|
%define %%m_bits %1
|
||||||
%define %%m_bit_count %2
|
%define %%m_bit_count %2
|
||||||
%define %%code %3
|
%define %%code %3
|
||||||
%define %%count %4
|
%define %%count %4
|
||||||
%define %%m_out_buf %5
|
%define %%m_out_buf %5
|
||||||
%define %%tmp1 %6
|
|
||||||
|
|
||||||
%ifdef USE_HSWNI
|
%ifdef USE_HSWNI
|
||||||
shlx %%code, %%code, %%m_bit_count
|
shlx %%code, %%code, %%m_bit_count
|
||||||
|
@ -160,9 +160,10 @@ FIELD _total_in_start,4, 4
|
|||||||
FIELD _block_next, 4, 4
|
FIELD _block_next, 4, 4
|
||||||
FIELD _block_end, 4, 4
|
FIELD _block_end, 4, 4
|
||||||
FIELD _dist_mask, 4, 4
|
FIELD _dist_mask, 4, 4
|
||||||
|
FIELD _hash_mask, 4, 4
|
||||||
|
FIELD _state, 4, 4
|
||||||
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
|
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
|
||||||
FIELD _crc, 4, 4
|
FIELD _crc, 4, 4
|
||||||
FIELD _state, 4, 4
|
|
||||||
FIELD _has_wrap_hdr, 1, 1
|
FIELD _has_wrap_hdr, 1, 1
|
||||||
FIELD _has_eob_hdr, 1, 1
|
FIELD _has_eob_hdr, 1, 1
|
||||||
FIELD _has_eob, 1, 1
|
FIELD _has_eob, 1, 1
|
||||||
@ -218,6 +219,7 @@ _internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
|
|||||||
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
|
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
|
||||||
_internal_state_crc equ _internal_state+_crc
|
_internal_state_crc equ _internal_state+_crc
|
||||||
_internal_state_dist_mask equ _internal_state+_dist_mask
|
_internal_state_dist_mask equ _internal_state+_dist_mask
|
||||||
|
_internal_state_hash_mask equ _internal_state+_hash_mask
|
||||||
_internal_state_bitbuf equ _internal_state+_bitbuf
|
_internal_state_bitbuf equ _internal_state+_bitbuf
|
||||||
_internal_state_state equ _internal_state+_state
|
_internal_state_state equ _internal_state+_state
|
||||||
_internal_state_count equ _internal_state+_count
|
_internal_state_count equ _internal_state+_count
|
||||||
|
@ -923,6 +923,25 @@ static void inline set_dist_mask(struct isal_zstream *stream)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void inline set_hash_mask(struct isal_zstream *stream)
|
||||||
|
{
|
||||||
|
struct isal_zstate *state = &stream->internal_state;
|
||||||
|
|
||||||
|
switch (stream->level) {
|
||||||
|
case 3:
|
||||||
|
state->hash_mask = LVL3_HASH_MASK;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
state->hash_mask = LVL2_HASH_MASK;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
state->hash_mask = LVL1_HASH_MASK;
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
state->hash_mask = LVL0_HASH_MASK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void isal_deflate_init(struct isal_zstream *stream)
|
void isal_deflate_init(struct isal_zstream *stream)
|
||||||
{
|
{
|
||||||
struct isal_zstate *state = &stream->internal_state;
|
struct isal_zstate *state = &stream->internal_state;
|
||||||
@ -1036,26 +1055,28 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic
|
|||||||
/* Reset history to prevent out of bounds matches this works because
|
/* Reset history to prevent out of bounds matches this works because
|
||||||
* dictionary must set at least 1 element in the history */
|
* dictionary must set at least 1 element in the history */
|
||||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||||
|
uint32_t hash_mask = stream->internal_state.hash_mask;
|
||||||
|
|
||||||
switch (stream->level) {
|
switch (stream->level) {
|
||||||
case 3:
|
case 3:
|
||||||
memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table));
|
memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table));
|
||||||
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK,
|
isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask,
|
||||||
stream->total_in, dict, dict_len);
|
stream->total_in, dict, dict_len);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
|
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
|
||||||
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK,
|
isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask,
|
||||||
stream->total_in, dict, dict_len);
|
stream->total_in, dict, dict_len);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table));
|
memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table));
|
||||||
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK,
|
isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask,
|
||||||
stream->total_in, dict, dict_len);
|
stream->total_in, dict, dict_len);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
|
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
|
||||||
isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK,
|
isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask,
|
||||||
stream->total_in, dict, dict_len);
|
stream->total_in, dict, dict_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1128,6 +1149,8 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
|||||||
return level_check;
|
return level_check;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set_hash_mask(stream);
|
||||||
|
|
||||||
if (avail_in == 0)
|
if (avail_in == 0)
|
||||||
stored_len = TYPE0_BLK_HDR_LEN;
|
stored_len = TYPE0_BLK_HDR_LEN;
|
||||||
else
|
else
|
||||||
@ -1263,6 +1286,7 @@ int isal_deflate(struct isal_zstream *stream)
|
|||||||
|
|
||||||
if (state->has_hist == IGZIP_NO_HIST) {
|
if (state->has_hist == IGZIP_NO_HIST) {
|
||||||
set_dist_mask(stream);
|
set_dist_mask(stream);
|
||||||
|
set_hash_mask(stream);
|
||||||
stream->total_in -= buffered_size;
|
stream->total_in -= buffered_size;
|
||||||
reset_match_history(stream);
|
reset_match_history(stream);
|
||||||
stream->total_in += buffered_size;
|
stream->total_in += buffered_size;
|
||||||
@ -1270,6 +1294,7 @@ int isal_deflate(struct isal_zstream *stream)
|
|||||||
|
|
||||||
} else if (state->has_hist == IGZIP_DICT_HIST) {
|
} else if (state->has_hist == IGZIP_DICT_HIST) {
|
||||||
set_dist_mask(stream);
|
set_dist_mask(stream);
|
||||||
|
set_hash_mask(stream);
|
||||||
isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
|
isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
|||||||
uint16_t *last_seen = state->head;
|
uint16_t *last_seen = state->head;
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||||
uint32_t hist_size = state->dist_mask;
|
uint32_t hist_size = state->dist_mask;
|
||||||
|
uint32_t hash_mask = state->hash_mask;
|
||||||
|
|
||||||
if (stream->avail_in == 0) {
|
if (stream->avail_in == 0) {
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||||
@ -58,7 +59,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
literal = *(uint32_t *) next_in;
|
||||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
@ -79,7 +80,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
for (; next_hash < end; next_hash++) {
|
for (; next_hash < end; next_hash++) {
|
||||||
literal = *(uint32_t *) next_hash;
|
literal = *(uint32_t *) next_hash;
|
||||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,6 +125,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
|||||||
uint16_t *last_seen = state->head;
|
uint16_t *last_seen = state->head;
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||||
uint32_t hist_size = state->dist_mask;
|
uint32_t hist_size = state->dist_mask;
|
||||||
|
uint32_t hash_mask = state->hash_mask;
|
||||||
|
|
||||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||||
|
|
||||||
@ -139,7 +141,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
literal = *(uint32_t *) next_in;
|
||||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
@ -158,7 +160,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
for (; next_hash < end - 3; next_hash++) {
|
for (; next_hash < end - 3; next_hash++) {
|
||||||
literal = *(uint32_t *) next_hash;
|
literal = *(uint32_t *) next_hash;
|
||||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
last_seen[hash] =
|
last_seen[hash] =
|
||||||
(uint64_t) (next_hash - file_start);
|
(uint64_t) (next_hash - file_start);
|
||||||
}
|
}
|
||||||
|
@ -34,10 +34,8 @@
|
|||||||
|
|
||||||
void isal_deflate_body_base(struct isal_zstream *stream);
|
void isal_deflate_body_base(struct isal_zstream *stream);
|
||||||
void isal_deflate_finish_base(struct isal_zstream *stream);
|
void isal_deflate_finish_base(struct isal_zstream *stream);
|
||||||
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream);
|
|
||||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
||||||
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream);
|
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream);
|
||||||
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream);
|
|
||||||
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream);
|
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream);
|
||||||
void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream);
|
void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream);
|
||||||
void isal_update_histogram_base(uint8_t * start_stream, int length,
|
void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||||
@ -68,7 +66,7 @@ void isal_deflate_finish(struct isal_zstream *stream)
|
|||||||
|
|
||||||
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
|
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
|
||||||
{
|
{
|
||||||
isal_deflate_icf_body_hash8k_base(stream);
|
isal_deflate_icf_body_hash_hist_base(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void isal_deflate_icf_body_lvl2(struct isal_zstream *stream)
|
void isal_deflate_icf_body_lvl2(struct isal_zstream *stream)
|
||||||
@ -83,7 +81,7 @@ void isal_deflate_icf_body_lvl3(struct isal_zstream *stream)
|
|||||||
|
|
||||||
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
|
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
|
||||||
{
|
{
|
||||||
isal_deflate_icf_finish_hash8k_base(stream);
|
isal_deflate_icf_finish_hash_hist_base(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
|
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
|
||||||
|
@ -52,6 +52,7 @@
|
|||||||
%define tmp4 rbx
|
%define tmp4 rbx
|
||||||
%define dist rbx
|
%define dist rbx
|
||||||
%define code2 rbx
|
%define code2 rbx
|
||||||
|
%define hmask1 rbx
|
||||||
|
|
||||||
%define hash rdx
|
%define hash rdx
|
||||||
%define len rdx
|
%define len rdx
|
||||||
@ -172,7 +173,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
mov stream, rcx
|
mov stream, rcx
|
||||||
mov byte [stream + _internal_state_has_eob], 0
|
mov byte [stream + _internal_state_has_eob], 0
|
||||||
|
|
||||||
MOVDQU xmask, [mask]
|
MOVD xmask, [stream + _internal_state_hash_mask]
|
||||||
|
PSHUFD xmask, xmask, 0
|
||||||
|
|
||||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||||
mov m_out_buf, [stream + _next_out]
|
mov m_out_buf, [stream + _next_out]
|
||||||
@ -203,6 +205,7 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
cmp f_end_i, f_i
|
cmp f_end_i, f_i
|
||||||
jle .input_end
|
jle .input_end
|
||||||
|
|
||||||
|
MOVD hmask1 %+ d, xmask
|
||||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||||
MOVDQU xdata, [file_start + f_i]
|
MOVDQU xdata, [file_start + f_i]
|
||||||
mov curr_data, [file_start + f_i]
|
mov curr_data, [file_start + f_i]
|
||||||
@ -214,8 +217,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
shr tmp3, 8
|
shr tmp3, 8
|
||||||
compute_hash hash2, tmp3
|
compute_hash hash2, tmp3
|
||||||
|
|
||||||
and hash, LVL0_HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
and hash2, LVL0_HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||||
je .write_first_byte
|
je .write_first_byte
|
||||||
@ -315,6 +318,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
%endif
|
%endif
|
||||||
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
|
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
|
||||||
|
|
||||||
|
MOVD hmask1 %+ d, xmask
|
||||||
|
|
||||||
SHLX code4, code4, rcx
|
SHLX code4, code4, rcx
|
||||||
or code4, code
|
or code4, code
|
||||||
add code_len2, rcx
|
add code_len2, rcx
|
||||||
@ -322,12 +327,13 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
add f_i, len2
|
add f_i, len2
|
||||||
neg len2
|
neg len2
|
||||||
|
|
||||||
|
SHLX code4, code4, code_len3
|
||||||
|
|
||||||
MOVQ tmp5, xdata
|
MOVQ tmp5, xdata
|
||||||
shr tmp5, 24
|
shr tmp5, 24
|
||||||
compute_hash tmp4, tmp5
|
compute_hash hash2, tmp5
|
||||||
and tmp4, LVL0_HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
SHLX code4, code4, code_len3
|
|
||||||
or code4, code3
|
or code4, code3
|
||||||
add code_len2, code_len3
|
add code_len2, code_len3
|
||||||
|
|
||||||
@ -336,23 +342,23 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
|
|
||||||
MOVDQU xdata, [file_start + f_i]
|
MOVDQU xdata, [file_start + f_i]
|
||||||
mov curr_data, [file_start + f_i]
|
mov curr_data, [file_start + f_i]
|
||||||
mov curr_data2, curr_data
|
|
||||||
|
|
||||||
MOVD hash %+ d, xhash
|
MOVD hash %+ d, xhash
|
||||||
PEXTRD hash2 %+ d, xhash, 1
|
PEXTRD tmp6 %+ d, xhash, 1
|
||||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||||
|
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
|
|
||||||
add tmp3,1
|
add tmp3,1
|
||||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
|
||||||
|
|
||||||
add tmp3, 1
|
add tmp3, 1
|
||||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||||
|
|
||||||
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf, tmp4
|
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
|
||||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||||
|
|
||||||
|
mov curr_data2, curr_data
|
||||||
shr curr_data2, 8
|
shr curr_data2, 8
|
||||||
compute_hash hash2, curr_data2
|
compute_hash hash2, curr_data2
|
||||||
|
|
||||||
@ -362,16 +368,16 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
cmp tmp3, f_i
|
cmp tmp3, f_i
|
||||||
jae .loop3_done
|
jae .loop3_done
|
||||||
mov tmp6, [file_start + tmp3]
|
mov tmp6, [file_start + tmp3]
|
||||||
compute_hash tmp4, tmp6
|
compute_hash tmp1, tmp6
|
||||||
and tmp4 %+ d, LVL0_HASH_MASK
|
and tmp1 %+ d, hmask1 %+ d
|
||||||
; state->head[hash] = k;
|
; state->head[hash] = k;
|
||||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
|
||||||
jmp .loop3
|
jmp .loop3
|
||||||
.loop3_done:
|
.loop3_done:
|
||||||
%endif
|
%endif
|
||||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
and hash2 %+ d, LVL0_HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
cmp f_i, f_end_i
|
cmp f_i, f_end_i
|
||||||
@ -400,8 +406,8 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
; code2 <<= code_len
|
; code2 <<= code_len
|
||||||
; code2 |= code
|
; code2 |= code
|
||||||
; code_len2 += code_len
|
; code_len2 += code_len
|
||||||
SHLX code2, code2, rcx
|
SHLX code4, code2, rcx
|
||||||
or code2, code
|
or code4, code
|
||||||
add code_len2, rcx
|
add code_len2, rcx
|
||||||
|
|
||||||
;; Setup for updateing hash
|
;; Setup for updateing hash
|
||||||
@ -414,14 +420,15 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
add tmp3,1
|
add tmp3,1
|
||||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||||
|
|
||||||
|
MOVD hmask1 %+ d, xmask
|
||||||
MOVDQU xdata, [file_start + f_i]
|
MOVDQU xdata, [file_start + f_i]
|
||||||
mov curr_data, [file_start + f_i]
|
mov curr_data, [file_start + f_i]
|
||||||
mov curr_data2, curr_data
|
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
|
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp7
|
write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
|
||||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||||
|
|
||||||
|
mov curr_data2, curr_data
|
||||||
shr curr_data2, 8
|
shr curr_data2, 8
|
||||||
compute_hash hash2, curr_data2
|
compute_hash hash2, curr_data2
|
||||||
|
|
||||||
@ -431,16 +438,16 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
cmp tmp3, f_i
|
cmp tmp3, f_i
|
||||||
jae .loop4_done
|
jae .loop4_done
|
||||||
mov tmp6, [file_start + tmp3]
|
mov tmp6, [file_start + tmp3]
|
||||||
compute_hash tmp4, tmp6
|
compute_hash tmp1, tmp6
|
||||||
and tmp4, LVL0_HASH_MASK
|
and tmp1 %+ d, hmask1 %+ d
|
||||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
|
||||||
jmp .loop4
|
jmp .loop4
|
||||||
.loop4_done:
|
.loop4_done:
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
and hash2 %+ d, LVL0_HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
cmp f_i, f_end_i
|
cmp f_i, f_end_i
|
||||||
@ -455,7 +462,7 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
|
|
||||||
MOVD hash %+ d, xhash
|
MOVD hash %+ d, xhash
|
||||||
|
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||||
|
|
||||||
PEXTRD hash2 %+ d, xhash, 1
|
PEXTRD hash2 %+ d, xhash, 1
|
||||||
|
|
||||||
@ -564,7 +571,3 @@ isal_deflate_body_ %+ ARCH %+ :
|
|||||||
%xdefine COMPARE_TYPE1 COMPARE_TYPE2
|
%xdefine COMPARE_TYPE1 COMPARE_TYPE2
|
||||||
%endif
|
%endif
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
section .data
|
|
||||||
align 16
|
|
||||||
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
|
|
||||||
|
@ -60,12 +60,14 @@
|
|||||||
%define f_i rdi
|
%define f_i rdi
|
||||||
|
|
||||||
%define code_len2 rbp
|
%define code_len2 rbp
|
||||||
|
%define hmask1 rbp
|
||||||
|
|
||||||
%define m_out_buf r8
|
%define m_out_buf r8
|
||||||
|
|
||||||
%define m_bits r9
|
%define m_bits r9
|
||||||
|
|
||||||
%define dist r10
|
%define dist r10
|
||||||
|
%define hmask2 r10
|
||||||
|
|
||||||
%define m_bit_count r11
|
%define m_bit_count r11
|
||||||
|
|
||||||
@ -131,9 +133,9 @@ skip_SLOP:
|
|||||||
|
|
||||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||||
ja end_loop_2
|
ja end_loop_2
|
||||||
|
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||||
jmp encode_literal
|
jmp encode_literal
|
||||||
@ -142,15 +144,15 @@ skip_write_first_byte:
|
|||||||
|
|
||||||
loop2:
|
loop2:
|
||||||
mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
|
mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
|
||||||
|
mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
|
||||||
; if (state->bitbuf.is_full()) {
|
; if (state->bitbuf.is_full()) {
|
||||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||||
ja end_loop_2
|
ja end_loop_2
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
mov curr_data %+ d, [file_start + f_i]
|
mov curr_data %+ d, [file_start + f_i]
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
|
|
||||||
; f_index = state->head[hash];
|
; f_index = state->head[hash];
|
||||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||||
@ -198,6 +200,7 @@ loop2:
|
|||||||
; get_len_code(len, &code, &code_len);
|
; get_len_code(len, &code, &code_len);
|
||||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||||
|
|
||||||
|
mov hmask2 %+ d, dword [stream + _internal_state_hash_mask]
|
||||||
; code2 <<= code_len
|
; code2 <<= code_len
|
||||||
; code2 |= code
|
; code2 |= code
|
||||||
; code_len2 += code_len
|
; code_len2 += code_len
|
||||||
@ -213,24 +216,24 @@ loop2:
|
|||||||
|
|
||||||
; only update hash twice
|
; only update hash twice
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||||
compute_hash hash, tmp6
|
compute_hash hash, tmp6
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask2 %+ d
|
||||||
; state->head[hash] = k;
|
; state->head[hash] = k;
|
||||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||||
|
|
||||||
add tmp3, 1
|
add tmp3, 1
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||||
compute_hash hash, tmp6
|
compute_hash hash, tmp6
|
||||||
and hash %+ d, LVL0_HASH_MASK
|
and hash %+ d, hmask2 %+ d
|
||||||
; state->head[hash] = k;
|
; state->head[hash] = k;
|
||||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||||
|
|
||||||
skip_hash_update:
|
skip_hash_update:
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||||
@ -242,7 +245,7 @@ encode_literal:
|
|||||||
movzx tmp5, byte [file_start + f_i]
|
movzx tmp5, byte [file_start + f_i]
|
||||||
get_lit_code tmp5, code2, code_len2, hufftables
|
get_lit_code tmp5, code2, code_len2, hufftables
|
||||||
|
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
add f_i, 1
|
add f_i, 1
|
||||||
@ -263,7 +266,7 @@ final_bytes:
|
|||||||
ja not_end
|
ja not_end
|
||||||
movzx tmp5, byte [file_start + f_i]
|
movzx tmp5, byte [file_start + f_i]
|
||||||
get_lit_code tmp5, code2, code_len2, hufftables
|
get_lit_code tmp5, code2, code_len2, hufftables
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||||
|
|
||||||
inc f_i
|
inc f_i
|
||||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||||
@ -276,7 +279,7 @@ write_eob:
|
|||||||
; get_lit_code(256, &code2, &code_len2);
|
; get_lit_code(256, &code2, &code_len2);
|
||||||
get_lit_code 256, code2, code_len2, hufftables
|
get_lit_code 256, code2, code_len2, hufftables
|
||||||
|
|
||||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
|
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
|
||||||
|
|
||||||
mov byte [stream + _internal_state_has_eob], 1
|
mov byte [stream + _internal_state_has_eob], 1
|
||||||
cmp word [stream + _end_of_stream], 1
|
cmp word [stream + _end_of_stream], 1
|
||||||
|
@ -100,8 +100,8 @@
|
|||||||
%define ydist_mask ymm15
|
%define ydist_mask ymm15
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define stack_size 10*16 + 6 * 8 + 8
|
%define stack_size 10*16 + 6 * 8 + 3 * 8
|
||||||
%define local_storage_offset (stack_size - 8)
|
%define local_storage_offset (stack_size - 16)
|
||||||
%define func(x) proc_frame x
|
%define func(x) proc_frame x
|
||||||
|
|
||||||
%macro FUNC_SAVE 0
|
%macro FUNC_SAVE 0
|
||||||
@ -144,7 +144,7 @@
|
|||||||
add rsp, stack_size
|
add rsp, stack_size
|
||||||
%endm
|
%endm
|
||||||
%else
|
%else
|
||||||
%define stack_size 8
|
%define stack_size 16
|
||||||
%define local_storage_offset 0
|
%define local_storage_offset 0
|
||||||
|
|
||||||
%define func(x) x:
|
%define func(x) x:
|
||||||
@ -164,6 +164,7 @@
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%define dist_mask_offset local_storage_offset
|
%define dist_mask_offset local_storage_offset
|
||||||
|
%define hash_mask_offset local_storage_offset + 8
|
||||||
|
|
||||||
%define VECT_SIZE 8
|
%define VECT_SIZE 8
|
||||||
%define HASH_BYTES 2
|
%define HASH_BYTES 2
|
||||||
@ -184,6 +185,8 @@ func(gen_icf_map_lh1_04)
|
|||||||
;; Prep for main loop
|
;; Prep for main loop
|
||||||
mov tmp %+ d, dword [stream + _internal_state_dist_mask]
|
mov tmp %+ d, dword [stream + _internal_state_dist_mask]
|
||||||
mov [rsp + dist_mask_offset], tmp
|
mov [rsp + dist_mask_offset], tmp
|
||||||
|
mov tmp %+ d, dword [stream + _internal_state_hash_mask]
|
||||||
|
mov [rsp + hash_mask_offset], tmp
|
||||||
mov tmp, stream
|
mov tmp, stream
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
sub f_i_end, LA
|
sub f_i_end, LA
|
||||||
@ -193,7 +196,7 @@ func(gen_icf_map_lh1_04)
|
|||||||
|
|
||||||
;; Process first byte
|
;; Process first byte
|
||||||
vpbroadcastd yhash_prod, [hash_prod]
|
vpbroadcastd yhash_prod, [hash_prod]
|
||||||
vpbroadcastd yhash_mask, [hash_mask]
|
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||||
vmovd yhashes %+ x, dword [f_i + file_start]
|
vmovd yhashes %+ x, dword [f_i + file_start]
|
||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes, yhashes, yhash_prod
|
||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes, yhashes, yhash_prod
|
||||||
@ -299,7 +302,7 @@ func(gen_icf_map_lh1_04)
|
|||||||
|
|
||||||
;; Compute hash for next loop
|
;; Compute hash for next loop
|
||||||
vpbroadcastd yhash_prod, [hash_prod]
|
vpbroadcastd yhash_prod, [hash_prod]
|
||||||
vpbroadcastd yhash_mask, [hash_mask]
|
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||||
vmovdqu datas, [f_i + file_start + VECT_SIZE]
|
vmovdqu datas, [f_i + file_start + VECT_SIZE]
|
||||||
vpermq yhashes, datas, 0x44
|
vpermq yhashes, datas, 0x44
|
||||||
vpshufb yhashes, yhashes, [datas_shuf]
|
vpshufb yhashes, yhashes, [datas_shuf]
|
||||||
@ -362,7 +365,7 @@ loop1:
|
|||||||
|
|
||||||
;; Compute hash for next loop
|
;; Compute hash for next loop
|
||||||
vpbroadcastd yhash_prod, [hash_prod]
|
vpbroadcastd yhash_prod, [hash_prod]
|
||||||
vpbroadcastd yhash_mask, [hash_mask]
|
vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
|
||||||
vpermq yhashes, datas_lookup, 0x44
|
vpermq yhashes, datas_lookup, 0x44
|
||||||
vpshufb yhashes, yhashes, [datas_shuf]
|
vpshufb yhashes, yhashes, [datas_shuf]
|
||||||
vpmaddwd yhashes, yhashes, yhash_prod
|
vpmaddwd yhashes, yhashes, yhash_prod
|
||||||
@ -532,7 +535,7 @@ loop1_end:
|
|||||||
add tmp %+ d, f_i %+ d
|
add tmp %+ d, f_i %+ d
|
||||||
|
|
||||||
vpbroadcastd yhash_prod %+ x, [hash_prod]
|
vpbroadcastd yhash_prod %+ x, [hash_prod]
|
||||||
vpbroadcastd yhash_mask %+ x, [hash_mask]
|
vpbroadcastd yhash_mask %+ x, [rsp + hash_mask_offset]
|
||||||
|
|
||||||
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
|
vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
|
||||||
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
|
vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
|
||||||
@ -722,8 +725,6 @@ hash_prod:
|
|||||||
dw PROD1, PROD2
|
dw PROD1, PROD2
|
||||||
null_dist_syms:
|
null_dist_syms:
|
||||||
dd LIT
|
dd LIT
|
||||||
hash_mask:
|
|
||||||
dd HASH_MAP_HASH_MASK
|
|
||||||
twofiftyfour:
|
twofiftyfour:
|
||||||
dd 0xfe
|
dd 0xfe
|
||||||
shortest_matches:
|
shortest_matches:
|
||||||
|
@ -175,13 +175,13 @@ func(gen_icf_map_lh1_06)
|
|||||||
|
|
||||||
;; Prep for main loop
|
;; Prep for main loop
|
||||||
vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask]
|
vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask]
|
||||||
|
vpbroadcastd zhash_mask, dword [stream + _internal_state_hash_mask]
|
||||||
mov tmp, stream
|
mov tmp, stream
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
sub f_i_end, LA
|
sub f_i_end, LA
|
||||||
vmovdqu64 zdatas_perm, [datas_perm]
|
vmovdqu64 zdatas_perm, [datas_perm]
|
||||||
vbroadcasti32x8 zdatas_shuf, [datas_shuf]
|
vbroadcasti32x8 zdatas_shuf, [datas_shuf]
|
||||||
vpbroadcastd zhash_prod, [hash_prod]
|
vpbroadcastd zhash_prod, [hash_prod]
|
||||||
vpbroadcastd zhash_mask, [hash_mask]
|
|
||||||
vmovdqu64 zincrement, [increment]
|
vmovdqu64 zincrement, [increment]
|
||||||
vmovdqu64 zqword_shuf, [qword_shuf]
|
vmovdqu64 zqword_shuf, [qword_shuf]
|
||||||
vbroadcasti64x2 zdatas_perm2, [datas_perm2]
|
vbroadcasti64x2 zdatas_perm2, [datas_perm2]
|
||||||
@ -569,8 +569,6 @@ thirty:
|
|||||||
dd 0x1e
|
dd 0x1e
|
||||||
twofiftyfour:
|
twofiftyfour:
|
||||||
dd 0xfe
|
dd 0xfe
|
||||||
hash_mask:
|
|
||||||
dd HASH_MAP_HASH_MASK
|
|
||||||
lit_len_mask:
|
lit_len_mask:
|
||||||
dd LIT_LEN_MASK
|
dd LIT_LEN_MASK
|
||||||
shortest_matches:
|
shortest_matches:
|
||||||
|
@ -32,102 +32,6 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
|
|||||||
level_buf->icf_buf_avail_out = end_out - next_out;
|
level_buf->icf_buf_avail_out = end_out - next_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream)
|
|
||||||
{
|
|
||||||
uint32_t literal, hash;
|
|
||||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
|
||||||
struct deflate_icf *start_out, *next_out, *end_out;
|
|
||||||
uint16_t match_length;
|
|
||||||
uint32_t dist;
|
|
||||||
uint32_t code, code2, extra_bits;
|
|
||||||
struct isal_zstate *state = &stream->internal_state;
|
|
||||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
|
||||||
uint16_t *last_seen = level_buf->hash8k.hash_table;
|
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
|
||||||
uint32_t hist_size = state->dist_mask;
|
|
||||||
|
|
||||||
if (stream->avail_in == 0) {
|
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
|
||||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
start_in = stream->next_in;
|
|
||||||
end_in = start_in + stream->avail_in;
|
|
||||||
next_in = start_in;
|
|
||||||
|
|
||||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
|
||||||
end_out =
|
|
||||||
start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
|
||||||
sizeof(struct deflate_icf);
|
|
||||||
next_out = start_out;
|
|
||||||
|
|
||||||
while (next_in + ISAL_LOOK_AHEAD < end_in) {
|
|
||||||
|
|
||||||
if (next_out >= end_out) {
|
|
||||||
state->state = ZSTATE_CREATE_HDR;
|
|
||||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
|
||||||
end_out);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
|
||||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
|
||||||
|
|
||||||
/* The -1 are to handle the case when dist = 0 */
|
|
||||||
if (dist - 1 < hist_size) {
|
|
||||||
assert(dist != 0);
|
|
||||||
|
|
||||||
match_length = compare258(next_in - dist, next_in, 258);
|
|
||||||
|
|
||||||
if (match_length >= SHORTEST_MATCH) {
|
|
||||||
next_hash = next_in;
|
|
||||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
|
||||||
end = next_hash + 3;
|
|
||||||
#else
|
|
||||||
end = next_hash + match_length;
|
|
||||||
#endif
|
|
||||||
next_hash++;
|
|
||||||
|
|
||||||
for (; next_hash < end; next_hash++) {
|
|
||||||
literal = *(uint32_t *) next_hash;
|
|
||||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
|
||||||
}
|
|
||||||
|
|
||||||
get_len_icf_code(match_length, &code);
|
|
||||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
|
||||||
|
|
||||||
level_buf->hist.ll_hist[code]++;
|
|
||||||
level_buf->hist.d_hist[code2]++;
|
|
||||||
|
|
||||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
|
||||||
next_out++;
|
|
||||||
next_in += match_length;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
get_lit_icf_code(literal & 0xFF, &code);
|
|
||||||
level_buf->hist.ll_hist[code]++;
|
|
||||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
|
||||||
next_out++;
|
|
||||||
next_in++;
|
|
||||||
}
|
|
||||||
|
|
||||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
|
||||||
|
|
||||||
assert(stream->avail_in <= ISAL_LOOK_AHEAD);
|
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
|
||||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
|
||||||
|
|
||||||
return;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
||||||
{
|
{
|
||||||
uint32_t literal, hash;
|
uint32_t literal, hash;
|
||||||
@ -141,6 +45,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
|||||||
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||||
uint32_t hist_size = state->dist_mask;
|
uint32_t hist_size = state->dist_mask;
|
||||||
|
uint32_t hash_mask = state->hash_mask;
|
||||||
|
|
||||||
if (stream->avail_in == 0) {
|
if (stream->avail_in == 0) {
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||||
@ -168,7 +73,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
literal = *(uint32_t *) next_in;
|
||||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
@ -189,7 +94,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
for (; next_hash < end; next_hash++) {
|
for (; next_hash < end; next_hash++) {
|
||||||
literal = *(uint32_t *) next_hash;
|
literal = *(uint32_t *) next_hash;
|
||||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,116 +129,6 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream)
|
|
||||||
{
|
|
||||||
uint32_t literal = 0, hash;
|
|
||||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
|
||||||
struct deflate_icf *start_out, *next_out, *end_out;
|
|
||||||
uint16_t match_length;
|
|
||||||
uint32_t dist;
|
|
||||||
uint32_t code, code2, extra_bits;
|
|
||||||
struct isal_zstate *state = &stream->internal_state;
|
|
||||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
|
||||||
uint16_t *last_seen = level_buf->hash8k.hash_table;
|
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
|
||||||
uint32_t hist_size = state->dist_mask;
|
|
||||||
|
|
||||||
start_in = stream->next_in;
|
|
||||||
end_in = start_in + stream->avail_in;
|
|
||||||
next_in = start_in;
|
|
||||||
|
|
||||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
|
||||||
end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
|
||||||
sizeof(struct deflate_icf);
|
|
||||||
next_out = start_out;
|
|
||||||
|
|
||||||
if (stream->avail_in == 0) {
|
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
|
||||||
state->state = ZSTATE_CREATE_HDR;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (next_in + 3 < end_in) {
|
|
||||||
if (next_out >= end_out) {
|
|
||||||
state->state = ZSTATE_CREATE_HDR;
|
|
||||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
|
||||||
end_out);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
|
||||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
|
||||||
|
|
||||||
if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
|
|
||||||
match_length = compare258(next_in - dist, next_in, end_in - next_in);
|
|
||||||
|
|
||||||
if (match_length >= SHORTEST_MATCH) {
|
|
||||||
next_hash = next_in;
|
|
||||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
|
||||||
end = next_hash + 3;
|
|
||||||
#else
|
|
||||||
end = next_hash + match_length;
|
|
||||||
#endif
|
|
||||||
next_hash++;
|
|
||||||
|
|
||||||
for (; next_hash < end - 3; next_hash++) {
|
|
||||||
literal = *(uint32_t *) next_hash;
|
|
||||||
hash = compute_hash(literal) & HASH8K_HASH_MASK;
|
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
|
||||||
}
|
|
||||||
|
|
||||||
get_len_icf_code(match_length, &code);
|
|
||||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
|
||||||
|
|
||||||
level_buf->hist.ll_hist[code]++;
|
|
||||||
level_buf->hist.d_hist[code2]++;
|
|
||||||
|
|
||||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
|
||||||
|
|
||||||
next_out++;
|
|
||||||
next_in += match_length;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
get_lit_icf_code(literal & 0xFF, &code);
|
|
||||||
level_buf->hist.ll_hist[code]++;
|
|
||||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
|
||||||
next_out++;
|
|
||||||
next_in++;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
while (next_in < end_in) {
|
|
||||||
if (next_out >= end_out) {
|
|
||||||
state->state = ZSTATE_CREATE_HDR;
|
|
||||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
|
||||||
end_out);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
literal = *next_in;
|
|
||||||
get_lit_icf_code(literal & 0xFF, &code);
|
|
||||||
level_buf->hist.ll_hist[code]++;
|
|
||||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
|
||||||
next_out++;
|
|
||||||
next_in++;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (next_in == end_in) {
|
|
||||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
|
||||||
state->state = ZSTATE_CREATE_HDR;
|
|
||||||
}
|
|
||||||
|
|
||||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
||||||
{
|
{
|
||||||
uint32_t literal = 0, hash;
|
uint32_t literal = 0, hash;
|
||||||
@ -347,6 +142,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
|||||||
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
uint16_t *last_seen = level_buf->hash_hist.hash_table;
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||||
uint32_t hist_size = state->dist_mask;
|
uint32_t hist_size = state->dist_mask;
|
||||||
|
uint32_t hash_mask = state->hash_mask;
|
||||||
|
|
||||||
start_in = stream->next_in;
|
start_in = stream->next_in;
|
||||||
end_in = start_in + stream->avail_in;
|
end_in = start_in + stream->avail_in;
|
||||||
@ -372,7 +168,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
literal = *(uint32_t *) next_in;
|
||||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
@ -390,7 +186,7 @@ void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
for (; next_hash < end - 3; next_hash++) {
|
for (; next_hash < end - 3; next_hash++) {
|
||||||
literal = *(uint32_t *) next_hash;
|
literal = *(uint32_t *) next_hash;
|
||||||
hash = compute_hash(literal) & HASH_HIST_HASH_MASK;
|
hash = compute_hash(literal) & hash_mask;
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -457,6 +253,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
|||||||
uint16_t *last_seen = level_buf->hash_map.hash_table;
|
uint16_t *last_seen = level_buf->hash_map.hash_table;
|
||||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||||
uint32_t hist_size = state->dist_mask;
|
uint32_t hist_size = state->dist_mask;
|
||||||
|
uint32_t hash_mask = state->hash_mask;
|
||||||
|
|
||||||
start_in = stream->next_in;
|
start_in = stream->next_in;
|
||||||
end_in = start_in + stream->avail_in;
|
end_in = start_in + stream->avail_in;
|
||||||
@ -481,7 +278,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
|||||||
}
|
}
|
||||||
|
|
||||||
literal = *(uint32_t *) next_in;
|
literal = *(uint32_t *) next_in;
|
||||||
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
|
hash = compute_hash_mad(literal) & hash_mask;
|
||||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
@ -499,7 +296,7 @@ void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
|
|||||||
|
|
||||||
for (; next_hash < end - 3; next_hash++) {
|
for (; next_hash < end - 3; next_hash++) {
|
||||||
literal = *(uint32_t *) next_hash;
|
literal = *(uint32_t *) next_hash;
|
||||||
hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK;
|
hash = compute_hash_mad(literal) & hash_mask;
|
||||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,6 +78,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
|||||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||||
uint16_t *hash_table = level_buf->hash_map.hash_table;
|
uint16_t *hash_table = level_buf->hash_map.hash_table;
|
||||||
uint32_t hist_size = stream->internal_state.dist_mask;
|
uint32_t hist_size = stream->internal_state.dist_mask;
|
||||||
|
uint32_t hash_mask = stream->internal_state.hash_mask;
|
||||||
|
|
||||||
if (input_size < ISAL_LOOK_AHEAD)
|
if (input_size < ISAL_LOOK_AHEAD)
|
||||||
return 0;
|
return 0;
|
||||||
@ -87,7 +88,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
|||||||
matches_icf_lookup->lit_dist = 0x1e;
|
matches_icf_lookup->lit_dist = 0x1e;
|
||||||
matches_icf_lookup->dist_extra = 0;
|
matches_icf_lookup->dist_extra = 0;
|
||||||
|
|
||||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
|
||||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
|
||||||
next_in++;
|
next_in++;
|
||||||
@ -96,7 +97,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||||
hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK;
|
hash = compute_hash(*(uint32_t *) next_in) & hash_mask;
|
||||||
dist = (next_in - file_start - hash_table[hash]);
|
dist = (next_in - file_start - hash_table[hash]);
|
||||||
dist = ((dist - 1) & hist_size) + 1;
|
dist = ((dist - 1) & hist_size) + 1;
|
||||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||||
|
@ -63,9 +63,11 @@ global %1
|
|||||||
%define dist rbx
|
%define dist rbx
|
||||||
%define dist_code2 rbx
|
%define dist_code2 rbx
|
||||||
%define lit_code2 rbx
|
%define lit_code2 rbx
|
||||||
|
%define hmask2 rbx
|
||||||
|
|
||||||
%define dist2 r12
|
%define dist2 r12
|
||||||
%define dist_code r12
|
%define dist_code r12
|
||||||
|
%define hmask3 r12
|
||||||
|
|
||||||
%define tmp1 rsi
|
%define tmp1 rsi
|
||||||
%define lit_code rsi
|
%define lit_code rsi
|
||||||
@ -73,6 +75,7 @@ global %1
|
|||||||
%define curr_data2 r8
|
%define curr_data2 r8
|
||||||
%define len2 r8
|
%define len2 r8
|
||||||
%define tmp4 r8
|
%define tmp4 r8
|
||||||
|
%define hmask1 r8
|
||||||
|
|
||||||
%define len rdx
|
%define len rdx
|
||||||
%define len_code rdx
|
%define len_code rdx
|
||||||
@ -104,9 +107,10 @@ global %1
|
|||||||
m_out_end equ 0 ; local variable (8 bytes)
|
m_out_end equ 0 ; local variable (8 bytes)
|
||||||
m_out_start equ 8
|
m_out_start equ 8
|
||||||
dist_mask_offset equ 16
|
dist_mask_offset equ 16
|
||||||
f_end_i_mem_offset equ 24
|
hash_mask_offset equ 24
|
||||||
stream_offset equ 32
|
f_end_i_mem_offset equ 32
|
||||||
gpr_save_mem_offset equ 40 ; gpr save area (8*8 bytes)
|
stream_offset equ 40
|
||||||
|
gpr_save_mem_offset equ 48 ; gpr save area (8*8 bytes)
|
||||||
xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||||
stack_size equ 7*8 + 8*8 + 4*16
|
stack_size equ 7*8 + 8*8 + 4*16
|
||||||
|
|
||||||
@ -125,14 +129,10 @@ stack_size equ 7*8 + 8*8 + 4*16
|
|||||||
%xdefine COMPARE_TYPE2 3
|
%xdefine COMPARE_TYPE2 3
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%rep 3
|
|
||||||
;; Defines to generate functions for different levels
|
;; Defines to generate functions for different levels
|
||||||
%xdefine HASH_MASK HASH8K_HASH_MASK
|
%xdefine METHOD hash_hist
|
||||||
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
|
|
||||||
%xdefine METHOD hash8k
|
|
||||||
%xdefine METHOD1 hash_hist
|
|
||||||
|
|
||||||
%rep 2
|
%rep 3
|
||||||
%if ARCH == 04
|
%if ARCH == 04
|
||||||
%define USE_HSWNI
|
%define USE_HSWNI
|
||||||
%endif
|
%endif
|
||||||
@ -183,8 +183,11 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
mov [rsp + stream_offset], stream
|
mov [rsp + stream_offset], stream
|
||||||
|
|
||||||
mov byte [stream + _internal_state_has_eob], 0
|
mov byte [stream + _internal_state_has_eob], 0
|
||||||
|
|
||||||
mov tmp1 %+ d, dword[stream + _internal_state_dist_mask]
|
mov tmp1 %+ d, dword[stream + _internal_state_dist_mask]
|
||||||
mov [rsp + dist_mask_offset], tmp1
|
mov [rsp + dist_mask_offset], tmp1
|
||||||
|
mov tmp1 %+ d, dword[stream + _internal_state_hash_mask]
|
||||||
|
mov [rsp + hash_mask_offset], tmp1
|
||||||
|
|
||||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
@ -208,6 +211,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
; file_length -= LA;
|
; file_length -= LA;
|
||||||
sub file_length, LA
|
sub file_length, LA
|
||||||
; if (file_length <= 0) continue;
|
; if (file_length <= 0) continue;
|
||||||
|
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||||
|
|
||||||
cmp file_length, f_i
|
cmp file_length, f_i
|
||||||
jle .input_end
|
jle .input_end
|
||||||
@ -223,8 +227,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
shr tmp1, 8
|
shr tmp1, 8
|
||||||
compute_hash hash2, tmp1
|
compute_hash hash2, tmp1
|
||||||
|
|
||||||
and hash, HASH_MASK
|
and hash, hmask1
|
||||||
and hash2, HASH_MASK
|
and hash2, hmask1
|
||||||
|
|
||||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||||
je .write_first_byte
|
je .write_first_byte
|
||||||
@ -234,6 +238,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
|
|
||||||
.loop2:
|
.loop2:
|
||||||
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
||||||
|
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||||
; if (state->bitbuf.is_full()) {
|
; if (state->bitbuf.is_full()) {
|
||||||
cmp m_out_buf, [rsp + m_out_end]
|
cmp m_out_buf, [rsp + m_out_end]
|
||||||
ja .output_end
|
ja .output_end
|
||||||
@ -253,7 +258,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
mov tmp2, curr_data
|
mov tmp2, curr_data
|
||||||
shr curr_data, 16
|
shr curr_data, 16
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
|
|
||||||
mov dist2 %+ w, f_i %+ w
|
mov dist2 %+ w, f_i %+ w
|
||||||
dec dist2
|
dec dist2
|
||||||
@ -266,7 +271,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
|
|
||||||
shr tmp2, 24
|
shr tmp2, 24
|
||||||
compute_hash hash2, tmp2
|
compute_hash hash2, tmp2
|
||||||
and hash2 %+ d, HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
and dist2 %+ d, tmp3 %+ d
|
and dist2 %+ d, tmp3 %+ d
|
||||||
neg dist2
|
neg dist2
|
||||||
@ -308,6 +313,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
|
|
||||||
get_dist_icf_code dist2, dist_code2, tmp1
|
get_dist_icf_code dist2, dist_code2, tmp1
|
||||||
|
|
||||||
|
mov hmask3 %+ d, dword [rsp + hash_mask_offset]
|
||||||
|
|
||||||
;; Setup for updating hash
|
;; Setup for updating hash
|
||||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||||
|
|
||||||
@ -317,7 +324,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
|
|
||||||
shr curr_data, 24
|
shr curr_data, 24
|
||||||
compute_hash hash3, curr_data
|
compute_hash hash3, curr_data
|
||||||
and hash3, HASH_MASK
|
and hash3 %+ d, hmask3 %+ d
|
||||||
|
|
||||||
mov curr_data, tmp1
|
mov curr_data, tmp1
|
||||||
shr tmp1, 8
|
shr tmp1, 8
|
||||||
@ -349,9 +356,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
and dist_code2, 0x1F
|
and dist_code2, 0x1F
|
||||||
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
|
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask3 %+ d
|
||||||
and hash2 %+ d, HASH_MASK
|
and hash2 %+ d, hmask3 %+ d
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
cmp f_i, file_length
|
cmp f_i, file_length
|
||||||
@ -373,6 +380,8 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
; get_dist_code(dist, &code2, &code_len2);
|
; get_dist_code(dist, &code2, &code_len2);
|
||||||
get_dist_icf_code dist, dist_code, tmp1
|
get_dist_icf_code dist, dist_code, tmp1
|
||||||
|
|
||||||
|
mov hmask2 %+ d, [rsp + hash_mask_offset]
|
||||||
|
|
||||||
add file_start, f_i
|
add file_start, f_i
|
||||||
MOVDQU xdata, [file_start + len]
|
MOVDQU xdata, [file_start + len]
|
||||||
mov curr_data2, [file_start + len]
|
mov curr_data2, [file_start + len]
|
||||||
@ -401,9 +410,9 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
and dist_code, 0x1F
|
and dist_code, 0x1F
|
||||||
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
|
inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask2 %+ d
|
||||||
and hash2 %+ d, HASH_MASK
|
and hash2 %+ d, hmask2 %+ d
|
||||||
|
|
||||||
; continue
|
; continue
|
||||||
cmp f_i, file_length
|
cmp f_i, file_length
|
||||||
@ -494,6 +503,7 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
jmp .len_dist_lit_huffman
|
jmp .len_dist_lit_huffman
|
||||||
|
|
||||||
.write_first_byte:
|
.write_first_byte:
|
||||||
|
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||||
cmp m_out_buf, [rsp + m_out_end]
|
cmp m_out_buf, [rsp + m_out_end]
|
||||||
ja .output_end
|
ja .output_end
|
||||||
|
|
||||||
@ -515,26 +525,17 @@ isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
|
|||||||
MOVDQU xdata, [file_start + f_i + 1]
|
MOVDQU xdata, [file_start + f_i + 1]
|
||||||
add f_i, 1
|
add f_i, 1
|
||||||
mov curr_data, [file_start + f_i]
|
mov curr_data, [file_start + f_i]
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
and hash2 %+ d, HASH_MASK
|
and hash2 %+ d, hmask1 %+ d
|
||||||
|
|
||||||
cmp f_i, file_length
|
cmp f_i, file_length
|
||||||
jl .loop2
|
jl .loop2
|
||||||
jmp .input_end
|
jmp .input_end
|
||||||
|
|
||||||
|
|
||||||
%ifdef USE_HSWNI
|
%ifdef USE_HSWNI
|
||||||
%undef USE_HSWNI
|
%undef USE_HSWNI
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
;; Shift defines over in order to iterate over all versions
|
|
||||||
%undef HASH_MASK
|
|
||||||
%xdefine HASH_MASK HASH_MASK1
|
|
||||||
|
|
||||||
%undef METHOD
|
|
||||||
%xdefine METHOD METHOD1
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
;; Shift defines over in order to iterate over all versions
|
;; Shift defines over in order to iterate over all versions
|
||||||
%undef ARCH
|
%undef ARCH
|
||||||
%xdefine ARCH ARCH1
|
%xdefine ARCH ARCH1
|
||||||
|
@ -60,12 +60,14 @@
|
|||||||
%define f_i rdi
|
%define f_i rdi
|
||||||
|
|
||||||
%define code_len2 rbp
|
%define code_len2 rbp
|
||||||
|
%define hmask1 rbp
|
||||||
|
|
||||||
%define m_out_buf r8
|
%define m_out_buf r8
|
||||||
|
|
||||||
%define level_buf r9
|
%define level_buf r9
|
||||||
|
|
||||||
%define dist r10
|
%define dist r10
|
||||||
|
%define hmask2 r10
|
||||||
|
|
||||||
%define code2 r12
|
%define code2 r12
|
||||||
%define f_end_i r12
|
%define f_end_i r12
|
||||||
@ -87,14 +89,11 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes)
|
|||||||
m_out_end equ 8
|
m_out_end equ 8
|
||||||
m_out_start equ 16
|
m_out_start equ 16
|
||||||
dist_mask_offset equ 24
|
dist_mask_offset equ 24
|
||||||
stack_size equ 32
|
hash_mask_offset equ 32
|
||||||
|
stack_size equ 5*8
|
||||||
|
|
||||||
%xdefine HASH_MASK HASH8K_HASH_MASK
|
%xdefine METHOD hash_hist
|
||||||
%xdefine HASH_MASK1 HASH_HIST_HASH_MASK
|
|
||||||
%xdefine METHOD hash8k
|
|
||||||
%xdefine METHOD1 hash_hist
|
|
||||||
|
|
||||||
%rep 2
|
|
||||||
; void isal_deflate_icf_finish ( isal_zstream *stream )
|
; void isal_deflate_icf_finish ( isal_zstream *stream )
|
||||||
; arg 1: rcx: addr of stream
|
; arg 1: rcx: addr of stream
|
||||||
global isal_deflate_icf_finish_ %+ METHOD %+ _01
|
global isal_deflate_icf_finish_ %+ METHOD %+ _01
|
||||||
@ -109,7 +108,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||||
mov tmp2, [stream + _internal_state_dist_mask]
|
mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
|
||||||
|
mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
|
||||||
mov level_buf, [stream + _level_buf]
|
mov level_buf, [stream + _level_buf]
|
||||||
mov m_out_buf, [level_buf + _icf_buf_next]
|
mov m_out_buf, [level_buf + _icf_buf_next]
|
||||||
mov [rsp + m_out_start], m_out_buf
|
mov [rsp + m_out_start], m_out_buf
|
||||||
@ -118,6 +118,7 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
sub tmp1, 4
|
sub tmp1, 4
|
||||||
|
|
||||||
mov [rsp + dist_mask_offset], tmp2
|
mov [rsp + dist_mask_offset], tmp2
|
||||||
|
mov [rsp + hash_mask_offset], tmp3
|
||||||
mov [rsp + m_out_end], tmp1
|
mov [rsp + m_out_end], tmp1
|
||||||
|
|
||||||
mov hufftables, [stream + _hufftables]
|
mov hufftables, [stream + _hufftables]
|
||||||
@ -144,8 +145,9 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
cmp m_out_buf, [rsp + m_out_end]
|
cmp m_out_buf, [rsp + m_out_end]
|
||||||
ja .end_loop_2
|
ja .end_loop_2
|
||||||
|
|
||||||
|
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
mov [hash_table + 2 * hash], f_i %+ w
|
mov [hash_table + 2 * hash], f_i %+ w
|
||||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||||
jmp .encode_literal
|
jmp .encode_literal
|
||||||
@ -154,14 +156,15 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
|
|
||||||
.loop2:
|
.loop2:
|
||||||
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
mov tmp3 %+ d, [rsp + dist_mask_offset]
|
||||||
|
mov hmask1 %+ d, [rsp + hash_mask_offset]
|
||||||
; if (state->bitbuf.is_full()) {
|
; if (state->bitbuf.is_full()) {
|
||||||
cmp m_out_buf, [rsp + m_out_end]
|
cmp m_out_buf, [rsp + m_out_end]
|
||||||
ja .end_loop_2
|
ja .end_loop_2
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
; hash = compute_hash(state->file_start + f_i) & hash_mask;
|
||||||
mov curr_data %+ d, [file_start + f_i]
|
mov curr_data %+ d, [file_start + f_i]
|
||||||
compute_hash hash, curr_data
|
compute_hash hash, curr_data
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask1 %+ d
|
||||||
|
|
||||||
; f_index = state->head[hash];
|
; f_index = state->head[hash];
|
||||||
movzx f_index %+ d, word [hash_table + 2 * hash]
|
movzx f_index %+ d, word [hash_table + 2 * hash]
|
||||||
@ -209,6 +212,8 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
;; get_len_code
|
;; get_len_code
|
||||||
lea code, [len + 254]
|
lea code, [len + 254]
|
||||||
|
|
||||||
|
mov hmask2 %+ d, [rsp + hash_mask_offset]
|
||||||
|
|
||||||
or code2, code
|
or code2, code
|
||||||
inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
|
inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
|
||||||
|
|
||||||
@ -220,19 +225,19 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
|
|
||||||
; only update hash twice
|
; only update hash twice
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||||
compute_hash hash, tmp6
|
compute_hash hash, tmp6
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask2 %+ d
|
||||||
; state->head[hash] = k;
|
; state->head[hash] = k;
|
||||||
mov [hash_table + 2 * hash], tmp3 %+ w
|
mov [hash_table + 2 * hash], tmp3 %+ w
|
||||||
|
|
||||||
add tmp3, 1
|
add tmp3, 1
|
||||||
|
|
||||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
; hash = compute_hash(state->file_start + k) & hash_mask;
|
||||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||||
compute_hash hash, tmp6
|
compute_hash hash, tmp6
|
||||||
and hash %+ d, HASH_MASK
|
and hash %+ d, hmask2 %+ d
|
||||||
; state->head[hash] = k;
|
; state->head[hash] = k;
|
||||||
mov [hash_table + 2 * hash], tmp3 %+ w
|
mov [hash_table + 2 * hash], tmp3 %+ w
|
||||||
|
|
||||||
@ -312,14 +317,6 @@ isal_deflate_icf_finish_ %+ METHOD %+ _01:
|
|||||||
POP_ALL
|
POP_ALL
|
||||||
ret
|
ret
|
||||||
|
|
||||||
;; Shift defines over in order to iterate over all versions
|
|
||||||
%undef HASH_MASK
|
|
||||||
%xdefine HASH_MASK HASH_MASK1
|
|
||||||
|
|
||||||
%undef METHOD
|
|
||||||
%xdefine METHOD METHOD1
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
section .data
|
section .data
|
||||||
align 4
|
align 4
|
||||||
c258: dq 258
|
c258: dq 258
|
||||||
|
@ -39,13 +39,6 @@ extern isal_deflate_body_04
|
|||||||
extern isal_deflate_finish_base
|
extern isal_deflate_finish_base
|
||||||
extern isal_deflate_finish_01
|
extern isal_deflate_finish_01
|
||||||
|
|
||||||
extern isal_deflate_icf_body_hash8k_base
|
|
||||||
extern isal_deflate_icf_body_hash8k_01
|
|
||||||
extern isal_deflate_icf_body_hash8k_02
|
|
||||||
extern isal_deflate_icf_body_hash8k_04
|
|
||||||
extern isal_deflate_icf_finish_hash8k_base
|
|
||||||
extern isal_deflate_icf_finish_hash8k_01
|
|
||||||
|
|
||||||
extern isal_deflate_icf_body_hash_hist_base
|
extern isal_deflate_icf_body_hash_hist_base
|
||||||
extern isal_deflate_icf_body_hash_hist_01
|
extern isal_deflate_icf_body_hash_hist_01
|
||||||
extern isal_deflate_icf_body_hash_hist_02
|
extern isal_deflate_icf_body_hash_hist_02
|
||||||
@ -99,7 +92,7 @@ mbin_interface isal_deflate_finish
|
|||||||
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
||||||
|
|
||||||
mbin_interface isal_deflate_icf_body_lvl1
|
mbin_interface isal_deflate_icf_body_lvl1
|
||||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04
|
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
|
||||||
|
|
||||||
mbin_interface isal_deflate_icf_body_lvl2
|
mbin_interface isal_deflate_icf_body_lvl2
|
||||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
|
mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
|
||||||
@ -108,7 +101,7 @@ mbin_interface isal_deflate_icf_body_lvl3
|
|||||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy
|
mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy
|
||||||
|
|
||||||
mbin_interface isal_deflate_icf_finish_lvl1
|
mbin_interface isal_deflate_icf_finish_lvl1
|
||||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01
|
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
|
||||||
|
|
||||||
mbin_interface isal_deflate_icf_finish_lvl2
|
mbin_interface isal_deflate_icf_finish_lvl2
|
||||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
|
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
|
||||||
|
@ -386,4 +386,16 @@ ssc:
|
|||||||
pxor %%dest, %%src2
|
pxor %%dest, %%src2
|
||||||
%endif
|
%endif
|
||||||
%endm
|
%endm
|
||||||
|
|
||||||
|
%macro PSHUFD 3
|
||||||
|
%define %%dest %1
|
||||||
|
%define %%src1 %2
|
||||||
|
%define %%imm8 %3
|
||||||
|
%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
|
||||||
|
vpshufd %%dest, %%src1, %%imm8
|
||||||
|
%else
|
||||||
|
pshufd %%dest, %%src1, %%imm8
|
||||||
|
%endif
|
||||||
|
%endm
|
||||||
|
|
||||||
%endif ;; ifndef STDMAC_ASM
|
%endif ;; ifndef STDMAC_ASM
|
||||||
|
@ -315,9 +315,10 @@ struct isal_zstate {
|
|||||||
uint32_t block_next; //!< Start of current deflate block in the input
|
uint32_t block_next; //!< Start of current deflate block in the input
|
||||||
uint32_t block_end; //!< End of current deflate block in the input
|
uint32_t block_end; //!< End of current deflate block in the input
|
||||||
uint32_t dist_mask; //!< Distance mask used.
|
uint32_t dist_mask; //!< Distance mask used.
|
||||||
|
uint32_t hash_mask;
|
||||||
|
enum isal_zstate_state state; //!< Current state in processing the data stream
|
||||||
struct BitBuf2 bitbuf; //!< Bit Buffer
|
struct BitBuf2 bitbuf; //!< Bit Buffer
|
||||||
uint32_t crc; //!< Current crc
|
uint32_t crc; //!< Current crc
|
||||||
enum isal_zstate_state state; //!< Current state in processing the data stream
|
|
||||||
uint8_t has_wrap_hdr; //!< keeps track of wrapper header
|
uint8_t has_wrap_hdr; //!< keeps track of wrapper header
|
||||||
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
||||||
uint8_t has_eob; //!< keeps track of eob on the last deflate block
|
uint8_t has_eob; //!< keeps track of eob on the last deflate block
|
||||||
|
Loading…
Reference in New Issue
Block a user