igzip: Optimize igzip

Remove conditional move and reorder operations to increase speed.

Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Roy Oursler 2016-07-28 15:29:34 -07:00 committed by Greg Tucker
parent d06e14b937
commit 22bd9eef65
4 changed files with 54 additions and 55 deletions

View File

@ -152,6 +152,9 @@ void sync_flush(struct isal_zstream *stream)
if (stream->flush == FULL_FLUSH) {
/* Clear match history so there are no cross
* block length distance pairs */
state->file_start -= state->b_bytes_processed;
state->b_bytes_valid -= state->b_bytes_processed;
state->b_bytes_processed = 0;
reset_match_history(stream);
}
}
@ -392,10 +395,7 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream, uint8_t * nex
if (stream->avail_out < 8)
return STATELESS_OVERFLOW;
stream->internal_state.file_start = (uint8_t *) & stream->internal_state.buffer;
stream->internal_state.b_bytes_processed = 0;
reset_match_history(stream);
memset(stream->internal_state.head, 0, sizeof(stream->internal_state.head));
isal_deflate_body_stateless(stream);
if (!stream->internal_state.has_eob)
@ -496,7 +496,7 @@ static inline void reset_match_history(struct isal_zstream *stream)
for (i = 0; i < sizeof(state->head) / 2; i++) {
head[i] =
(uint16_t) (state->b_bytes_processed + state->buffer - state->file_start -
(IGZIP_D + 1));
IGZIP_D);
}
}
@ -529,7 +529,7 @@ void isal_deflate_init_01(struct isal_zstream *stream)
memset(state->crc, 0, sizeof(state->crc));
*state->crc = 0x9db42487;
reset_match_history(stream);
memset(state->head, 0, sizeof(state->head));
return;
}

View File

@ -233,10 +233,8 @@ skip_SLOP:
mov blen %+ d, [stream + _avail_in]
mov dword [rsp + empty_buffer_flag], 0
cmp dword [stream + _flush], _FULL_FLUSH
sete byte [rsp + empty_buffer_flag]
cmp dword [stream + _internal_state_b_bytes_processed], 0
sete byte [rsp + empty_buffer_flag + 1]
sete byte [rsp + empty_buffer_flag]
; while (blen != 0)
MARK __Compute_X_ %+ ARCH
@ -374,9 +372,9 @@ loop2:
lea tmp1, [file_start + f_i]
mov dist %+ w, f_i %+ w
dec dist
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
dec dist
inc f_i
@ -386,22 +384,18 @@ loop2:
compute_hash tmp6, tmp5
mov dist2 %+ w, f_i %+ w
dec dist2
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
dec dist2
; if ((dist-1) < (D-1)) {
cmp dist %+ d, (D-1)
cmovae dist, tmp3
add dist, 1
and dist %+ d, (D-1)
neg dist
shr tmp8, 8
compute_hash tmp2, tmp8
cmp dist2 %+ d, (D-1)
cmovae dist2, tmp3
add dist2, 1
and dist2 %+ d, (D-1)
neg dist2
MARK __compare_ %+ ARCH
@ -409,7 +403,7 @@ MARK __compare_ %+ ARCH
MOVQ len, xdata
mov curr_data, len
PSRLDQ xdata, 1
xor len, [tmp1 + dist]
xor len, [tmp1 + dist - 1]
jz compare_loop
MOVD xhash, tmp6 %+ d
@ -418,7 +412,7 @@ MARK __compare_ %+ ARCH
;; Check for len/dist match (>7) with second literal
MOVQ len2, xdata
xor len2, [tmp1 + dist2 + 1]
xor len2, [tmp1 + dist2]
jz compare_loop2
;; Specutively load the code for the first literal
@ -434,7 +428,7 @@ MARK __compare_ %+ ARCH
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
shl code2, cl
SHLX code2, code2, rcx
or code2, code3
add code_len2, rcx
@ -450,6 +444,7 @@ len_dist_lit_huffman_pre:
len_dist_lit_huffman:
neg dist2
add dist2, 1
%ifndef LONGER_HUFFTABLE
mov tmp4, dist2
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
@ -462,21 +457,21 @@ len_dist_lit_huffman:
or code4, code
add code_len2, rcx
mov rcx, code_len3
add f_i, len2
neg len2
MOVQ tmp5, xdata
shr tmp5, 24
compute_hash tmp4, tmp5
and tmp4, HASH_MASK
SHLX code4, code4, rcx
SHLX code4, code4, code_len3
or code4, code3
add code_len2, rcx
add code_len2, code_len3
;; Setup for updating hash
lea tmp3, [f_i + 1] ; tmp3 <= k
lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
add f_i, len2
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
@ -529,6 +524,7 @@ len_dist_huffman_pre:
len_dist_huffman:
dec f_i
neg dist
add dist, 1
; get_dist_code(dist, &code2, &code_len2);
%ifndef LONGER_HUFFTABLE
@ -702,7 +698,7 @@ compare_loop:
MOVD xhash, tmp6 %+ d
PINSRD xhash, tmp2 %+ d, 1
PAND xhash, xhash, xmask
lea tmp2, [tmp1 + dist]
lea tmp2, [tmp1 + dist - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
@ -716,8 +712,8 @@ compare_loop:
jmp len_dist_huffman
compare_loop2:
add tmp1, 1
lea tmp2, [tmp1 + dist2]
add tmp1, 1
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)

View File

@ -129,6 +129,19 @@ skip_SLOP:
mov tmp1, [file_start + f_i]
cmp dword [stream + _internal_state_b_bytes_processed], 0
jne skip_write_first_byte
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
compute_hash hash, tmp1
and hash %+ d, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
jmp encode_literal
skip_write_first_byte:
loop2:
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
@ -185,11 +198,7 @@ loop2:
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
SHLX code2, code2, rcx
or code2, code
add code_len2, rcx

View File

@ -230,9 +230,9 @@ loop2:
lea tmp1, [file_start + f_i]
mov dist %+ w, f_i %+ w
dec dist
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
dec dist
inc f_i
@ -242,22 +242,18 @@ loop2:
compute_hash tmp6, tmp5
mov dist2 %+ w, f_i %+ w
dec dist2
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
dec dist2
; if ((dist-1) < (D-1)) {
cmp dist %+ d, (D-1)
cmovae dist, tmp3
add dist, 1
and dist %+ d, (D-1)
neg dist
shr tmp8, 8
compute_hash tmp2, tmp8
cmp dist2 %+ d, (D-1)
cmovae dist2, tmp3
add dist2, 1
and dist2 %+ d, (D-1)
neg dist2
MARK __stateless_compare_ %+ ARCH
@ -265,7 +261,7 @@ MARK __stateless_compare_ %+ ARCH
MOVQ len, xdata
mov curr_data, len
PSRLDQ xdata, 1
xor len, [tmp1 + dist]
xor len, [tmp1 + dist - 1]
jz compare_loop
MOVD xhash, tmp6 %+ d
@ -274,7 +270,7 @@ MARK __stateless_compare_ %+ ARCH
;; Check for len/dist match (>7) with second literal
MOVQ len2, xdata
xor len2, [tmp1 + dist2 + 1]
xor len2, [tmp1 + dist2]
jz compare_loop2
;; Specutively load the code for the first literal
@ -290,7 +286,7 @@ MARK __stateless_compare_ %+ ARCH
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
shl code2, cl
SHLX code2, code2, rcx
or code2, code3
add code_len2, rcx
@ -306,6 +302,7 @@ len_dist_lit_huffman_pre:
len_dist_lit_huffman:
neg dist2
add dist2, 1
%ifndef LONGER_HUFFTABLE
mov tmp4, dist2
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
@ -318,21 +315,21 @@ len_dist_lit_huffman:
or code4, code
add code_len2, rcx
mov rcx, code_len3
add f_i, len2
neg len2
MOVQ tmp5, xdata
shr tmp5, 24
compute_hash tmp4, tmp5
and tmp4, HASH_MASK
SHLX code4, code4, rcx
SHLX code4, code4, code_len3
or code4, code3
add code_len2, rcx
add code_len2, code_len3
;; Setup for updating hash
lea tmp3, [f_i + 1] ; tmp3 <= k
lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
add f_i, len2
MOVDQU xdata, [file_start + f_i]
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
@ -386,6 +383,7 @@ len_dist_huffman_pre:
len_dist_huffman:
dec f_i
neg dist
add dist, 1
; get_dist_code(dist, &code2, &code_len2);
%ifndef LONGER_HUFFTABLE
@ -517,11 +515,7 @@ loop2_finish:
get_len_code len, code, rcx, hufftables ;; rcx is code_len
;; Combine length and distance code for writing it out
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
SHLX code2, code2, rcx
or code2, code
add code_len2, rcx
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
@ -608,7 +602,7 @@ compare_loop:
MOVD xhash, tmp6 %+ d
PINSRD xhash, tmp2 %+ d, 1
PAND xhash, xhash, xmask
lea tmp2, [tmp1 + dist]
lea tmp2, [tmp1 + dist - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
@ -622,8 +616,8 @@ compare_loop:
jmp len_dist_huffman
compare_loop2:
add tmp1, 1
lea tmp2, [tmp1 + dist2]
add tmp1, 1
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)