diff --git a/igzip/data_struct2.asm b/igzip/data_struct2.asm index b5cd659..abca975 100644 --- a/igzip/data_struct2.asm +++ b/igzip/data_struct2.asm @@ -82,7 +82,7 @@ FIELD _tmp_out_start, 4, 4 FIELD _tmp_out_end, 4, 4 FIELD _has_eob, 4, 4 FIELD _has_eob_hdr, 4, 4 -FIELD _left_over, 4, 4 +FIELD _has_hist, 4, 4 FIELD _buffer, BSIZE, 32 FIELD _head, IGZIP_HASH_SIZE*2, 16 @@ -129,7 +129,7 @@ _internal_state_tmp_out_start equ _internal_state+_tmp_out_start _internal_state_tmp_out_end equ _internal_state+_tmp_out_end _internal_state_has_eob equ _internal_state+_has_eob _internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr -_internal_state_left_over equ _internal_state+_left_over +_internal_state_has_hist equ _internal_state+_has_hist _internal_state_buffer equ _internal_state+_buffer _internal_state_head equ _internal_state+_head _internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits diff --git a/igzip/igzip.c b/igzip/igzip.c index 88afd23..ad12d2b 100644 --- a/igzip/igzip.c +++ b/igzip/igzip.c @@ -526,6 +526,8 @@ static inline void reset_match_history(struct isal_zstream *stream) uint16_t *head = stream->internal_state.head; int i = 0; + state->has_hist = 0; + if (stream->total_in == 0) memset(stream->internal_state.head, 0, sizeof(stream->internal_state.head)); else { @@ -549,7 +551,7 @@ void isal_deflate_init(struct isal_zstream *stream) state->b_bytes_processed = 0; state->has_eob = 0; state->has_eob_hdr = 0; - state->left_over = 0; + state->has_hist = 0; state->state = ZSTATE_NEW_HDR; state->count = 0; diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm index e325ee2..e588af6 100644 --- a/igzip/igzip_body.asm +++ b/igzip/igzip_body.asm @@ -199,31 +199,23 @@ skip1: ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { MARK __body_compute_hash_ %+ ARCH + MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] + mov tmp3, curr_data + mov tmp6, curr_data - cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] - ja output_end - - ;; Encode first byte in the stream as a literal compute_hash hash, curr_data - and hash %+ d, HASH_MASK - mov [stream + _internal_state_head + 2 * hash], f_i %+ w - and curr_data, 0xff - get_lit_code curr_data, code2, code_len2, hufftables - mov tmp3, [file_start + f_i + 1] - mov tmp6, tmp3 - compute_hash hash, tmp3 + shr tmp3, 8 + compute_hash hash2, tmp3 - shr tmp6, 8 - compute_hash hash2, tmp6 + and hash, HASH_MASK + and hash2, HASH_MASK - MOVD xhash, hash %+ d - PINSRD xhash, hash2 %+ d, 1 - PAND xhash, xhash, xmask - - jmp write_lit_bits + cmp dword [stream + _internal_state_has_hist], 0 + je write_first_byte + jmp loop2 align 16 loop2: @@ -548,6 +540,27 @@ compare_loop2: get_lit_code curr_data, code3, code_len3, hufftables jmp len_dist_lit_huffman +MARK __write_first_byte_ %+ ARCH +write_first_byte: + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja output_end + + mov dword [stream + _internal_state_has_hist], 1 + + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + mov hash, hash2 + shr tmp6, 16 + compute_hash hash2, tmp6 + + MOVD xhash, hash %+ d + PINSRD xhash, hash2 %+ d, 1 + PAND xhash, xhash, xmask + + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + jmp write_lit_bits + section .data align 16 mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm index 9b93e25..e8d5e36 100644 --- a/igzip/igzip_finish.asm +++ b/igzip/igzip_finish.asm @@ -126,7 +126,7 @@ skip_SLOP: mov curr_data %+ d, [file_start + f_i] - cmp dword [stream + _internal_state_b_bytes_processed], 0 ;TODO fixz + cmp dword [stream + _internal_state_has_hist], 0 jne skip_write_first_byte cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] @@ -135,6 +135,7 @@ skip_SLOP: compute_hash hash, curr_data and hash %+ d, HASH_MASK mov [stream + _internal_state_head + 2 * hash], f_i %+ w + mov dword [stream + _internal_state_has_hist], 1 jmp encode_literal skip_write_first_byte: diff --git a/include/igzip_lib.h b/include/igzip_lib.h index 147f931..781d026 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -240,7 +240,7 @@ struct isal_zstate { uint32_t tmp_out_end; //!< temporary variable uint32_t has_eob; //!< keeps track of eob on the last deflate block uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) - uint32_t left_over; //!< keeps track of overflow bytes + uint32_t has_hist; //!< flag to track if there is match history DECLARE_ALIGNED(uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD], 32); //!< Internal buffer DECLARE_ALIGNED(uint16_t head[IGZIP_HASH_SIZE], 16); //!< Hash array