igzip: Modify igzip to ignore matches which are shorter than 4

Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Roy Oursler
2016-06-13 11:20:26 -07:00
committed by Greg Tucker
parent 45311ea249
commit bbc886cf01
7 changed files with 25 additions and 99 deletions

View File

@@ -202,7 +202,6 @@
%define %%result %1d ; 32-bit reg %define %%result %1d ; 32-bit reg
%define %%data %2d ; 32-bit reg (low byte not clobbered) %define %%data %2d ; 32-bit reg (low byte not clobbered)
and %%data, 0x00FFFFFF
xor %%result, %%result xor %%result, %%result
crc32 %%result, %%data crc32 %%result, %%data
%endm %endm

View File

@@ -138,7 +138,6 @@ static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit
*/ */
static inline uint32_t compute_hash(uint32_t data) static inline uint32_t compute_hash(uint32_t data)
{ {
data &= 0x00FFFFFF;
#ifdef __SSE4_2__ #ifdef __SSE4_2__
return _mm_crc32_u32(0, data); return _mm_crc32_u32(0, data);

View File

@@ -334,7 +334,7 @@ skip_move_zero:
jge end_loop_2 jge end_loop_2
MARK __misc_compute_hash_lookup_ %+ ARCH MARK __misc_compute_hash_lookup_ %+ ARCH
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
cmp dword [rsp + empty_buffer_flag], 0 cmp dword [rsp + empty_buffer_flag], 0
jne write_first_byte jne write_first_byte
@@ -348,8 +348,7 @@ MARK __misc_compute_hash_lookup_ %+ ARCH
loop2: loop2:
shr curr_data2, 8 shr curr_data2, 8
xor hash2 %+ d, hash2 %+ d compute_hash hash2, curr_data2
crc32 hash2 %+ d, curr_data2 %+ d
; hash = compute_hash(state->file_start + f_i) & HASH_MASK; ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK
@@ -407,11 +406,6 @@ MARK __compare_ %+ ARCH
xor len, [tmp2] xor len, [tmp2]
jz compare_loop jz compare_loop
%ifdef USE_HSWNI
blsmsk tmp3, len
or tmp3, 0xFFFFFF
%endif
lea tmp1, [file_start + f_i] lea tmp1, [file_start + f_i]
mov tmp2, tmp1 mov tmp2, tmp1
sub tmp2, dist2 sub tmp2, dist2
@@ -424,27 +418,12 @@ MARK __compare_ %+ ARCH
xor len2, [tmp2] xor len2, [tmp2]
jz compare_loop2 jz compare_loop2
%ifdef USE_HSWNI
;; Check for len/dist match for first literal
test tmp3, len2
jz len_dist_lit_huffman_pre
cmp tmp3, 0xFFFFFF
je encode_2_literals
jmp len_dist_huffman_pre
MARK __len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, code_len3, hufftables
%else
;; Specutively load the code for the first literal ;; Specutively load the code for the first literal
movzx tmp1, curr_data %+ b movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables get_lit_code tmp1, code3, rcx, hufftables
;; Check for len/dist match for first literal ;; Check for len/dist match for first literal
test len, 0xFFFFFF test len %+ d, 0xFFFFFFFF
jz len_dist_huffman_pre jz len_dist_huffman_pre
;; Specutively load the code for the second literal ;; Specutively load the code for the second literal
@@ -457,13 +436,12 @@ len_dist_lit_huffman_pre:
add code_len2, rcx add code_len2, rcx
;; Check for len/dist match for second literal ;; Check for len/dist match for second literal
test len2, 0xFFFFFF test len2 %+ d, 0xFFFFFFFF
jnz write_lit_bits jnz write_lit_bits
MARK __len_dist_lit_huffman_ %+ ARCH MARK __len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre: len_dist_lit_huffman_pre:
mov code_len3, rcx mov code_len3, rcx
%endif
bsf len2, len2 bsf len2, len2
shr len2, 3 shr len2, 3
@@ -500,7 +478,7 @@ len_dist_lit_huffman:
add f_i, len2 add f_i, len2
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp5 %+ d, [file_start + tmp3] mov tmp5, [file_start + tmp3]
mov tmp7, tmp5 mov tmp7, tmp5
shr tmp7, 8 shr tmp7, 8
@@ -546,11 +524,11 @@ len_dist_huffman:
;; Setup for updateing hash ;; Setup for updateing hash
lea tmp3, [f_i + 2] ; tmp3 <= k lea tmp3, [f_i + 2] ; tmp3 <= k
add f_i, len add f_i, len
mov tmp7 %+ d, [file_start + tmp3] mov tmp7, [file_start + tmp3]
MARK __update_hash_for_symbol_ %+ ARCH MARK __update_hash_for_symbol_ %+ ARCH
update_hash_for_symbol: update_hash_for_symbol:
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
mov curr_data2, curr_data mov curr_data2, curr_data
compute_hash hash, curr_data compute_hash hash, curr_data
%ifdef LIMIT_HASH_UPDATE %ifdef LIMIT_HASH_UPDATE
@@ -565,7 +543,7 @@ update_hash_for_symbol:
%else %else
loop3: loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp7 %+ d, [file_start + tmp3] mov tmp7, [file_start + tmp3]
compute_hash hash2, tmp7 compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK and hash2 %+ d, HASH_MASK
; state->head[hash] = k; ; state->head[hash] = k;
@@ -587,24 +565,10 @@ MARK __write_len_dist_bits_ %+ ARCH
MARK __write_lit_bits_ %+ ARCH MARK __write_lit_bits_ %+ ARCH
%ifdef USE_HSWNI
encode_2_literals:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
;; Calculate code associated with both literals
shlx code2, code2, rcx
or code2, code3
add code_len2, rcx
%endif
write_lit_bits: write_lit_bits:
mov f_end_i, [rsp + f_end_i_mem_offset] mov f_end_i, [rsp + f_end_i_mem_offset]
add f_i, 1 add f_i, 1
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
mov curr_data2, curr_data mov curr_data2, curr_data
compute_hash hash, curr_data compute_hash hash, curr_data

View File

@@ -127,7 +127,7 @@ skip_SLOP:
cmp f_i, f_end_i cmp f_i, f_end_i
jge end_loop_2 jge end_loop_2
mov tmp1 %+ d, [file_start + f_i] mov tmp1, [file_start + f_i]
loop2: loop2:
; if (state->bitbuf.is_full()) { ; if (state->bitbuf.is_full()) {
@@ -200,7 +200,7 @@ loop2:
; only update hash twice ; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3] mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6 compute_hash hash, tmp6
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK
; state->head[hash] = k; ; state->head[hash] = k;
@@ -209,7 +209,7 @@ loop2:
add tmp3, 1 add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3] mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6 compute_hash hash, tmp6
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK
; state->head[hash] = k; ; state->head[hash] = k;
@@ -218,7 +218,7 @@ loop2:
%else %else
loop3: loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3] mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6 compute_hash hash, tmp6
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK
; state->head[hash] = k; ; state->head[hash] = k;

View File

@@ -185,7 +185,7 @@ skip_SLOP:
; for (f_i = f_start_i; f_i < f_end_i; f_i++) { ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
MARK __stateless_compute_hash_ %+ ARCH MARK __stateless_compute_hash_ %+ ARCH
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end ja end
@@ -202,8 +202,7 @@ MARK __stateless_compute_hash_ %+ ARCH
loop2: loop2:
shr curr_data2, 8 shr curr_data2, 8
xor hash2 %+ d, hash2 %+ d compute_hash hash2, curr_data2
crc32 hash2 %+ d, curr_data2 %+ d
; hash = compute_hash(state->file_start + f_i) & HASH_MASK; ; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK
@@ -261,11 +260,6 @@ MARK __stateless_compare_ %+ ARCH
xor len, [tmp2] xor len, [tmp2]
jz compare_loop jz compare_loop
%ifdef USE_HSWNI
blsmsk tmp3, len
or tmp3, 0xFFFFFF
%endif
lea tmp1, [file_start + f_i] lea tmp1, [file_start + f_i]
mov tmp2, tmp1 mov tmp2, tmp1
sub tmp2, dist2 sub tmp2, dist2
@@ -278,27 +272,12 @@ MARK __stateless_compare_ %+ ARCH
xor len2, [tmp2] xor len2, [tmp2]
jz compare_loop2 jz compare_loop2
%ifdef USE_HSWNI
;; Check for len/dist match for first literal
test tmp3, len2
jz len_dist_lit_huffman_pre
cmp tmp3, 0xFFFFFF
je encode_2_literals
jmp len_dist_huffman_pre
MARK __stateless_len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, code_len3, hufftables
%else
;; Specutively load the code for the first literal ;; Specutively load the code for the first literal
movzx tmp1, curr_data %+ b movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables get_lit_code tmp1, code3, rcx, hufftables
;; Check for len/dist match for first literal ;; Check for len/dist match for first literal
test len, 0xFFFFFF test len %+ d, 0xFFFFFFFF
jz len_dist_huffman_pre jz len_dist_huffman_pre
;; Specutively load the code for the second literal ;; Specutively load the code for the second literal
@@ -311,13 +290,12 @@ len_dist_lit_huffman_pre:
add code_len2, rcx add code_len2, rcx
;; Check for len/dist match for second literal ;; Check for len/dist match for second literal
test len2, 0xFFFFFF test len2 %+ d, 0xFFFFFFFF
jnz write_lit_bits jnz write_lit_bits
MARK __stateless_len_dist_lit_huffman_ %+ ARCH MARK __stateless_len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre: len_dist_lit_huffman_pre:
mov code_len3, rcx mov code_len3, rcx
%endif
bsf len2, len2 bsf len2, len2
shr len2, 3 shr len2, 3
@@ -355,7 +333,7 @@ len_dist_lit_huffman:
add f_i, len2 add f_i, len2
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp5 %+ d, [file_start + tmp3] mov tmp5, [file_start + tmp3]
mov tmp7, tmp5 mov tmp7, tmp5
shr tmp7, 8 shr tmp7, 8
@@ -402,11 +380,11 @@ len_dist_huffman:
;; Setup for updateing hash ;; Setup for updateing hash
lea tmp3, [f_i + 2] ; tmp3 <= k lea tmp3, [f_i + 2] ; tmp3 <= k
add f_i, len add f_i, len
mov tmp7 %+ d, [file_start + tmp3] mov tmp7, [file_start + tmp3]
MARK __stateless_update_hash_for_symbol_ %+ ARCH MARK __stateless_update_hash_for_symbol_ %+ ARCH
update_hash_for_symbol: update_hash_for_symbol:
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
mov curr_data2, curr_data mov curr_data2, curr_data
compute_hash hash, curr_data compute_hash hash, curr_data
%ifdef LIMIT_HASH_UPDATE %ifdef LIMIT_HASH_UPDATE
@@ -421,7 +399,7 @@ update_hash_for_symbol:
%else %else
loop3: loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK; ; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp7 %+ d, [file_start + tmp3] mov tmp7, [file_start + tmp3]
compute_hash hash2, tmp7 compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK and hash2 %+ d, HASH_MASK
; state->head[hash] = k; ; state->head[hash] = k;
@@ -443,24 +421,10 @@ MARK __stateless_write_len_dist_bits_ %+ ARCH
MARK __stateless_write_lit_bits_ %+ ARCH MARK __stateless_write_lit_bits_ %+ ARCH
%ifdef USE_HSWNI
encode_2_literals:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
;; Calculate code associated with both literals
shlx code2, code2, rcx
or code2, code3
add code_len2, rcx
%endif
write_lit_bits: write_lit_bits:
mov f_end_i, [rsp + f_end_i_mem_offset] mov f_end_i, [rsp + f_end_i_mem_offset]
add f_i, 1 add f_i, 1
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
mov curr_data2, curr_data mov curr_data2, curr_data
compute_hash hash, curr_data compute_hash hash, curr_data
@@ -483,7 +447,7 @@ loop2_finish:
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end ja end
mov curr_data %+ d, [file_start + f_i] mov curr_data, [file_start + f_i]
compute_hash hash, curr_data compute_hash hash, curr_data
and hash %+ d, HASH_MASK and hash %+ d, HASH_MASK

View File

@@ -39,6 +39,6 @@
%assign HASH_SIZE D %assign HASH_SIZE D
%assign HASH_MASK (HASH_SIZE - 1) %assign HASH_MASK (HASH_SIZE - 1)
%assign SHORTEST_MATCH 3 %assign SHORTEST_MATCH 4
%assign SLOP 8 %assign SLOP 8

View File

@@ -124,7 +124,7 @@ extern "C" {
#define HASH_SIZE IGZIP_D #define HASH_SIZE IGZIP_D
#define HASH_MASK (HASH_SIZE - 1) #define HASH_MASK (HASH_SIZE - 1)
#define SHORTEST_MATCH 3 #define SHORTEST_MATCH 4
#define IGZIP_MAX_DEF_HDR_SIZE 328 #define IGZIP_MAX_DEF_HDR_SIZE 328