igzip: Modify set_long_icf to handle small end_in

Change-Id: I24c3420df5d9e84d27fe28eff96155e5fcd51760
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-10-02 14:01:57 -07:00
parent ba1a000680
commit 1fdc5941a3
3 changed files with 65 additions and 43 deletions

View File

@ -4,7 +4,7 @@
#include "igzip_level_buf_structs.h"
extern uint64_t gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *);
extern void set_long_icf_fg(uint8_t *, uint64_t, uint64_t, struct deflate_icf *);
extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
extern void isal_deflate_icf_body_lvl2(struct isal_zstream *);
extern void isal_deflate_icf_body_lvl3(struct isal_zstream *);
@ -24,9 +24,11 @@ static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
| (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT));
}
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
struct deflate_icf *match_lookup, struct level_buf *level_buf)
void set_long_icf_fg_base(uint8_t * next_in, uint64_t processed, uint64_t input_size,
struct deflate_icf *match_lookup)
{
uint8_t *end_processed = next_in + processed;
uint8_t *end_in = next_in + input_size;
uint32_t dist_code, dist_extra, dist, len;
uint32_t match_len;
uint32_t dist_start[] = {
@ -36,15 +38,17 @@ void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
};
while (next_in < end_in - ISAL_LOOK_AHEAD) {
if (end_in > end_processed + ISAL_LOOK_AHEAD)
end_in = end_processed + ISAL_LOOK_AHEAD;
while (next_in < end_processed) {
dist_code = match_lookup->lit_dist;
dist_extra = match_lookup->dist_extra;
dist = dist_start[dist_code] + dist_extra;
len = match_lookup->lit_len;
if (len >= 8 + LEN_OFFSET) {
match_len = compare(next_in - dist + 8, next_in + 8,
end_in - next_in + ISAL_DEF_MAX_MATCH) +
LEN_OFFSET + 8;
match_len = compare((next_in + 8) - dist, next_in + 8,
end_in - (next_in + 8)) + LEN_OFFSET + 8;
while (match_len > match_lookup->lit_len
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) {
@ -251,8 +255,7 @@ void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)
processed = gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
set_long_icf_fg(stream->next_in, stream->next_in + processed,
matches_icf_lookup, level_buf);
set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
stream->next_in += processed;
stream->avail_in -= processed;
@ -291,8 +294,7 @@ void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)
processed = gen_icf_map_lh1(stream, matches_icf_lookup, input_size);
set_long_icf_fg(stream->next_in, stream->next_in + processed,
matches_icf_lookup, level_buf);
set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
stream->next_in += processed;
stream->avail_in -= processed;

View File

@ -39,28 +39,30 @@ default rel
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define dist_code rsi
%define tmp3 rsi
%define arg4 r9
%define len rdi
%define tmp2 rdi
%define dist rsi
%else
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define dist_code rcx
%define tmp3 rcx
%define arg4 rcx
%define len r8
%define tmp2 r8
%define dist r9
%endif
%define next_in arg1
%define end_in arg2
%define match_lookup arg3
%define end_processed arg2
%define end_in arg3
%define match_lookup arg4
%define match_in rax
%define dist r9
%define match_offset r10
%define tmp1 r11
%define end_in_orig r12
%define end_processed_orig r12
%define dist_code r13
%define tmp3 r13
%define ymatch_lookup ymm0
%define ymatch_lookup2 ymm1
@ -97,6 +99,7 @@ default rel
save_reg rsi, 10*16 + 0*8
save_reg rdi, 10*16 + 1*8
save_reg r12, 10*16 + 2*8
save_reg r13, 10*16 + 3*8
end_prolog
%endm
@ -115,15 +118,18 @@ default rel
mov rsi, [rsp + 10*16 + 0*8]
mov rdi, [rsp + 10*16 + 1*8]
mov r12, [rsp + 10*16 + 2*8]
mov r13, [rsp + 10*16 + 3*8]
add rsp, stack_size
%endm
%else
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endm
%macro FUNC_RESTORE 0
pop r13
pop r12
%endm
%endif
@ -133,8 +139,13 @@ global set_long_icf_fg_04
func(set_long_icf_fg_04)
FUNC_SAVE
mov end_in_orig, end_in
sub end_in, VECT_SIZE - 1
lea end_in, [next_in + arg3]
add end_processed, next_in
mov end_processed_orig, end_processed
lea tmp1, [end_processed + LA_STATELESS]
cmp end_in, tmp1
cmovg end_in, tmp1
sub end_processed, VECT_SIZE - 1
vmovdqu ylong_lens, [long_len]
vmovdqu ylens_mask, [len_mask]
vmovdqu ydists_mask, [dists_mask]
@ -148,7 +159,7 @@ func(set_long_icf_fg_04)
vmovdqu ymatch_lookup2, ymatch_lookup
vmovdqu ymatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
cmp next_in, end_in
cmp next_in, end_processed
jae .end_fill
.finish_entry:
@ -185,7 +196,6 @@ func(set_long_icf_fg_04)
mov len, 8
mov tmp3, end_in
sub tmp3, next_in
add tmp3, 258
compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
@ -194,7 +204,7 @@ func(set_long_icf_fg_04)
vpsubd ylens1, ylens1, [increment]
vpaddd ylens1, ylens1, [twofiftyfour]
mov tmp3, end_in
mov tmp3, end_processed
sub tmp3, next_in
cmp len, tmp3
cmovg len, tmp3
@ -235,11 +245,11 @@ func(set_long_icf_fg_04)
jmp .update_match_lookup
.end_fill:
mov end_in, end_in_orig
cmp next_in, end_in
mov end_processed, end_processed_orig
cmp next_in, end_processed
jge .finish
mov tmp1, end_in
mov tmp1, end_processed
sub tmp1, next_in
vmovd ytmp1 %+ x, tmp1 %+ d
vpbroadcastd ytmp1, ytmp1 %+ x

View File

@ -38,26 +38,28 @@
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define dist_code rsi
%define tmp2 rsi
%define arg4 r9
%define len rdi
%define dist rsi
%else
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define dist_code rcx
%define tmp2 rcx
%define arg4 rcx
%define len r8
%define dist r9
%endif
%define next_in arg1
%define end_in arg2
%define match_lookup arg3
%define end_processed arg2
%define end_in arg3
%define match_lookup arg4
%define match_in rax
%define dist r9
%define match_offset r10
%define tmp1 r11
%define end_in_orig r12
%define end_processed_orig r12
%define dist_code r13
%define tmp2 r13
%define zmatch_lookup zmm0
%define zmatch_lookup2 zmm1
@ -106,6 +108,7 @@
save_reg rsi, 8*16 + 0*8
save_reg rdi, 8*16 + 1*8
save_reg r12, 8*16 + 2*8
save_reg r13, 8*16 + 3*8
end_prolog
%endm
@ -122,15 +125,18 @@
mov rsi, [rsp + 8*16 + 0*8]
mov rdi, [rsp + 8*16 + 1*8]
mov r12, [rsp + 8*16 + 2*8]
mov r13, [rsp + 8*16 + 3*8]
add rsp, stack_size
%endm
%else
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endm
%macro FUNC_RESTORE 0
pop r13
pop r12
%endm
%endif
@ -140,8 +146,13 @@ global set_long_icf_fg_06
func(set_long_icf_fg_06)
FUNC_SAVE
mov end_in_orig, end_in
sub end_in, 15
lea end_in, [next_in + arg3]
add end_processed, next_in
mov end_processed_orig, end_processed
lea tmp1, [end_processed + LA_STATELESS]
cmp end_in, tmp1
cmovg end_in, tmp1
sub end_processed, 15
vpbroadcastd zlong_lens, [long_len]
vpbroadcastd zlong_lens2, [long_len2]
vpbroadcastd zlens_mask, [len_mask]
@ -164,7 +175,7 @@ func(set_long_icf_fg_06)
vmovdqu32 zmatch_lookup2, zmatch_lookup
vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
cmp next_in, end_in
cmp next_in, end_processed
jae .end_fill
.finish_entry:
@ -237,7 +248,6 @@ func(set_long_icf_fg_06)
mov len, 16
mov tmp2, end_in
sub tmp2, next_in
add tmp2, 258
compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2
@ -245,7 +255,7 @@ func(set_long_icf_fg_06)
vpsubd zlens1, zlens1, zincrement
vpaddd zlens1, zlens1, ztwofiftyfour
mov tmp2, end_in
mov tmp2, end_processed
sub tmp2, next_in
cmp len, tmp2
cmovg len, tmp2
@ -283,11 +293,11 @@ func(set_long_icf_fg_06)
jmp .update_match_lookup
.end_fill:
mov end_in, end_in_orig
cmp next_in, end_in
mov end_processed, end_processed_orig
cmp next_in, end_processed
jge .finish
mov tmp1, end_in
mov tmp1, end_processed
sub tmp1, next_in
vpbroadcastd ztmp1, tmp1 %+ d
vpcmpd k3, ztmp1, zincrement, 6