mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: Modify set_long_icf to handle small end_in
Change-Id: I24c3420df5d9e84d27fe28eff96155e5fcd51760 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
ba1a000680
commit
1fdc5941a3
@ -4,7 +4,7 @@
|
||||
#include "igzip_level_buf_structs.h"
|
||||
|
||||
extern uint64_t gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
|
||||
extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *);
|
||||
extern void set_long_icf_fg(uint8_t *, uint64_t, uint64_t, struct deflate_icf *);
|
||||
extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
|
||||
extern void isal_deflate_icf_body_lvl2(struct isal_zstream *);
|
||||
extern void isal_deflate_icf_body_lvl3(struct isal_zstream *);
|
||||
@ -24,9 +24,11 @@ static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
|
||||
| (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT));
|
||||
}
|
||||
|
||||
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
|
||||
struct deflate_icf *match_lookup, struct level_buf *level_buf)
|
||||
void set_long_icf_fg_base(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
||||
struct deflate_icf *match_lookup)
|
||||
{
|
||||
uint8_t *end_processed = next_in + processed;
|
||||
uint8_t *end_in = next_in + input_size;
|
||||
uint32_t dist_code, dist_extra, dist, len;
|
||||
uint32_t match_len;
|
||||
uint32_t dist_start[] = {
|
||||
@ -36,15 +38,17 @@ void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
|
||||
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||
if (end_in > end_processed + ISAL_LOOK_AHEAD)
|
||||
end_in = end_processed + ISAL_LOOK_AHEAD;
|
||||
|
||||
while (next_in < end_processed) {
|
||||
dist_code = match_lookup->lit_dist;
|
||||
dist_extra = match_lookup->dist_extra;
|
||||
dist = dist_start[dist_code] + dist_extra;
|
||||
len = match_lookup->lit_len;
|
||||
if (len >= 8 + LEN_OFFSET) {
|
||||
match_len = compare(next_in - dist + 8, next_in + 8,
|
||||
end_in - next_in + ISAL_DEF_MAX_MATCH) +
|
||||
LEN_OFFSET + 8;
|
||||
match_len = compare((next_in + 8) - dist, next_in + 8,
|
||||
end_in - (next_in + 8)) + LEN_OFFSET + 8;
|
||||
|
||||
while (match_len > match_lookup->lit_len
|
||||
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) {
|
||||
@ -251,8 +255,7 @@ void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)
|
||||
|
||||
processed = gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
|
||||
|
||||
set_long_icf_fg(stream->next_in, stream->next_in + processed,
|
||||
matches_icf_lookup, level_buf);
|
||||
set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
|
||||
|
||||
stream->next_in += processed;
|
||||
stream->avail_in -= processed;
|
||||
@ -291,8 +294,7 @@ void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)
|
||||
|
||||
processed = gen_icf_map_lh1(stream, matches_icf_lookup, input_size);
|
||||
|
||||
set_long_icf_fg(stream->next_in, stream->next_in + processed,
|
||||
matches_icf_lookup, level_buf);
|
||||
set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
|
||||
|
||||
stream->next_in += processed;
|
||||
stream->avail_in -= processed;
|
||||
|
@ -39,28 +39,30 @@ default rel
|
||||
%define arg1 rcx
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define dist_code rsi
|
||||
%define tmp3 rsi
|
||||
%define arg4 r9
|
||||
%define len rdi
|
||||
%define tmp2 rdi
|
||||
%define dist rsi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define dist_code rcx
|
||||
%define tmp3 rcx
|
||||
%define arg4 rcx
|
||||
%define len r8
|
||||
%define tmp2 r8
|
||||
%define dist r9
|
||||
%endif
|
||||
|
||||
%define next_in arg1
|
||||
%define end_in arg2
|
||||
%define match_lookup arg3
|
||||
%define end_processed arg2
|
||||
%define end_in arg3
|
||||
%define match_lookup arg4
|
||||
%define match_in rax
|
||||
%define dist r9
|
||||
%define match_offset r10
|
||||
%define tmp1 r11
|
||||
%define end_in_orig r12
|
||||
%define end_processed_orig r12
|
||||
%define dist_code r13
|
||||
%define tmp3 r13
|
||||
|
||||
%define ymatch_lookup ymm0
|
||||
%define ymatch_lookup2 ymm1
|
||||
@ -97,6 +99,7 @@ default rel
|
||||
save_reg rsi, 10*16 + 0*8
|
||||
save_reg rdi, 10*16 + 1*8
|
||||
save_reg r12, 10*16 + 2*8
|
||||
save_reg r13, 10*16 + 3*8
|
||||
end_prolog
|
||||
%endm
|
||||
|
||||
@ -115,15 +118,18 @@ default rel
|
||||
mov rsi, [rsp + 10*16 + 0*8]
|
||||
mov rdi, [rsp + 10*16 + 1*8]
|
||||
mov r12, [rsp + 10*16 + 2*8]
|
||||
mov r13, [rsp + 10*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endm
|
||||
%else
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endm
|
||||
%endif
|
||||
@ -133,8 +139,13 @@ global set_long_icf_fg_04
|
||||
func(set_long_icf_fg_04)
|
||||
FUNC_SAVE
|
||||
|
||||
mov end_in_orig, end_in
|
||||
sub end_in, VECT_SIZE - 1
|
||||
lea end_in, [next_in + arg3]
|
||||
add end_processed, next_in
|
||||
mov end_processed_orig, end_processed
|
||||
lea tmp1, [end_processed + LA_STATELESS]
|
||||
cmp end_in, tmp1
|
||||
cmovg end_in, tmp1
|
||||
sub end_processed, VECT_SIZE - 1
|
||||
vmovdqu ylong_lens, [long_len]
|
||||
vmovdqu ylens_mask, [len_mask]
|
||||
vmovdqu ydists_mask, [dists_mask]
|
||||
@ -148,7 +159,7 @@ func(set_long_icf_fg_04)
|
||||
vmovdqu ymatch_lookup2, ymatch_lookup
|
||||
vmovdqu ymatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
|
||||
|
||||
cmp next_in, end_in
|
||||
cmp next_in, end_processed
|
||||
jae .end_fill
|
||||
|
||||
.finish_entry:
|
||||
@ -185,7 +196,6 @@ func(set_long_icf_fg_04)
|
||||
mov len, 8
|
||||
mov tmp3, end_in
|
||||
sub tmp3, next_in
|
||||
add tmp3, 258
|
||||
|
||||
compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
|
||||
|
||||
@ -194,7 +204,7 @@ func(set_long_icf_fg_04)
|
||||
vpsubd ylens1, ylens1, [increment]
|
||||
vpaddd ylens1, ylens1, [twofiftyfour]
|
||||
|
||||
mov tmp3, end_in
|
||||
mov tmp3, end_processed
|
||||
sub tmp3, next_in
|
||||
cmp len, tmp3
|
||||
cmovg len, tmp3
|
||||
@ -235,11 +245,11 @@ func(set_long_icf_fg_04)
|
||||
jmp .update_match_lookup
|
||||
|
||||
.end_fill:
|
||||
mov end_in, end_in_orig
|
||||
cmp next_in, end_in
|
||||
mov end_processed, end_processed_orig
|
||||
cmp next_in, end_processed
|
||||
jge .finish
|
||||
|
||||
mov tmp1, end_in
|
||||
mov tmp1, end_processed
|
||||
sub tmp1, next_in
|
||||
vmovd ytmp1 %+ x, tmp1 %+ d
|
||||
vpbroadcastd ytmp1, ytmp1 %+ x
|
||||
|
@ -38,26 +38,28 @@
|
||||
%define arg1 rcx
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define dist_code rsi
|
||||
%define tmp2 rsi
|
||||
%define arg4 r9
|
||||
%define len rdi
|
||||
%define dist rsi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define dist_code rcx
|
||||
%define tmp2 rcx
|
||||
%define arg4 rcx
|
||||
%define len r8
|
||||
%define dist r9
|
||||
%endif
|
||||
|
||||
%define next_in arg1
|
||||
%define end_in arg2
|
||||
%define match_lookup arg3
|
||||
%define end_processed arg2
|
||||
%define end_in arg3
|
||||
%define match_lookup arg4
|
||||
%define match_in rax
|
||||
%define dist r9
|
||||
%define match_offset r10
|
||||
%define tmp1 r11
|
||||
%define end_in_orig r12
|
||||
%define end_processed_orig r12
|
||||
%define dist_code r13
|
||||
%define tmp2 r13
|
||||
|
||||
%define zmatch_lookup zmm0
|
||||
%define zmatch_lookup2 zmm1
|
||||
@ -106,6 +108,7 @@
|
||||
save_reg rsi, 8*16 + 0*8
|
||||
save_reg rdi, 8*16 + 1*8
|
||||
save_reg r12, 8*16 + 2*8
|
||||
save_reg r13, 8*16 + 3*8
|
||||
end_prolog
|
||||
%endm
|
||||
|
||||
@ -122,15 +125,18 @@
|
||||
mov rsi, [rsp + 8*16 + 0*8]
|
||||
mov rdi, [rsp + 8*16 + 1*8]
|
||||
mov r12, [rsp + 8*16 + 2*8]
|
||||
mov r13, [rsp + 8*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endm
|
||||
%else
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endm
|
||||
%endif
|
||||
@ -140,8 +146,13 @@ global set_long_icf_fg_06
|
||||
func(set_long_icf_fg_06)
|
||||
FUNC_SAVE
|
||||
|
||||
mov end_in_orig, end_in
|
||||
sub end_in, 15
|
||||
lea end_in, [next_in + arg3]
|
||||
add end_processed, next_in
|
||||
mov end_processed_orig, end_processed
|
||||
lea tmp1, [end_processed + LA_STATELESS]
|
||||
cmp end_in, tmp1
|
||||
cmovg end_in, tmp1
|
||||
sub end_processed, 15
|
||||
vpbroadcastd zlong_lens, [long_len]
|
||||
vpbroadcastd zlong_lens2, [long_len2]
|
||||
vpbroadcastd zlens_mask, [len_mask]
|
||||
@ -164,7 +175,7 @@ func(set_long_icf_fg_06)
|
||||
vmovdqu32 zmatch_lookup2, zmatch_lookup
|
||||
vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
|
||||
|
||||
cmp next_in, end_in
|
||||
cmp next_in, end_processed
|
||||
jae .end_fill
|
||||
|
||||
.finish_entry:
|
||||
@ -237,7 +248,6 @@ func(set_long_icf_fg_06)
|
||||
mov len, 16
|
||||
mov tmp2, end_in
|
||||
sub tmp2, next_in
|
||||
add tmp2, 258
|
||||
|
||||
compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2
|
||||
|
||||
@ -245,7 +255,7 @@ func(set_long_icf_fg_06)
|
||||
vpsubd zlens1, zlens1, zincrement
|
||||
vpaddd zlens1, zlens1, ztwofiftyfour
|
||||
|
||||
mov tmp2, end_in
|
||||
mov tmp2, end_processed
|
||||
sub tmp2, next_in
|
||||
cmp len, tmp2
|
||||
cmovg len, tmp2
|
||||
@ -283,11 +293,11 @@ func(set_long_icf_fg_06)
|
||||
jmp .update_match_lookup
|
||||
|
||||
.end_fill:
|
||||
mov end_in, end_in_orig
|
||||
cmp next_in, end_in
|
||||
mov end_processed, end_processed_orig
|
||||
cmp next_in, end_processed
|
||||
jge .finish
|
||||
|
||||
mov tmp1, end_in
|
||||
mov tmp1, end_processed
|
||||
sub tmp1, next_in
|
||||
vpbroadcastd ztmp1, tmp1 %+ d
|
||||
vpcmpd k3, ztmp1, zincrement, 6
|
||||
|
Loading…
Reference in New Issue
Block a user