igzip: Modify set_long_icf to handle small end_in

Change-Id: I24c3420df5d9e84d27fe28eff96155e5fcd51760 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
2024-12-12 09:23:50 +01:00 · 2018-10-02 14:01:57 -07:00 · 2018-10-02 14:01:57 -07:00 · 1fdc5941a3
commit 1fdc5941a3
parent ba1a000680
3 changed files with 65 additions and 43 deletions
--- a/igzip/igzip_icf_body.c
+++ b/igzip/igzip_icf_body.c
@ -4,7 +4,7 @@
 #include "igzip_level_buf_structs.h"

 extern uint64_t gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
-extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *);
+extern void set_long_icf_fg(uint8_t *, uint64_t, uint64_t, struct deflate_icf *);
 extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
 extern void isal_deflate_icf_body_lvl2(struct isal_zstream *);
 extern void isal_deflate_icf_body_lvl3(struct isal_zstream *);
@ -24,9 +24,11 @@ static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
 	    | (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT));
 }

-void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
-			  struct deflate_icf *match_lookup, struct level_buf *level_buf)
+void set_long_icf_fg_base(uint8_t * next_in, uint64_t processed, uint64_t input_size,
+			  struct deflate_icf *match_lookup)
 {
+	uint8_t *end_processed = next_in + processed;
+	uint8_t *end_in = next_in + input_size;
 	uint32_t dist_code, dist_extra, dist, len;
 	uint32_t match_len;
 	uint32_t dist_start[] = {
@ -36,15 +38,17 @@ void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
 		0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
 	};

-	while (next_in < end_in - ISAL_LOOK_AHEAD) {
+	if (end_in > end_processed + ISAL_LOOK_AHEAD)
+		end_in = end_processed + ISAL_LOOK_AHEAD;
+
+	while (next_in < end_processed) {
 		dist_code = match_lookup->lit_dist;
 		dist_extra = match_lookup->dist_extra;
 		dist = dist_start[dist_code] + dist_extra;
 		len = match_lookup->lit_len;
 		if (len >= 8 + LEN_OFFSET) {
-			match_len = compare(next_in - dist + 8, next_in + 8,
-					    end_in - next_in + ISAL_DEF_MAX_MATCH) +
-			    LEN_OFFSET + 8;
+			match_len = compare((next_in + 8) - dist, next_in + 8,
+					    end_in - (next_in + 8)) + LEN_OFFSET + 8;

 			while (match_len > match_lookup->lit_len
 			       && match_len >= LEN_OFFSET + SHORTEST_MATCH) {
@ -251,8 +255,7 @@ void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)

 		processed = gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);

-		set_long_icf_fg(stream->next_in, stream->next_in + processed,
-				matches_icf_lookup, level_buf);
+		set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);

 		stream->next_in += processed;
 		stream->avail_in -= processed;
@ -291,8 +294,7 @@ void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)

 		processed = gen_icf_map_lh1(stream, matches_icf_lookup, input_size);

-		set_long_icf_fg(stream->next_in, stream->next_in + processed,
-				matches_icf_lookup, level_buf);
+		set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);

 		stream->next_in += processed;
 		stream->avail_in -= processed;
--- a/igzip/igzip_set_long_icf_fg_04.asm
+++ b/igzip/igzip_set_long_icf_fg_04.asm
@ -39,28 +39,30 @@ default rel
 %define arg1 rcx
 %define arg2 rdx
 %define arg3 r8
-%define dist_code rsi
-%define tmp3 rsi
+%define arg4 r9
 %define len rdi
 %define tmp2 rdi
+%define dist rsi
 %else
 %define arg1 rdi
 %define arg2 rsi
 %define arg3 rdx
-%define dist_code rcx
-%define tmp3 rcx
+%define arg4 rcx
 %define len r8
 %define tmp2 r8
+%define dist r9
 %endif

 %define next_in arg1
-%define end_in arg2
-%define match_lookup arg3
+%define end_processed arg2
+%define end_in arg3
+%define match_lookup arg4
 %define match_in rax
-%define dist r9
 %define match_offset r10
 %define tmp1 r11
-%define end_in_orig r12
+%define end_processed_orig r12
+%define dist_code r13
+%define tmp3 r13

 %define ymatch_lookup ymm0
 %define ymatch_lookup2 ymm1
@ -97,6 +99,7 @@ default rel
 	save_reg	rsi, 10*16 + 0*8
 	save_reg	rdi, 10*16 + 1*8
 	save_reg	r12, 10*16 + 2*8
+	save_reg	r13, 10*16 + 3*8
 	end_prolog
 %endm

@ -115,15 +118,18 @@ default rel
 	mov	rsi, [rsp + 10*16 + 0*8]
 	mov	rdi, [rsp + 10*16 + 1*8]
 	mov	r12, [rsp + 10*16 + 2*8]
+	mov	r13, [rsp + 10*16 + 3*8]
 	add	rsp, stack_size
 %endm
 %else
 %define func(x) x:
 %macro FUNC_SAVE 0
 	push r12
+	push r13
 %endm

 %macro FUNC_RESTORE 0
+	pop r13
 	pop r12
 %endm
 %endif
@ -133,8 +139,13 @@ global set_long_icf_fg_04
 func(set_long_icf_fg_04)
 	FUNC_SAVE

-	mov	end_in_orig, end_in
-	sub	end_in, VECT_SIZE - 1
+	lea	end_in, [next_in + arg3]
+	add	end_processed, next_in
+	mov	end_processed_orig, end_processed
+	lea	tmp1, [end_processed + LA_STATELESS]
+	cmp	end_in, tmp1
+	cmovg	end_in, tmp1
+	sub	end_processed, VECT_SIZE - 1
 	vmovdqu ylong_lens, [long_len]
 	vmovdqu ylens_mask, [len_mask]
 	vmovdqu ydists_mask, [dists_mask]
@ -148,7 +159,7 @@ func(set_long_icf_fg_04)
 	vmovdqu ymatch_lookup2, ymatch_lookup
 	vmovdqu ymatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]

-	cmp	next_in, end_in
+	cmp	next_in, end_processed
 	jae	.end_fill

 .finish_entry:
@ -185,7 +196,6 @@ func(set_long_icf_fg_04)
 	mov	len, 8
 	mov	tmp3, end_in
 	sub	tmp3, next_in
-	add	tmp3, 258

 	compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2

@ -194,7 +204,7 @@ func(set_long_icf_fg_04)
 	vpsubd	ylens1, ylens1, [increment]
 	vpaddd	ylens1, ylens1, [twofiftyfour]

-	mov	tmp3, end_in
+	mov	tmp3, end_processed
 	sub	tmp3, next_in
 	cmp	len, tmp3
 	cmovg	len, tmp3
@ -235,11 +245,11 @@ func(set_long_icf_fg_04)
 	jmp	.update_match_lookup

 .end_fill:
-	mov	end_in, end_in_orig
-	cmp	next_in, end_in
+	mov	end_processed, end_processed_orig
+	cmp	next_in, end_processed
 	jge	.finish

-	mov	tmp1, end_in
+	mov	tmp1, end_processed
 	sub	tmp1, next_in
 	vmovd	ytmp1 %+ x, tmp1 %+ d
 	vpbroadcastd ytmp1, ytmp1 %+ x
--- a/igzip/igzip_set_long_icf_fg_06.asm
+++ b/igzip/igzip_set_long_icf_fg_06.asm
@ -38,26 +38,28 @@
 %define arg1 rcx
 %define arg2 rdx
 %define arg3 r8
-%define dist_code rsi
-%define tmp2 rsi
+%define arg4 r9
 %define len rdi
+%define dist rsi
 %else
 %define arg1 rdi
 %define arg2 rsi
 %define arg3 rdx
-%define dist_code rcx
-%define tmp2 rcx
+%define arg4 rcx
 %define len r8
+%define dist r9
 %endif

 %define next_in arg1
-%define end_in arg2
-%define match_lookup arg3
+%define end_processed arg2
+%define end_in arg3
+%define match_lookup arg4
 %define match_in rax
-%define dist r9
 %define match_offset r10
 %define tmp1 r11
-%define end_in_orig r12
+%define end_processed_orig r12
+%define dist_code r13
+%define tmp2 r13

 %define zmatch_lookup zmm0
 %define zmatch_lookup2 zmm1
@ -106,6 +108,7 @@
 	save_reg	rsi, 8*16 + 0*8
 	save_reg	rdi, 8*16 + 1*8
 	save_reg	r12, 8*16 + 2*8
+	save_reg	r13, 8*16 + 3*8
 	end_prolog
 %endm

@ -122,15 +125,18 @@
 	mov	rsi, [rsp + 8*16 + 0*8]
 	mov	rdi, [rsp + 8*16 + 1*8]
 	mov	r12, [rsp + 8*16 + 2*8]
+	mov	r13, [rsp + 8*16 + 3*8]
 	add	rsp, stack_size
 %endm
 %else
 %define func(x) x:
 %macro FUNC_SAVE 0
 	push	r12
+	push	r13
 %endm

 %macro FUNC_RESTORE 0
+	pop	r13
 	pop	r12
 %endm
 %endif
@ -140,8 +146,13 @@ global set_long_icf_fg_06
 func(set_long_icf_fg_06)
 	FUNC_SAVE

-	mov	end_in_orig, end_in
-	sub	end_in, 15
+	lea	end_in, [next_in + arg3]
+	add	end_processed, next_in
+	mov	end_processed_orig, end_processed
+	lea	tmp1, [end_processed + LA_STATELESS]
+	cmp	end_in, tmp1
+	cmovg	end_in, tmp1
+	sub	end_processed, 15
 	vpbroadcastd zlong_lens, [long_len]
 	vpbroadcastd zlong_lens2, [long_len2]
 	vpbroadcastd zlens_mask, [len_mask]
@ -164,7 +175,7 @@ func(set_long_icf_fg_06)
 	vmovdqu32 zmatch_lookup2, zmatch_lookup
 	vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]

-	cmp	next_in, end_in
+	cmp	next_in, end_processed
 	jae	.end_fill

 .finish_entry:
@ -237,7 +248,6 @@ func(set_long_icf_fg_06)
 	mov	len, 16
 	mov	tmp2, end_in
 	sub	tmp2, next_in
-	add	tmp2, 258

 	compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2

@ -245,7 +255,7 @@ func(set_long_icf_fg_06)
 	vpsubd	zlens1, zlens1, zincrement
 	vpaddd	zlens1, zlens1, ztwofiftyfour

-	mov	tmp2, end_in
+	mov	tmp2, end_processed
 	sub	tmp2, next_in
 	cmp	len, tmp2
 	cmovg	len, tmp2
@ -283,11 +293,11 @@ func(set_long_icf_fg_06)
 	jmp	.update_match_lookup

 .end_fill:
-	mov	end_in, end_in_orig
-	cmp	next_in, end_in
+	mov	end_processed, end_processed_orig
+	cmp	next_in, end_processed
 	jge	.finish

-	mov	tmp1, end_in
+	mov	tmp1, end_processed
 	sub	tmp1, next_in
 	vpbroadcastd ztmp1, tmp1 %+ d
 	vpcmpd k3, ztmp1, zincrement, 6