mirror of
https://github.com/intel/isa-l.git
synced 2024-12-13 09:52:56 +01:00
igzip: Implement set_long_icf to compare more than 258
Change-Id: Ia8813d176da6bfcd3c6ef441eca1c59ac99db7f2 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
bdb6289bbe
commit
ba1a000680
@ -2,6 +2,7 @@
|
||||
#define ENCODE_DF_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "huff_codes.h"
|
||||
|
||||
/* Deflate Intermediate Compression Format */
|
||||
@ -12,8 +13,9 @@
|
||||
#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT
|
||||
#define NULL_DIST_SYM 30
|
||||
|
||||
#define LEN_START 257
|
||||
#define LEN_OFFSET (LEN_START - 3)
|
||||
#define LEN_START ISAL_DEF_LIT_SYMBOLS
|
||||
#define LEN_OFFSET (LEN_START - ISAL_DEF_MIN_MATCH)
|
||||
#define LEN_MAX (LEN_OFFSET + ISAL_DEF_MAX_MATCH)
|
||||
#define LIT_START (NULL_DIST_SYM + 1)
|
||||
#define ICF_CODE_LEN 32
|
||||
|
||||
|
@ -282,3 +282,61 @@ static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns how long str1 and str2 have the same symbols.
|
||||
* @param str1: First input string.
|
||||
* @param str2: Second input string.
|
||||
* @param max_length: length of the smaller string.
|
||||
*/
|
||||
static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length)
|
||||
{
|
||||
uint32_t count;
|
||||
uint64_t test;
|
||||
uint64_t loop_length;
|
||||
|
||||
loop_length = max_length & ~0x7;
|
||||
|
||||
for(count = 0; count < loop_length; count += 8){
|
||||
test = *(uint64_t *) str1;
|
||||
test ^= *(uint64_t *) str2;
|
||||
if(test != 0)
|
||||
return count + tzbytecnt(test);
|
||||
str1 += 8;
|
||||
str2 += 8;
|
||||
}
|
||||
|
||||
switch(max_length % 8){
|
||||
|
||||
case 7:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 6:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 5:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 4:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 3:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 2:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 1:
|
||||
if(*str1 != *str2)
|
||||
return count;
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -278,7 +278,78 @@
|
||||
jmp %%end
|
||||
|
||||
%%miscompare_vect:
|
||||
bsf %%tmp, %%tmp
|
||||
tzcnt %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;; compares 64 bytes at a time
|
||||
;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
|
||||
;; Clobbers result_max
|
||||
%macro compare_z 8
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3 ; Accumulator for match_length
|
||||
%define %%result_max %4
|
||||
%define %%tmp %5 ; tmp as a 16-bit register
|
||||
%define %%ktmp %6
|
||||
%define %%ztmp %7
|
||||
%define %%ztmp2 %8
|
||||
|
||||
sub %%result_max, 128
|
||||
cmp %%result, %%result_max
|
||||
jg %%_by_64
|
||||
|
||||
%%loop1:
|
||||
vmovdqu8 %%ztmp, [%%src1 + %%result]
|
||||
vmovdqu8 %%ztmp2, [%%src2 + %%result]
|
||||
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
|
||||
ktestq %%ktmp, %%ktmp
|
||||
jnz %%miscompare
|
||||
add %%result, 64
|
||||
|
||||
vmovdqu8 %%ztmp, [%%src1 + %%result]
|
||||
vmovdqu8 %%ztmp2, [%%src2 + %%result]
|
||||
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
|
||||
ktestq %%ktmp, %%ktmp
|
||||
jnz %%miscompare
|
||||
add %%result, 64
|
||||
|
||||
cmp %%result, %%result_max
|
||||
jle %%loop1
|
||||
|
||||
%%_by_64:
|
||||
add %%result_max, 64
|
||||
cmp %%result, %%result_max
|
||||
jg %%_less_than_64
|
||||
|
||||
vmovdqu8 %%ztmp, [%%src1 + %%result]
|
||||
vmovdqu8 %%ztmp2, [%%src2 + %%result]
|
||||
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
|
||||
ktestq %%ktmp, %%ktmp
|
||||
jnz %%miscompare
|
||||
add %%result, 64
|
||||
|
||||
%%_less_than_64:
|
||||
add %%result_max, 64
|
||||
sub %%result_max, %%result
|
||||
jle %%end
|
||||
|
||||
mov %%tmp, -1
|
||||
bzhi %%tmp, %%tmp, %%result_max
|
||||
kmovq %%ktmp, %%tmp
|
||||
|
||||
vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result]
|
||||
vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
|
||||
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
|
||||
ktestq %%ktmp, %%ktmp
|
||||
jnz %%miscompare
|
||||
add %%result, %%result_max
|
||||
|
||||
jmp %%end
|
||||
%%miscompare:
|
||||
kmovq %%tmp, %%ktmp
|
||||
tzcnt %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
@ -42,13 +42,15 @@ void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
|
||||
dist = dist_start[dist_code] + dist_extra;
|
||||
len = match_lookup->lit_len;
|
||||
if (len >= 8 + LEN_OFFSET) {
|
||||
match_len =
|
||||
compare258(next_in - dist + 8, next_in + 8, 250) + LEN_OFFSET + 8;
|
||||
match_len = compare(next_in - dist + 8, next_in + 8,
|
||||
end_in - next_in + ISAL_DEF_MAX_MATCH) +
|
||||
LEN_OFFSET + 8;
|
||||
|
||||
while (match_len > match_lookup->lit_len
|
||||
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) {
|
||||
write_deflate_icf(match_lookup, match_len, dist_code,
|
||||
dist_extra);
|
||||
write_deflate_icf(match_lookup,
|
||||
match_len > LEN_MAX ? LEN_MAX : match_len,
|
||||
dist_code, dist_extra);
|
||||
match_lookup++;
|
||||
next_in++;
|
||||
match_len--;
|
||||
|
@ -40,6 +40,7 @@ default rel
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define dist_code rsi
|
||||
%define tmp3 rsi
|
||||
%define len rdi
|
||||
%define tmp2 rdi
|
||||
%else
|
||||
@ -47,6 +48,7 @@ default rel
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define dist_code rcx
|
||||
%define tmp3 rcx
|
||||
%define len r8
|
||||
%define tmp2 r8
|
||||
%endif
|
||||
@ -63,14 +65,14 @@ default rel
|
||||
%define ymatch_lookup ymm0
|
||||
%define ymatch_lookup2 ymm1
|
||||
%define ylens ymm2
|
||||
%define ydists ymm3
|
||||
%define ycmp2 ymm3
|
||||
%define ylens1 ymm4
|
||||
%define ylens2 ymm5
|
||||
%define ycmp ymm6
|
||||
%define ytmp1 ymm7
|
||||
%define ytmp2 ymm8
|
||||
%define yvect_size ymm9
|
||||
%define ytwofiftyfour ymm10
|
||||
%define ymax_len ymm10
|
||||
%define ytwofiftysix ymm11
|
||||
%define ynlen_mask ymm12
|
||||
%define ydists_mask ymm13
|
||||
@ -138,7 +140,7 @@ func(set_long_icf_fg_04)
|
||||
vmovdqu ydists_mask, [dists_mask]
|
||||
vmovdqu ynlen_mask, [nlen_mask]
|
||||
vmovdqu yvect_size, [vect_size]
|
||||
vmovdqu ytwofiftyfour, [twofiftyfour]
|
||||
vmovdqu ymax_len, [max_len]
|
||||
vmovdqu ytwofiftysix, [twofiftysix]
|
||||
vmovdqu ymatch_lookup, [match_lookup]
|
||||
|
||||
@ -180,27 +182,23 @@ func(set_long_icf_fg_04)
|
||||
mov match_in, next_in
|
||||
sub match_in, dist
|
||||
|
||||
mov len, 2
|
||||
%rep 7
|
||||
vmovdqu ytmp1, [next_in + len]
|
||||
vmovdqu ytmp2, [match_in + len]
|
||||
vpcmpeqb ycmp, ytmp1, [match_in + len]
|
||||
vpmovmskb tmp1, ycmp
|
||||
cmp tmp1 %+ d, 0xffffffff
|
||||
jne .miscompare
|
||||
mov len, 8
|
||||
mov tmp3, end_in
|
||||
sub tmp3, next_in
|
||||
add tmp3, 258
|
||||
|
||||
add len, 32
|
||||
%endrep
|
||||
compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
|
||||
|
||||
vmovdqu ytmp1, [next_in + len]
|
||||
vmovdqu ytmp2, [match_in + len]
|
||||
vpcmpeqb ycmp, ytmp1, [match_in + len]
|
||||
vpmovmskb tmp1, ycmp
|
||||
vmovd ylens1 %+ x, len %+ d
|
||||
vpbroadcastd ylens1, ylens1 %+ x
|
||||
vpsubd ylens1, ylens1, [increment]
|
||||
vpaddd ylens1, ylens1, [twofiftyfour]
|
||||
|
||||
mov tmp3, end_in
|
||||
sub tmp3, next_in
|
||||
cmp len, tmp3
|
||||
cmovg len, tmp3
|
||||
|
||||
.miscompare:
|
||||
not tmp1 %+ d
|
||||
tzcnt tmp1 %+ d, tmp1 %+ d
|
||||
add len, tmp1
|
||||
add next_in, len
|
||||
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
|
||||
vmovdqu ymatch_lookup, [match_lookup]
|
||||
@ -208,10 +206,6 @@ func(set_long_icf_fg_04)
|
||||
vpbroadcastd ymatch_lookup2, ymatch_lookup2 %+ x
|
||||
vpand ymatch_lookup2, ymatch_lookup2, ynlen_mask
|
||||
|
||||
vmovd ylens1 %+ x, len %+ d
|
||||
vpbroadcastd ylens1, ylens1 %+ x
|
||||
vpsubd ylens1, ylens1, [increment]
|
||||
vpaddd ylens1, ylens1, ytwofiftyfour
|
||||
neg len
|
||||
|
||||
.update_match_lookup:
|
||||
@ -222,7 +216,12 @@ func(set_long_icf_fg_04)
|
||||
vpand ycmp, ycmp, ytmp1
|
||||
vpmovmskb tmp1, ycmp
|
||||
|
||||
vpaddd ylens2, ylens1, ymatch_lookup2
|
||||
vpcmpgtd ycmp2, ylens1, ymax_len
|
||||
vpandn ylens, ycmp2, ylens1
|
||||
vpand ycmp2, ymax_len, ycmp2
|
||||
vpor ylens, ycmp2
|
||||
|
||||
vpaddd ylens2, ylens, ymatch_lookup2
|
||||
vpand ylens2, ylens2, ycmp
|
||||
|
||||
vpmaskmovd [match_lookup + ICF_CODE_BYTES * len], ycmp, ylens2
|
||||
@ -281,3 +280,6 @@ twofiftysix:
|
||||
nlen_mask:
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
max_len:
|
||||
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
|
||||
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
|
||||
|
@ -39,12 +39,14 @@
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define dist_code rsi
|
||||
%define tmp2 rsi
|
||||
%define len rdi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define dist_code rcx
|
||||
%define tmp2 rcx
|
||||
%define len r8
|
||||
%endif
|
||||
|
||||
@ -71,7 +73,8 @@
|
||||
%define datas zmm11
|
||||
%define ztmp1 zmm12
|
||||
%define ztmp2 zmm13
|
||||
%define zvect_size zmm17
|
||||
%define zvect_size zmm16
|
||||
%define zmax_len zmm17
|
||||
%define ztwofiftyfour zmm18
|
||||
%define ztwofiftysix zmm19
|
||||
%define ztwosixtytwo zmm20
|
||||
@ -151,6 +154,7 @@ func(set_long_icf_fg_06)
|
||||
vbroadcasti64x2 zbswap, [bswap_shuf]
|
||||
vpbroadcastd znlen_mask, [nlen_mask]
|
||||
vpbroadcastd zvect_size, [vect_size]
|
||||
vpbroadcastd zmax_len, [max_len]
|
||||
vpbroadcastd ztwofiftyfour, [twofiftyfour]
|
||||
vpbroadcastd ztwofiftysix, [twofiftysix]
|
||||
vpbroadcastd ztwosixtytwo, [twosixtytwo]
|
||||
@ -230,25 +234,22 @@ func(set_long_icf_fg_06)
|
||||
mov match_in, next_in
|
||||
sub match_in, dist
|
||||
|
||||
mov len, 2
|
||||
%rep 3
|
||||
vmovdqu8 ztmp1, [next_in + len]
|
||||
vmovdqu8 ztmp2, [match_in + len]
|
||||
vpcmpb k3, ztmp1, [match_in + len], NEQ
|
||||
ktestq k3, k3
|
||||
jnz .miscompare
|
||||
mov len, 16
|
||||
mov tmp2, end_in
|
||||
sub tmp2, next_in
|
||||
add tmp2, 258
|
||||
|
||||
add len, 64
|
||||
%endrep
|
||||
compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2
|
||||
|
||||
vmovdqu8 ztmp1, [next_in + len]
|
||||
vmovdqu8 ztmp2, [match_in + len]
|
||||
vpcmpb k3, ztmp1, ztmp2, 4
|
||||
vpbroadcastd zlens1, len %+ d
|
||||
vpsubd zlens1, zlens1, zincrement
|
||||
vpaddd zlens1, zlens1, ztwofiftyfour
|
||||
|
||||
mov tmp2, end_in
|
||||
sub tmp2, next_in
|
||||
cmp len, tmp2
|
||||
cmovg len, tmp2
|
||||
|
||||
.miscompare:
|
||||
kmovq tmp1, k3
|
||||
tzcnt tmp1, tmp1
|
||||
add len, tmp1
|
||||
add next_in, len
|
||||
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
|
||||
vmovdqu32 zmatch_lookup, [match_lookup]
|
||||
@ -256,9 +257,6 @@ func(set_long_icf_fg_06)
|
||||
vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x
|
||||
vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask
|
||||
|
||||
vpbroadcastd zlens1, len %+ d
|
||||
vpsubd zlens1, zlens1, zincrement
|
||||
vpaddd zlens1, zlens1, ztwofiftyfour
|
||||
neg len
|
||||
|
||||
.update_match_lookup:
|
||||
@ -267,7 +265,11 @@ func(set_long_icf_fg_06)
|
||||
vpcmpgtd k4, zlens1, ztwofiftysix
|
||||
kandw k3, k3, k4
|
||||
|
||||
vpaddd zlens2 {k3}{z}, zlens1, zmatch_lookup2
|
||||
vpcmpgtd k4, zlens1, zmax_len
|
||||
vmovdqu32 zlens, zlens1
|
||||
vmovdqu32 zlens {k4}, zmax_len
|
||||
|
||||
vpaddd zlens2 {k3}{z}, zlens, zmatch_lookup2
|
||||
|
||||
vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2
|
||||
|
||||
@ -340,6 +342,8 @@ long_len:
|
||||
dd 0x105
|
||||
long_len2:
|
||||
dd 0x7
|
||||
max_len:
|
||||
dd 0xfe + 0x102
|
||||
vect_size:
|
||||
dd VECT_SIZE
|
||||
twofiftyfour:
|
||||
|
@ -85,13 +85,16 @@ extern "C" {
|
||||
#define ISAL_DEF_MAX_CODE_LEN 15
|
||||
#define ISAL_DEF_HIST_SIZE (32*IGZIP_K)
|
||||
#define ISAL_DEF_MAX_HIST_BITS 15
|
||||
#define ISAL_DEF_MAX_MATCH 258
|
||||
#define ISAL_DEF_MIN_MATCH 3
|
||||
|
||||
#define ISAL_DEF_LIT_SYMBOLS 257
|
||||
#define ISAL_DEF_LEN_SYMBOLS 29
|
||||
#define ISAL_DEF_DIST_SYMBOLS 30
|
||||
#define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS)
|
||||
|
||||
#define ISAL_LOOK_AHEAD (18 * 16) /* Max repeat length, rounded up to 32 byte boundary */
|
||||
/* Max repeat length, rounded up to 32 byte boundary */
|
||||
#define ISAL_LOOK_AHEAD ((ISAL_DEF_MAX_MATCH + 31) & ~31)
|
||||
|
||||
/******************************************************************************/
|
||||
/* Deflate Implementation Specific Defines */
|
||||
|
Loading…
Reference in New Issue
Block a user