igzip: Implement set_long_icf to compare more than 258

Change-Id: Ia8813d176da6bfcd3c6ef441eca1c59ac99db7f2
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-09-27 13:20:13 -07:00
parent bdb6289bbe
commit ba1a000680
7 changed files with 197 additions and 55 deletions

View File

@ -2,6 +2,7 @@
#define ENCODE_DF_H #define ENCODE_DF_H
#include <stdint.h> #include <stdint.h>
#include "igzip_lib.h"
#include "huff_codes.h" #include "huff_codes.h"
/* Deflate Intermediate Compression Format */ /* Deflate Intermediate Compression Format */
@ -12,8 +13,9 @@
#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT #define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT
#define NULL_DIST_SYM 30 #define NULL_DIST_SYM 30
#define LEN_START 257 #define LEN_START ISAL_DEF_LIT_SYMBOLS
#define LEN_OFFSET (LEN_START - 3) #define LEN_OFFSET (LEN_START - ISAL_DEF_MIN_MATCH)
#define LEN_MAX (LEN_OFFSET + ISAL_DEF_MAX_MATCH)
#define LIT_START (NULL_DIST_SYM + 1) #define LIT_START (NULL_DIST_SYM + 1)
#define ICF_CODE_LEN 32 #define ICF_CODE_LEN 32

View File

@ -282,3 +282,61 @@ static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length
return count; return count;
} }
/**
* @brief Returns how long str1 and str2 have the same symbols.
* @param str1: First input string.
* @param str2: Second input string.
* @param max_length: length of the smaller string.
*/
static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length)
{
uint32_t count;
uint64_t test;
uint64_t loop_length;
loop_length = max_length & ~0x7;
for(count = 0; count < loop_length; count += 8){
test = *(uint64_t *) str1;
test ^= *(uint64_t *) str2;
if(test != 0)
return count + tzbytecnt(test);
str1 += 8;
str2 += 8;
}
switch(max_length % 8){
case 7:
if(*str1++ != *str2++)
return count;
count++;
case 6:
if(*str1++ != *str2++)
return count;
count++;
case 5:
if(*str1++ != *str2++)
return count;
count++;
case 4:
if(*str1++ != *str2++)
return count;
count++;
case 3:
if(*str1++ != *str2++)
return count;
count++;
case 2:
if(*str1++ != *str2++)
return count;
count++;
case 1:
if(*str1 != *str2)
return count;
count++;
}
return count;
}

View File

@ -278,7 +278,78 @@
jmp %%end jmp %%end
%%miscompare_vect: %%miscompare_vect:
bsf %%tmp, %%tmp tzcnt %%tmp, %%tmp
add %%result, %%tmp
%%end:
%endm
;; compares 64 bytes at a time
;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
;; Clobbers result_max
%macro compare_z 8
%define %%src1 %1
%define %%src2 %2
%define %%result %3 ; Accumulator for match_length
%define %%result_max %4
%define %%tmp %5 ; tmp as a 16-bit register
%define %%ktmp %6
%define %%ztmp %7
%define %%ztmp2 %8
sub %%result_max, 128
cmp %%result, %%result_max
jg %%_by_64
%%loop1:
vmovdqu8 %%ztmp, [%%src1 + %%result]
vmovdqu8 %%ztmp2, [%%src2 + %%result]
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
ktestq %%ktmp, %%ktmp
jnz %%miscompare
add %%result, 64
vmovdqu8 %%ztmp, [%%src1 + %%result]
vmovdqu8 %%ztmp2, [%%src2 + %%result]
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
ktestq %%ktmp, %%ktmp
jnz %%miscompare
add %%result, 64
cmp %%result, %%result_max
jle %%loop1
%%_by_64:
add %%result_max, 64
cmp %%result, %%result_max
jg %%_less_than_64
vmovdqu8 %%ztmp, [%%src1 + %%result]
vmovdqu8 %%ztmp2, [%%src2 + %%result]
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
ktestq %%ktmp, %%ktmp
jnz %%miscompare
add %%result, 64
%%_less_than_64:
add %%result_max, 64
sub %%result_max, %%result
jle %%end
mov %%tmp, -1
bzhi %%tmp, %%tmp, %%result_max
kmovq %%ktmp, %%tmp
vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result]
vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
ktestq %%ktmp, %%ktmp
jnz %%miscompare
add %%result, %%result_max
jmp %%end
%%miscompare:
kmovq %%tmp, %%ktmp
tzcnt %%tmp, %%tmp
add %%result, %%tmp add %%result, %%tmp
%%end: %%end:
%endm %endm

View File

@ -42,13 +42,15 @@ void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
dist = dist_start[dist_code] + dist_extra; dist = dist_start[dist_code] + dist_extra;
len = match_lookup->lit_len; len = match_lookup->lit_len;
if (len >= 8 + LEN_OFFSET) { if (len >= 8 + LEN_OFFSET) {
match_len = match_len = compare(next_in - dist + 8, next_in + 8,
compare258(next_in - dist + 8, next_in + 8, 250) + LEN_OFFSET + 8; end_in - next_in + ISAL_DEF_MAX_MATCH) +
LEN_OFFSET + 8;
while (match_len > match_lookup->lit_len while (match_len > match_lookup->lit_len
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) { && match_len >= LEN_OFFSET + SHORTEST_MATCH) {
write_deflate_icf(match_lookup, match_len, dist_code, write_deflate_icf(match_lookup,
dist_extra); match_len > LEN_MAX ? LEN_MAX : match_len,
dist_code, dist_extra);
match_lookup++; match_lookup++;
next_in++; next_in++;
match_len--; match_len--;

View File

@ -40,6 +40,7 @@ default rel
%define arg2 rdx %define arg2 rdx
%define arg3 r8 %define arg3 r8
%define dist_code rsi %define dist_code rsi
%define tmp3 rsi
%define len rdi %define len rdi
%define tmp2 rdi %define tmp2 rdi
%else %else
@ -47,6 +48,7 @@ default rel
%define arg2 rsi %define arg2 rsi
%define arg3 rdx %define arg3 rdx
%define dist_code rcx %define dist_code rcx
%define tmp3 rcx
%define len r8 %define len r8
%define tmp2 r8 %define tmp2 r8
%endif %endif
@ -63,14 +65,14 @@ default rel
%define ymatch_lookup ymm0 %define ymatch_lookup ymm0
%define ymatch_lookup2 ymm1 %define ymatch_lookup2 ymm1
%define ylens ymm2 %define ylens ymm2
%define ydists ymm3 %define ycmp2 ymm3
%define ylens1 ymm4 %define ylens1 ymm4
%define ylens2 ymm5 %define ylens2 ymm5
%define ycmp ymm6 %define ycmp ymm6
%define ytmp1 ymm7 %define ytmp1 ymm7
%define ytmp2 ymm8 %define ytmp2 ymm8
%define yvect_size ymm9 %define yvect_size ymm9
%define ytwofiftyfour ymm10 %define ymax_len ymm10
%define ytwofiftysix ymm11 %define ytwofiftysix ymm11
%define ynlen_mask ymm12 %define ynlen_mask ymm12
%define ydists_mask ymm13 %define ydists_mask ymm13
@ -138,7 +140,7 @@ func(set_long_icf_fg_04)
vmovdqu ydists_mask, [dists_mask] vmovdqu ydists_mask, [dists_mask]
vmovdqu ynlen_mask, [nlen_mask] vmovdqu ynlen_mask, [nlen_mask]
vmovdqu yvect_size, [vect_size] vmovdqu yvect_size, [vect_size]
vmovdqu ytwofiftyfour, [twofiftyfour] vmovdqu ymax_len, [max_len]
vmovdqu ytwofiftysix, [twofiftysix] vmovdqu ytwofiftysix, [twofiftysix]
vmovdqu ymatch_lookup, [match_lookup] vmovdqu ymatch_lookup, [match_lookup]
@ -180,27 +182,23 @@ func(set_long_icf_fg_04)
mov match_in, next_in mov match_in, next_in
sub match_in, dist sub match_in, dist
mov len, 2 mov len, 8
%rep 7 mov tmp3, end_in
vmovdqu ytmp1, [next_in + len] sub tmp3, next_in
vmovdqu ytmp2, [match_in + len] add tmp3, 258
vpcmpeqb ycmp, ytmp1, [match_in + len]
vpmovmskb tmp1, ycmp
cmp tmp1 %+ d, 0xffffffff
jne .miscompare
add len, 32 compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
%endrep
vmovdqu ytmp1, [next_in + len] vmovd ylens1 %+ x, len %+ d
vmovdqu ytmp2, [match_in + len] vpbroadcastd ylens1, ylens1 %+ x
vpcmpeqb ycmp, ytmp1, [match_in + len] vpsubd ylens1, ylens1, [increment]
vpmovmskb tmp1, ycmp vpaddd ylens1, ylens1, [twofiftyfour]
mov tmp3, end_in
sub tmp3, next_in
cmp len, tmp3
cmovg len, tmp3
.miscompare:
not tmp1 %+ d
tzcnt tmp1 %+ d, tmp1 %+ d
add len, tmp1
add next_in, len add next_in, len
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len] lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
vmovdqu ymatch_lookup, [match_lookup] vmovdqu ymatch_lookup, [match_lookup]
@ -208,10 +206,6 @@ func(set_long_icf_fg_04)
vpbroadcastd ymatch_lookup2, ymatch_lookup2 %+ x vpbroadcastd ymatch_lookup2, ymatch_lookup2 %+ x
vpand ymatch_lookup2, ymatch_lookup2, ynlen_mask vpand ymatch_lookup2, ymatch_lookup2, ynlen_mask
vmovd ylens1 %+ x, len %+ d
vpbroadcastd ylens1, ylens1 %+ x
vpsubd ylens1, ylens1, [increment]
vpaddd ylens1, ylens1, ytwofiftyfour
neg len neg len
.update_match_lookup: .update_match_lookup:
@ -222,7 +216,12 @@ func(set_long_icf_fg_04)
vpand ycmp, ycmp, ytmp1 vpand ycmp, ycmp, ytmp1
vpmovmskb tmp1, ycmp vpmovmskb tmp1, ycmp
vpaddd ylens2, ylens1, ymatch_lookup2 vpcmpgtd ycmp2, ylens1, ymax_len
vpandn ylens, ycmp2, ylens1
vpand ycmp2, ymax_len, ycmp2
vpor ylens, ycmp2
vpaddd ylens2, ylens, ymatch_lookup2
vpand ylens2, ylens2, ycmp vpand ylens2, ylens2, ycmp
vpmaskmovd [match_lookup + ICF_CODE_BYTES * len], ycmp, ylens2 vpmaskmovd [match_lookup + ICF_CODE_BYTES * len], ycmp, ylens2
@ -281,3 +280,6 @@ twofiftysix:
nlen_mask: nlen_mask:
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
max_len:
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102

View File

@ -39,12 +39,14 @@
%define arg2 rdx %define arg2 rdx
%define arg3 r8 %define arg3 r8
%define dist_code rsi %define dist_code rsi
%define tmp2 rsi
%define len rdi %define len rdi
%else %else
%define arg1 rdi %define arg1 rdi
%define arg2 rsi %define arg2 rsi
%define arg3 rdx %define arg3 rdx
%define dist_code rcx %define dist_code rcx
%define tmp2 rcx
%define len r8 %define len r8
%endif %endif
@ -71,7 +73,8 @@
%define datas zmm11 %define datas zmm11
%define ztmp1 zmm12 %define ztmp1 zmm12
%define ztmp2 zmm13 %define ztmp2 zmm13
%define zvect_size zmm17 %define zvect_size zmm16
%define zmax_len zmm17
%define ztwofiftyfour zmm18 %define ztwofiftyfour zmm18
%define ztwofiftysix zmm19 %define ztwofiftysix zmm19
%define ztwosixtytwo zmm20 %define ztwosixtytwo zmm20
@ -151,6 +154,7 @@ func(set_long_icf_fg_06)
vbroadcasti64x2 zbswap, [bswap_shuf] vbroadcasti64x2 zbswap, [bswap_shuf]
vpbroadcastd znlen_mask, [nlen_mask] vpbroadcastd znlen_mask, [nlen_mask]
vpbroadcastd zvect_size, [vect_size] vpbroadcastd zvect_size, [vect_size]
vpbroadcastd zmax_len, [max_len]
vpbroadcastd ztwofiftyfour, [twofiftyfour] vpbroadcastd ztwofiftyfour, [twofiftyfour]
vpbroadcastd ztwofiftysix, [twofiftysix] vpbroadcastd ztwofiftysix, [twofiftysix]
vpbroadcastd ztwosixtytwo, [twosixtytwo] vpbroadcastd ztwosixtytwo, [twosixtytwo]
@ -230,25 +234,22 @@ func(set_long_icf_fg_06)
mov match_in, next_in mov match_in, next_in
sub match_in, dist sub match_in, dist
mov len, 2 mov len, 16
%rep 3 mov tmp2, end_in
vmovdqu8 ztmp1, [next_in + len] sub tmp2, next_in
vmovdqu8 ztmp2, [match_in + len] add tmp2, 258
vpcmpb k3, ztmp1, [match_in + len], NEQ
ktestq k3, k3
jnz .miscompare
add len, 64 compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2
%endrep
vmovdqu8 ztmp1, [next_in + len] vpbroadcastd zlens1, len %+ d
vmovdqu8 ztmp2, [match_in + len] vpsubd zlens1, zlens1, zincrement
vpcmpb k3, ztmp1, ztmp2, 4 vpaddd zlens1, zlens1, ztwofiftyfour
mov tmp2, end_in
sub tmp2, next_in
cmp len, tmp2
cmovg len, tmp2
.miscompare:
kmovq tmp1, k3
tzcnt tmp1, tmp1
add len, tmp1
add next_in, len add next_in, len
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len] lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
vmovdqu32 zmatch_lookup, [match_lookup] vmovdqu32 zmatch_lookup, [match_lookup]
@ -256,9 +257,6 @@ func(set_long_icf_fg_06)
vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x
vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask
vpbroadcastd zlens1, len %+ d
vpsubd zlens1, zlens1, zincrement
vpaddd zlens1, zlens1, ztwofiftyfour
neg len neg len
.update_match_lookup: .update_match_lookup:
@ -267,7 +265,11 @@ func(set_long_icf_fg_06)
vpcmpgtd k4, zlens1, ztwofiftysix vpcmpgtd k4, zlens1, ztwofiftysix
kandw k3, k3, k4 kandw k3, k3, k4
vpaddd zlens2 {k3}{z}, zlens1, zmatch_lookup2 vpcmpgtd k4, zlens1, zmax_len
vmovdqu32 zlens, zlens1
vmovdqu32 zlens {k4}, zmax_len
vpaddd zlens2 {k3}{z}, zlens, zmatch_lookup2
vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2 vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2
@ -340,6 +342,8 @@ long_len:
dd 0x105 dd 0x105
long_len2: long_len2:
dd 0x7 dd 0x7
max_len:
dd 0xfe + 0x102
vect_size: vect_size:
dd VECT_SIZE dd VECT_SIZE
twofiftyfour: twofiftyfour:

View File

@ -85,13 +85,16 @@ extern "C" {
#define ISAL_DEF_MAX_CODE_LEN 15 #define ISAL_DEF_MAX_CODE_LEN 15
#define ISAL_DEF_HIST_SIZE (32*IGZIP_K) #define ISAL_DEF_HIST_SIZE (32*IGZIP_K)
#define ISAL_DEF_MAX_HIST_BITS 15 #define ISAL_DEF_MAX_HIST_BITS 15
#define ISAL_DEF_MAX_MATCH 258
#define ISAL_DEF_MIN_MATCH 3
#define ISAL_DEF_LIT_SYMBOLS 257 #define ISAL_DEF_LIT_SYMBOLS 257
#define ISAL_DEF_LEN_SYMBOLS 29 #define ISAL_DEF_LEN_SYMBOLS 29
#define ISAL_DEF_DIST_SYMBOLS 30 #define ISAL_DEF_DIST_SYMBOLS 30
#define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS) #define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS)
#define ISAL_LOOK_AHEAD (18 * 16) /* Max repeat length, rounded up to 32 byte boundary */ /* Max repeat length, rounded up to 32 byte boundary */
#define ISAL_LOOK_AHEAD ((ISAL_DEF_MAX_MATCH + 31) & ~31)
/******************************************************************************/ /******************************************************************************/
/* Deflate Implementation Specific Defines */ /* Deflate Implementation Specific Defines */