igzip: Implement optimized level 2 compression

Change-Id: I8cf5bcd56f290d17205ac36dc2828c8acfc66947
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2017-06-09 17:03:41 -07:00
parent 3c62216aa1
commit 4ae2d1be29
30 changed files with 1952 additions and 217 deletions

View File

@ -120,9 +120,9 @@ objs = \
bin\encode_df.obj \
bin\encode_df_04.obj \
bin\proc_heap.obj \
bin\igzip_icf_body_01.obj \
bin\igzip_icf_body_02.obj \
bin\igzip_icf_body_04.obj \
bin\igzip_icf_body_h1_gr_bt_01.obj \
bin\igzip_icf_body_h1_gr_bt_02.obj \
bin\igzip_icf_body_h1_gr_bt_04.obj \
bin\igzip_icf_finish.obj \
bin\igzip_icf_base.obj \
bin\igzip_inflate.obj \
@ -135,7 +135,10 @@ objs = \
bin\crc32_gzip_refl_by8.obj \
bin\adler32_sse.obj \
bin\adler32_avx2_4.obj \
bin\igzip_deflate_hash.obj
bin\igzip_deflate_hash.obj \
bin\igzip_gen_icf_map_lh1_06.obj \
bin\igzip_set_long_icf_fg_06.obj \
bin\igzip_icf_body.obj
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/
LINKFLAGS = /nologo

View File

@ -33,19 +33,19 @@ lsrc += igzip/igzip.c \
igzip/igzip_icf_base.c \
igzip/crc32_gzip_base.c \
igzip/flatten_ll.c \
igzip/encode_df.c
igzip/encode_df.c \
igzip/igzip_icf_body.c
lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
lsrc_x86_64 += \
igzip/igzip_body_01.asm \
lsrc_x86_64 += igzip/igzip_body_01.asm \
igzip/igzip_body_02.asm \
igzip/igzip_body_04.asm \
igzip/igzip_finish.asm \
igzip/igzip_icf_body_01.asm \
igzip/igzip_icf_body_02.asm \
igzip/igzip_icf_body_04.asm \
igzip/igzip_icf_body_h1_gr_bt_01.asm \
igzip/igzip_icf_body_h1_gr_bt_02.asm \
igzip/igzip_icf_body_h1_gr_bt_04.asm \
igzip/igzip_icf_finish.asm \
igzip/rfc1951_lookup.asm \
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
@ -60,7 +60,9 @@ lsrc_x86_64 += \
igzip/encode_df_04.asm \
igzip/encode_df_06.asm \
igzip/proc_heap.asm \
igzip/igzip_deflate_hash.asm
igzip/igzip_deflate_hash.asm \
igzip/igzip_gen_icf_map_lh1_06.asm \
igzip/igzip_set_long_icf_fg_06.asm
src_include += -I $(srcdir)/igzip
extern_hdrs += include/igzip_lib.h
@ -80,7 +82,7 @@ other_src += igzip/bitbuf2.asm \
igzip/data_struct2.asm \
igzip/inflate_data_structs.asm \
igzip/igzip_body.asm \
igzip/igzip_icf_body.asm \
igzip/igzip_icf_body_h1_gr_bt.asm \
igzip/igzip_finish.asm \
igzip/lz0a_const.asm \
igzip/options.asm \
@ -124,3 +126,6 @@ igzip_inflate_test: LDLIBS += -lz
igzip_igzip_inflate_test_LDADD = libisal.la
igzip_igzip_inflate_test_LDFLAGS = -lz
igzip_igzip_hist_perf_LDADD = libisal.la
igzip_fuzz_inflate: LDLIBS += -lz
igzip_igzip_fuzz_inflate_LDADD = libisal.la
igzip_igzip_fuzz_inflate_LDFLAGS = -lz

View File

@ -96,19 +96,42 @@ FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; lvl2_buf
;; name size align
FIELD _hash_table, 2 * IGZIP_LVL2_HASH_SIZE, 2
FIELD _matches_next, 8, 8
FIELD _matches_end, 8, 8
FIELD _matches, 4*4*1024, 4
FIELD _overflow, 4*LA, 4
%assign _lvl2_buf_size _FIELD_OFFSET
%assign _lvl2_buf_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define DEF_MAX_HDR_SIZE 328
START_FIELDS ;; level_2_buf
START_FIELDS ;; level_buf
;; name size align
FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align
FIELD _deflate_hdr_buf_used, 8, 8
FIELD _deflate_hdr_buf, DEF_MAX_HDR_SIZE, 1
FIELD _deflate_hdr_count, 4, 4
FIELD _deflate_hdr_extra_bits,4, 4
FIELD _deflate_hdr, DEF_MAX_HDR_SIZE, 1
FIELD _icf_buf_next, 8, 8
FIELD _icf_buf_avail_out, 8, 8
FIELD _icf_buf_start, 0, 0
FIELD _icf_buf_start, 8, 8
FIELD _lvl_extra, _lvl2_buf_size, _lvl2_buf_align
%assign _level_2_buf_size _FIELD_OFFSET
%assign _level_2_buf_align _STRUCT_ALIGN
%assign _level_buf_base_size _FIELD_OFFSET
%assign _level_buf_base_align _STRUCT_ALIGN
_lvl2_hash_table equ _lvl_extra + _hash_table
_lvl2_matches_next equ _lvl_extra + _matches_next
_lvl2_matches_end equ _lvl_extra + _matches_end
_lvl2_matches equ _lvl_extra + _matches
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -127,6 +150,7 @@ FIELD _has_wrap_hdr, 1, 1
FIELD _has_eob_hdr, 1, 1
FIELD _has_eob, 1, 1
FIELD _has_hist, 1, 1
FIELD _has_level_buf_init, 2, 2
FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align
FIELD _count, 4, 4
FIELD _tmp_out_buff, 16, 1
@ -135,8 +159,7 @@ FIELD _tmp_out_end, 4, 4
FIELD _b_bytes_valid, 4, 4
FIELD _b_bytes_processed, 4, 4
FIELD _buffer, BSIZE, 1
FIELD _head, IGZIP_HASH_SIZE*2, 2
FIELD _head, IGZIP_LVL0_HASH_SIZE*2, 2
%assign _isal_zstate_size _FIELD_OFFSET
%assign _isal_zstate_align _STRUCT_ALIGN
@ -189,6 +212,7 @@ _internal_state_has_wrap_hdr equ _internal_state+_has_wrap_hdr
_internal_state_has_eob equ _internal_state+_has_eob
_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr
_internal_state_has_hist equ _internal_state+_has_hist
_internal_state_has_level_buf_init equ _internal_state+_has_level_buf_init
_internal_state_buffer equ _internal_state+_buffer
_internal_state_head equ _internal_state+_head
_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits

View File

@ -6,14 +6,21 @@
/* Deflate Intermediate Compression Format */
#define LIT_LEN_BIT_COUNT 10
#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
#define DIST_LIT_BIT_COUNT 9
#define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT
#define NULL_DIST_SYM 30
#define LEN_START 257
#define LEN_OFFSET (LEN_START - 3)
#define LIT_START (NULL_DIST_SYM + 1)
#define ICF_CODE_LEN 32
struct deflate_icf {
uint32_t lit_len:LIT_LEN_BIT_COUNT;
uint32_t lit_dist:DIST_LIT_BIT_COUNT;
uint32_t dist_extra:32 - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET;
uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET;
};
struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in,

View File

@ -684,7 +684,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
for (current = start_stream; current < end_stream - 3; current++) {
literal = *(uint32_t *) current;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
seen = last_seen[hash];
last_seen[hash] = (current - start_stream) & 0xFFFF;
dist = (current - start_stream - seen) & 0xFFFF;
@ -704,7 +704,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
}
@ -718,7 +718,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
lit_len_histogram[literal & 0xFF] += 1;
}
literal = literal >> 8;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
seen = last_seen[hash];
last_seen[hash] = (current - start_stream) & 0xFFFF;
dist = (current - start_stream - seen) & 0xFFFF;

View File

@ -76,7 +76,8 @@
#define INVALID_DIST_HUFFCODE 1
#define INVALID_HUFFCODE 1
#define HASH_MASK (IGZIP_HASH_SIZE - 1)
#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
#define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1)
#define SHORTEST_MATCH 4
#define LENGTH_BITS 5

View File

@ -55,9 +55,15 @@ static inline uint32_t tzcnt(uint64_t val)
{
uint32_t cnt;
#ifdef __x86_64__
#ifdef __BMI__
cnt = __tzcnt_u64(val);
cnt = cnt / 8;
#elifdef __x86_64__
cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val);
cnt = __bsfq(val);
if(val == 0)
cnt = 64;
cnt = cnt / 8;
#else
for(cnt = 8; val > 0; val <<= 8)
@ -178,12 +184,41 @@ static inline uint32_t compute_hash(uint32_t data)
return _mm_crc32_u32(0, data);
#else
uint64_t hash;
/* Use multiplication to create a hash, 0xBDD06057 is a prime number */
return ((uint64_t)data * 0xB2D06057) >> 16;
hash = data;
hash *= 0xB2D06057;
hash >>= 16;
hash *= 0xB2D06057;
hash >>= 16;
return hash;
#endif /* __SSE4_2__ */
}
#define PROD1 0xFFFFE84B
#define PROD2 0xFFFF97B1
static inline uint32_t compute_hash_mad(uint32_t data)
{
int16_t data_low;
int16_t data_high;
data_low = data; ;
data_high = data >> 16;
data = PROD1 * data_low + PROD2 * data_high;
data_low = data;
data_high = data >> 16;
data = PROD1 * data_low + PROD2 * data_high;
return data;
}
static inline uint32_t compute_long_hash(uint64_t data) {
return compute_hash(data >> 32)^compute_hash(data);
}
/**
* @brief Returns how long str1 and str2 have the same symbols.

View File

@ -41,8 +41,6 @@
#define NON_EMPTY_BLOCK_SIZE 6
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
#define MAX_TOKENS (16 * 1024)
#include "huffman.h"
#include "bitbuf2.h"
#include "igzip_lib.h"
@ -67,6 +65,7 @@
#endif
extern void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, int dict_len);
extern void isal_deflate_hash_lvl2(struct isal_zstream *stream, uint8_t * dict, int dict_len);
extern const uint8_t gzip_hdr[];
extern const uint32_t gzip_hdr_bytes;
extern const uint32_t gzip_trl_bytes;
@ -93,7 +92,8 @@ void isal_deflate_body(struct isal_zstream *stream);
void isal_deflate_finish(struct isal_zstream *stream);
void isal_deflate_icf_body(struct isal_zstream *stream);
void isal_deflate_icf_finish(struct isal_zstream *stream);
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream);
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream);
/*****************************************************************/
/* Forward declarations */
@ -217,7 +217,7 @@ static void flush_write_buffer(struct isal_zstream *stream)
static void flush_icf_block(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
struct BitBuf2 *write_buf = &state->bitbuf;
struct deflate_icf *icf_buf_encoded_next;
@ -243,27 +243,92 @@ static void flush_icf_block(struct isal_zstream *stream)
}
}
static int check_level_req(struct isal_zstream *stream)
{
if (stream->level == 0)
return 0;
if (stream->level_buf == NULL)
return ISAL_INVALID_LEVEL_BUF;
switch (stream->level) {
case 2:
if (stream->level_buf_size < ISAL_DEF_LVL2_MIN)
return ISAL_INVALID_LEVEL;
break;
case 1:
if (stream->level_buf_size < ISAL_DEF_LVL1_MIN)
return ISAL_INVALID_LEVEL;
break;
default:
return ISAL_INVALID_LEVEL;
}
return 0;
}
/* returns the size of the level specific buffer */
static int init_lvlX_buf(struct isal_zstream *stream)
{
int level_struct_size;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
struct isal_zstate *state = &stream->internal_state;
level_struct_size = sizeof(struct level_buf) - MAX_LVL_BUF_SIZE;
switch (stream->level) {
case 2:
if (!state->has_level_buf_init) {
level_buf->lvl2.matches_next = level_buf->lvl2.matches;
level_buf->lvl2.matches_end = level_buf->lvl2.matches;
}
level_struct_size += sizeof(struct lvl2_buf);
break;
}
state->has_level_buf_init = 1;
return level_struct_size;
}
static void init_new_icf_block(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
int level_struct_size;
if (stream->level_buf_size >=
sizeof(struct level_2_buf) + 100 * sizeof(struct deflate_icf)) {
state->block_next = state->block_end;
level_buf->icf_buf_next = level_buf->icf_buf_start;
level_buf->icf_buf_avail_out =
stream->level_buf_size - sizeof(struct level_2_buf) -
sizeof(struct deflate_icf);
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
state->state = ZSTATE_BODY;
level_struct_size = init_lvlX_buf(stream);
state->block_next = state->block_end;
level_buf->icf_buf_start =
(struct deflate_icf *)(stream->level_buf + level_struct_size);
level_buf->icf_buf_next = level_buf->icf_buf_start;
level_buf->icf_buf_avail_out =
stream->level_buf_size - level_struct_size - sizeof(struct deflate_icf);
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
state->state = ZSTATE_BODY;
}
static int are_buffers_empty(struct isal_zstream *stream)
{
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
switch (stream->level) {
case 2:
return (!stream->avail_in
&& level_buf->lvl2.matches_next >= level_buf->lvl2.matches_end);
default:
return !stream->avail_in;
}
}
static void create_icf_block_hdr(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
struct BitBuf2 *write_buf = &state->bitbuf;
struct BitBuf2 write_buf_tmp;
uint32_t out_size = stream->avail_out;
@ -286,7 +351,7 @@ static void create_icf_block_hdr(struct isal_zstream *stream)
level_buf->icf_buf_next->dist_extra = 0;
level_buf->icf_buf_next++;
state->has_eob_hdr = (stream->end_of_stream && !stream->avail_in) ? 1 : 0;
state->has_eob_hdr = (stream->end_of_stream && are_buffers_empty(stream)) ? 1 : 0;
if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) {
/* Assumes ISAL_DEF_MAX_HDR_SIZE is large enough to contain a
@ -366,11 +431,22 @@ static void isal_deflate_pass(struct isal_zstream *stream)
write_trailer(stream);
}
static void isal_deflate_icf_finish(struct isal_zstream *stream)
{
switch (stream->level) {
case 2:
isal_deflate_icf_finish_lvl2(stream);
break;
default:
isal_deflate_icf_finish_lvl1(stream);
}
}
static void isal_deflate_icf_pass(struct isal_zstream *stream)
{
uint8_t *start_in = stream->next_in;
struct isal_zstate *state = &stream->internal_state;
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
do {
if (state->state == ZSTATE_NEW_HDR)
@ -631,21 +707,14 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream)
isal_deflate_pass(stream);
} else if (stream->level == 1) {
if (stream->level_buf == NULL || stream->level_buf_size < ISAL_DEF_LVL1_MIN) {
/* Default to internal buffer if invalid size is supplied */
stream->level_buf = state->buffer;
stream->level_buf_size = sizeof(state->buffer);
}
} else if (stream->level <= ISAL_DEF_MAX_LEVEL) {
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
reset_match_history(stream);
state->count = 0;
isal_deflate_icf_pass(stream);
} else
return ISAL_INVALID_LEVEL;
}
if (state->state == ZSTATE_END
|| (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH))
@ -745,7 +814,7 @@ static uint32_t write_stored_block(struct isal_zstream *stream)
if (state->block_next == state->block_end) {
state->state = state->has_eob_hdr ? ZSTATE_TRL : ZSTATE_NEW_HDR;
if (stream->flush == FULL_FLUSH && state->state == ZSTATE_NEW_HDR
&& stream->avail_in == 0) {
&& are_buffers_empty(stream)) {
/* Clear match history so there are no cross
* block length distance pairs */
reset_match_history(stream);
@ -759,16 +828,28 @@ static uint32_t write_stored_block(struct isal_zstream *stream)
static inline void reset_match_history(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
uint16_t *head = stream->internal_state.head;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *hash_table;
uint32_t hash_table_size;
int i = 0;
switch (stream->level) {
case 2:
hash_table = level_buf->lvl2.hash_table;
hash_table_size = sizeof(level_buf->lvl2.hash_table);
break;
default:
hash_table = state->head;
hash_table_size = sizeof(state->head);
}
state->has_hist = IGZIP_NO_HIST;
if ((stream->total_in & 0xFFFF) == 0)
memset(stream->internal_state.head, 0, sizeof(stream->internal_state.head));
memset(hash_table, 0, hash_table_size);
else {
for (i = 0; i < sizeof(state->head) / 2; i++) {
head[i] = (uint16_t) (stream->total_in);
for (i = 0; i < hash_table_size / 2; i++) {
hash_table[i] = (uint16_t) (stream->total_in);
}
}
}
@ -796,6 +877,7 @@ void isal_deflate_init(struct isal_zstream *stream)
state->has_eob = 0;
state->has_eob_hdr = 0;
state->has_hist = IGZIP_NO_HIST;
state->has_level_buf_init = 0;
state->state = ZSTATE_NEW_HDR;
state->count = 0;
@ -823,6 +905,7 @@ void isal_deflate_reset(struct isal_zstream *stream)
state->total_in_start = 0;
state->has_wrap_hdr = 0;
state->has_eob = 0;
state->has_level_buf_init = 0;
state->has_eob_hdr = 0;
state->has_hist = IGZIP_NO_HIST;
state->state = ZSTATE_NEW_HDR;
@ -880,7 +963,18 @@ void isal_deflate_stateless_init(struct isal_zstream *stream)
void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
{
isal_deflate_hash_lvl0(stream, dict, dict_len);
/* Reset history to prevent out of bounds matches this works because
* dictionary must set at least 1 element in the history */
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
switch (stream->level) {
case 2:
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
isal_deflate_hash_lvl2(stream, dict, dict_len);
default:
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
isal_deflate_hash_lvl0(stream, dict, dict_len);
}
stream->internal_state.has_hist = IGZIP_HIST;
}
@ -903,10 +997,6 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t
state->b_bytes_processed = dict_len;
state->b_bytes_valid = dict_len;
/* Reset history to prevent out of bounds matches this works because
* dictionary must set at least 1 element in the history */
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
state->has_hist = IGZIP_DICT_HIST;
return COMP_OK;
@ -925,6 +1015,7 @@ int isal_deflate_stateless(struct isal_zstream *stream)
const uint32_t gzip_flag = stream->gzip_flag;
const uint32_t has_wrap_hdr = state->has_wrap_hdr;
int level_check;
uint32_t stored_len;
/* Final block has already been written */
@ -935,6 +1026,7 @@ int isal_deflate_stateless(struct isal_zstream *stream)
init(&state->bitbuf);
state->state = ZSTATE_NEW_HDR;
state->crc = 0;
state->has_level_buf_init = 0;
if (stream->flush == NO_FLUSH)
stream->end_of_stream = 1;
@ -942,8 +1034,15 @@ int isal_deflate_stateless(struct isal_zstream *stream)
if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH)
return INVALID_FLUSH;
if (stream->level != 0 && stream->level != 1)
return ISAL_INVALID_LEVEL;
level_check = check_level_req(stream);
if (level_check) {
if (stream->level == 1 && stream->level_buf == NULL) {
/* Default to internal buffer if invalid size is supplied */
stream->level_buf = state->buffer;
stream->level_buf_size = sizeof(state->buffer);
} else
return level_check;
}
if (avail_in == 0)
stored_len = TYPE0_BLK_HDR_LEN;
@ -1045,7 +1144,7 @@ int isal_deflate(struct isal_zstream *stream)
struct isal_zstate *state = &stream->internal_state;
int ret = COMP_OK;
uint8_t *next_in;
uint32_t avail_in, avail_in_start, total_start, hist_size;
uint32_t avail_in, avail_in_start, total_start, hist_size, future_size;
uint32_t flush_type = stream->flush;
uint32_t end_of_stream = stream->end_of_stream;
uint32_t size = 0;
@ -1055,6 +1154,10 @@ int isal_deflate(struct isal_zstream *stream)
if (stream->flush >= 3)
return INVALID_FLUSH;
ret = check_level_req(stream);
if (ret)
return ret;
next_in = stream->next_in;
avail_in = stream->avail_in;
total_start = stream->total_in;
@ -1124,19 +1227,18 @@ int isal_deflate(struct isal_zstream *stream)
avail_in_start = stream->avail_in;
state->total_in_start = total_start;
isal_deflate_int(stream);
hist_size = hist_add(stream, hist_size, avail_in_start - stream->avail_in);
future_size = stream->avail_in;
if (future_size > ISAL_LOOK_AHEAD)
future_size = ISAL_LOOK_AHEAD;
memmove(state->buffer, stream->next_in - hist_size, hist_size);
memmove(state->buffer, stream->next_in - hist_size, hist_size + future_size);
state->b_bytes_processed = hist_size;
state->b_bytes_valid = hist_size;
if (stream->avail_in <= ISAL_LOOK_AHEAD) {
memmove(state->buffer + hist_size, stream->next_in, stream->avail_in);
state->b_bytes_valid += stream->avail_in;
stream->next_in += stream->avail_in;
stream->total_in += stream->avail_in;
stream->avail_in -= stream->avail_in;
}
state->b_bytes_valid = hist_size + future_size;
stream->next_in += future_size;
stream->total_in += future_size;
stream->avail_in -= future_size;
}
return ret;

View File

@ -12,6 +12,9 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
struct isal_zstate *state = &stream->internal_state;
uint32_t bytes_written;
if (next_in - start_in > 0)
state->has_hist = IGZIP_HIST;
stream->next_in = next_in;
stream->total_in += next_in - start_in;
stream->avail_in = end_in - next_in;
@ -54,7 +57,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -75,7 +78,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -134,7 +137,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -153,7 +156,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] =
(uint64_t) (next_hash - file_start);
}
@ -222,7 +225,7 @@ void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict,
while (next_in <= end_in) {
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] = lookup_val;
lookup_val++;
next_in++;

View File

@ -30,11 +30,14 @@
#include <stdint.h>
#include "igzip_lib.h"
#include "encode_df.h"
#include "igzip_level_buf_structs.h"
void isal_deflate_body_base(struct isal_zstream *stream);
void isal_deflate_finish_base(struct isal_zstream *stream);
void isal_deflate_icf_body_base(struct isal_zstream *stream);
void isal_deflate_icf_finish_base(struct isal_zstream *stream);
void isal_deflate_icf_body_lvl1_base(struct isal_zstream *stream);
void isal_deflate_icf_finish_lvl1_base(struct isal_zstream *stream);
void isal_deflate_icf_finish_lvl2_base(struct isal_zstream *stream);
void isal_update_histogram_base(uint8_t * start_stream, int length,
struct isal_huff_histogram *histogram);
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
@ -45,6 +48,12 @@ uint32_t adler32_base(uint32_t init, const unsigned char *buf, uint64_t len);
int decode_huffman_code_block_stateless_base(struct inflate_state *s);
void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict,
uint32_t dict_len);
void isal_deflate_hash_lvl2_base(struct isal_zstream *stream, uint8_t * dict,
uint32_t dict_len);
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
struct deflate_icf *match_lookup, struct level_buf *level_buf);
void gen_icf_map_h1_base(struct isal_zstream *stream,
struct deflate_icf *matches_icf_lookup, uint64_t input_size);
void isal_deflate_body(struct isal_zstream *stream)
{
@ -61,9 +70,19 @@ void isal_deflate_icf_body(struct isal_zstream *stream)
isal_deflate_icf_body_base(stream);
}
void isal_deflate_icf_finish(struct isal_zstream *stream)
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
{
isal_deflate_icf_finish_base(stream);
isal_deflate_icf_body_lvl1_base(stream);
}
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
{
isal_deflate_icf_finish_lvl1_base(stream);
}
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
{
isal_deflate_icf_finish_lvl2_base(stream);
}
void isal_update_histogram(uint8_t * start_stream, int length,
@ -96,5 +115,22 @@ int decode_huffman_code_block_stateless(struct inflate_state *s)
void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
{
return isal_deflate_hash_lvl0_base(stream, dict, dict_len);
isal_deflate_hash_lvl0_base(stream, dict, dict_len);
}
void isal_deflate_hash_lvl2(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
{
isal_deflate_hash_lvl2_base(stream, dict, dict_len);
}
void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in,
struct deflate_icf *match_lookup, struct level_buf *level_buf)
{
set_long_icf_fg_base(next_in, end_in, match_lookup, level_buf);
}
void gen_icf_map_lh1(struct isal_zstream *stream,
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
{
gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
}

View File

@ -209,8 +209,8 @@ MARK __body_compute_hash_ %+ ARCH
shr tmp3, 8
compute_hash hash2, tmp3
and hash, HASH_MASK
and hash2, HASH_MASK
and hash, LVL0_HASH_MASK
and hash2, LVL0_HASH_MASK
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
je write_first_byte
@ -321,7 +321,7 @@ len_dist_lit_huffman:
MOVQ tmp5, xdata
shr tmp5, 24
compute_hash tmp4, tmp5
and tmp4, HASH_MASK
and tmp4, LVL0_HASH_MASK
SHLX code4, code4, code_len3
or code4, code3
@ -359,15 +359,15 @@ loop3:
jae loop3_done
mov tmp6, [file_start + tmp3]
compute_hash tmp4, tmp6
and tmp4 %+ d, HASH_MASK
and tmp4 %+ d, LVL0_HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
jmp loop3
loop3_done:
%endif
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; continue
cmp f_i, f_end_i
@ -429,15 +429,15 @@ loop4:
jae loop4_done
mov tmp6, [file_start + tmp3]
compute_hash tmp4, tmp6
and tmp4, HASH_MASK
and tmp4, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
jmp loop4
loop4_done:
%endif
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; continue
cmp f_i, f_end_i
@ -563,5 +563,5 @@ write_first_byte:
section .data
align 16
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
const_D: dq D

View File

@ -90,10 +90,10 @@ main_loop:
xor hash4, hash4
crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1]
and hash, HASH_MASK
and hash2, HASH_MASK
and hash3, HASH_MASK
and hash4, HASH_MASK
and hash, LVL0_HASH_MASK
and hash2, LVL0_HASH_MASK
and hash3, LVL0_HASH_MASK
and hash4, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
add f_i, 1
@ -118,7 +118,7 @@ end_loop:
xor hash, hash
crc32 hash %+ d, dword [f_i + dict_offset]
and hash, HASH_MASK
and hash, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
add f_i, 1

View File

@ -42,6 +42,59 @@
# define RUN_MEM_SIZE 500000000
#endif
int level_size_buf[10] = {
#ifdef ISAL_DEF_LVL0_DEFAULT
ISAL_DEF_LVL0_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL1_DEFAULT
ISAL_DEF_LVL1_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL2_DEFAULT
ISAL_DEF_LVL2_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL3_DEFAULT
ISAL_DEF_LVL3_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL4_DEFAULT
ISAL_DEF_LVL4_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL5_DEFAULT
ISAL_DEF_LVL5_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL6_DEFAULT
ISAL_DEF_LVL6_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL7_DEFAULT
ISAL_DEF_LVL7_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL8_DEFAULT
ISAL_DEF_LVL8_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL9_DEFAULT
ISAL_DEF_LVL9_DEFAULT,
#else
0,
#endif
};
struct isal_zstream stream;
int usage(void)
@ -69,7 +122,17 @@ int main(int argc, char *argv[])
int level = 0, level_size = 0, avail_in;
char *in_file_name = NULL, *out_file_name = NULL, *dict_file_name = NULL;
while ((c = getopt(argc, argv, "h01i:b:o:d:")) != -1) {
while ((c = getopt(argc, argv, "h0123456789i:b:o:d:")) != -1) {
if (c >= '0' && c <= '9') {
if (c > '0' + ISAL_DEF_MAX_LEVEL)
usage();
else {
level = c - '0';
level_size = level_size_buf[level];
}
continue;
}
switch (c) {
case 'o':
out_file_name = optarg;
@ -85,12 +148,6 @@ int main(int argc, char *argv[])
case 'b':
inbuf_size = atoi(optarg);
break;
case '1':
level = 1;
level_size = ISAL_DEF_LVL1_LARGE;
break;
case '0':
break;
case 'h':
default:
usage();

View File

@ -133,7 +133,7 @@ skip_SLOP:
ja end_loop_2
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
jmp encode_literal
@ -145,10 +145,10 @@ loop2:
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
@ -211,19 +211,19 @@ loop2:
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w

View File

@ -0,0 +1,500 @@
%include "reg_sizes.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%ifdef HAVE_AS_KNOWS_AVX512
%ifidn __OUTPUT_FORMAT__, win64
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define hash rsi
%define next_in rdi
%else
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define hash r8
%define next_in rcx
%endif
%define stream arg1
%define level_buf arg1
%define matches_next arg2
%define f_i_end arg3
%define f_i rax
%define file_start rbp
%define next_byte r9
%define encode_size r10
%define prev_len r11
%define prev_dist r12
%define hash_table level_buf + _lvl2_hash_table
%define datas zmm0
%define datas_lookup zmm1
%define zhashes zmm2
%define zdists zmm3
%define zdists_lookup zmm4
%define zscatter zmm5
%define zdists2 zmm6
%define zlens1 zmm7
%define zlens2 zmm8
%define zlookup zmm9
%define zlookup2 zmm10
%define match_lookups zmm11
%define zindex zmm12
%define zdist_extra zmm13
%define zdists_tmp zmm14
%define znull_dist_syms zmm15
%define zcode zmm16
%define zthirty zmm17
%define zdist_mask zmm18
%define zshortest_matches zmm19
%define zrot_left zmm20
%define zdatas_perm zmm21
%define zdatas_perm2 zmm22
%define zdatas_perm3 zmm23
%define zdatas_shuf zmm24
%define zhash_prod zmm25
%define zhash_mask zmm26
%define zincrement zmm27
%define zqword_shuf zmm28
%define zones zmm29
%define ztwofiftyfour zmm30
%define zbswap zmm31
%ifidn __OUTPUT_FORMAT__, win64
%define stack_size 10*16 + 4 * 8 + 8
%macro FUNC_SAVE 0
alloc_stack stack_size
vmovdqa [rsp + 0*16], xmm6
vmovdqa [rsp + 1*16], xmm7
vmovdqa [rsp + 2*16], xmm8
vmovdqa [rsp + 3*16], xmm9
vmovdqa [rsp + 4*16], xmm10
vmovdqa [rsp + 5*16], xmm11
vmovdqa [rsp + 6*16], xmm12
vmovdqa [rsp + 7*16], xmm13
vmovdqu [rsp + 8*16], xmm14
vmovdqa [rsp + 9*16], xmm15
save_reg rsi, 10*16 + 0*8
save_reg rdi, 10*16 + 1*8
save_reg rbp, 10*16 + 2*8
save_reg r12, 10*16 + 3*8
end_prolog
%endm
%macro FUNC_RESTORE 0
vmovdqa xmm6, [rsp + 0*16]
vmovdqa xmm7, [rsp + 1*16]
vmovdqa xmm8, [rsp + 2*16]
vmovdqa xmm9, [rsp + 3*16]
vmovdqa xmm10, [rsp + 4*16]
vmovdqa xmm11, [rsp + 5*16]
vmovdqa xmm12, [rsp + 6*16]
vmovdqa xmm13, [rsp + 7*16]
vmovdqa xmm14, [rsp + 8*16]
vmovdqa xmm15, [rsp + 9*16]
mov 10*16 + 0*8, rsi
mov 10*16 + 1*8, rdi
mov 10*16 + 2*8, rbp
mov 10*16 + 3*8, r12
add rsp, stack_size
%endm
%else
%macro FUNC_SAVE 0
push rbp
push r12
%endm
%macro FUNC_RESTORE 0
pop r12
pop rbp
%endm
%endif
%define VECT_SIZE 16
%define HASH_BYTES 2
global gen_icf_map_lh1_06
gen_icf_map_lh1_06:
FUNC_SAVE
mov file_start, [stream + _next_in]
mov f_i %+ d, dword [stream + _total_in]
sub file_start, f_i
add f_i_end, f_i
cmp f_i, f_i_end
jge end_main
;; Prep for main loop
mov level_buf, [stream + _level_buf]
sub f_i_end, LA
vmovdqu64 zdatas_perm, [datas_perm]
vmovdqu64 zdatas_shuf, [datas_shuf]
vmovdqu64 zhash_prod, [hash_prod]
vmovdqu64 zhash_mask, [hash_mask]
vmovdqu64 zincrement, [increment]
vmovdqu64 zqword_shuf, [qword_shuf]
vmovdqu64 zdatas_perm2, [datas_perm2]
vmovdqu64 zdatas_perm3, [datas_perm3]
vmovdqu64 zones, [ones]
vmovdqu64 zbswap, [bswap_shuf]
vmovdqu64 zthirty, [thirty]
vmovdqu64 zrot_left, [drot_left]
vmovdqu64 zdist_mask, [dist_mask]
vmovdqu64 zshortest_matches, [shortest_matches]
vmovdqu64 ztwofiftyfour, [twofiftyfour]
vmovdqu64 znull_dist_syms, [null_dist_syms]
kxorq k0, k0, k0
kmovq k1, [k_mask_1]
kmovq k2, [k_mask_2]
xor prev_len, prev_len
xor prev_dist, prev_dist
;; Process first byte
vmovd zhashes %+ x, dword [f_i + file_start]
vpmaddwd zhashes, zhashes, zhash_prod
vpmaddwd zhashes, zhashes, zhash_prod
vpandd zhashes, zhashes, zhash_mask
vmovd hash %+ d, zhashes %+ x
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
add f_i, 1
cmp f_i, f_i_end
jg end_main
;;hash
vmovdqu64 datas %+ y, [f_i + file_start]
vpermq zhashes, zdatas_perm, datas
vpshufb zhashes, zhashes, zdatas_shuf
vpmaddwd zhashes, zhashes, zhash_prod
vpmaddwd zhashes, zhashes, zhash_prod
vpandd zhashes, zhashes, zhash_mask
vpermq zlookup, zdatas_perm2, datas
vpshufb zlookup, zlookup, zqword_shuf
vpermq zlookup2, zdatas_perm3, datas
vpshufb zlookup2, zlookup2, zqword_shuf
;;gather/scatter hashes
knotq k6, k0
vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
vpbroadcastd zindex, f_i %+ d
vpaddd zindex, zindex, zincrement
vpblendmw zscatter {k1}, zindex, zdists_lookup
knotq k6, k0
vpscatterdd [hash_table + HASH_BYTES * zhashes] {k6}, zscatter
;; Compute hash for next loop
vmovdqu64 datas %+ y, [f_i + file_start + VECT_SIZE]
vpermq zhashes, zdatas_perm, datas
vpshufb zhashes, zhashes, zdatas_shuf
vpmaddwd zhashes, zhashes, zhash_prod
vpmaddwd zhashes, zhashes, zhash_prod
vpandd zhashes, zhashes, zhash_mask
vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
sub f_i_end, VECT_SIZE
cmp f_i, f_i_end
jg loop1_end
loop1:
lea next_in, [f_i + file_start]
;; Calculate look back dists
vpaddd zdists, zdists_lookup, zones
vpsubd zdists, zindex, zdists
vpandd zdists, zdists, zdist_mask
vpaddd zdists, zdists, zones
vpsubd zdists, zincrement, zdists
;;gather/scatter hashes
add f_i, VECT_SIZE
kxnorq k6, k6, k6
kxnorq k7, k7, k7
vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
vpbroadcastd zindex, f_i %+ d
vpaddd zindex, zindex, zincrement
vpblendmw zscatter {k1}, zindex, zdists_lookup
vpscatterdd [hash_table + HASH_BYTES * zhashes] {k7}, zscatter
;; Compute hash for next loop
vpermq zhashes, zdatas_perm, datas_lookup
vpshufb zhashes, zhashes, zdatas_shuf
vpmaddwd zhashes, zhashes, zhash_prod
vpmaddwd zhashes, zhashes, zhash_prod
vpandd zhashes, zhashes, zhash_mask
;;lookup old codes
vextracti32x8 zdists2 %+ y, zdists, 1
kxnorq k6, k6, k6
kxnorq k7, k7, k7
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
;; Calculate dist_icf_code
vpaddd zdists, zdists, zones
vpsubd zdists, zincrement, zdists
vpcmpgtd k5, zdists, zones
vplzcntd zdist_extra, zdists
vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
vpsllvd zcode, zones, zdist_extra
vpsubd zcode, zcode, zones
vpandd zcode {k5}{z}, zdists, zcode
vpsrlvd zdists, zdists, zdist_extra
vpslld zdist_extra, zdist_extra, 1
vpaddd zdists, zdists, zdist_extra
vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
vpaddd zdists, zdists, zcode
;; Setup zdists for combining with zlens
vpslld zdists, zdists, DIST_OFFSET
;; xor current data with lookback dist
vpxorq zlens1, zlens1, zlookup
vpxorq zlens2, zlens2, zlookup2
;; Setup registers for next loop
vpermq zlookup, zdatas_perm2, datas
vpshufb zlookup, zlookup, zqword_shuf
vpermq zlookup2, zdatas_perm3, datas
vpshufb zlookup2, zlookup2, zqword_shuf
;; Compute match length
vpshufb zlens1, zlens1, zbswap
vpshufb zlens2, zlens2, zbswap
vplzcntq zlens1, zlens1
vplzcntq zlens2, zlens2
vpmovqd zlens1 %+ y, zlens1
vpmovqd zlens2 %+ y, zlens2
vinserti32x8 zlens1, zlens2 %+ y, 1
vpsrld zlens1, zlens1, 3
;; Preload for next loops
vmovdqu64 datas, datas_lookup
vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
;; Zero out matches which should not be taken
kshiftrw k3, k1, 15
vpermd zlens2, zrot_left, zlens1
vpermd zdists, zrot_left, zdists
vmovd zdists_tmp %+ x, prev_len %+ d
vmovd prev_len %+ d, zlens2 %+ x
vmovdqu32 zlens2 {k3}, zdists_tmp
vmovd zdists_tmp %+ x, prev_dist %+ d
vmovd prev_dist %+ d, zdists %+ x
vmovdqu32 zdists {k3}, zdists_tmp
vpcmpgtd k3, zlens2, zshortest_matches
vpcmpgtd k4, zlens1, zlens2
knotq k3, k3
korq k3, k3, k4
knotq k4, k3
vmovdqu32 zlens1 {k4}{z}, zlens2
;; Update zdists to match zlens1
vpaddd zdists, zdists, zlens1
vpaddd zdists, zdists, ztwofiftyfour
vpmovzxbd zdists {k3}, [f_i + file_start - VECT_SIZE - 1]
vpaddd zdists {k3}, zdists, znull_dist_syms
;;Store zdists
vmovdqu64 [matches_next], zdists
add matches_next, ICF_CODE_BYTES * VECT_SIZE
cmp f_i, f_i_end
jle loop1
loop1_end:
lea next_in, [f_i + file_start]
;; Calculate look back dists
vpaddd zdists, zdists_lookup, zones
vpsubd zdists, zindex, zdists
vpandd zdists, zdists, zdist_mask
vpaddd zdists, zdists, zones
vpsubd zdists, zincrement, zdists
;;lookup old codes
vextracti32x8 zdists2 %+ y, zdists, 1
kxnorq k6, k6, k6
kxnorq k7, k7, k7
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
;; Calculate dist_icf_code
vpaddd zdists, zdists, zones
vpsubd zdists, zincrement, zdists
vpcmpgtd k5, zdists, zones
vplzcntd zdist_extra, zdists
vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
vpsllvd zcode, zones, zdist_extra
vpsubd zcode, zcode, zones
vpandd zcode {k5}{z}, zdists, zcode
vpsrlvd zdists, zdists, zdist_extra
vpslld zdist_extra, zdist_extra, 1
vpaddd zdists, zdists, zdist_extra
vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
vpaddd zdists, zdists, zcode
;; Setup zdists for combining with zlens
vpslld zdists, zdists, DIST_OFFSET
;; xor current data with lookback dist
vpxorq zlens1, zlens1, zlookup
vpxorq zlens2, zlens2, zlookup2
;; Compute match length
vpshufb zlens1, zlens1, zbswap
vpshufb zlens2, zlens2, zbswap
vplzcntq zlens1, zlens1
vplzcntq zlens2, zlens2
vpmovqd zlens1 %+ y, zlens1
vpmovqd zlens2 %+ y, zlens2
vinserti32x8 zlens1, zlens2 %+ y, 1
vpsrld zlens1, zlens1, 3
;; Zero out matches which should not be taken
kshiftrw k3, k1, 15
vpermd zlens2, zrot_left, zlens1
vpermd zdists, zrot_left, zdists
vmovd zdists_tmp %+ x, prev_len %+ d
vmovd prev_len %+ d, zlens2 %+ x
vmovdqu32 zlens2 {k3}, zdists_tmp
vmovd zdists_tmp %+ x, prev_dist %+ d
vmovd prev_dist %+ d, zdists %+ x
vmovdqu32 zdists {k3}, zdists_tmp
vpcmpgtd k3, zlens2, zshortest_matches
vpcmpgtd k4, zlens1, zlens2
knotq k3, k3
korq k3, k3, k4
knotq k4, k3
vmovdqu32 zlens1 {k4}{z}, zlens2
;; Update zdists to match zlens1
vpaddd zdists, zdists, zlens1
vpaddd zdists, zdists, ztwofiftyfour
vpmovzxbd zdists {k3}, [f_i + file_start - 1]
vpaddd zdists {k3}, zdists, znull_dist_syms
;;Store zdists
vmovdqu64 [matches_next], zdists
end_main:
FUNC_RESTORE
ret
section .data
align 64
datas_perm:
dq 0x0, 0x1, 0x0, 0x1, 0x1, 0x2, 0x1, 0x2
datas_perm2:
dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1
datas_perm3:
dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2
drot_left:
dd 0xf, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6
dd 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
datas_shuf:
db 0x0, 0x1, 0x2, 0x3
db 0x1, 0x2, 0x3, 0x4
db 0x2, 0x3, 0x4, 0x5
db 0x3, 0x4, 0x5, 0x6
db 0x4, 0x5, 0x6, 0x7
db 0x5, 0x6, 0x7, 0x8
db 0x6, 0x7, 0x8, 0x9
db 0x7, 0x8, 0x9, 0xa
db 0x0, 0x1, 0x2, 0x3
db 0x1, 0x2, 0x3, 0x4
db 0x2, 0x3, 0x4, 0x5
db 0x3, 0x4, 0x5, 0x6
db 0x4, 0x5, 0x6, 0x7
db 0x5, 0x6, 0x7, 0x8
db 0x6, 0x7, 0x8, 0x9
db 0x7, 0x8, 0x9, 0xa
bswap_shuf:
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
qword_shuf:
db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
%define PROD1 0xE84B
%define PROD2 0x97B1
hash_prod:
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
null_dist_syms:
dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT
dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT
increment:
dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
ones:
dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1
dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1
thirty:
dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e
dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e
twofiftyfour:
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
dist_mask:
dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1
dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1
hash_mask:
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
lit_len_mask:
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
shortest_matches:
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
k_mask_1:
dq 0xaaaaaaaaaaaaaaaa
k_mask_2:
dq 0x7fff
%endif

View File

@ -26,11 +26,11 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
stream->internal_state.block_end = stream->total_in;
stream->avail_in = end_in - next_in;
((struct level_2_buf *)stream->level_buf)->icf_buf_next = next_out;
((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
((struct level_buf *)stream->level_buf)->icf_buf_next = next_out;
((struct level_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
}
void isal_deflate_icf_body_base(struct isal_zstream *stream)
void isal_deflate_icf_body_lvl1_base(struct isal_zstream *stream)
{
uint32_t literal, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
@ -52,9 +52,9 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
end_out =
start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
@ -68,7 +68,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -89,7 +89,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -124,7 +124,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
}
void isal_deflate_icf_finish_base(struct isal_zstream *stream)
void isal_deflate_icf_finish_lvl1_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
@ -140,8 +140,8 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
end_out = start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
sizeof(struct deflate_icf);
next_out = start_out;
@ -160,7 +160,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
@ -178,7 +178,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
hash = compute_hash(literal) & LVL0_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
@ -231,3 +231,131 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
return;
}
void isal_deflate_icf_finish_lvl2_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
struct deflate_icf *start_out, *next_out, *end_out;
uint16_t match_length;
uint32_t dist;
uint32_t code, code2, extra_bits;
struct isal_zstate *state = &stream->internal_state;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *last_seen = level_buf->lvl2.hash_table;
uint8_t *file_start = stream->next_in - stream->total_in;
start_in = stream->next_in;
end_in = start_in + stream->avail_in;
next_in = start_in;
start_out = level_buf->icf_buf_next;
end_out = start_out + level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
next_out = start_out;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_CREATE_HDR;
return;
}
while (next_in + 3 < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - file_start);
if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */
match_length = compare258(next_in - dist, next_in, end_in - next_in);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef ISAL_LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end - 3; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
last_seen[hash] = (uint64_t) (next_hash - file_start);
}
get_len_icf_code(match_length, &code);
get_dist_icf_code(dist, &code2, &extra_bits);
state->hist.ll_hist[code]++;
state->hist.d_hist[code2]++;
write_deflate_icf(next_out, code, code2, extra_bits);
next_out++;
next_in += match_length;
continue;
}
}
get_lit_icf_code(literal & 0xFF, &code);
state->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
while (next_in < end_in) {
if (next_out >= end_out) {
state->state = ZSTATE_CREATE_HDR;
update_state(stream, start_in, next_in, end_in, start_out, next_out,
end_out);
return;
}
literal = *next_in;
get_lit_icf_code(literal & 0xFF, &code);
state->hist.ll_hist[code]++;
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
next_out++;
next_in++;
}
if (next_in == end_in) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_CREATE_HDR;
}
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
return;
}
void isal_deflate_hash_lvl2_base(struct isal_zstream *stream, uint8_t * dict,
uint32_t dict_len)
{
uint8_t *next_in = dict;
uint8_t *end_in = dict + dict_len - SHORTEST_MATCH;
uint32_t literal;
uint32_t hash;
uint16_t lookup_val = stream->total_in - dict_len;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *last_seen = level_buf->lvl2.hash_table;
while (next_in <= end_in) {
literal = *(uint32_t *) next_in;
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
last_seen[hash] = lookup_val;
lookup_val++;
next_in++;
}
}

342
igzip/igzip_icf_body.c Normal file
View File

@ -0,0 +1,342 @@
#include "igzip_lib.h"
#include "huffman.h"
#include "encode_df.h"
#include "igzip_level_buf_structs.h"
extern void gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *);
extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
/*
*************************************************************
* Helper functions
************************************************************
*/
static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
uint32_t lit_dist, uint32_t extra_bits)
{
/* icf->lit_len = lit_len; */
/* icf->lit_dist = lit_dist; */
/* icf->dist_extra = extra_bits; */
*(uint32_t *) icf = lit_len | (lit_dist << LIT_LEN_BIT_COUNT)
| (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT));
}
void hash_section(struct isal_zstream *stream, uint8_t * next_in, uint8_t * end_in,
uint16_t * last_seen)
{
uint32_t index, hash_input, hash;
uint8_t *file_start = stream->next_in - stream->total_in;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *hash_table = level_buf->lvl2.hash_table;
/* Compute Hashes */
for (index = 0; index < end_in - next_in - ISAL_LOOK_AHEAD; index++) {
hash_input = *(uint32_t *) (next_in + index);
hash = compute_hash(hash_input) & LVL2_HASH_MASK;
last_seen[index] = hash_table[hash];
hash_table[hash] = (uint64_t) (next_in + index - file_start);
}
return;
}
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
struct deflate_icf *match_lookup, struct level_buf *level_buf)
{
uint32_t dist_code, dist_extra, dist, len;
uint32_t match_len;
uint32_t dist_start[] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
};
while (next_in < end_in - ISAL_LOOK_AHEAD) {
dist_code = match_lookup->lit_dist;
dist_extra = match_lookup->dist_extra;
dist = dist_start[dist_code] + dist_extra;
len = match_lookup->lit_len;
if (len >= 8 + LEN_OFFSET) {
match_len =
compare258(next_in - dist + 8, next_in + 8, 250) + LEN_OFFSET + 8;
while (match_len > match_lookup->lit_len
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) {
write_deflate_icf(match_lookup, match_len, dist_code,
dist_extra);
match_lookup++;
next_in++;
match_len--;
}
}
match_lookup++;
next_in++;
}
}
/*
*************************************************************
* Methods for generating one pass match lookup table
************************************************************
*/
void gen_icf_map_h1_base(struct isal_zstream *stream,
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
{
uint32_t dist, len, extra_bits;
uint8_t *next_in = stream->next_in, *end_in = stream->next_in + input_size;
uint8_t *file_start = stream->next_in - stream->total_in;
uint32_t hash;
uint64_t next_bytes, match_bytes;
uint64_t match;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint16_t *hash_table = level_buf->lvl2.hash_table;
if (input_size < ISAL_LOOK_AHEAD)
return;
matches_icf_lookup->lit_len = *next_in;
matches_icf_lookup->lit_dist = 0x1e;
matches_icf_lookup->dist_extra = 0;
hash = compute_hash(*(uint32_t *) next_in) & LVL2_HASH_MASK;
hash_table[hash] = (uint64_t) (next_in - file_start);
next_in++;
matches_icf_lookup++;
while (next_in < end_in - ISAL_LOOK_AHEAD) {
hash = compute_hash(*(uint32_t *) next_in) & LVL2_HASH_MASK;
dist = (next_in - file_start - hash_table[hash]);
dist = ((dist - 1) & (IGZIP_HIST_SIZE - 1)) + 1;
hash_table[hash] = (uint64_t) (next_in - file_start);
match_bytes = *(uint64_t *) (next_in - dist);
next_bytes = *(uint64_t *) next_in;
match = next_bytes ^ match_bytes;
len = tzcnt(match);
if (len >= SHORTEST_MATCH) {
len += LEN_OFFSET;
get_dist_icf_code(dist, &dist, &extra_bits);
write_deflate_icf(matches_icf_lookup, len, dist, extra_bits);
} else {
write_deflate_icf(matches_icf_lookup, *next_in, 0x1e, 0);
}
next_in++;
matches_icf_lookup++;
}
}
/*
*************************************************************
* One pass methods for parsing provided match lookup table
************************************************************
*/
struct deflate_icf *compress_icf_map_g(struct isal_zstream *stream,
struct deflate_icf *matches_next,
struct deflate_icf *matches_end)
{
uint32_t lit_len, lit_len2, dist;
uint64_t code;
struct isal_zstate *state = &stream->internal_state;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
struct deflate_icf *matches_start = matches_next;
struct deflate_icf *icf_buf_end =
level_buf->icf_buf_next +
level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
while (matches_next < matches_end - 1 && level_buf->icf_buf_next < icf_buf_end - 1) {
code = *(uint64_t *) matches_next;
lit_len = code & LIT_LEN_MASK;
lit_len2 = (code >> ICF_CODE_LEN) & LIT_LEN_MASK;
state->hist.ll_hist[lit_len]++;
if (lit_len >= LEN_START) {
*(uint32_t *) level_buf->icf_buf_next = code;
level_buf->icf_buf_next++;
dist = (code >> ICF_DIST_OFFSET) & DIST_LIT_MASK;
state->hist.d_hist[dist]++;
lit_len -= LEN_OFFSET;
matches_next += lit_len;
} else if (lit_len2 >= LEN_START) {
*(uint64_t *) level_buf->icf_buf_next = code;
level_buf->icf_buf_next += 2;
state->hist.ll_hist[lit_len2]++;
dist = (code >> (ICF_CODE_LEN + ICF_DIST_OFFSET)) & DIST_LIT_MASK;
state->hist.d_hist[dist]++;
lit_len2 -= LEN_OFFSET - 1;
matches_next += lit_len2;
} else {
code = ((lit_len2 + LIT_START) << ICF_DIST_OFFSET) | lit_len;
*(uint32_t *) level_buf->icf_buf_next = code;
level_buf->icf_buf_next++;
state->hist.ll_hist[lit_len2]++;
matches_next += 2;
}
}
while (matches_next < matches_end && level_buf->icf_buf_next < icf_buf_end) {
code = *(uint32_t *) matches_next;
lit_len = code & LIT_LEN_MASK;
*(uint32_t *) level_buf->icf_buf_next = code;
level_buf->icf_buf_next++;
state->hist.ll_hist[lit_len]++;
if (lit_len >= LEN_START) {
dist = (code >> 10) & 0x1ff;
state->hist.d_hist[dist]++;
lit_len -= LEN_OFFSET;
matches_next += lit_len;
} else {
matches_next++;
}
}
level_buf->icf_buf_avail_out =
(icf_buf_end - level_buf->icf_buf_next) * sizeof(struct deflate_icf);
state->block_end += matches_next - matches_start;
if (matches_next > matches_end && matches_start < matches_end) {
stream->next_in += matches_next - matches_end;
stream->avail_in -= matches_next - matches_end;
stream->total_in += matches_next - matches_end;
}
return matches_next;
}
/*
*************************************************************
* Compression functions combining different methods
************************************************************
*/
static inline void icf_body_next_state(struct isal_zstream *stream)
{
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
struct isal_zstate *state = &stream->internal_state;
if (level_buf->icf_buf_avail_out <= 0)
state->state = ZSTATE_CREATE_HDR;
else if (stream->avail_in <= ISAL_LOOK_AHEAD
&& (stream->end_of_stream || stream->flush != NO_FLUSH))
state->state = ZSTATE_FLUSH_READ_BUFFER;
}
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)
{
struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
struct deflate_icf *matches_icf_lookup;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint32_t input_size;
matches_icf = level_buf->lvl2.matches;
matches_icf_lookup = matches_icf;
matches_next_icf = level_buf->lvl2.matches_next;
matches_end_icf = level_buf->lvl2.matches_end;
matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
while (matches_next_icf >= matches_end_icf) {
input_size = MATCH_BUF_SIZE;
input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
if (input_size <= ISAL_LOOK_AHEAD)
break;
gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
set_long_icf_fg(stream->next_in, stream->next_in + input_size,
matches_icf_lookup, level_buf);
stream->next_in += input_size - ISAL_LOOK_AHEAD;
stream->avail_in -= input_size - ISAL_LOOK_AHEAD;
stream->total_in += input_size - ISAL_LOOK_AHEAD;
matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD;
matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
}
level_buf->lvl2.matches_next = matches_next_icf;
level_buf->lvl2.matches_end = matches_end_icf;
icf_body_next_state(stream);
}
void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)
{
struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
struct deflate_icf *matches_icf_lookup;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
uint32_t input_size;
matches_icf = level_buf->lvl2.matches;
matches_icf_lookup = matches_icf;
matches_next_icf = level_buf->lvl2.matches_next;
matches_end_icf = level_buf->lvl2.matches_end;
matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
while (matches_next_icf >= matches_end_icf) {
input_size = MATCH_BUF_SIZE;
input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
if (input_size <= ISAL_LOOK_AHEAD)
break;
gen_icf_map_lh1(stream, matches_icf_lookup, input_size);
set_long_icf_fg(stream->next_in, stream->next_in + input_size,
matches_icf_lookup, level_buf);
stream->next_in += input_size - ISAL_LOOK_AHEAD;
stream->avail_in -= input_size - ISAL_LOOK_AHEAD;
stream->total_in += input_size - ISAL_LOOK_AHEAD;
matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD;
matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
}
level_buf->lvl2.matches_next = matches_next_icf;
level_buf->lvl2.matches_end = matches_end_icf;
icf_body_next_state(stream);
}
void isal_deflate_icf_body_base(struct isal_zstream *stream)
{
switch (stream->level) {
case 2:
icf_body_hash1_fillgreedy_lazy(stream);
break;
case 1:
default:
isal_deflate_icf_body_lvl1(stream);
}
}
void isal_deflate_icf_body_06(struct isal_zstream *stream)
{
switch (stream->level) {
case 2:
icf_body_lazyhash1_fillgreedy_greedy(stream);
break;
case 1:
default:
isal_deflate_icf_body_lvl1(stream);
}
}

View File

@ -110,8 +110,8 @@ stack_size equ 3*8 + 8*8 + 4*16
; void isal_deflate_icf_body ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_body_ %+ ARCH
isal_deflate_icf_body_ %+ ARCH %+ :
global isal_deflate_icf_body_lvl1_ %+ ARCH
isal_deflate_icf_body_lvl1_ %+ ARCH %+ :
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
@ -190,8 +190,8 @@ MARK __body_compute_hash_ %+ ARCH
shr tmp3, 8
compute_hash hash2, tmp3
and hash, HASH_MASK
and hash2, HASH_MASK
and hash, LVL0_HASH_MASK
and hash2, LVL0_HASH_MASK
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
je write_first_byte
@ -220,7 +220,7 @@ loop2:
mov tmp2, curr_data
shr curr_data, 16
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
mov dist2 %+ w, f_i %+ w
dec dist2
@ -233,7 +233,7 @@ loop2:
shr tmp2, 24
compute_hash hash2, tmp2
and hash2 %+ d, HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
and dist2 %+ d, (D-1)
neg dist2
@ -286,7 +286,7 @@ len_dist_lit_huffman:
shr curr_data, 24
compute_hash hash3, curr_data
and hash3, HASH_MASK
and hash3, LVL0_HASH_MASK
mov curr_data, tmp1
shr tmp1, 8
@ -318,9 +318,9 @@ len_dist_lit_huffman:
and dist_code2, 0x1F
inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code2]
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; continue
cmp f_i, file_length
@ -371,9 +371,9 @@ len_dist_huffman:
and dist_code, 0x1F
inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code]
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
; continue
cmp f_i, file_length
@ -501,8 +501,8 @@ write_first_byte:
MOVDQU xdata, [file_start + f_i + 1]
add f_i, 1
mov curr_data, [file_start + f_i]
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
cmp f_i, file_length
jl loop2
@ -510,5 +510,5 @@ write_first_byte:
section .data
align 16
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
const_D: dq D

View File

@ -4,4 +4,4 @@
%define COMPARE_TYPE 2
%endif
%include "igzip_icf_body.asm"
%include "igzip_icf_body_h1_gr_bt.asm"

View File

@ -4,4 +4,4 @@
%define COMPARE_TYPE 2
%endif
%include "igzip_icf_body.asm"
%include "igzip_icf_body_h1_gr_bt.asm"

View File

@ -5,4 +5,4 @@
%define COMPARE_TYPE 3
%endif
%include "igzip_icf_body.asm"
%include "igzip_icf_body_h1_gr_bt.asm"

View File

@ -83,8 +83,8 @@ m_out_start equ 16
stack_size equ 32
; void isal_deflate_icf_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_finish_01
isal_deflate_icf_finish_01:
global isal_deflate_icf_finish_lvl1_01
isal_deflate_icf_finish_lvl1_01:
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
sub rsp, stack_size
@ -129,7 +129,7 @@ isal_deflate_icf_finish_01:
ja end_loop_2
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
jmp encode_literal
@ -141,10 +141,10 @@ loop2:
cmp m_out_buf, [rsp + m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
@ -203,19 +203,19 @@ loop2:
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
mov tmp6 %+ d, dword [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w

View File

@ -1,16 +1,31 @@
#ifndef IGZIP_LEVEL_BUF_STRUCTS_H
#define IGZIP_LEVEL_BUF_STRUCTS_H
#include "igzip_lib.h"
#include "huff_codes.h"
#include "encode_df.h"
struct level_2_buf {
#define MATCH_BUF_SIZE (4 * 1024)
struct lvl2_buf{
uint16_t hash_table[IGZIP_LVL2_HASH_SIZE];
struct deflate_icf *matches_next;
struct deflate_icf *matches_end;
struct deflate_icf matches[MATCH_BUF_SIZE];
struct deflate_icf overflow[ISAL_LOOK_AHEAD];
};
#define MAX_LVL_BUF_SIZE sizeof(struct lvl2_buf)
struct level_buf {
struct hufftables_icf encode_tables;
uint32_t deflate_hdr_count;
uint32_t deflate_hdr_extra_bits;
uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE];
struct deflate_icf *icf_buf_next;
uint64_t icf_buf_avail_out;
struct deflate_icf icf_buf_start[];
struct deflate_icf *icf_buf_start;
struct lvl2_buf lvl2;
};
#endif

View File

@ -39,23 +39,29 @@ extern isal_deflate_body_04
extern isal_deflate_finish_base
extern isal_deflate_finish_01
extern isal_deflate_icf_body_base
extern isal_deflate_icf_body_01
extern isal_deflate_icf_body_02
extern isal_deflate_icf_body_04
extern isal_deflate_icf_finish_base
extern isal_deflate_icf_finish_01
extern isal_deflate_icf_body_lvl1_base
extern isal_deflate_icf_body_lvl1_01
extern isal_deflate_icf_body_lvl1_02
extern isal_deflate_icf_body_lvl1_04
extern isal_deflate_icf_finish_lvl1_base
extern isal_deflate_icf_finish_lvl1_01
extern isal_deflate_icf_finish_lvl2_base
extern isal_update_histogram_base
extern isal_update_histogram_01
extern isal_update_histogram_04
extern gen_icf_map_h1_base
extern encode_deflate_icf_base
extern encode_deflate_icf_04
extern set_long_icf_fg_base
%ifdef HAVE_AS_KNOWS_AVX512
extern encode_deflate_icf_06
extern set_long_icf_fg_06
extern gen_icf_map_lh1_06
%endif
extern crc32_gzip_base
@ -68,6 +74,11 @@ extern adler32_sse
extern isal_deflate_hash_lvl0_base
extern isal_deflate_hash_lvl0_01
extern isal_deflate_hash_lvl2_base
extern isal_deflate_icf_body_base
extern isal_deflate_icf_body_06
section .text
%include "multibinary.asm"
@ -77,10 +88,14 @@ mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body
mbin_interface isal_deflate_finish
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
mbin_interface isal_deflate_icf_body
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_01, isal_deflate_icf_body_02, isal_deflate_icf_body_04
mbin_interface isal_deflate_icf_finish
mbin_dispatch_init5 isal_deflate_icf_finish, isal_deflate_icf_finish_base, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01
mbin_interface isal_deflate_icf_body_lvl1
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_lvl1_base, isal_deflate_icf_body_lvl1_01, isal_deflate_icf_body_lvl1_02, isal_deflate_icf_body_lvl1_04
mbin_interface isal_deflate_icf_finish_lvl1
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_lvl1_base, isal_deflate_icf_finish_lvl1_01, isal_deflate_icf_finish_lvl1_01, isal_deflate_icf_finish_lvl1_01
mbin_interface isal_deflate_icf_finish_lvl2
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base
mbin_interface isal_update_histogram
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
@ -88,9 +103,21 @@ mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_upda
%ifdef HAVE_AS_KNOWS_AVX512
mbin_interface encode_deflate_icf
mbin_dispatch_init6 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04, encode_deflate_icf_06
mbin_interface set_long_icf_fg
mbin_dispatch_init6 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_06
mbin_interface gen_icf_map_lh1
mbin_dispatch_init6 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_lh1_06
%else
mbin_interface encode_deflate_icf
mbin_dispatch_init5 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04
mbin_interface set_long_icf_fg
mbin_dispatch_init5 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base
mbin_interface gen_icf_map_lh1
mbin_dispatch_init5 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base
%endif
mbin_interface crc32_gzip
@ -101,3 +128,14 @@ mbin_dispatch_init5 isal_adler32, adler32_base, adler32_sse, adler32_sse, adler3
mbin_interface isal_deflate_hash_lvl0
mbin_dispatch_init5 isal_deflate_hash_lvl0, isal_deflate_hash_lvl0_base, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01
mbin_interface isal_deflate_hash_lvl2
mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base
%ifdef HAVE_AS_KNOWS_AVX512
mbin_interface isal_deflate_icf_body
mbin_dispatch_init6 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_06
%else
mbin_interface isal_deflate_icf_body
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base
%endif

View File

@ -242,6 +242,26 @@ int get_rand_data_length(void)
return rand() & max_mask;
}
int get_rand_level(void)
{
return ISAL_DEF_MIN_LEVEL + rand() % (ISAL_DEF_MAX_LEVEL - ISAL_DEF_MIN_LEVEL + 1);
}
int get_rand_level_buf_size(int level)
{
int size;
switch (level) {
case 2:
size = rand() % IBUF_SIZE + ISAL_DEF_LVL2_MIN;
break;
case 1:
default:
size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
}
return size;
}
void print_error(int error_code)
{
switch (error_code) {
@ -963,7 +983,7 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
level_buf_size = get_rand_level_buf_size(stream.level);
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
@ -1147,7 +1167,7 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
level_buf_size = get_rand_level_buf_size(stream.level);
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
@ -1218,21 +1238,28 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
stream.gzip_flag = gzip_flag;
stream.level = level;
if (level >= 1) {
if (level == 1) {
/* This is to test case where level buf uses already existing
* internal buffers */
level_buf_size = rand() % IBUF_SIZE;
/* printf("level_buf_size = %d\n", level_buf_size); */
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
} else if (level > 1) {
level_buf_size = get_rand_level_buf_size(level);
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
if (reset_test_flag) {
if (reset_test_flag)
isal_deflate_reset(&stream);
/* printf("post reset level_buf_size = %d\n", level_buf_size); */
}
ret = isal_deflate_stateless(&stream);
if (level_buf != NULL)
@ -1311,14 +1338,23 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
stream.next_out = compressed_buf;
stream.level = level;
if (level >= 1) {
if (level == 1) {
/* This is to test case where level_buf uses already existing
* internal buffers */
level_buf_size = rand() % IBUF_SIZE;
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
} else if (level > 1) {
level_buf_size = get_rand_level_buf_size(level);
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
stream.level_buf = level_buf;
stream.level_buf_size = level_buf_size;
}
if (reset_test_flag)
@ -1361,6 +1397,7 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
set_random_hufftable(&stream);
ret = isal_deflate_stateless(&stream);
assert(stream.internal_state.bitbuf.m_bit_count == 0);
assert(compressed_buf == stream.next_out - stream.total_out);
@ -1448,7 +1485,7 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
stream.level = level;
if (level >= 1) {
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
level_buf_size = get_rand_level_buf_size(stream.level);
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
level_buf = malloc(level_buf_size);
create_rand_repeat_data(level_buf, level_buf_size);
@ -1626,7 +1663,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
uint8_t *in_buf = NULL;
gzip_flag = rand() % 5;
level = rand() % 2;
level = get_rand_level();
if (in_size != 0) {
in_buf = malloc(in_size);
@ -1866,7 +1903,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
}
gzip_flag = rand() % 5;
level = rand() % 2;
level = get_rand_level();
z_size = z_size_max;
@ -2072,7 +2109,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
uint8_t *z_buf = NULL;
gzip_flag = rand() % 5;
level = rand() % 2;
level = get_rand_level();
z_size = 2 * in_size + 2 * hdr_bytes + 8;
if (gzip_flag == IGZIP_GZIP)
@ -2142,7 +2179,7 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size)
uint8_t *z_buf = NULL;
gzip_flag = rand() % 5;
level = rand() % 2;
level = get_rand_level();
z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
if (gzip_flag == IGZIP_GZIP)

View File

@ -0,0 +1,317 @@
%include "reg_sizes.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "igzip_compare_types.asm"
%define NEQ 4
%ifdef HAVE_AS_KNOWS_AVX512
%ifidn __OUTPUT_FORMAT__, win64
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define dist_code rsi
%define len rdi
%else
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define dist_code rcx
%define len r8
%endif
%define next_in arg1
%define end_in arg2
%define match_lookup arg3
%define match_in rax
%define dist r9
%define match_offset r10
%define tmp1 r11
%define zmatch_lookup zmm0
%define zmatch_lookup2 zmm1
%define zlens zmm2
%define zdist_codes zmm3
%define zdist_extras zmm4
%define zdists zmm5
%define zdists2 zmm6
%define zlens1 zmm7
%define zlens2 zmm8
%define zlookup zmm9
%define zlookup2 zmm10
%define datas zmm11
%define ztmp1 zmm12
%define ztmp2 zmm13
%define zvect_size zmm17
%define ztwofiftyfour zmm18
%define ztwofiftysix zmm19
%define ztwosixtytwo zmm20
%define znlen_mask zmm21
%define zbswap zmm22
%define zqword_shuf zmm23
%define zdatas_perm3 zmm24
%define zdatas_perm2 zmm25
%define zincrement zmm26
%define zdists_mask zmm27
%define zdists_start zmm28
%define zlong_lens2 zmm29
%define zlong_lens zmm30
%define zlens_mask zmm31
%ifidn __OUTPUT_FORMAT__, win64
%define stack_size 8*16 + 2 * 8 + 8
%macro FUNC_SAVE 0
alloc_stack stack_size
vmovdqa [rsp + 0*16], xmm6
vmovdqa [rsp + 1*16], xmm7
vmovdqa [rsp + 2*16], xmm8
vmovdqa [rsp + 3*16], xmm9
vmovdqa [rsp + 4*16], xmm10
vmovdqa [rsp + 5*16], xmm11
vmovdqa [rsp + 6*16], xmm12
vmovdqa [rsp + 7*16], xmm13
save_reg rsi, 8*16 + 0*8
save_reg rdi, 8*16 + 1*8
end_prolog
%endm
%macro FUNC_RESTORE 0
vmovdqa xmm6, [rsp + 0*16]
vmovdqa xmm7, [rsp + 1*16]
vmovdqa xmm8, [rsp + 2*16]
vmovdqa xmm9, [rsp + 3*16]
vmovdqa xmm10, [rsp + 4*16]
vmovdqa xmm11, [rsp + 5*16]
vmovdqa xmm12, [rsp + 6*16]
vmovdqa xmm13, [rsp + 7*16]
mov 8*16 + 0*8, rsi
mov 8*16 + 1*8, rdi
add rsp, stack_size
%endm
%else
%macro FUNC_SAVE 0
%endm
%macro FUNC_RESTORE 0
%endm
%endif
%define VECT_SIZE 16
global set_long_icf_fg_06
set_long_icf_fg_06:
FUNC_SAVE
sub end_in, LA + 15
vmovdqu32 zlong_lens, [long_len]
vmovdqu32 zlong_lens2, [long_len2]
vmovdqu32 zlens_mask, [len_mask]
vmovdqu16 zdists_start, [dist_start]
vmovdqu32 zdists_mask, [dists_mask]
vmovdqu32 zincrement, [increment]
vmovdqu64 zdatas_perm2, [datas_perm2]
vmovdqu64 zdatas_perm3, [datas_perm3]
vmovdqu64 zqword_shuf, [qword_shuf]
vmovdqu64 zbswap, [bswap_shuf]
vmovdqu64 znlen_mask, [nlen_mask]
vmovdqu64 zvect_size, [vect_size]
vmovdqu64 ztwofiftyfour, [twofiftyfour]
vmovdqu64 ztwofiftysix, [twofiftysix]
vmovdqu64 ztwosixtytwo, [twosixtytwo]
vmovdqu32 zmatch_lookup, [match_lookup]
fill_loop: ; Tahiti is a magical place
vmovdqu32 zmatch_lookup2, zmatch_lookup
vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
cmp next_in, end_in
jae end_fill
vpandd zlens, zmatch_lookup2, zlens_mask
vpcmpgtd k3, zlens, zlong_lens
;; Speculatively increment
add next_in, VECT_SIZE
add match_lookup, ICF_CODE_BYTES * VECT_SIZE
ktestw k3, k3
jz fill_loop
vpsrld zdist_codes, zmatch_lookup2, DIST_OFFSET
vpmovdw zdists %+ y, zdist_codes ; Relies on perm working mod 32
vpermw zdists, zdists, zdists_start
vpmovzxwd zdists, zdists %+ y
vpsrld zdist_extras, zmatch_lookup2, EXTRA_BITS_OFFSET
vpsubd zdist_extras, zincrement, zdist_extras
vpsubd zdists, zdist_extras, zdists
vextracti32x8 zdists2 %+ y, zdists, 1
kmovb k6, k3
kshiftrw k7, k3, 8
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y - 8]
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y - 8]
vmovdqu8 datas %+ y, [next_in - 8]
vpermq zlookup, zdatas_perm2, datas
vpshufb zlookup, zlookup, zqword_shuf
vpermq zlookup2, zdatas_perm3, datas
vpshufb zlookup2, zlookup2, zqword_shuf
vpxorq zlens1, zlens1, zlookup
vpxorq zlens2, zlens2, zlookup2
vpshufb zlens1, zlens1, zbswap
vpshufb zlens2, zlens2, zbswap
vplzcntq zlens1, zlens1
vplzcntq zlens2, zlens2
vpmovqd zlens1 %+ y, zlens1
vpmovqd zlens2 %+ y, zlens2
vinserti32x8 zlens1, zlens2 %+ y, 1
vpsrld zlens1 {k3}{z}, zlens1, 3
vpandd zmatch_lookup2 {k3}{z}, zmatch_lookup2, znlen_mask
vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, ztwosixtytwo
vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, zlens1
vmovdqu32 [match_lookup - ICF_CODE_BYTES * VECT_SIZE] {k3}, zmatch_lookup2
vpcmpgtd k3, zlens1, zlong_lens2
ktestw k3, k3
jz fill_loop
vpsubd zdists, zincrement, zdists
vpcompressd zdists2 {k3}, zdists
vpcompressd zmatch_lookup2 {k3}, zmatch_lookup2
kmovq match_offset, k3
tzcnt match_offset, match_offset
vmovd dist %+ d, zdists2 %+ x
lea next_in, [next_in + match_offset - VECT_SIZE]
lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)]
mov match_in, next_in
sub match_in, dist
mov len, 2
%rep 3
vmovdqu8 ztmp1, [next_in + len]
vmovdqu8 ztmp2, [match_in + len]
vpcmpb k3, ztmp1, [match_in + len], NEQ
ktestq k3, k3
jnz miscompare
add len, 64
%endrep
vmovdqu8 ztmp1, [next_in + len]
vmovdqu8 ztmp2, [match_in + len]
vpcmpb k3, ztmp1, ztmp2, 4
miscompare:
kmovq tmp1, k3
tzcnt tmp1, tmp1
add len, tmp1
add next_in, len
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
vmovdqu32 zmatch_lookup, [match_lookup]
vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x
vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask
vpbroadcastd zlens1, len %+ d
vpsubd zlens1, zlens1, zincrement
vpaddd zlens1, zlens1, ztwofiftyfour
neg len
update_match_lookup:
vpandd zlens2, zlens_mask, [match_lookup + ICF_CODE_BYTES * len]
vpcmpgtd k3, zlens1, zlens2
vpcmpgtd k4, zlens1, ztwofiftysix
kandw k3, k3, k4
vpaddd zlens2 {k3}{z}, zlens1, zmatch_lookup2
vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2
knotw k3, k3
ktestw k3, k3
jnz fill_loop
add len, VECT_SIZE
vpsubd zlens1, zlens1, zvect_size
jmp update_match_lookup
end_fill:
FUNC_RESTORE
ret
section .data
align 64
dist_start:
dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
len_mask:
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
dists_mask:
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
long_len:
dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
long_len2:
dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
increment:
dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
datas_perm2:
dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1
datas_perm3:
dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2
bswap_shuf:
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
qword_shuf:
db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
vect_size:
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
twofiftyfour:
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
twofiftysix:
dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
twosixtytwo:
dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106
dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106
nlen_mask:
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
%endif

View File

@ -42,6 +42,59 @@
# define RUN_MEM_SIZE 500000000
#endif
int level_size_buf[10] = {
#ifdef ISAL_DEF_LVL0_DEFAULT
ISAL_DEF_LVL0_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL1_DEFAULT
ISAL_DEF_LVL1_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL2_DEFAULT
ISAL_DEF_LVL2_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL3_DEFAULT
ISAL_DEF_LVL3_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL4_DEFAULT
ISAL_DEF_LVL4_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL5_DEFAULT
ISAL_DEF_LVL5_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL6_DEFAULT
ISAL_DEF_LVL6_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL7_DEFAULT
ISAL_DEF_LVL7_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL8_DEFAULT
ISAL_DEF_LVL8_DEFAULT,
#else
0,
#endif
#ifdef ISAL_DEF_LVL9_DEFAULT
ISAL_DEF_LVL9_DEFAULT,
#else
0,
#endif
};
struct isal_zstream stream;
int usage(void)
@ -49,7 +102,7 @@ int usage(void)
fprintf(stderr,
"Usage: igzip_stateless_file_perf [options] <infile>\n"
" -h help\n"
" -X use compression level X with 0 <= X <= 1\n"
" -X use compression level X with 0 <= X <= 2\n"
" -i <iter> number of iterations (at least 1)\n"
" -o <file> output file for compresed data\n");
exit(0);
@ -66,7 +119,17 @@ int main(int argc, char *argv[])
int level = 0, level_size = 0;
char *in_file_name = NULL, *out_file_name = NULL;
while ((c = getopt(argc, argv, "h01i:o:")) != -1) {
while ((c = getopt(argc, argv, "h0123456789i:o:")) != -1) {
if (c >= '0' && c <= '9') {
if (c > '0' + ISAL_DEF_MAX_LEVEL)
usage();
else {
level = c - '0';
level_size = level_size_buf[level];
}
continue;
}
switch (c) {
case 'o':
out_file_name = optarg;
@ -76,12 +139,6 @@ int main(int argc, char *argv[])
if (iterations < 1)
usage();
break;
case '1':
level = 1;
level_size = ISAL_DEF_LVL1_LARGE;
break;
case '0':
break;
case 'h':
default:
usage();

View File

@ -247,7 +247,7 @@ isal_update_histogram_ %+ ARCH %+ :
;; Init hash_table
PXOR vtmp0, vtmp0, vtmp0
mov rcx, (IGZIP_HASH_SIZE - V_LENGTH)
mov rcx, (IGZIP_LVL0_HASH_SIZE - V_LENGTH)
init_hash_table:
MOVDQU [histogram + _hash_offset + 2 * rcx], vtmp0
MOVDQU [histogram + _hash_offset + 2 * (rcx + V_LENGTH / 2)], vtmp0
@ -262,7 +262,7 @@ init_hash_table:
;; Load first literal into histogram
mov curr_data, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
and curr_data, 0xff
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
@ -276,8 +276,8 @@ init_hash_table:
shr curr_data2, 8
compute_hash hash2, curr_data2
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
and hash, LVL0_HASH_MASK
loop2:
xor dist, dist
xor dist2, dist2
@ -324,8 +324,8 @@ loop2:
xor len, [tmp1 + dist - 1]
jz compare_loop
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
MOVQ len2, xdata
xor len2, [tmp1 + dist2]
@ -370,7 +370,7 @@ len_dist_lit_huffman:
mov tmp1, curr_data
compute_hash hash, curr_data
and hash3, HASH_MASK
and hash3, LVL0_HASH_MASK
mov [histogram + _hash_offset + 2 * hash3], tmp3 %+ w
dist_to_dist_code2 dist_code2, dist2
@ -383,8 +383,8 @@ len_dist_lit_huffman:
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2]
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
and hash, LVL0_HASH_MASK
cmp f_i, file_length
jl loop2
@ -418,8 +418,8 @@ len_dist_huffman:
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
and hash, LVL0_HASH_MASK
cmp f_i, file_length
jl loop2
@ -442,7 +442,7 @@ end_loop_2:
loop2_finish:
mov curr_data %+ d, dword [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
;; Calculate possible distance for length/dist pair.
xor dist, dist
@ -513,8 +513,8 @@ exit_ret:
ret
compare_loop:
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
and hash %+ d, LVL0_HASH_MASK
and hash2 %+ d, LVL0_HASH_MASK
lea tmp2, [tmp1 + dist - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3

View File

@ -38,19 +38,29 @@
%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds
%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer
%ifndef IGZIP_HASH_SIZE
%assign IGZIP_HASH_SIZE (8 * K)
%ifndef IGZIP_LVL0_HASH_SIZE
%assign IGZIP_LVL0_HASH_SIZE (8 * K)
%endif
%assign HASH_MASK (IGZIP_HASH_SIZE - 1)
%ifndef IGZIP_LVL2_HASH_SIZE
%assign IGZIP_LVL2_HASH_SIZE IGZIP_HIST_SIZE
%endif
%assign LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
%assign LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1)
%assign MIN_DEF_MATCH 3 ; Minimum length of a match in deflate
%assign SHORTEST_MATCH 4
%assign SLOP 8
%define ICF_CODE_BYTES 4
%define LIT_LEN_BIT_COUNT 10
%define DIST_LIT_BIT_COUNT 9
%define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
%define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
%define DIST_OFFSET LIT_LEN_BIT_COUNT
%define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT)
%define LIT (0x1E << DIST_OFFSET)

View File

@ -115,8 +115,12 @@ extern "C" {
#define ISAL_LIMIT_HASH_UPDATE
#ifndef IGZIP_HASH_SIZE
#define IGZIP_HASH_SIZE (8 * IGZIP_K)
#ifndef IGZIP_LVL0_HASH_SIZE
#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K)
#endif
#ifndef IGZIP_LVL2_HASH_SIZE
#define IGZIP_LVL2_HASH_SIZE IGZIP_HIST_SIZE
#endif
#ifdef LONGER_HUFFTABLE
@ -155,8 +159,10 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
#define INVALID_PARAM -8
#define STATELESS_OVERFLOW -1
#define ISAL_INVALID_OPERATION -9
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
#define ISAL_INVALID_STATE -3
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
#define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */
/**
* @enum isal_zstate_state
* @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
@ -235,7 +241,7 @@ enum isal_block_state {
struct isal_huff_histogram {
uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen
uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen
uint16_t hash_table[IGZIP_HASH_SIZE]; //!< Tmp space used as a hash table
uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table
};
struct isal_mod_hist {
@ -244,12 +250,15 @@ struct isal_mod_hist {
};
#define ISAL_DEF_MIN_LEVEL 0
#define ISAL_DEF_MAX_LEVEL 1
#define ISAL_DEF_MAX_LEVEL 2
/* Defines used set level data sizes */
/* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */
#define ISAL_DEF_LVL0_REQ 0
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K /* has to be at least sizeof(struct level_2_buf) */
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K
#define ISAL_DEF_LVL1_TOKEN_SIZE 4
#define ISAL_DEF_LVL2_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE
#define ISAL_DEF_LVL2_TOKEN_SIZE 4
/* Data sizes for level specific data options */
#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ
@ -266,6 +275,13 @@ struct isal_mod_hist {
#define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE
#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K)
#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K)
#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K)
#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K)
#define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K)
#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE
#define IGZIP_NO_HIST 0
#define IGZIP_HIST 1
#define IGZIP_DICT_HIST 2
@ -296,6 +312,7 @@ struct isal_zstate {
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
uint8_t has_eob; //!< keeps track of eob on the last deflate block
uint8_t has_hist; //!< flag to track if there is match history
uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized.
struct isal_mod_hist hist;
uint32_t count; //!< used for partial header/trailer writes
uint8_t tmp_out_buff[16]; //!< temporary array
@ -306,8 +323,7 @@ struct isal_zstate {
uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer
/* Stream should be setup such that the head is cache aligned*/
uint16_t head[IGZIP_HASH_SIZE]; //!< Hash array
uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array
};
/** @brief Holds the huffman tree used to huffman encode the input stream **/
@ -593,7 +609,8 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t d
* @param stream Structure holding state information on the compression streams.
* @return COMP_OK (if everything is ok),
* INVALID_FLUSH (if an invalid FLUSH is selected),
* ISAL_INVALID_LEVEL (if an invalid compression level is selected).
* ISAL_INVALID_LEVEL (if an invalid compression level is selected),
* ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough).
*/
int isal_deflate(struct isal_zstream *stream);
@ -622,6 +639,7 @@ int isal_deflate(struct isal_zstream *stream);
* @return COMP_OK (if everything is ok),
* INVALID_FLUSH (if an invalid FLUSH is selected),
* ISAL_INVALID_LEVEL (if an invalid compression level is selected),
* ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough),
* STATELESS_OVERFLOW (if output buffer will not fit output).
*/
int isal_deflate_stateless(struct isal_zstream *stream);