mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: Implement optimized level 2 compression
Change-Id: I8cf5bcd56f290d17205ac36dc2828c8acfc66947 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
parent
3c62216aa1
commit
4ae2d1be29
@ -120,9 +120,9 @@ objs = \
|
||||
bin\encode_df.obj \
|
||||
bin\encode_df_04.obj \
|
||||
bin\proc_heap.obj \
|
||||
bin\igzip_icf_body_01.obj \
|
||||
bin\igzip_icf_body_02.obj \
|
||||
bin\igzip_icf_body_04.obj \
|
||||
bin\igzip_icf_body_h1_gr_bt_01.obj \
|
||||
bin\igzip_icf_body_h1_gr_bt_02.obj \
|
||||
bin\igzip_icf_body_h1_gr_bt_04.obj \
|
||||
bin\igzip_icf_finish.obj \
|
||||
bin\igzip_icf_base.obj \
|
||||
bin\igzip_inflate.obj \
|
||||
@ -135,7 +135,10 @@ objs = \
|
||||
bin\crc32_gzip_refl_by8.obj \
|
||||
bin\adler32_sse.obj \
|
||||
bin\adler32_avx2_4.obj \
|
||||
bin\igzip_deflate_hash.obj
|
||||
bin\igzip_deflate_hash.obj \
|
||||
bin\igzip_gen_icf_map_lh1_06.obj \
|
||||
bin\igzip_set_long_icf_fg_06.obj \
|
||||
bin\igzip_icf_body.obj
|
||||
|
||||
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/
|
||||
LINKFLAGS = /nologo
|
||||
|
@ -33,19 +33,19 @@ lsrc += igzip/igzip.c \
|
||||
igzip/igzip_icf_base.c \
|
||||
igzip/crc32_gzip_base.c \
|
||||
igzip/flatten_ll.c \
|
||||
igzip/encode_df.c
|
||||
igzip/encode_df.c \
|
||||
igzip/igzip_icf_body.c
|
||||
|
||||
lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
|
||||
lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
|
||||
|
||||
lsrc_x86_64 += \
|
||||
igzip/igzip_body_01.asm \
|
||||
lsrc_x86_64 += igzip/igzip_body_01.asm \
|
||||
igzip/igzip_body_02.asm \
|
||||
igzip/igzip_body_04.asm \
|
||||
igzip/igzip_finish.asm \
|
||||
igzip/igzip_icf_body_01.asm \
|
||||
igzip/igzip_icf_body_02.asm \
|
||||
igzip/igzip_icf_body_04.asm \
|
||||
igzip/igzip_icf_body_h1_gr_bt_01.asm \
|
||||
igzip/igzip_icf_body_h1_gr_bt_02.asm \
|
||||
igzip/igzip_icf_body_h1_gr_bt_04.asm \
|
||||
igzip/igzip_icf_finish.asm \
|
||||
igzip/rfc1951_lookup.asm \
|
||||
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
|
||||
@ -60,7 +60,9 @@ lsrc_x86_64 += \
|
||||
igzip/encode_df_04.asm \
|
||||
igzip/encode_df_06.asm \
|
||||
igzip/proc_heap.asm \
|
||||
igzip/igzip_deflate_hash.asm
|
||||
igzip/igzip_deflate_hash.asm \
|
||||
igzip/igzip_gen_icf_map_lh1_06.asm \
|
||||
igzip/igzip_set_long_icf_fg_06.asm
|
||||
|
||||
src_include += -I $(srcdir)/igzip
|
||||
extern_hdrs += include/igzip_lib.h
|
||||
@ -80,7 +82,7 @@ other_src += igzip/bitbuf2.asm \
|
||||
igzip/data_struct2.asm \
|
||||
igzip/inflate_data_structs.asm \
|
||||
igzip/igzip_body.asm \
|
||||
igzip/igzip_icf_body.asm \
|
||||
igzip/igzip_icf_body_h1_gr_bt.asm \
|
||||
igzip/igzip_finish.asm \
|
||||
igzip/lz0a_const.asm \
|
||||
igzip/options.asm \
|
||||
@ -124,3 +126,6 @@ igzip_inflate_test: LDLIBS += -lz
|
||||
igzip_igzip_inflate_test_LDADD = libisal.la
|
||||
igzip_igzip_inflate_test_LDFLAGS = -lz
|
||||
igzip_igzip_hist_perf_LDADD = libisal.la
|
||||
igzip_fuzz_inflate: LDLIBS += -lz
|
||||
igzip_igzip_fuzz_inflate_LDADD = libisal.la
|
||||
igzip_igzip_fuzz_inflate_LDFLAGS = -lz
|
||||
|
@ -96,19 +96,42 @@ FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; lvl2_buf
|
||||
|
||||
;; name size align
|
||||
FIELD _hash_table, 2 * IGZIP_LVL2_HASH_SIZE, 2
|
||||
FIELD _matches_next, 8, 8
|
||||
FIELD _matches_end, 8, 8
|
||||
FIELD _matches, 4*4*1024, 4
|
||||
FIELD _overflow, 4*LA, 4
|
||||
|
||||
%assign _lvl2_buf_size _FIELD_OFFSET
|
||||
%assign _lvl2_buf_align _STRUCT_ALIGN
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define DEF_MAX_HDR_SIZE 328
|
||||
START_FIELDS ;; level_2_buf
|
||||
START_FIELDS ;; level_buf
|
||||
|
||||
;; name size align
|
||||
FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align
|
||||
FIELD _deflate_hdr_buf_used, 8, 8
|
||||
FIELD _deflate_hdr_buf, DEF_MAX_HDR_SIZE, 1
|
||||
FIELD _deflate_hdr_count, 4, 4
|
||||
FIELD _deflate_hdr_extra_bits,4, 4
|
||||
FIELD _deflate_hdr, DEF_MAX_HDR_SIZE, 1
|
||||
FIELD _icf_buf_next, 8, 8
|
||||
FIELD _icf_buf_avail_out, 8, 8
|
||||
FIELD _icf_buf_start, 0, 0
|
||||
FIELD _icf_buf_start, 8, 8
|
||||
FIELD _lvl_extra, _lvl2_buf_size, _lvl2_buf_align
|
||||
|
||||
%assign _level_2_buf_size _FIELD_OFFSET
|
||||
%assign _level_2_buf_align _STRUCT_ALIGN
|
||||
%assign _level_buf_base_size _FIELD_OFFSET
|
||||
%assign _level_buf_base_align _STRUCT_ALIGN
|
||||
|
||||
_lvl2_hash_table equ _lvl_extra + _hash_table
|
||||
_lvl2_matches_next equ _lvl_extra + _matches_next
|
||||
_lvl2_matches_end equ _lvl_extra + _matches_end
|
||||
_lvl2_matches equ _lvl_extra + _matches
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@ -127,6 +150,7 @@ FIELD _has_wrap_hdr, 1, 1
|
||||
FIELD _has_eob_hdr, 1, 1
|
||||
FIELD _has_eob, 1, 1
|
||||
FIELD _has_hist, 1, 1
|
||||
FIELD _has_level_buf_init, 2, 2
|
||||
FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align
|
||||
FIELD _count, 4, 4
|
||||
FIELD _tmp_out_buff, 16, 1
|
||||
@ -135,8 +159,7 @@ FIELD _tmp_out_end, 4, 4
|
||||
FIELD _b_bytes_valid, 4, 4
|
||||
FIELD _b_bytes_processed, 4, 4
|
||||
FIELD _buffer, BSIZE, 1
|
||||
FIELD _head, IGZIP_HASH_SIZE*2, 2
|
||||
|
||||
FIELD _head, IGZIP_LVL0_HASH_SIZE*2, 2
|
||||
%assign _isal_zstate_size _FIELD_OFFSET
|
||||
%assign _isal_zstate_align _STRUCT_ALIGN
|
||||
|
||||
@ -189,6 +212,7 @@ _internal_state_has_wrap_hdr equ _internal_state+_has_wrap_hdr
|
||||
_internal_state_has_eob equ _internal_state+_has_eob
|
||||
_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr
|
||||
_internal_state_has_hist equ _internal_state+_has_hist
|
||||
_internal_state_has_level_buf_init equ _internal_state+_has_level_buf_init
|
||||
_internal_state_buffer equ _internal_state+_buffer
|
||||
_internal_state_head equ _internal_state+_head
|
||||
_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits
|
||||
|
@ -6,14 +6,21 @@
|
||||
|
||||
/* Deflate Intermediate Compression Format */
|
||||
#define LIT_LEN_BIT_COUNT 10
|
||||
#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
|
||||
#define DIST_LIT_BIT_COUNT 9
|
||||
#define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
|
||||
#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT
|
||||
#define NULL_DIST_SYM 30
|
||||
|
||||
#define LEN_START 257
|
||||
#define LEN_OFFSET (LEN_START - 3)
|
||||
#define LIT_START (NULL_DIST_SYM + 1)
|
||||
#define ICF_CODE_LEN 32
|
||||
|
||||
struct deflate_icf {
|
||||
uint32_t lit_len:LIT_LEN_BIT_COUNT;
|
||||
uint32_t lit_dist:DIST_LIT_BIT_COUNT;
|
||||
uint32_t dist_extra:32 - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET;
|
||||
uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET;
|
||||
};
|
||||
|
||||
struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in,
|
||||
|
@ -684,7 +684,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
|
||||
for (current = start_stream; current < end_stream - 3; current++) {
|
||||
literal = *(uint32_t *) current;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
seen = last_seen[hash];
|
||||
last_seen[hash] = (current - start_stream) & 0xFFFF;
|
||||
dist = (current - start_stream - seen) & 0xFFFF;
|
||||
@ -704,7 +704,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
next_hash++;
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
|
||||
}
|
||||
|
||||
@ -718,7 +718,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
lit_len_histogram[literal & 0xFF] += 1;
|
||||
}
|
||||
literal = literal >> 8;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
seen = last_seen[hash];
|
||||
last_seen[hash] = (current - start_stream) & 0xFFFF;
|
||||
dist = (current - start_stream - seen) & 0xFFFF;
|
||||
|
@ -76,7 +76,8 @@
|
||||
#define INVALID_DIST_HUFFCODE 1
|
||||
#define INVALID_HUFFCODE 1
|
||||
|
||||
#define HASH_MASK (IGZIP_HASH_SIZE - 1)
|
||||
#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
|
||||
#define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1)
|
||||
#define SHORTEST_MATCH 4
|
||||
|
||||
#define LENGTH_BITS 5
|
||||
|
@ -55,9 +55,15 @@ static inline uint32_t tzcnt(uint64_t val)
|
||||
{
|
||||
uint32_t cnt;
|
||||
|
||||
#ifdef __x86_64__
|
||||
#ifdef __BMI__
|
||||
cnt = __tzcnt_u64(val);
|
||||
cnt = cnt / 8;
|
||||
#elifdef __x86_64__
|
||||
|
||||
cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val);
|
||||
cnt = __bsfq(val);
|
||||
if(val == 0)
|
||||
cnt = 64;
|
||||
cnt = cnt / 8;
|
||||
|
||||
#else
|
||||
for(cnt = 8; val > 0; val <<= 8)
|
||||
@ -178,12 +184,41 @@ static inline uint32_t compute_hash(uint32_t data)
|
||||
return _mm_crc32_u32(0, data);
|
||||
|
||||
#else
|
||||
uint64_t hash;
|
||||
/* Use multiplication to create a hash, 0xBDD06057 is a prime number */
|
||||
return ((uint64_t)data * 0xB2D06057) >> 16;
|
||||
hash = data;
|
||||
hash *= 0xB2D06057;
|
||||
hash >>= 16;
|
||||
hash *= 0xB2D06057;
|
||||
hash >>= 16;
|
||||
|
||||
return hash;
|
||||
|
||||
#endif /* __SSE4_2__ */
|
||||
}
|
||||
|
||||
#define PROD1 0xFFFFE84B
|
||||
#define PROD2 0xFFFF97B1
|
||||
static inline uint32_t compute_hash_mad(uint32_t data)
|
||||
{
|
||||
int16_t data_low;
|
||||
int16_t data_high;
|
||||
|
||||
data_low = data; ;
|
||||
data_high = data >> 16;
|
||||
data = PROD1 * data_low + PROD2 * data_high;
|
||||
|
||||
data_low = data;
|
||||
data_high = data >> 16;
|
||||
data = PROD1 * data_low + PROD2 * data_high;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static inline uint32_t compute_long_hash(uint64_t data) {
|
||||
|
||||
return compute_hash(data >> 32)^compute_hash(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns how long str1 and str2 have the same symbols.
|
||||
|
200
igzip/igzip.c
200
igzip/igzip.c
@ -41,8 +41,6 @@
|
||||
#define NON_EMPTY_BLOCK_SIZE 6
|
||||
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
|
||||
|
||||
#define MAX_TOKENS (16 * 1024)
|
||||
|
||||
#include "huffman.h"
|
||||
#include "bitbuf2.h"
|
||||
#include "igzip_lib.h"
|
||||
@ -67,6 +65,7 @@
|
||||
#endif
|
||||
|
||||
extern void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, int dict_len);
|
||||
extern void isal_deflate_hash_lvl2(struct isal_zstream *stream, uint8_t * dict, int dict_len);
|
||||
extern const uint8_t gzip_hdr[];
|
||||
extern const uint32_t gzip_hdr_bytes;
|
||||
extern const uint32_t gzip_trl_bytes;
|
||||
@ -93,7 +92,8 @@ void isal_deflate_body(struct isal_zstream *stream);
|
||||
void isal_deflate_finish(struct isal_zstream *stream);
|
||||
|
||||
void isal_deflate_icf_body(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream);
|
||||
/*****************************************************************/
|
||||
|
||||
/* Forward declarations */
|
||||
@ -217,7 +217,7 @@ static void flush_write_buffer(struct isal_zstream *stream)
|
||||
static void flush_icf_block(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
struct BitBuf2 *write_buf = &state->bitbuf;
|
||||
struct deflate_icf *icf_buf_encoded_next;
|
||||
|
||||
@ -243,27 +243,92 @@ static void flush_icf_block(struct isal_zstream *stream)
|
||||
}
|
||||
}
|
||||
|
||||
static int check_level_req(struct isal_zstream *stream)
|
||||
{
|
||||
if (stream->level == 0)
|
||||
return 0;
|
||||
|
||||
if (stream->level_buf == NULL)
|
||||
return ISAL_INVALID_LEVEL_BUF;
|
||||
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
if (stream->level_buf_size < ISAL_DEF_LVL2_MIN)
|
||||
return ISAL_INVALID_LEVEL;
|
||||
break;
|
||||
case 1:
|
||||
if (stream->level_buf_size < ISAL_DEF_LVL1_MIN)
|
||||
return ISAL_INVALID_LEVEL;
|
||||
break;
|
||||
default:
|
||||
return ISAL_INVALID_LEVEL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* returns the size of the level specific buffer */
|
||||
static int init_lvlX_buf(struct isal_zstream *stream)
|
||||
{
|
||||
int level_struct_size;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
|
||||
level_struct_size = sizeof(struct level_buf) - MAX_LVL_BUF_SIZE;
|
||||
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
if (!state->has_level_buf_init) {
|
||||
level_buf->lvl2.matches_next = level_buf->lvl2.matches;
|
||||
level_buf->lvl2.matches_end = level_buf->lvl2.matches;
|
||||
}
|
||||
|
||||
level_struct_size += sizeof(struct lvl2_buf);
|
||||
break;
|
||||
}
|
||||
|
||||
state->has_level_buf_init = 1;
|
||||
return level_struct_size;
|
||||
|
||||
}
|
||||
|
||||
static void init_new_icf_block(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
int level_struct_size;
|
||||
|
||||
if (stream->level_buf_size >=
|
||||
sizeof(struct level_2_buf) + 100 * sizeof(struct deflate_icf)) {
|
||||
state->block_next = state->block_end;
|
||||
level_buf->icf_buf_next = level_buf->icf_buf_start;
|
||||
level_buf->icf_buf_avail_out =
|
||||
stream->level_buf_size - sizeof(struct level_2_buf) -
|
||||
sizeof(struct deflate_icf);
|
||||
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
|
||||
state->state = ZSTATE_BODY;
|
||||
level_struct_size = init_lvlX_buf(stream);
|
||||
|
||||
state->block_next = state->block_end;
|
||||
level_buf->icf_buf_start =
|
||||
(struct deflate_icf *)(stream->level_buf + level_struct_size);
|
||||
|
||||
level_buf->icf_buf_next = level_buf->icf_buf_start;
|
||||
level_buf->icf_buf_avail_out =
|
||||
stream->level_buf_size - level_struct_size - sizeof(struct deflate_icf);
|
||||
|
||||
memset(&state->hist, 0, sizeof(struct isal_mod_hist));
|
||||
state->state = ZSTATE_BODY;
|
||||
}
|
||||
|
||||
static int are_buffers_empty(struct isal_zstream *stream)
|
||||
{
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
return (!stream->avail_in
|
||||
&& level_buf->lvl2.matches_next >= level_buf->lvl2.matches_end);
|
||||
default:
|
||||
return !stream->avail_in;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_icf_block_hdr(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
struct BitBuf2 *write_buf = &state->bitbuf;
|
||||
struct BitBuf2 write_buf_tmp;
|
||||
uint32_t out_size = stream->avail_out;
|
||||
@ -286,7 +351,7 @@ static void create_icf_block_hdr(struct isal_zstream *stream)
|
||||
level_buf->icf_buf_next->dist_extra = 0;
|
||||
level_buf->icf_buf_next++;
|
||||
|
||||
state->has_eob_hdr = (stream->end_of_stream && !stream->avail_in) ? 1 : 0;
|
||||
state->has_eob_hdr = (stream->end_of_stream && are_buffers_empty(stream)) ? 1 : 0;
|
||||
|
||||
if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) {
|
||||
/* Assumes ISAL_DEF_MAX_HDR_SIZE is large enough to contain a
|
||||
@ -366,11 +431,22 @@ static void isal_deflate_pass(struct isal_zstream *stream)
|
||||
write_trailer(stream);
|
||||
}
|
||||
|
||||
static void isal_deflate_icf_finish(struct isal_zstream *stream)
|
||||
{
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
isal_deflate_icf_finish_lvl2(stream);
|
||||
break;
|
||||
default:
|
||||
isal_deflate_icf_finish_lvl1(stream);
|
||||
}
|
||||
}
|
||||
|
||||
static void isal_deflate_icf_pass(struct isal_zstream *stream)
|
||||
{
|
||||
uint8_t *start_in = stream->next_in;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
|
||||
do {
|
||||
if (state->state == ZSTATE_NEW_HDR)
|
||||
@ -631,21 +707,14 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream)
|
||||
|
||||
isal_deflate_pass(stream);
|
||||
|
||||
} else if (stream->level == 1) {
|
||||
if (stream->level_buf == NULL || stream->level_buf_size < ISAL_DEF_LVL1_MIN) {
|
||||
/* Default to internal buffer if invalid size is supplied */
|
||||
stream->level_buf = state->buffer;
|
||||
stream->level_buf_size = sizeof(state->buffer);
|
||||
}
|
||||
|
||||
} else if (stream->level <= ISAL_DEF_MAX_LEVEL) {
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
|
||||
reset_match_history(stream);
|
||||
|
||||
state->count = 0;
|
||||
isal_deflate_icf_pass(stream);
|
||||
|
||||
} else
|
||||
return ISAL_INVALID_LEVEL;
|
||||
}
|
||||
|
||||
if (state->state == ZSTATE_END
|
||||
|| (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH))
|
||||
@ -745,7 +814,7 @@ static uint32_t write_stored_block(struct isal_zstream *stream)
|
||||
if (state->block_next == state->block_end) {
|
||||
state->state = state->has_eob_hdr ? ZSTATE_TRL : ZSTATE_NEW_HDR;
|
||||
if (stream->flush == FULL_FLUSH && state->state == ZSTATE_NEW_HDR
|
||||
&& stream->avail_in == 0) {
|
||||
&& are_buffers_empty(stream)) {
|
||||
/* Clear match history so there are no cross
|
||||
* block length distance pairs */
|
||||
reset_match_history(stream);
|
||||
@ -759,16 +828,28 @@ static uint32_t write_stored_block(struct isal_zstream *stream)
|
||||
static inline void reset_match_history(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *head = stream->internal_state.head;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *hash_table;
|
||||
uint32_t hash_table_size;
|
||||
int i = 0;
|
||||
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
hash_table = level_buf->lvl2.hash_table;
|
||||
hash_table_size = sizeof(level_buf->lvl2.hash_table);
|
||||
break;
|
||||
default:
|
||||
hash_table = state->head;
|
||||
hash_table_size = sizeof(state->head);
|
||||
}
|
||||
|
||||
state->has_hist = IGZIP_NO_HIST;
|
||||
|
||||
if ((stream->total_in & 0xFFFF) == 0)
|
||||
memset(stream->internal_state.head, 0, sizeof(stream->internal_state.head));
|
||||
memset(hash_table, 0, hash_table_size);
|
||||
else {
|
||||
for (i = 0; i < sizeof(state->head) / 2; i++) {
|
||||
head[i] = (uint16_t) (stream->total_in);
|
||||
for (i = 0; i < hash_table_size / 2; i++) {
|
||||
hash_table[i] = (uint16_t) (stream->total_in);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -796,6 +877,7 @@ void isal_deflate_init(struct isal_zstream *stream)
|
||||
state->has_eob = 0;
|
||||
state->has_eob_hdr = 0;
|
||||
state->has_hist = IGZIP_NO_HIST;
|
||||
state->has_level_buf_init = 0;
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
state->count = 0;
|
||||
|
||||
@ -823,6 +905,7 @@ void isal_deflate_reset(struct isal_zstream *stream)
|
||||
state->total_in_start = 0;
|
||||
state->has_wrap_hdr = 0;
|
||||
state->has_eob = 0;
|
||||
state->has_level_buf_init = 0;
|
||||
state->has_eob_hdr = 0;
|
||||
state->has_hist = IGZIP_NO_HIST;
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
@ -880,7 +963,18 @@ void isal_deflate_stateless_init(struct isal_zstream *stream)
|
||||
|
||||
void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
|
||||
{
|
||||
isal_deflate_hash_lvl0(stream, dict, dict_len);
|
||||
/* Reset history to prevent out of bounds matches this works because
|
||||
* dictionary must set at least 1 element in the history */
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
|
||||
isal_deflate_hash_lvl2(stream, dict, dict_len);
|
||||
default:
|
||||
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
|
||||
isal_deflate_hash_lvl0(stream, dict, dict_len);
|
||||
}
|
||||
|
||||
stream->internal_state.has_hist = IGZIP_HIST;
|
||||
}
|
||||
|
||||
@ -903,10 +997,6 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t
|
||||
state->b_bytes_processed = dict_len;
|
||||
state->b_bytes_valid = dict_len;
|
||||
|
||||
/* Reset history to prevent out of bounds matches this works because
|
||||
* dictionary must set at least 1 element in the history */
|
||||
memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
|
||||
|
||||
state->has_hist = IGZIP_DICT_HIST;
|
||||
|
||||
return COMP_OK;
|
||||
@ -925,6 +1015,7 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
const uint32_t gzip_flag = stream->gzip_flag;
|
||||
const uint32_t has_wrap_hdr = state->has_wrap_hdr;
|
||||
|
||||
int level_check;
|
||||
uint32_t stored_len;
|
||||
|
||||
/* Final block has already been written */
|
||||
@ -935,6 +1026,7 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
init(&state->bitbuf);
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
state->crc = 0;
|
||||
state->has_level_buf_init = 0;
|
||||
|
||||
if (stream->flush == NO_FLUSH)
|
||||
stream->end_of_stream = 1;
|
||||
@ -942,8 +1034,15 @@ int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH)
|
||||
return INVALID_FLUSH;
|
||||
|
||||
if (stream->level != 0 && stream->level != 1)
|
||||
return ISAL_INVALID_LEVEL;
|
||||
level_check = check_level_req(stream);
|
||||
if (level_check) {
|
||||
if (stream->level == 1 && stream->level_buf == NULL) {
|
||||
/* Default to internal buffer if invalid size is supplied */
|
||||
stream->level_buf = state->buffer;
|
||||
stream->level_buf_size = sizeof(state->buffer);
|
||||
} else
|
||||
return level_check;
|
||||
}
|
||||
|
||||
if (avail_in == 0)
|
||||
stored_len = TYPE0_BLK_HDR_LEN;
|
||||
@ -1045,7 +1144,7 @@ int isal_deflate(struct isal_zstream *stream)
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
int ret = COMP_OK;
|
||||
uint8_t *next_in;
|
||||
uint32_t avail_in, avail_in_start, total_start, hist_size;
|
||||
uint32_t avail_in, avail_in_start, total_start, hist_size, future_size;
|
||||
uint32_t flush_type = stream->flush;
|
||||
uint32_t end_of_stream = stream->end_of_stream;
|
||||
uint32_t size = 0;
|
||||
@ -1055,6 +1154,10 @@ int isal_deflate(struct isal_zstream *stream)
|
||||
if (stream->flush >= 3)
|
||||
return INVALID_FLUSH;
|
||||
|
||||
ret = check_level_req(stream);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
next_in = stream->next_in;
|
||||
avail_in = stream->avail_in;
|
||||
total_start = stream->total_in;
|
||||
@ -1124,19 +1227,18 @@ int isal_deflate(struct isal_zstream *stream)
|
||||
avail_in_start = stream->avail_in;
|
||||
state->total_in_start = total_start;
|
||||
isal_deflate_int(stream);
|
||||
|
||||
hist_size = hist_add(stream, hist_size, avail_in_start - stream->avail_in);
|
||||
future_size = stream->avail_in;
|
||||
if (future_size > ISAL_LOOK_AHEAD)
|
||||
future_size = ISAL_LOOK_AHEAD;
|
||||
|
||||
memmove(state->buffer, stream->next_in - hist_size, hist_size);
|
||||
memmove(state->buffer, stream->next_in - hist_size, hist_size + future_size);
|
||||
state->b_bytes_processed = hist_size;
|
||||
state->b_bytes_valid = hist_size;
|
||||
|
||||
if (stream->avail_in <= ISAL_LOOK_AHEAD) {
|
||||
memmove(state->buffer + hist_size, stream->next_in, stream->avail_in);
|
||||
state->b_bytes_valid += stream->avail_in;
|
||||
stream->next_in += stream->avail_in;
|
||||
stream->total_in += stream->avail_in;
|
||||
stream->avail_in -= stream->avail_in;
|
||||
}
|
||||
state->b_bytes_valid = hist_size + future_size;
|
||||
stream->next_in += future_size;
|
||||
stream->total_in += future_size;
|
||||
stream->avail_in -= future_size;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -12,6 +12,9 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint32_t bytes_written;
|
||||
|
||||
if (next_in - start_in > 0)
|
||||
state->has_hist = IGZIP_HIST;
|
||||
|
||||
stream->next_in = next_in;
|
||||
stream->total_in += next_in - start_in;
|
||||
stream->avail_in = end_in - next_in;
|
||||
@ -54,7 +57,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -75,7 +78,7 @@ void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -134,7 +137,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -153,7 +156,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - file_start);
|
||||
}
|
||||
@ -222,7 +225,7 @@ void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict,
|
||||
|
||||
while (next_in <= end_in) {
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] = lookup_val;
|
||||
lookup_val++;
|
||||
next_in++;
|
||||
|
@ -30,11 +30,14 @@
|
||||
#include <stdint.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "encode_df.h"
|
||||
#include "igzip_level_buf_structs.h"
|
||||
|
||||
void isal_deflate_body_base(struct isal_zstream *stream);
|
||||
void isal_deflate_finish_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_body_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_body_lvl1_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_lvl1_base(struct isal_zstream *stream);
|
||||
void isal_deflate_icf_finish_lvl2_base(struct isal_zstream *stream);
|
||||
void isal_update_histogram_base(uint8_t * start_stream, int length,
|
||||
struct isal_huff_histogram *histogram);
|
||||
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
|
||||
@ -45,6 +48,12 @@ uint32_t adler32_base(uint32_t init, const unsigned char *buf, uint64_t len);
|
||||
int decode_huffman_code_block_stateless_base(struct inflate_state *s);
|
||||
void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict,
|
||||
uint32_t dict_len);
|
||||
void isal_deflate_hash_lvl2_base(struct isal_zstream *stream, uint8_t * dict,
|
||||
uint32_t dict_len);
|
||||
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
|
||||
struct deflate_icf *match_lookup, struct level_buf *level_buf);
|
||||
void gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size);
|
||||
|
||||
void isal_deflate_body(struct isal_zstream *stream)
|
||||
{
|
||||
@ -61,9 +70,19 @@ void isal_deflate_icf_body(struct isal_zstream *stream)
|
||||
isal_deflate_icf_body_base(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish(struct isal_zstream *stream)
|
||||
void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
|
||||
{
|
||||
isal_deflate_icf_finish_base(stream);
|
||||
isal_deflate_icf_body_lvl1_base(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
|
||||
{
|
||||
isal_deflate_icf_finish_lvl1_base(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
|
||||
{
|
||||
isal_deflate_icf_finish_lvl2_base(stream);
|
||||
}
|
||||
|
||||
void isal_update_histogram(uint8_t * start_stream, int length,
|
||||
@ -96,5 +115,22 @@ int decode_huffman_code_block_stateless(struct inflate_state *s)
|
||||
|
||||
void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
|
||||
{
|
||||
return isal_deflate_hash_lvl0_base(stream, dict, dict_len);
|
||||
isal_deflate_hash_lvl0_base(stream, dict, dict_len);
|
||||
}
|
||||
|
||||
void isal_deflate_hash_lvl2(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
|
||||
{
|
||||
isal_deflate_hash_lvl2_base(stream, dict, dict_len);
|
||||
}
|
||||
|
||||
void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in,
|
||||
struct deflate_icf *match_lookup, struct level_buf *level_buf)
|
||||
{
|
||||
set_long_icf_fg_base(next_in, end_in, match_lookup, level_buf);
|
||||
}
|
||||
|
||||
void gen_icf_map_lh1(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
||||
{
|
||||
gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
|
||||
}
|
||||
|
@ -209,8 +209,8 @@ MARK __body_compute_hash_ %+ ARCH
|
||||
shr tmp3, 8
|
||||
compute_hash hash2, tmp3
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
and hash2, LVL0_HASH_MASK
|
||||
|
||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||
je write_first_byte
|
||||
@ -321,7 +321,7 @@ len_dist_lit_huffman:
|
||||
MOVQ tmp5, xdata
|
||||
shr tmp5, 24
|
||||
compute_hash tmp4, tmp5
|
||||
and tmp4, HASH_MASK
|
||||
and tmp4, LVL0_HASH_MASK
|
||||
|
||||
SHLX code4, code4, code_len3
|
||||
or code4, code3
|
||||
@ -359,15 +359,15 @@ loop3:
|
||||
jae loop3_done
|
||||
mov tmp6, [file_start + tmp3]
|
||||
compute_hash tmp4, tmp6
|
||||
and tmp4 %+ d, HASH_MASK
|
||||
and tmp4 %+ d, LVL0_HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
jmp loop3
|
||||
loop3_done:
|
||||
%endif
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
@ -429,15 +429,15 @@ loop4:
|
||||
jae loop4_done
|
||||
mov tmp6, [file_start + tmp3]
|
||||
compute_hash tmp4, tmp6
|
||||
and tmp4, HASH_MASK
|
||||
and tmp4, LVL0_HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w
|
||||
jmp loop4
|
||||
loop4_done:
|
||||
%endif
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
@ -563,5 +563,5 @@ write_first_byte:
|
||||
|
||||
section .data
|
||||
align 16
|
||||
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
|
||||
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
|
||||
const_D: dq D
|
||||
|
@ -90,10 +90,10 @@ main_loop:
|
||||
xor hash4, hash4
|
||||
crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1]
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
and hash3, HASH_MASK
|
||||
and hash4, HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
and hash2, LVL0_HASH_MASK
|
||||
and hash3, LVL0_HASH_MASK
|
||||
and hash4, LVL0_HASH_MASK
|
||||
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
add f_i, 1
|
||||
@ -118,7 +118,7 @@ end_loop:
|
||||
xor hash, hash
|
||||
crc32 hash %+ d, dword [f_i + dict_offset]
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
add f_i, 1
|
||||
|
@ -42,6 +42,59 @@
|
||||
# define RUN_MEM_SIZE 500000000
|
||||
#endif
|
||||
|
||||
int level_size_buf[10] = {
|
||||
#ifdef ISAL_DEF_LVL0_DEFAULT
|
||||
ISAL_DEF_LVL0_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL1_DEFAULT
|
||||
ISAL_DEF_LVL1_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL2_DEFAULT
|
||||
ISAL_DEF_LVL2_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL3_DEFAULT
|
||||
ISAL_DEF_LVL3_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL4_DEFAULT
|
||||
ISAL_DEF_LVL4_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL5_DEFAULT
|
||||
ISAL_DEF_LVL5_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL6_DEFAULT
|
||||
ISAL_DEF_LVL6_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL7_DEFAULT
|
||||
ISAL_DEF_LVL7_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL8_DEFAULT
|
||||
ISAL_DEF_LVL8_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL9_DEFAULT
|
||||
ISAL_DEF_LVL9_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
};
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int usage(void)
|
||||
@ -69,7 +122,17 @@ int main(int argc, char *argv[])
|
||||
int level = 0, level_size = 0, avail_in;
|
||||
char *in_file_name = NULL, *out_file_name = NULL, *dict_file_name = NULL;
|
||||
|
||||
while ((c = getopt(argc, argv, "h01i:b:o:d:")) != -1) {
|
||||
while ((c = getopt(argc, argv, "h0123456789i:b:o:d:")) != -1) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
if (c > '0' + ISAL_DEF_MAX_LEVEL)
|
||||
usage();
|
||||
else {
|
||||
level = c - '0';
|
||||
level_size = level_size_buf[level];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 'o':
|
||||
out_file_name = optarg;
|
||||
@ -85,12 +148,6 @@ int main(int argc, char *argv[])
|
||||
case 'b':
|
||||
inbuf_size = atoi(optarg);
|
||||
break;
|
||||
case '1':
|
||||
level = 1;
|
||||
level_size = ISAL_DEF_LVL1_LARGE;
|
||||
break;
|
||||
case '0':
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
usage();
|
||||
|
@ -133,7 +133,7 @@ skip_SLOP:
|
||||
ja end_loop_2
|
||||
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||
jmp encode_literal
|
||||
@ -145,10 +145,10 @@ loop2:
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||
@ -211,19 +211,19 @@ loop2:
|
||||
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
|
500
igzip/igzip_gen_icf_map_lh1_06.asm
Normal file
500
igzip/igzip_gen_icf_map_lh1_06.asm
Normal file
@ -0,0 +1,500 @@
|
||||
%include "reg_sizes.asm"
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
|
||||
%ifdef HAVE_AS_KNOWS_AVX512
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg1 rcx
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define hash rsi
|
||||
%define next_in rdi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define hash r8
|
||||
%define next_in rcx
|
||||
%endif
|
||||
|
||||
%define stream arg1
|
||||
%define level_buf arg1
|
||||
%define matches_next arg2
|
||||
%define f_i_end arg3
|
||||
|
||||
%define f_i rax
|
||||
%define file_start rbp
|
||||
%define next_byte r9
|
||||
%define encode_size r10
|
||||
%define prev_len r11
|
||||
%define prev_dist r12
|
||||
|
||||
%define hash_table level_buf + _lvl2_hash_table
|
||||
|
||||
%define datas zmm0
|
||||
%define datas_lookup zmm1
|
||||
%define zhashes zmm2
|
||||
%define zdists zmm3
|
||||
%define zdists_lookup zmm4
|
||||
%define zscatter zmm5
|
||||
%define zdists2 zmm6
|
||||
%define zlens1 zmm7
|
||||
%define zlens2 zmm8
|
||||
%define zlookup zmm9
|
||||
%define zlookup2 zmm10
|
||||
%define match_lookups zmm11
|
||||
%define zindex zmm12
|
||||
%define zdist_extra zmm13
|
||||
%define zdists_tmp zmm14
|
||||
%define znull_dist_syms zmm15
|
||||
%define zcode zmm16
|
||||
%define zthirty zmm17
|
||||
%define zdist_mask zmm18
|
||||
%define zshortest_matches zmm19
|
||||
%define zrot_left zmm20
|
||||
%define zdatas_perm zmm21
|
||||
%define zdatas_perm2 zmm22
|
||||
%define zdatas_perm3 zmm23
|
||||
%define zdatas_shuf zmm24
|
||||
%define zhash_prod zmm25
|
||||
%define zhash_mask zmm26
|
||||
%define zincrement zmm27
|
||||
%define zqword_shuf zmm28
|
||||
%define zones zmm29
|
||||
%define ztwofiftyfour zmm30
|
||||
%define zbswap zmm31
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define stack_size 10*16 + 4 * 8 + 8
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqu [rsp + 8*16], xmm14
|
||||
vmovdqa [rsp + 9*16], xmm15
|
||||
save_reg rsi, 10*16 + 0*8
|
||||
save_reg rdi, 10*16 + 1*8
|
||||
save_reg rbp, 10*16 + 2*8
|
||||
save_reg r12, 10*16 + 3*8
|
||||
end_prolog
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
|
||||
mov 10*16 + 0*8, rsi
|
||||
mov 10*16 + 1*8, rdi
|
||||
mov 10*16 + 2*8, rbp
|
||||
mov 10*16 + 3*8, r12
|
||||
add rsp, stack_size
|
||||
%endm
|
||||
%else
|
||||
%macro FUNC_SAVE 0
|
||||
push rbp
|
||||
push r12
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
pop rbp
|
||||
%endm
|
||||
%endif
|
||||
|
||||
%define VECT_SIZE 16
|
||||
%define HASH_BYTES 2
|
||||
|
||||
global gen_icf_map_lh1_06
|
||||
gen_icf_map_lh1_06:
|
||||
FUNC_SAVE
|
||||
|
||||
mov file_start, [stream + _next_in]
|
||||
mov f_i %+ d, dword [stream + _total_in]
|
||||
|
||||
sub file_start, f_i
|
||||
add f_i_end, f_i
|
||||
cmp f_i, f_i_end
|
||||
jge end_main
|
||||
|
||||
;; Prep for main loop
|
||||
mov level_buf, [stream + _level_buf]
|
||||
sub f_i_end, LA
|
||||
vmovdqu64 zdatas_perm, [datas_perm]
|
||||
vmovdqu64 zdatas_shuf, [datas_shuf]
|
||||
vmovdqu64 zhash_prod, [hash_prod]
|
||||
vmovdqu64 zhash_mask, [hash_mask]
|
||||
vmovdqu64 zincrement, [increment]
|
||||
vmovdqu64 zqword_shuf, [qword_shuf]
|
||||
vmovdqu64 zdatas_perm2, [datas_perm2]
|
||||
vmovdqu64 zdatas_perm3, [datas_perm3]
|
||||
vmovdqu64 zones, [ones]
|
||||
vmovdqu64 zbswap, [bswap_shuf]
|
||||
vmovdqu64 zthirty, [thirty]
|
||||
vmovdqu64 zrot_left, [drot_left]
|
||||
vmovdqu64 zdist_mask, [dist_mask]
|
||||
vmovdqu64 zshortest_matches, [shortest_matches]
|
||||
vmovdqu64 ztwofiftyfour, [twofiftyfour]
|
||||
vmovdqu64 znull_dist_syms, [null_dist_syms]
|
||||
kxorq k0, k0, k0
|
||||
kmovq k1, [k_mask_1]
|
||||
kmovq k2, [k_mask_2]
|
||||
|
||||
xor prev_len, prev_len
|
||||
xor prev_dist, prev_dist
|
||||
|
||||
;; Process first byte
|
||||
vmovd zhashes %+ x, dword [f_i + file_start]
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpandd zhashes, zhashes, zhash_mask
|
||||
vmovd hash %+ d, zhashes %+ x
|
||||
mov word [hash_table + HASH_BYTES * hash], f_i %+ w
|
||||
|
||||
add f_i, 1
|
||||
cmp f_i, f_i_end
|
||||
jg end_main
|
||||
|
||||
;;hash
|
||||
vmovdqu64 datas %+ y, [f_i + file_start]
|
||||
vpermq zhashes, zdatas_perm, datas
|
||||
vpshufb zhashes, zhashes, zdatas_shuf
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpandd zhashes, zhashes, zhash_mask
|
||||
|
||||
vpermq zlookup, zdatas_perm2, datas
|
||||
vpshufb zlookup, zlookup, zqword_shuf
|
||||
vpermq zlookup2, zdatas_perm3, datas
|
||||
vpshufb zlookup2, zlookup2, zqword_shuf
|
||||
|
||||
;;gather/scatter hashes
|
||||
knotq k6, k0
|
||||
vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
|
||||
|
||||
vpbroadcastd zindex, f_i %+ d
|
||||
vpaddd zindex, zindex, zincrement
|
||||
vpblendmw zscatter {k1}, zindex, zdists_lookup
|
||||
|
||||
knotq k6, k0
|
||||
vpscatterdd [hash_table + HASH_BYTES * zhashes] {k6}, zscatter
|
||||
|
||||
;; Compute hash for next loop
|
||||
vmovdqu64 datas %+ y, [f_i + file_start + VECT_SIZE]
|
||||
vpermq zhashes, zdatas_perm, datas
|
||||
vpshufb zhashes, zhashes, zdatas_shuf
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpandd zhashes, zhashes, zhash_mask
|
||||
|
||||
vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
|
||||
|
||||
sub f_i_end, VECT_SIZE
|
||||
cmp f_i, f_i_end
|
||||
jg loop1_end
|
||||
|
||||
loop1:
|
||||
lea next_in, [f_i + file_start]
|
||||
|
||||
;; Calculate look back dists
|
||||
vpaddd zdists, zdists_lookup, zones
|
||||
vpsubd zdists, zindex, zdists
|
||||
vpandd zdists, zdists, zdist_mask
|
||||
vpaddd zdists, zdists, zones
|
||||
vpsubd zdists, zincrement, zdists
|
||||
|
||||
;;gather/scatter hashes
|
||||
add f_i, VECT_SIZE
|
||||
|
||||
kxnorq k6, k6, k6
|
||||
kxnorq k7, k7, k7
|
||||
vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
|
||||
|
||||
vpbroadcastd zindex, f_i %+ d
|
||||
vpaddd zindex, zindex, zincrement
|
||||
vpblendmw zscatter {k1}, zindex, zdists_lookup
|
||||
|
||||
vpscatterdd [hash_table + HASH_BYTES * zhashes] {k7}, zscatter
|
||||
|
||||
;; Compute hash for next loop
|
||||
vpermq zhashes, zdatas_perm, datas_lookup
|
||||
vpshufb zhashes, zhashes, zdatas_shuf
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpmaddwd zhashes, zhashes, zhash_prod
|
||||
vpandd zhashes, zhashes, zhash_mask
|
||||
|
||||
;;lookup old codes
|
||||
vextracti32x8 zdists2 %+ y, zdists, 1
|
||||
kxnorq k6, k6, k6
|
||||
kxnorq k7, k7, k7
|
||||
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
|
||||
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
|
||||
|
||||
;; Calculate dist_icf_code
|
||||
vpaddd zdists, zdists, zones
|
||||
vpsubd zdists, zincrement, zdists
|
||||
vpcmpgtd k5, zdists, zones
|
||||
vplzcntd zdist_extra, zdists
|
||||
vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
|
||||
vpsllvd zcode, zones, zdist_extra
|
||||
vpsubd zcode, zcode, zones
|
||||
vpandd zcode {k5}{z}, zdists, zcode
|
||||
vpsrlvd zdists, zdists, zdist_extra
|
||||
vpslld zdist_extra, zdist_extra, 1
|
||||
vpaddd zdists, zdists, zdist_extra
|
||||
vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
|
||||
vpaddd zdists, zdists, zcode
|
||||
|
||||
;; Setup zdists for combining with zlens
|
||||
vpslld zdists, zdists, DIST_OFFSET
|
||||
|
||||
;; xor current data with lookback dist
|
||||
vpxorq zlens1, zlens1, zlookup
|
||||
vpxorq zlens2, zlens2, zlookup2
|
||||
|
||||
;; Setup registers for next loop
|
||||
vpermq zlookup, zdatas_perm2, datas
|
||||
vpshufb zlookup, zlookup, zqword_shuf
|
||||
vpermq zlookup2, zdatas_perm3, datas
|
||||
vpshufb zlookup2, zlookup2, zqword_shuf
|
||||
|
||||
;; Compute match length
|
||||
vpshufb zlens1, zlens1, zbswap
|
||||
vpshufb zlens2, zlens2, zbswap
|
||||
vplzcntq zlens1, zlens1
|
||||
vplzcntq zlens2, zlens2
|
||||
vpmovqd zlens1 %+ y, zlens1
|
||||
vpmovqd zlens2 %+ y, zlens2
|
||||
vinserti32x8 zlens1, zlens2 %+ y, 1
|
||||
vpsrld zlens1, zlens1, 3
|
||||
|
||||
;; Preload for next loops
|
||||
vmovdqu64 datas, datas_lookup
|
||||
vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
|
||||
|
||||
;; Zero out matches which should not be taken
|
||||
kshiftrw k3, k1, 15
|
||||
vpermd zlens2, zrot_left, zlens1
|
||||
vpermd zdists, zrot_left, zdists
|
||||
|
||||
vmovd zdists_tmp %+ x, prev_len %+ d
|
||||
vmovd prev_len %+ d, zlens2 %+ x
|
||||
vmovdqu32 zlens2 {k3}, zdists_tmp
|
||||
|
||||
vmovd zdists_tmp %+ x, prev_dist %+ d
|
||||
vmovd prev_dist %+ d, zdists %+ x
|
||||
vmovdqu32 zdists {k3}, zdists_tmp
|
||||
|
||||
vpcmpgtd k3, zlens2, zshortest_matches
|
||||
vpcmpgtd k4, zlens1, zlens2
|
||||
|
||||
knotq k3, k3
|
||||
korq k3, k3, k4
|
||||
knotq k4, k3
|
||||
vmovdqu32 zlens1 {k4}{z}, zlens2
|
||||
|
||||
;; Update zdists to match zlens1
|
||||
vpaddd zdists, zdists, zlens1
|
||||
vpaddd zdists, zdists, ztwofiftyfour
|
||||
vpmovzxbd zdists {k3}, [f_i + file_start - VECT_SIZE - 1]
|
||||
vpaddd zdists {k3}, zdists, znull_dist_syms
|
||||
|
||||
;;Store zdists
|
||||
vmovdqu64 [matches_next], zdists
|
||||
add matches_next, ICF_CODE_BYTES * VECT_SIZE
|
||||
|
||||
cmp f_i, f_i_end
|
||||
jle loop1
|
||||
|
||||
loop1_end:
|
||||
lea next_in, [f_i + file_start]
|
||||
|
||||
;; Calculate look back dists
|
||||
vpaddd zdists, zdists_lookup, zones
|
||||
vpsubd zdists, zindex, zdists
|
||||
vpandd zdists, zdists, zdist_mask
|
||||
vpaddd zdists, zdists, zones
|
||||
vpsubd zdists, zincrement, zdists
|
||||
|
||||
;;lookup old codes
|
||||
vextracti32x8 zdists2 %+ y, zdists, 1
|
||||
kxnorq k6, k6, k6
|
||||
kxnorq k7, k7, k7
|
||||
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
|
||||
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
|
||||
|
||||
;; Calculate dist_icf_code
|
||||
vpaddd zdists, zdists, zones
|
||||
vpsubd zdists, zincrement, zdists
|
||||
vpcmpgtd k5, zdists, zones
|
||||
vplzcntd zdist_extra, zdists
|
||||
vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
|
||||
vpsllvd zcode, zones, zdist_extra
|
||||
vpsubd zcode, zcode, zones
|
||||
vpandd zcode {k5}{z}, zdists, zcode
|
||||
vpsrlvd zdists, zdists, zdist_extra
|
||||
vpslld zdist_extra, zdist_extra, 1
|
||||
vpaddd zdists, zdists, zdist_extra
|
||||
vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
|
||||
vpaddd zdists, zdists, zcode
|
||||
|
||||
;; Setup zdists for combining with zlens
|
||||
vpslld zdists, zdists, DIST_OFFSET
|
||||
|
||||
;; xor current data with lookback dist
|
||||
vpxorq zlens1, zlens1, zlookup
|
||||
vpxorq zlens2, zlens2, zlookup2
|
||||
|
||||
;; Compute match length
|
||||
vpshufb zlens1, zlens1, zbswap
|
||||
vpshufb zlens2, zlens2, zbswap
|
||||
vplzcntq zlens1, zlens1
|
||||
vplzcntq zlens2, zlens2
|
||||
vpmovqd zlens1 %+ y, zlens1
|
||||
vpmovqd zlens2 %+ y, zlens2
|
||||
vinserti32x8 zlens1, zlens2 %+ y, 1
|
||||
vpsrld zlens1, zlens1, 3
|
||||
|
||||
;; Zero out matches which should not be taken
|
||||
kshiftrw k3, k1, 15
|
||||
vpermd zlens2, zrot_left, zlens1
|
||||
vpermd zdists, zrot_left, zdists
|
||||
|
||||
vmovd zdists_tmp %+ x, prev_len %+ d
|
||||
vmovd prev_len %+ d, zlens2 %+ x
|
||||
vmovdqu32 zlens2 {k3}, zdists_tmp
|
||||
|
||||
vmovd zdists_tmp %+ x, prev_dist %+ d
|
||||
vmovd prev_dist %+ d, zdists %+ x
|
||||
vmovdqu32 zdists {k3}, zdists_tmp
|
||||
|
||||
vpcmpgtd k3, zlens2, zshortest_matches
|
||||
vpcmpgtd k4, zlens1, zlens2
|
||||
|
||||
knotq k3, k3
|
||||
korq k3, k3, k4
|
||||
knotq k4, k3
|
||||
vmovdqu32 zlens1 {k4}{z}, zlens2
|
||||
|
||||
;; Update zdists to match zlens1
|
||||
vpaddd zdists, zdists, zlens1
|
||||
vpaddd zdists, zdists, ztwofiftyfour
|
||||
vpmovzxbd zdists {k3}, [f_i + file_start - 1]
|
||||
vpaddd zdists {k3}, zdists, znull_dist_syms
|
||||
|
||||
;;Store zdists
|
||||
vmovdqu64 [matches_next], zdists
|
||||
|
||||
end_main:
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
section .data
|
||||
align 64
|
||||
datas_perm:
|
||||
dq 0x0, 0x1, 0x0, 0x1, 0x1, 0x2, 0x1, 0x2
|
||||
datas_perm2:
|
||||
dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1
|
||||
datas_perm3:
|
||||
dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2
|
||||
drot_left:
|
||||
dd 0xf, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6
|
||||
dd 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
|
||||
datas_shuf:
|
||||
db 0x0, 0x1, 0x2, 0x3
|
||||
db 0x1, 0x2, 0x3, 0x4
|
||||
db 0x2, 0x3, 0x4, 0x5
|
||||
db 0x3, 0x4, 0x5, 0x6
|
||||
db 0x4, 0x5, 0x6, 0x7
|
||||
db 0x5, 0x6, 0x7, 0x8
|
||||
db 0x6, 0x7, 0x8, 0x9
|
||||
db 0x7, 0x8, 0x9, 0xa
|
||||
db 0x0, 0x1, 0x2, 0x3
|
||||
db 0x1, 0x2, 0x3, 0x4
|
||||
db 0x2, 0x3, 0x4, 0x5
|
||||
db 0x3, 0x4, 0x5, 0x6
|
||||
db 0x4, 0x5, 0x6, 0x7
|
||||
db 0x5, 0x6, 0x7, 0x8
|
||||
db 0x6, 0x7, 0x8, 0x9
|
||||
db 0x7, 0x8, 0x9, 0xa
|
||||
bswap_shuf:
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
|
||||
qword_shuf:
|
||||
db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
|
||||
db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
|
||||
db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
|
||||
db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
|
||||
db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
|
||||
db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
|
||||
db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
|
||||
db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
|
||||
%define PROD1 0xE84B
|
||||
%define PROD2 0x97B1
|
||||
|
||||
hash_prod:
|
||||
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
|
||||
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
|
||||
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
|
||||
dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2
|
||||
null_dist_syms:
|
||||
dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT
|
||||
dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT
|
||||
increment:
|
||||
dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
ones:
|
||||
dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1
|
||||
dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1
|
||||
thirty:
|
||||
dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e
|
||||
dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e
|
||||
twofiftyfour:
|
||||
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
|
||||
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
|
||||
dist_mask:
|
||||
dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1
|
||||
dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1
|
||||
hash_mask:
|
||||
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
|
||||
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
|
||||
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
|
||||
dd LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK, LVL2_HASH_MASK
|
||||
lit_len_mask:
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
shortest_matches:
|
||||
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
|
||||
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
|
||||
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
|
||||
dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH
|
||||
|
||||
k_mask_1:
|
||||
dq 0xaaaaaaaaaaaaaaaa
|
||||
k_mask_2:
|
||||
dq 0x7fff
|
||||
%endif
|
@ -26,11 +26,11 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
|
||||
stream->internal_state.block_end = stream->total_in;
|
||||
stream->avail_in = end_in - next_in;
|
||||
|
||||
((struct level_2_buf *)stream->level_buf)->icf_buf_next = next_out;
|
||||
((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
|
||||
((struct level_buf *)stream->level_buf)->icf_buf_next = next_out;
|
||||
((struct level_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out;
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
void isal_deflate_icf_body_lvl1_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
@ -52,9 +52,9 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
|
||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out =
|
||||
start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
@ -68,7 +68,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -89,7 +89,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -124,7 +124,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
void isal_deflate_icf_finish_lvl1_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
@ -140,8 +140,8 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out = start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
|
||||
end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
|
||||
sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
@ -160,7 +160,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
@ -178,7 +178,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
hash = compute_hash(literal) & LVL0_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
@ -231,3 +231,131 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream)
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void isal_deflate_icf_finish_lvl2_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
struct deflate_icf *start_out, *next_out, *end_out;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint32_t code, code2, extra_bits;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *last_seen = level_buf->lvl2.hash_table;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
|
||||
start_in = stream->next_in;
|
||||
end_in = start_in + stream->avail_in;
|
||||
next_in = start_in;
|
||||
|
||||
start_out = level_buf->icf_buf_next;
|
||||
end_out = start_out + level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
|
||||
next_out = start_out;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
return;
|
||||
}
|
||||
|
||||
while (next_in + 3 < end_in) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
|
||||
dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */
|
||||
match_length = compare258(next_in - dist, next_in, end_in - next_in);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef ISAL_LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end - 3; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) (next_hash - file_start);
|
||||
}
|
||||
|
||||
get_len_icf_code(match_length, &code);
|
||||
get_dist_icf_code(dist, &code2, &extra_bits);
|
||||
|
||||
state->hist.ll_hist[code]++;
|
||||
state->hist.d_hist[code2]++;
|
||||
|
||||
write_deflate_icf(next_out, code, code2, extra_bits);
|
||||
|
||||
next_out++;
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
state->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
while (next_in < end_in) {
|
||||
if (next_out >= end_out) {
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out,
|
||||
end_out);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *next_in;
|
||||
get_lit_icf_code(literal & 0xFF, &code);
|
||||
state->hist.ll_hist[code]++;
|
||||
write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
|
||||
next_out++;
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
if (next_in == end_in) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
}
|
||||
|
||||
update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void isal_deflate_hash_lvl2_base(struct isal_zstream *stream, uint8_t * dict,
|
||||
uint32_t dict_len)
|
||||
{
|
||||
uint8_t *next_in = dict;
|
||||
uint8_t *end_in = dict + dict_len - SHORTEST_MATCH;
|
||||
uint32_t literal;
|
||||
uint32_t hash;
|
||||
uint16_t lookup_val = stream->total_in - dict_len;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *last_seen = level_buf->lvl2.hash_table;
|
||||
|
||||
while (next_in <= end_in) {
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash_mad(literal) & LVL2_HASH_MASK;
|
||||
last_seen[hash] = lookup_val;
|
||||
lookup_val++;
|
||||
next_in++;
|
||||
}
|
||||
}
|
||||
|
342
igzip/igzip_icf_body.c
Normal file
342
igzip/igzip_icf_body.c
Normal file
@ -0,0 +1,342 @@
|
||||
#include "igzip_lib.h"
|
||||
#include "huffman.h"
|
||||
#include "encode_df.h"
|
||||
#include "igzip_level_buf_structs.h"
|
||||
|
||||
extern void gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
|
||||
extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *);
|
||||
extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
|
||||
/*
|
||||
*************************************************************
|
||||
* Helper functions
|
||||
************************************************************
|
||||
*/
|
||||
static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
|
||||
uint32_t lit_dist, uint32_t extra_bits)
|
||||
{
|
||||
/* icf->lit_len = lit_len; */
|
||||
/* icf->lit_dist = lit_dist; */
|
||||
/* icf->dist_extra = extra_bits; */
|
||||
|
||||
*(uint32_t *) icf = lit_len | (lit_dist << LIT_LEN_BIT_COUNT)
|
||||
| (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT));
|
||||
}
|
||||
|
||||
void hash_section(struct isal_zstream *stream, uint8_t * next_in, uint8_t * end_in,
|
||||
uint16_t * last_seen)
|
||||
{
|
||||
uint32_t index, hash_input, hash;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *hash_table = level_buf->lvl2.hash_table;
|
||||
|
||||
/* Compute Hashes */
|
||||
for (index = 0; index < end_in - next_in - ISAL_LOOK_AHEAD; index++) {
|
||||
hash_input = *(uint32_t *) (next_in + index);
|
||||
hash = compute_hash(hash_input) & LVL2_HASH_MASK;
|
||||
last_seen[index] = hash_table[hash];
|
||||
hash_table[hash] = (uint64_t) (next_in + index - file_start);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
|
||||
struct deflate_icf *match_lookup, struct level_buf *level_buf)
|
||||
{
|
||||
uint32_t dist_code, dist_extra, dist, len;
|
||||
uint32_t match_len;
|
||||
uint32_t dist_start[] = {
|
||||
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
|
||||
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
|
||||
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
|
||||
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||
dist_code = match_lookup->lit_dist;
|
||||
dist_extra = match_lookup->dist_extra;
|
||||
dist = dist_start[dist_code] + dist_extra;
|
||||
len = match_lookup->lit_len;
|
||||
if (len >= 8 + LEN_OFFSET) {
|
||||
match_len =
|
||||
compare258(next_in - dist + 8, next_in + 8, 250) + LEN_OFFSET + 8;
|
||||
|
||||
while (match_len > match_lookup->lit_len
|
||||
&& match_len >= LEN_OFFSET + SHORTEST_MATCH) {
|
||||
write_deflate_icf(match_lookup, match_len, dist_code,
|
||||
dist_extra);
|
||||
match_lookup++;
|
||||
next_in++;
|
||||
match_len--;
|
||||
}
|
||||
}
|
||||
|
||||
match_lookup++;
|
||||
next_in++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
*************************************************************
|
||||
* Methods for generating one pass match lookup table
|
||||
************************************************************
|
||||
*/
|
||||
void gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
||||
{
|
||||
|
||||
uint32_t dist, len, extra_bits;
|
||||
uint8_t *next_in = stream->next_in, *end_in = stream->next_in + input_size;
|
||||
uint8_t *file_start = stream->next_in - stream->total_in;
|
||||
uint32_t hash;
|
||||
uint64_t next_bytes, match_bytes;
|
||||
uint64_t match;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint16_t *hash_table = level_buf->lvl2.hash_table;
|
||||
|
||||
if (input_size < ISAL_LOOK_AHEAD)
|
||||
return;
|
||||
|
||||
matches_icf_lookup->lit_len = *next_in;
|
||||
matches_icf_lookup->lit_dist = 0x1e;
|
||||
matches_icf_lookup->dist_extra = 0;
|
||||
|
||||
hash = compute_hash(*(uint32_t *) next_in) & LVL2_HASH_MASK;
|
||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
next_in++;
|
||||
matches_icf_lookup++;
|
||||
|
||||
while (next_in < end_in - ISAL_LOOK_AHEAD) {
|
||||
hash = compute_hash(*(uint32_t *) next_in) & LVL2_HASH_MASK;
|
||||
dist = (next_in - file_start - hash_table[hash]);
|
||||
dist = ((dist - 1) & (IGZIP_HIST_SIZE - 1)) + 1;
|
||||
hash_table[hash] = (uint64_t) (next_in - file_start);
|
||||
|
||||
match_bytes = *(uint64_t *) (next_in - dist);
|
||||
next_bytes = *(uint64_t *) next_in;
|
||||
match = next_bytes ^ match_bytes;
|
||||
|
||||
len = tzcnt(match);
|
||||
|
||||
if (len >= SHORTEST_MATCH) {
|
||||
len += LEN_OFFSET;
|
||||
get_dist_icf_code(dist, &dist, &extra_bits);
|
||||
write_deflate_icf(matches_icf_lookup, len, dist, extra_bits);
|
||||
} else {
|
||||
write_deflate_icf(matches_icf_lookup, *next_in, 0x1e, 0);
|
||||
}
|
||||
|
||||
next_in++;
|
||||
matches_icf_lookup++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
*************************************************************
|
||||
* One pass methods for parsing provided match lookup table
|
||||
************************************************************
|
||||
*/
|
||||
struct deflate_icf *compress_icf_map_g(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_next,
|
||||
struct deflate_icf *matches_end)
|
||||
{
|
||||
uint32_t lit_len, lit_len2, dist;
|
||||
uint64_t code;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
struct deflate_icf *matches_start = matches_next;
|
||||
struct deflate_icf *icf_buf_end =
|
||||
level_buf->icf_buf_next +
|
||||
level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
|
||||
|
||||
while (matches_next < matches_end - 1 && level_buf->icf_buf_next < icf_buf_end - 1) {
|
||||
code = *(uint64_t *) matches_next;
|
||||
lit_len = code & LIT_LEN_MASK;
|
||||
lit_len2 = (code >> ICF_CODE_LEN) & LIT_LEN_MASK;
|
||||
state->hist.ll_hist[lit_len]++;
|
||||
|
||||
if (lit_len >= LEN_START) {
|
||||
*(uint32_t *) level_buf->icf_buf_next = code;
|
||||
level_buf->icf_buf_next++;
|
||||
|
||||
dist = (code >> ICF_DIST_OFFSET) & DIST_LIT_MASK;
|
||||
state->hist.d_hist[dist]++;
|
||||
lit_len -= LEN_OFFSET;
|
||||
matches_next += lit_len;
|
||||
|
||||
} else if (lit_len2 >= LEN_START) {
|
||||
*(uint64_t *) level_buf->icf_buf_next = code;
|
||||
level_buf->icf_buf_next += 2;
|
||||
|
||||
state->hist.ll_hist[lit_len2]++;
|
||||
|
||||
dist = (code >> (ICF_CODE_LEN + ICF_DIST_OFFSET)) & DIST_LIT_MASK;
|
||||
state->hist.d_hist[dist]++;
|
||||
lit_len2 -= LEN_OFFSET - 1;
|
||||
matches_next += lit_len2;
|
||||
|
||||
} else {
|
||||
code = ((lit_len2 + LIT_START) << ICF_DIST_OFFSET) | lit_len;
|
||||
*(uint32_t *) level_buf->icf_buf_next = code;
|
||||
level_buf->icf_buf_next++;
|
||||
|
||||
state->hist.ll_hist[lit_len2]++;
|
||||
|
||||
matches_next += 2;
|
||||
}
|
||||
}
|
||||
|
||||
while (matches_next < matches_end && level_buf->icf_buf_next < icf_buf_end) {
|
||||
code = *(uint32_t *) matches_next;
|
||||
lit_len = code & LIT_LEN_MASK;
|
||||
*(uint32_t *) level_buf->icf_buf_next = code;
|
||||
level_buf->icf_buf_next++;
|
||||
|
||||
state->hist.ll_hist[lit_len]++;
|
||||
if (lit_len >= LEN_START) {
|
||||
dist = (code >> 10) & 0x1ff;
|
||||
state->hist.d_hist[dist]++;
|
||||
lit_len -= LEN_OFFSET;
|
||||
matches_next += lit_len;
|
||||
} else {
|
||||
matches_next++;
|
||||
}
|
||||
}
|
||||
|
||||
level_buf->icf_buf_avail_out =
|
||||
(icf_buf_end - level_buf->icf_buf_next) * sizeof(struct deflate_icf);
|
||||
|
||||
state->block_end += matches_next - matches_start;
|
||||
if (matches_next > matches_end && matches_start < matches_end) {
|
||||
stream->next_in += matches_next - matches_end;
|
||||
stream->avail_in -= matches_next - matches_end;
|
||||
stream->total_in += matches_next - matches_end;
|
||||
}
|
||||
|
||||
return matches_next;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
*************************************************************
|
||||
* Compression functions combining different methods
|
||||
************************************************************
|
||||
*/
|
||||
static inline void icf_body_next_state(struct isal_zstream *stream)
|
||||
{
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
|
||||
if (level_buf->icf_buf_avail_out <= 0)
|
||||
state->state = ZSTATE_CREATE_HDR;
|
||||
|
||||
else if (stream->avail_in <= ISAL_LOOK_AHEAD
|
||||
&& (stream->end_of_stream || stream->flush != NO_FLUSH))
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
}
|
||||
|
||||
void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)
|
||||
{
|
||||
struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
|
||||
struct deflate_icf *matches_icf_lookup;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint32_t input_size;
|
||||
|
||||
matches_icf = level_buf->lvl2.matches;
|
||||
matches_icf_lookup = matches_icf;
|
||||
matches_next_icf = level_buf->lvl2.matches_next;
|
||||
matches_end_icf = level_buf->lvl2.matches_end;
|
||||
|
||||
matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
|
||||
|
||||
while (matches_next_icf >= matches_end_icf) {
|
||||
input_size = MATCH_BUF_SIZE;
|
||||
input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
|
||||
|
||||
if (input_size <= ISAL_LOOK_AHEAD)
|
||||
break;
|
||||
|
||||
gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
|
||||
|
||||
set_long_icf_fg(stream->next_in, stream->next_in + input_size,
|
||||
matches_icf_lookup, level_buf);
|
||||
|
||||
stream->next_in += input_size - ISAL_LOOK_AHEAD;
|
||||
stream->avail_in -= input_size - ISAL_LOOK_AHEAD;
|
||||
stream->total_in += input_size - ISAL_LOOK_AHEAD;
|
||||
|
||||
matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD;
|
||||
matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
|
||||
}
|
||||
|
||||
level_buf->lvl2.matches_next = matches_next_icf;
|
||||
level_buf->lvl2.matches_end = matches_end_icf;
|
||||
|
||||
icf_body_next_state(stream);
|
||||
}
|
||||
|
||||
void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)
|
||||
{
|
||||
struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
|
||||
struct deflate_icf *matches_icf_lookup;
|
||||
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
|
||||
uint32_t input_size;
|
||||
|
||||
matches_icf = level_buf->lvl2.matches;
|
||||
matches_icf_lookup = matches_icf;
|
||||
matches_next_icf = level_buf->lvl2.matches_next;
|
||||
matches_end_icf = level_buf->lvl2.matches_end;
|
||||
|
||||
matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
|
||||
|
||||
while (matches_next_icf >= matches_end_icf) {
|
||||
input_size = MATCH_BUF_SIZE;
|
||||
input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
|
||||
|
||||
if (input_size <= ISAL_LOOK_AHEAD)
|
||||
break;
|
||||
|
||||
gen_icf_map_lh1(stream, matches_icf_lookup, input_size);
|
||||
|
||||
set_long_icf_fg(stream->next_in, stream->next_in + input_size,
|
||||
matches_icf_lookup, level_buf);
|
||||
|
||||
stream->next_in += input_size - ISAL_LOOK_AHEAD;
|
||||
stream->avail_in -= input_size - ISAL_LOOK_AHEAD;
|
||||
stream->total_in += input_size - ISAL_LOOK_AHEAD;
|
||||
|
||||
matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD;
|
||||
matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
|
||||
}
|
||||
|
||||
level_buf->lvl2.matches_next = matches_next_icf;
|
||||
level_buf->lvl2.matches_end = matches_end_icf;
|
||||
|
||||
icf_body_next_state(stream);
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_base(struct isal_zstream *stream)
|
||||
{
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
icf_body_hash1_fillgreedy_lazy(stream);
|
||||
break;
|
||||
case 1:
|
||||
default:
|
||||
isal_deflate_icf_body_lvl1(stream);
|
||||
}
|
||||
}
|
||||
|
||||
void isal_deflate_icf_body_06(struct isal_zstream *stream)
|
||||
{
|
||||
switch (stream->level) {
|
||||
case 2:
|
||||
icf_body_lazyhash1_fillgreedy_greedy(stream);
|
||||
break;
|
||||
case 1:
|
||||
default:
|
||||
isal_deflate_icf_body_lvl1(stream);
|
||||
}
|
||||
}
|
@ -110,8 +110,8 @@ stack_size equ 3*8 + 8*8 + 4*16
|
||||
|
||||
; void isal_deflate_icf_body ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_icf_body_ %+ ARCH
|
||||
isal_deflate_icf_body_ %+ ARCH %+ :
|
||||
global isal_deflate_icf_body_lvl1_ %+ ARCH
|
||||
isal_deflate_icf_body_lvl1_ %+ ARCH %+ :
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
@ -190,8 +190,8 @@ MARK __body_compute_hash_ %+ ARCH
|
||||
shr tmp3, 8
|
||||
compute_hash hash2, tmp3
|
||||
|
||||
and hash, HASH_MASK
|
||||
and hash2, HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
and hash2, LVL0_HASH_MASK
|
||||
|
||||
cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
|
||||
je write_first_byte
|
||||
@ -220,7 +220,7 @@ loop2:
|
||||
mov tmp2, curr_data
|
||||
shr curr_data, 16
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
dec dist2
|
||||
@ -233,7 +233,7 @@ loop2:
|
||||
|
||||
shr tmp2, 24
|
||||
compute_hash hash2, tmp2
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
and dist2 %+ d, (D-1)
|
||||
neg dist2
|
||||
@ -286,7 +286,7 @@ len_dist_lit_huffman:
|
||||
|
||||
shr curr_data, 24
|
||||
compute_hash hash3, curr_data
|
||||
and hash3, HASH_MASK
|
||||
and hash3, LVL0_HASH_MASK
|
||||
|
||||
mov curr_data, tmp1
|
||||
shr tmp1, 8
|
||||
@ -318,9 +318,9 @@ len_dist_lit_huffman:
|
||||
and dist_code2, 0x1F
|
||||
inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code2]
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, file_length
|
||||
@ -371,9 +371,9 @@ len_dist_huffman:
|
||||
and dist_code, 0x1F
|
||||
inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code]
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
; continue
|
||||
cmp f_i, file_length
|
||||
@ -501,8 +501,8 @@ write_first_byte:
|
||||
MOVDQU xdata, [file_start + f_i + 1]
|
||||
add f_i, 1
|
||||
mov curr_data, [file_start + f_i]
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
@ -510,5 +510,5 @@ write_first_byte:
|
||||
|
||||
section .data
|
||||
align 16
|
||||
mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK
|
||||
mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK
|
||||
const_D: dq D
|
@ -4,4 +4,4 @@
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
||||
%include "igzip_icf_body_h1_gr_bt.asm"
|
@ -4,4 +4,4 @@
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
||||
%include "igzip_icf_body_h1_gr_bt.asm"
|
@ -5,4 +5,4 @@
|
||||
%define COMPARE_TYPE 3
|
||||
%endif
|
||||
|
||||
%include "igzip_icf_body.asm"
|
||||
%include "igzip_icf_body_h1_gr_bt.asm"
|
@ -83,8 +83,8 @@ m_out_start equ 16
|
||||
stack_size equ 32
|
||||
; void isal_deflate_icf_finish ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_icf_finish_01
|
||||
isal_deflate_icf_finish_01:
|
||||
global isal_deflate_icf_finish_lvl1_01
|
||||
isal_deflate_icf_finish_lvl1_01:
|
||||
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
|
||||
sub rsp, stack_size
|
||||
|
||||
@ -129,7 +129,7 @@ isal_deflate_icf_finish_01:
|
||||
ja end_loop_2
|
||||
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
mov byte [stream + _internal_state_has_hist], IGZIP_HIST
|
||||
jmp encode_literal
|
||||
@ -141,10 +141,10 @@ loop2:
|
||||
cmp m_out_buf, [rsp + m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK;
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||
@ -203,19 +203,19 @@ loop2:
|
||||
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK;
|
||||
mov tmp6 %+ d, dword [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
|
@ -1,16 +1,31 @@
|
||||
#ifndef IGZIP_LEVEL_BUF_STRUCTS_H
|
||||
#define IGZIP_LEVEL_BUF_STRUCTS_H
|
||||
|
||||
#include "igzip_lib.h"
|
||||
#include "huff_codes.h"
|
||||
#include "encode_df.h"
|
||||
|
||||
struct level_2_buf {
|
||||
#define MATCH_BUF_SIZE (4 * 1024)
|
||||
|
||||
struct lvl2_buf{
|
||||
uint16_t hash_table[IGZIP_LVL2_HASH_SIZE];
|
||||
struct deflate_icf *matches_next;
|
||||
struct deflate_icf *matches_end;
|
||||
struct deflate_icf matches[MATCH_BUF_SIZE];
|
||||
struct deflate_icf overflow[ISAL_LOOK_AHEAD];
|
||||
};
|
||||
|
||||
#define MAX_LVL_BUF_SIZE sizeof(struct lvl2_buf)
|
||||
|
||||
struct level_buf {
|
||||
struct hufftables_icf encode_tables;
|
||||
uint32_t deflate_hdr_count;
|
||||
uint32_t deflate_hdr_extra_bits;
|
||||
uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE];
|
||||
struct deflate_icf *icf_buf_next;
|
||||
uint64_t icf_buf_avail_out;
|
||||
struct deflate_icf icf_buf_start[];
|
||||
struct deflate_icf *icf_buf_start;
|
||||
struct lvl2_buf lvl2;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -39,23 +39,29 @@ extern isal_deflate_body_04
|
||||
extern isal_deflate_finish_base
|
||||
extern isal_deflate_finish_01
|
||||
|
||||
|
||||
extern isal_deflate_icf_body_base
|
||||
extern isal_deflate_icf_body_01
|
||||
extern isal_deflate_icf_body_02
|
||||
extern isal_deflate_icf_body_04
|
||||
extern isal_deflate_icf_finish_base
|
||||
extern isal_deflate_icf_finish_01
|
||||
extern isal_deflate_icf_body_lvl1_base
|
||||
extern isal_deflate_icf_body_lvl1_01
|
||||
extern isal_deflate_icf_body_lvl1_02
|
||||
extern isal_deflate_icf_body_lvl1_04
|
||||
extern isal_deflate_icf_finish_lvl1_base
|
||||
extern isal_deflate_icf_finish_lvl1_01
|
||||
extern isal_deflate_icf_finish_lvl2_base
|
||||
|
||||
extern isal_update_histogram_base
|
||||
extern isal_update_histogram_01
|
||||
extern isal_update_histogram_04
|
||||
|
||||
extern gen_icf_map_h1_base
|
||||
|
||||
extern encode_deflate_icf_base
|
||||
extern encode_deflate_icf_04
|
||||
|
||||
extern set_long_icf_fg_base
|
||||
|
||||
%ifdef HAVE_AS_KNOWS_AVX512
|
||||
extern encode_deflate_icf_06
|
||||
extern set_long_icf_fg_06
|
||||
extern gen_icf_map_lh1_06
|
||||
%endif
|
||||
|
||||
extern crc32_gzip_base
|
||||
@ -68,6 +74,11 @@ extern adler32_sse
|
||||
extern isal_deflate_hash_lvl0_base
|
||||
extern isal_deflate_hash_lvl0_01
|
||||
|
||||
extern isal_deflate_hash_lvl2_base
|
||||
|
||||
extern isal_deflate_icf_body_base
|
||||
extern isal_deflate_icf_body_06
|
||||
|
||||
section .text
|
||||
|
||||
%include "multibinary.asm"
|
||||
@ -77,10 +88,14 @@ mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body
|
||||
mbin_interface isal_deflate_finish
|
||||
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
||||
|
||||
mbin_interface isal_deflate_icf_body
|
||||
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_01, isal_deflate_icf_body_02, isal_deflate_icf_body_04
|
||||
mbin_interface isal_deflate_icf_finish
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish, isal_deflate_icf_finish_base, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01
|
||||
mbin_interface isal_deflate_icf_body_lvl1
|
||||
mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_lvl1_base, isal_deflate_icf_body_lvl1_01, isal_deflate_icf_body_lvl1_02, isal_deflate_icf_body_lvl1_04
|
||||
|
||||
mbin_interface isal_deflate_icf_finish_lvl1
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_lvl1_base, isal_deflate_icf_finish_lvl1_01, isal_deflate_icf_finish_lvl1_01, isal_deflate_icf_finish_lvl1_01
|
||||
|
||||
mbin_interface isal_deflate_icf_finish_lvl2
|
||||
mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base, isal_deflate_icf_finish_lvl2_base
|
||||
|
||||
mbin_interface isal_update_histogram
|
||||
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
|
||||
@ -88,9 +103,21 @@ mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_upda
|
||||
%ifdef HAVE_AS_KNOWS_AVX512
|
||||
mbin_interface encode_deflate_icf
|
||||
mbin_dispatch_init6 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04, encode_deflate_icf_06
|
||||
|
||||
mbin_interface set_long_icf_fg
|
||||
mbin_dispatch_init6 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_06
|
||||
|
||||
mbin_interface gen_icf_map_lh1
|
||||
mbin_dispatch_init6 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_lh1_06
|
||||
%else
|
||||
mbin_interface encode_deflate_icf
|
||||
mbin_dispatch_init5 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04
|
||||
|
||||
mbin_interface set_long_icf_fg
|
||||
mbin_dispatch_init5 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base
|
||||
|
||||
mbin_interface gen_icf_map_lh1
|
||||
mbin_dispatch_init5 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base
|
||||
%endif
|
||||
|
||||
mbin_interface crc32_gzip
|
||||
@ -101,3 +128,14 @@ mbin_dispatch_init5 isal_adler32, adler32_base, adler32_sse, adler32_sse, adler3
|
||||
|
||||
mbin_interface isal_deflate_hash_lvl0
|
||||
mbin_dispatch_init5 isal_deflate_hash_lvl0, isal_deflate_hash_lvl0_base, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01
|
||||
|
||||
mbin_interface isal_deflate_hash_lvl2
|
||||
mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base, isal_deflate_hash_lvl2_base
|
||||
|
||||
%ifdef HAVE_AS_KNOWS_AVX512
|
||||
mbin_interface isal_deflate_icf_body
|
||||
mbin_dispatch_init6 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_06
|
||||
%else
|
||||
mbin_interface isal_deflate_icf_body
|
||||
mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base
|
||||
%endif
|
||||
|
@ -242,6 +242,26 @@ int get_rand_data_length(void)
|
||||
return rand() & max_mask;
|
||||
}
|
||||
|
||||
int get_rand_level(void)
|
||||
{
|
||||
return ISAL_DEF_MIN_LEVEL + rand() % (ISAL_DEF_MAX_LEVEL - ISAL_DEF_MIN_LEVEL + 1);
|
||||
|
||||
}
|
||||
|
||||
int get_rand_level_buf_size(int level)
|
||||
{
|
||||
int size;
|
||||
switch (level) {
|
||||
case 2:
|
||||
size = rand() % IBUF_SIZE + ISAL_DEF_LVL2_MIN;
|
||||
break;
|
||||
case 1:
|
||||
default:
|
||||
size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
void print_error(int error_code)
|
||||
{
|
||||
switch (error_code) {
|
||||
@ -963,7 +983,7 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
level_buf_size = get_rand_level_buf_size(stream.level);
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
@ -1147,7 +1167,7 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
level_buf_size = get_rand_level_buf_size(stream.level);
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
@ -1218,21 +1238,28 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_
|
||||
stream.gzip_flag = gzip_flag;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
if (level == 1) {
|
||||
/* This is to test case where level buf uses already existing
|
||||
* internal buffers */
|
||||
level_buf_size = rand() % IBUF_SIZE;
|
||||
/* printf("level_buf_size = %d\n", level_buf_size); */
|
||||
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
} else if (level > 1) {
|
||||
level_buf_size = get_rand_level_buf_size(level);
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
|
||||
if (reset_test_flag) {
|
||||
if (reset_test_flag)
|
||||
isal_deflate_reset(&stream);
|
||||
/* printf("post reset level_buf_size = %d\n", level_buf_size); */
|
||||
}
|
||||
|
||||
ret = isal_deflate_stateless(&stream);
|
||||
|
||||
if (level_buf != NULL)
|
||||
@ -1311,14 +1338,23 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
|
||||
stream.next_out = compressed_buf;
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
if (level == 1) {
|
||||
/* This is to test case where level_buf uses already existing
|
||||
* internal buffers */
|
||||
level_buf_size = rand() % IBUF_SIZE;
|
||||
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
} else if (level > 1) {
|
||||
level_buf_size = get_rand_level_buf_size(level);
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
stream.level_buf = level_buf;
|
||||
stream.level_buf_size = level_buf_size;
|
||||
}
|
||||
|
||||
if (reset_test_flag)
|
||||
@ -1361,6 +1397,7 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t *
|
||||
set_random_hufftable(&stream);
|
||||
|
||||
ret = isal_deflate_stateless(&stream);
|
||||
|
||||
assert(stream.internal_state.bitbuf.m_bit_count == 0);
|
||||
|
||||
assert(compressed_buf == stream.next_out - stream.total_out);
|
||||
@ -1448,7 +1485,7 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed
|
||||
stream.level = level;
|
||||
|
||||
if (level >= 1) {
|
||||
level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
|
||||
level_buf_size = get_rand_level_buf_size(stream.level);
|
||||
if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
|
||||
level_buf = malloc(level_buf_size);
|
||||
create_rand_repeat_data(level_buf, level_buf_size);
|
||||
@ -1626,7 +1663,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_
|
||||
uint8_t *in_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 5;
|
||||
level = rand() % 2;
|
||||
level = get_rand_level();
|
||||
|
||||
if (in_size != 0) {
|
||||
in_buf = malloc(in_size);
|
||||
@ -1866,7 +1903,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
|
||||
}
|
||||
|
||||
gzip_flag = rand() % 5;
|
||||
level = rand() % 2;
|
||||
level = get_rand_level();
|
||||
|
||||
z_size = z_size_max;
|
||||
|
||||
@ -2072,7 +2109,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
uint8_t *z_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 5;
|
||||
level = rand() % 2;
|
||||
level = get_rand_level();
|
||||
|
||||
z_size = 2 * in_size + 2 * hdr_bytes + 8;
|
||||
if (gzip_flag == IGZIP_GZIP)
|
||||
@ -2142,7 +2179,7 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size)
|
||||
uint8_t *z_buf = NULL;
|
||||
|
||||
gzip_flag = rand() % 5;
|
||||
level = rand() % 2;
|
||||
level = get_rand_level();
|
||||
z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
|
||||
|
||||
if (gzip_flag == IGZIP_GZIP)
|
||||
|
317
igzip/igzip_set_long_icf_fg_06.asm
Normal file
317
igzip/igzip_set_long_icf_fg_06.asm
Normal file
@ -0,0 +1,317 @@
|
||||
%include "reg_sizes.asm"
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
%define NEQ 4
|
||||
|
||||
%ifdef HAVE_AS_KNOWS_AVX512
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg1 rcx
|
||||
%define arg2 rdx
|
||||
%define arg3 r8
|
||||
%define dist_code rsi
|
||||
%define len rdi
|
||||
%else
|
||||
%define arg1 rdi
|
||||
%define arg2 rsi
|
||||
%define arg3 rdx
|
||||
%define dist_code rcx
|
||||
%define len r8
|
||||
%endif
|
||||
|
||||
%define next_in arg1
|
||||
%define end_in arg2
|
||||
%define match_lookup arg3
|
||||
%define match_in rax
|
||||
%define dist r9
|
||||
%define match_offset r10
|
||||
%define tmp1 r11
|
||||
|
||||
%define zmatch_lookup zmm0
|
||||
%define zmatch_lookup2 zmm1
|
||||
%define zlens zmm2
|
||||
%define zdist_codes zmm3
|
||||
%define zdist_extras zmm4
|
||||
%define zdists zmm5
|
||||
%define zdists2 zmm6
|
||||
%define zlens1 zmm7
|
||||
%define zlens2 zmm8
|
||||
%define zlookup zmm9
|
||||
%define zlookup2 zmm10
|
||||
%define datas zmm11
|
||||
%define ztmp1 zmm12
|
||||
%define ztmp2 zmm13
|
||||
%define zvect_size zmm17
|
||||
%define ztwofiftyfour zmm18
|
||||
%define ztwofiftysix zmm19
|
||||
%define ztwosixtytwo zmm20
|
||||
%define znlen_mask zmm21
|
||||
%define zbswap zmm22
|
||||
%define zqword_shuf zmm23
|
||||
%define zdatas_perm3 zmm24
|
||||
%define zdatas_perm2 zmm25
|
||||
%define zincrement zmm26
|
||||
%define zdists_mask zmm27
|
||||
%define zdists_start zmm28
|
||||
%define zlong_lens2 zmm29
|
||||
%define zlong_lens zmm30
|
||||
%define zlens_mask zmm31
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define stack_size 8*16 + 2 * 8 + 8
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
save_reg rsi, 8*16 + 0*8
|
||||
save_reg rdi, 8*16 + 1*8
|
||||
end_prolog
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
|
||||
mov 8*16 + 0*8, rsi
|
||||
mov 8*16 + 1*8, rdi
|
||||
add rsp, stack_size
|
||||
%endm
|
||||
%else
|
||||
%macro FUNC_SAVE 0
|
||||
%endm
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
%endm
|
||||
%endif
|
||||
%define VECT_SIZE 16
|
||||
|
||||
global set_long_icf_fg_06
|
||||
set_long_icf_fg_06:
|
||||
FUNC_SAVE
|
||||
|
||||
sub end_in, LA + 15
|
||||
vmovdqu32 zlong_lens, [long_len]
|
||||
vmovdqu32 zlong_lens2, [long_len2]
|
||||
vmovdqu32 zlens_mask, [len_mask]
|
||||
vmovdqu16 zdists_start, [dist_start]
|
||||
vmovdqu32 zdists_mask, [dists_mask]
|
||||
vmovdqu32 zincrement, [increment]
|
||||
vmovdqu64 zdatas_perm2, [datas_perm2]
|
||||
vmovdqu64 zdatas_perm3, [datas_perm3]
|
||||
vmovdqu64 zqword_shuf, [qword_shuf]
|
||||
vmovdqu64 zbswap, [bswap_shuf]
|
||||
vmovdqu64 znlen_mask, [nlen_mask]
|
||||
vmovdqu64 zvect_size, [vect_size]
|
||||
vmovdqu64 ztwofiftyfour, [twofiftyfour]
|
||||
vmovdqu64 ztwofiftysix, [twofiftysix]
|
||||
vmovdqu64 ztwosixtytwo, [twosixtytwo]
|
||||
vmovdqu32 zmatch_lookup, [match_lookup]
|
||||
|
||||
fill_loop: ; Tahiti is a magical place
|
||||
vmovdqu32 zmatch_lookup2, zmatch_lookup
|
||||
vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
|
||||
|
||||
cmp next_in, end_in
|
||||
jae end_fill
|
||||
vpandd zlens, zmatch_lookup2, zlens_mask
|
||||
vpcmpgtd k3, zlens, zlong_lens
|
||||
|
||||
;; Speculatively increment
|
||||
add next_in, VECT_SIZE
|
||||
add match_lookup, ICF_CODE_BYTES * VECT_SIZE
|
||||
|
||||
ktestw k3, k3
|
||||
jz fill_loop
|
||||
|
||||
vpsrld zdist_codes, zmatch_lookup2, DIST_OFFSET
|
||||
vpmovdw zdists %+ y, zdist_codes ; Relies on perm working mod 32
|
||||
vpermw zdists, zdists, zdists_start
|
||||
vpmovzxwd zdists, zdists %+ y
|
||||
|
||||
vpsrld zdist_extras, zmatch_lookup2, EXTRA_BITS_OFFSET
|
||||
vpsubd zdist_extras, zincrement, zdist_extras
|
||||
|
||||
vpsubd zdists, zdist_extras, zdists
|
||||
vextracti32x8 zdists2 %+ y, zdists, 1
|
||||
kmovb k6, k3
|
||||
kshiftrw k7, k3, 8
|
||||
vpgatherdq zlens1 {k6}, [next_in + zdists %+ y - 8]
|
||||
vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y - 8]
|
||||
|
||||
vmovdqu8 datas %+ y, [next_in - 8]
|
||||
vpermq zlookup, zdatas_perm2, datas
|
||||
vpshufb zlookup, zlookup, zqword_shuf
|
||||
vpermq zlookup2, zdatas_perm3, datas
|
||||
vpshufb zlookup2, zlookup2, zqword_shuf
|
||||
|
||||
vpxorq zlens1, zlens1, zlookup
|
||||
vpxorq zlens2, zlens2, zlookup2
|
||||
|
||||
vpshufb zlens1, zlens1, zbswap
|
||||
vpshufb zlens2, zlens2, zbswap
|
||||
vplzcntq zlens1, zlens1
|
||||
vplzcntq zlens2, zlens2
|
||||
vpmovqd zlens1 %+ y, zlens1
|
||||
vpmovqd zlens2 %+ y, zlens2
|
||||
vinserti32x8 zlens1, zlens2 %+ y, 1
|
||||
vpsrld zlens1 {k3}{z}, zlens1, 3
|
||||
|
||||
vpandd zmatch_lookup2 {k3}{z}, zmatch_lookup2, znlen_mask
|
||||
vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, ztwosixtytwo
|
||||
vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, zlens1
|
||||
|
||||
vmovdqu32 [match_lookup - ICF_CODE_BYTES * VECT_SIZE] {k3}, zmatch_lookup2
|
||||
|
||||
vpcmpgtd k3, zlens1, zlong_lens2
|
||||
ktestw k3, k3
|
||||
jz fill_loop
|
||||
|
||||
vpsubd zdists, zincrement, zdists
|
||||
|
||||
vpcompressd zdists2 {k3}, zdists
|
||||
vpcompressd zmatch_lookup2 {k3}, zmatch_lookup2
|
||||
kmovq match_offset, k3
|
||||
tzcnt match_offset, match_offset
|
||||
|
||||
vmovd dist %+ d, zdists2 %+ x
|
||||
lea next_in, [next_in + match_offset - VECT_SIZE]
|
||||
lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)]
|
||||
mov match_in, next_in
|
||||
sub match_in, dist
|
||||
|
||||
mov len, 2
|
||||
%rep 3
|
||||
vmovdqu8 ztmp1, [next_in + len]
|
||||
vmovdqu8 ztmp2, [match_in + len]
|
||||
vpcmpb k3, ztmp1, [match_in + len], NEQ
|
||||
ktestq k3, k3
|
||||
jnz miscompare
|
||||
|
||||
add len, 64
|
||||
%endrep
|
||||
|
||||
vmovdqu8 ztmp1, [next_in + len]
|
||||
vmovdqu8 ztmp2, [match_in + len]
|
||||
vpcmpb k3, ztmp1, ztmp2, 4
|
||||
|
||||
miscompare:
|
||||
kmovq tmp1, k3
|
||||
tzcnt tmp1, tmp1
|
||||
add len, tmp1
|
||||
add next_in, len
|
||||
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
|
||||
vmovdqu32 zmatch_lookup, [match_lookup]
|
||||
|
||||
vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x
|
||||
vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask
|
||||
|
||||
vpbroadcastd zlens1, len %+ d
|
||||
vpsubd zlens1, zlens1, zincrement
|
||||
vpaddd zlens1, zlens1, ztwofiftyfour
|
||||
neg len
|
||||
|
||||
update_match_lookup:
|
||||
vpandd zlens2, zlens_mask, [match_lookup + ICF_CODE_BYTES * len]
|
||||
vpcmpgtd k3, zlens1, zlens2
|
||||
vpcmpgtd k4, zlens1, ztwofiftysix
|
||||
kandw k3, k3, k4
|
||||
|
||||
vpaddd zlens2 {k3}{z}, zlens1, zmatch_lookup2
|
||||
|
||||
vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2
|
||||
|
||||
knotw k3, k3
|
||||
ktestw k3, k3
|
||||
jnz fill_loop
|
||||
|
||||
add len, VECT_SIZE
|
||||
vpsubd zlens1, zlens1, zvect_size
|
||||
|
||||
jmp update_match_lookup
|
||||
end_fill:
|
||||
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
section .data
|
||||
align 64
|
||||
dist_start:
|
||||
dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
|
||||
dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
|
||||
dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
||||
dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
||||
len_mask:
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
||||
dists_mask:
|
||||
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
||||
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
||||
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
||||
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
||||
long_len:
|
||||
dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
|
||||
dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
|
||||
long_len2:
|
||||
dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
|
||||
dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
|
||||
|
||||
increment:
|
||||
dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
datas_perm2:
|
||||
dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1
|
||||
datas_perm3:
|
||||
dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2
|
||||
bswap_shuf:
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
|
||||
db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
|
||||
qword_shuf:
|
||||
db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
|
||||
db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
|
||||
db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
|
||||
db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
|
||||
db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
|
||||
db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
|
||||
db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
|
||||
db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
vect_size:
|
||||
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
||||
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
||||
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
||||
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
||||
twofiftyfour:
|
||||
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
|
||||
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
|
||||
twofiftysix:
|
||||
dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
|
||||
dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
|
||||
twosixtytwo:
|
||||
dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106
|
||||
dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106
|
||||
nlen_mask:
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
||||
%endif
|
@ -42,6 +42,59 @@
|
||||
# define RUN_MEM_SIZE 500000000
|
||||
#endif
|
||||
|
||||
int level_size_buf[10] = {
|
||||
#ifdef ISAL_DEF_LVL0_DEFAULT
|
||||
ISAL_DEF_LVL0_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL1_DEFAULT
|
||||
ISAL_DEF_LVL1_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL2_DEFAULT
|
||||
ISAL_DEF_LVL2_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL3_DEFAULT
|
||||
ISAL_DEF_LVL3_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL4_DEFAULT
|
||||
ISAL_DEF_LVL4_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL5_DEFAULT
|
||||
ISAL_DEF_LVL5_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL6_DEFAULT
|
||||
ISAL_DEF_LVL6_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL7_DEFAULT
|
||||
ISAL_DEF_LVL7_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL8_DEFAULT
|
||||
ISAL_DEF_LVL8_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
#ifdef ISAL_DEF_LVL9_DEFAULT
|
||||
ISAL_DEF_LVL9_DEFAULT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
};
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int usage(void)
|
||||
@ -49,7 +102,7 @@ int usage(void)
|
||||
fprintf(stderr,
|
||||
"Usage: igzip_stateless_file_perf [options] <infile>\n"
|
||||
" -h help\n"
|
||||
" -X use compression level X with 0 <= X <= 1\n"
|
||||
" -X use compression level X with 0 <= X <= 2\n"
|
||||
" -i <iter> number of iterations (at least 1)\n"
|
||||
" -o <file> output file for compresed data\n");
|
||||
exit(0);
|
||||
@ -66,7 +119,17 @@ int main(int argc, char *argv[])
|
||||
int level = 0, level_size = 0;
|
||||
char *in_file_name = NULL, *out_file_name = NULL;
|
||||
|
||||
while ((c = getopt(argc, argv, "h01i:o:")) != -1) {
|
||||
while ((c = getopt(argc, argv, "h0123456789i:o:")) != -1) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
if (c > '0' + ISAL_DEF_MAX_LEVEL)
|
||||
usage();
|
||||
else {
|
||||
level = c - '0';
|
||||
level_size = level_size_buf[level];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 'o':
|
||||
out_file_name = optarg;
|
||||
@ -76,12 +139,6 @@ int main(int argc, char *argv[])
|
||||
if (iterations < 1)
|
||||
usage();
|
||||
break;
|
||||
case '1':
|
||||
level = 1;
|
||||
level_size = ISAL_DEF_LVL1_LARGE;
|
||||
break;
|
||||
case '0':
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
usage();
|
||||
|
@ -247,7 +247,7 @@ isal_update_histogram_ %+ ARCH %+ :
|
||||
|
||||
;; Init hash_table
|
||||
PXOR vtmp0, vtmp0, vtmp0
|
||||
mov rcx, (IGZIP_HASH_SIZE - V_LENGTH)
|
||||
mov rcx, (IGZIP_LVL0_HASH_SIZE - V_LENGTH)
|
||||
init_hash_table:
|
||||
MOVDQU [histogram + _hash_offset + 2 * rcx], vtmp0
|
||||
MOVDQU [histogram + _hash_offset + 2 * (rcx + V_LENGTH / 2)], vtmp0
|
||||
@ -262,7 +262,7 @@ init_hash_table:
|
||||
;; Load first literal into histogram
|
||||
mov curr_data, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
|
||||
and curr_data, 0xff
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
|
||||
@ -276,8 +276,8 @@ init_hash_table:
|
||||
shr curr_data2, 8
|
||||
compute_hash hash2, curr_data2
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
loop2:
|
||||
xor dist, dist
|
||||
xor dist2, dist2
|
||||
@ -324,8 +324,8 @@ loop2:
|
||||
xor len, [tmp1 + dist - 1]
|
||||
jz compare_loop
|
||||
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
|
||||
MOVQ len2, xdata
|
||||
xor len2, [tmp1 + dist2]
|
||||
@ -370,7 +370,7 @@ len_dist_lit_huffman:
|
||||
mov tmp1, curr_data
|
||||
compute_hash hash, curr_data
|
||||
|
||||
and hash3, HASH_MASK
|
||||
and hash3, LVL0_HASH_MASK
|
||||
mov [histogram + _hash_offset + 2 * hash3], tmp3 %+ w
|
||||
|
||||
dist_to_dist_code2 dist_code2, dist2
|
||||
@ -383,8 +383,8 @@ len_dist_lit_huffman:
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
|
||||
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2]
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
@ -418,8 +418,8 @@ len_dist_huffman:
|
||||
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
|
||||
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
|
||||
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash, HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
and hash, LVL0_HASH_MASK
|
||||
|
||||
cmp f_i, file_length
|
||||
jl loop2
|
||||
@ -442,7 +442,7 @@ end_loop_2:
|
||||
loop2_finish:
|
||||
mov curr_data %+ d, dword [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
|
||||
;; Calculate possible distance for length/dist pair.
|
||||
xor dist, dist
|
||||
@ -513,8 +513,8 @@ exit_ret:
|
||||
ret
|
||||
|
||||
compare_loop:
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
and hash %+ d, LVL0_HASH_MASK
|
||||
and hash2 %+ d, LVL0_HASH_MASK
|
||||
lea tmp2, [tmp1 + dist - 1]
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len, tmp3
|
||||
|
@ -38,19 +38,29 @@
|
||||
%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer
|
||||
|
||||
%ifndef IGZIP_HASH_SIZE
|
||||
%assign IGZIP_HASH_SIZE (8 * K)
|
||||
%ifndef IGZIP_LVL0_HASH_SIZE
|
||||
%assign IGZIP_LVL0_HASH_SIZE (8 * K)
|
||||
%endif
|
||||
|
||||
%assign HASH_MASK (IGZIP_HASH_SIZE - 1)
|
||||
%ifndef IGZIP_LVL2_HASH_SIZE
|
||||
%assign IGZIP_LVL2_HASH_SIZE IGZIP_HIST_SIZE
|
||||
%endif
|
||||
|
||||
%assign LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
|
||||
%assign LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1)
|
||||
|
||||
%assign MIN_DEF_MATCH 3 ; Minimum length of a match in deflate
|
||||
%assign SHORTEST_MATCH 4
|
||||
|
||||
%assign SLOP 8
|
||||
|
||||
%define ICF_CODE_BYTES 4
|
||||
%define LIT_LEN_BIT_COUNT 10
|
||||
%define DIST_LIT_BIT_COUNT 9
|
||||
|
||||
%define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
|
||||
%define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
|
||||
|
||||
%define DIST_OFFSET LIT_LEN_BIT_COUNT
|
||||
%define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT)
|
||||
%define LIT (0x1E << DIST_OFFSET)
|
||||
|
@ -115,8 +115,12 @@ extern "C" {
|
||||
|
||||
#define ISAL_LIMIT_HASH_UPDATE
|
||||
|
||||
#ifndef IGZIP_HASH_SIZE
|
||||
#define IGZIP_HASH_SIZE (8 * IGZIP_K)
|
||||
#ifndef IGZIP_LVL0_HASH_SIZE
|
||||
#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K)
|
||||
#endif
|
||||
|
||||
#ifndef IGZIP_LVL2_HASH_SIZE
|
||||
#define IGZIP_LVL2_HASH_SIZE IGZIP_HIST_SIZE
|
||||
#endif
|
||||
|
||||
#ifdef LONGER_HUFFTABLE
|
||||
@ -155,8 +159,10 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
|
||||
#define INVALID_PARAM -8
|
||||
#define STATELESS_OVERFLOW -1
|
||||
#define ISAL_INVALID_OPERATION -9
|
||||
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
|
||||
#define ISAL_INVALID_STATE -3
|
||||
#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
|
||||
#define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */
|
||||
|
||||
/**
|
||||
* @enum isal_zstate_state
|
||||
* @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
|
||||
@ -235,7 +241,7 @@ enum isal_block_state {
|
||||
struct isal_huff_histogram {
|
||||
uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen
|
||||
uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen
|
||||
uint16_t hash_table[IGZIP_HASH_SIZE]; //!< Tmp space used as a hash table
|
||||
uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table
|
||||
};
|
||||
|
||||
struct isal_mod_hist {
|
||||
@ -244,12 +250,15 @@ struct isal_mod_hist {
|
||||
};
|
||||
|
||||
#define ISAL_DEF_MIN_LEVEL 0
|
||||
#define ISAL_DEF_MAX_LEVEL 1
|
||||
#define ISAL_DEF_MAX_LEVEL 2
|
||||
|
||||
/* Defines used set level data sizes */
|
||||
/* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */
|
||||
#define ISAL_DEF_LVL0_REQ 0
|
||||
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K /* has to be at least sizeof(struct level_2_buf) */
|
||||
#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K
|
||||
#define ISAL_DEF_LVL1_TOKEN_SIZE 4
|
||||
#define ISAL_DEF_LVL2_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE
|
||||
#define ISAL_DEF_LVL2_TOKEN_SIZE 4
|
||||
|
||||
/* Data sizes for level specific data options */
|
||||
#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ
|
||||
@ -266,6 +275,13 @@ struct isal_mod_hist {
|
||||
#define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE
|
||||
|
||||
#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K)
|
||||
#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE
|
||||
|
||||
#define IGZIP_NO_HIST 0
|
||||
#define IGZIP_HIST 1
|
||||
#define IGZIP_DICT_HIST 2
|
||||
@ -296,6 +312,7 @@ struct isal_zstate {
|
||||
uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
||||
uint8_t has_eob; //!< keeps track of eob on the last deflate block
|
||||
uint8_t has_hist; //!< flag to track if there is match history
|
||||
uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized.
|
||||
struct isal_mod_hist hist;
|
||||
uint32_t count; //!< used for partial header/trailer writes
|
||||
uint8_t tmp_out_buff[16]; //!< temporary array
|
||||
@ -306,8 +323,7 @@ struct isal_zstate {
|
||||
uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer
|
||||
|
||||
/* Stream should be setup such that the head is cache aligned*/
|
||||
uint16_t head[IGZIP_HASH_SIZE]; //!< Hash array
|
||||
|
||||
uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array
|
||||
};
|
||||
|
||||
/** @brief Holds the huffman tree used to huffman encode the input stream **/
|
||||
@ -593,7 +609,8 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t d
|
||||
* @param stream Structure holding state information on the compression streams.
|
||||
* @return COMP_OK (if everything is ok),
|
||||
* INVALID_FLUSH (if an invalid FLUSH is selected),
|
||||
* ISAL_INVALID_LEVEL (if an invalid compression level is selected).
|
||||
* ISAL_INVALID_LEVEL (if an invalid compression level is selected),
|
||||
* ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough).
|
||||
*/
|
||||
int isal_deflate(struct isal_zstream *stream);
|
||||
|
||||
@ -622,6 +639,7 @@ int isal_deflate(struct isal_zstream *stream);
|
||||
* @return COMP_OK (if everything is ok),
|
||||
* INVALID_FLUSH (if an invalid FLUSH is selected),
|
||||
* ISAL_INVALID_LEVEL (if an invalid compression level is selected),
|
||||
* ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough),
|
||||
* STATELESS_OVERFLOW (if output buffer will not fit output).
|
||||
*/
|
||||
int isal_deflate_stateless(struct isal_zstream *stream);
|
||||
|
Loading…
Reference in New Issue
Block a user