diff --git a/Makefile.nmake b/Makefile.nmake index 3a0359a..af5abe1 100644 --- a/Makefile.nmake +++ b/Makefile.nmake @@ -120,9 +120,7 @@ objs = \ bin\encode_df.obj \ bin\encode_df_04.obj \ bin\proc_heap.obj \ - bin\igzip_icf_body_h1_gr_bt_01.obj \ - bin\igzip_icf_body_h1_gr_bt_02.obj \ - bin\igzip_icf_body_h1_gr_bt_04.obj \ + bin\igzip_icf_body_h1_gr_bt.obj \ bin\igzip_icf_finish.obj \ bin\igzip_icf_base.obj \ bin\igzip_inflate.obj \ diff --git a/igzip/Makefile.am b/igzip/Makefile.am index 4e2b944..d917251 100644 --- a/igzip/Makefile.am +++ b/igzip/Makefile.am @@ -43,9 +43,7 @@ lsrc_x86_64 += igzip/igzip_body_01.asm \ igzip/igzip_body_02.asm \ igzip/igzip_body_04.asm \ igzip/igzip_finish.asm \ - igzip/igzip_icf_body_h1_gr_bt_01.asm \ - igzip/igzip_icf_body_h1_gr_bt_02.asm \ - igzip/igzip_icf_body_h1_gr_bt_04.asm \ + igzip/igzip_icf_body_h1_gr_bt.asm \ igzip/igzip_icf_finish.asm \ igzip/rfc1951_lookup.asm \ igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \ @@ -82,7 +80,6 @@ other_src += igzip/bitbuf2.asm \ igzip/data_struct2.asm \ igzip/inflate_data_structs.asm \ igzip/igzip_body.asm \ - igzip/igzip_icf_body_h1_gr_bt.asm \ igzip/igzip_finish.asm \ igzip/lz0a_const.asm \ igzip/options.asm \ diff --git a/igzip/huff_codes.h b/igzip/huff_codes.h index e6123c8..d773c6c 100644 --- a/igzip/huff_codes.h +++ b/igzip/huff_codes.h @@ -77,11 +77,13 @@ #define INVALID_HUFFCODE 1 #define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_SIZE - 1) #define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) #define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) #define LVL1_HASH_MASK (IGZIP_LVL1_HASH_SIZE - 1) #define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1) +#define LVL3_HASH_MASK (IGZIP_LVL3_HASH_SIZE - 1) #define SHORTEST_MATCH 4 #define LENGTH_BITS 5 diff --git a/igzip/igzip.c b/igzip/igzip.c index b54ed22..401e0b7 100644 --- a/igzip/igzip.c +++ b/igzip/igzip.c @@ -67,6 +67,7 @@ extern void isal_deflate_hash_lvl0(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); extern void isal_deflate_hash_lvl1(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); extern void isal_deflate_hash_lvl2(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl3(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); extern const uint8_t gzip_hdr[]; extern const uint32_t gzip_hdr_bytes; extern const uint32_t gzip_trl_bytes; @@ -95,6 +96,7 @@ void isal_deflate_finish(struct isal_zstream *stream); void isal_deflate_icf_body(struct isal_zstream *stream); void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream); void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream); /*****************************************************************/ /* Forward declarations */ @@ -253,6 +255,11 @@ static int check_level_req(struct isal_zstream *stream) return ISAL_INVALID_LEVEL_BUF; switch (stream->level) { + case 3: + if (stream->level_buf_size < ISAL_DEF_LVL3_MIN) + return ISAL_INVALID_LEVEL; + break; + case 2: if (stream->level_buf_size < ISAL_DEF_LVL2_MIN) return ISAL_INVALID_LEVEL; @@ -276,6 +283,14 @@ static int init_hash8k_buf(struct isal_zstream *stream) return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash8k); } +static int init_hash_hist_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + state->has_level_buf_init = 1; + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_hist); +} + static int init_hash_map_buf(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; @@ -294,8 +309,10 @@ static int init_hash_map_buf(struct isal_zstream *stream) static int init_lvlX_buf(struct isal_zstream *stream) { switch (stream->level) { - case 2: + case 3: return init_hash_map_buf(stream); + case 2: + return init_hash_hist_buf(stream); default: return init_hash8k_buf(stream); } @@ -339,8 +356,10 @@ static int are_buffers_empty(struct isal_zstream *stream) { switch (stream->level) { - case 2: + case 3: return are_buffers_empty_hash_map(stream); + case 2: + return are_buffers_empty_hashX(stream); default: return are_buffers_empty_hashX(stream); } @@ -455,6 +474,9 @@ static void isal_deflate_pass(struct isal_zstream *stream) static void isal_deflate_icf_finish(struct isal_zstream *stream) { switch (stream->level) { + case 3: + isal_deflate_icf_finish_lvl3(stream); + break; case 2: isal_deflate_icf_finish_lvl2(stream); break; @@ -855,6 +877,11 @@ static inline void reset_match_history(struct isal_zstream *stream) int i = 0; switch (stream->level) { + case 3: + hash_table = level_buf->lvl3.hash_table; + hash_table_size = sizeof(level_buf->lvl3.hash_table); + break; + case 2: hash_table = level_buf->lvl2.hash_table; hash_table_size = sizeof(level_buf->lvl2.hash_table); @@ -992,14 +1019,22 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic * dictionary must set at least 1 element in the history */ struct level_buf *level_buf = (struct level_buf *)stream->level_buf; switch (stream->level) { + case 3: + memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table)); + isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK, + stream->total_in, dict, dict_len); + break; + case 2: memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table)); isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK, stream->total_in, dict, dict_len); + break; case 1: memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table)); isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK, stream->total_in, dict, dict_len); + break; default: memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head)); isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK, diff --git a/igzip/igzip_base_aliases.c b/igzip/igzip_base_aliases.c index 170f1b9..daf918a 100644 --- a/igzip/igzip_base_aliases.c +++ b/igzip/igzip_base_aliases.c @@ -36,7 +36,9 @@ void isal_deflate_body_base(struct isal_zstream *stream); void isal_deflate_finish_base(struct isal_zstream *stream); void isal_deflate_icf_body_base(struct isal_zstream *stream); void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream); +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream); void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream); void isal_update_histogram_base(uint8_t * start_stream, int length, struct isal_huff_histogram *histogram); @@ -74,12 +76,22 @@ void isal_deflate_icf_body_lvl1(struct isal_zstream *stream) isal_deflate_icf_body_hash8k_base(stream); } +void isal_deflate_icf_body_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_body_hash_hist_base(stream); +} + void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream) { isal_deflate_icf_finish_hash8k_base(stream); } void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_hist_base(stream); +} + +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream) { isal_deflate_icf_finish_hash_map_base(stream); } @@ -130,6 +142,12 @@ void isal_deflate_hash_lvl2(uint16_t * hash_table, uint32_t hash_mask, isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); } +void isal_deflate_hash_lvl3(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in, struct deflate_icf *match_lookup, struct level_buf *level_buf) { diff --git a/igzip/igzip_icf_base.c b/igzip/igzip_icf_base.c index 14f7b7e..3c3954d 100644 --- a/igzip/igzip_icf_base.c +++ b/igzip/igzip_icf_base.c @@ -127,6 +127,101 @@ void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream) } +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) +{ + uint32_t literal, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + struct deflate_icf *start_out, *next_out, *end_out; + uint16_t match_length; + uint32_t dist; + uint32_t code, code2, extra_bits; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash_hist.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + return; + } + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; + end_out = + start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / + sizeof(struct deflate_icf); + next_out = start_out; + + while (next_in + ISAL_LOOK_AHEAD < end_in) { + + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + /* The -1 are to handle the case when dist = 0 */ + if (dist - 1 < IGZIP_HIST_SIZE - 1) { + assert(dist != 0); + + match_length = compare258(next_in - dist, next_in, 258); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_icf_code(match_length, &code); + get_dist_icf_code(dist, &code2, &extra_bits); + + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; + + write_deflate_icf(next_out, code, code2, extra_bits); + next_out++; + next_in += match_length; + + continue; + } + } + + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + } + + update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); + + assert(stream->avail_in <= ISAL_LOOK_AHEAD); + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + + return; + +} + void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream) { uint32_t literal = 0, hash; @@ -236,6 +331,115 @@ void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream) return; } +void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + struct deflate_icf *start_out, *next_out, *end_out; + uint16_t match_length; + uint32_t dist; + uint32_t code, code2, extra_bits; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash_hist.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; + end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / + sizeof(struct deflate_icf); + next_out = start_out; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + return; + } + + while (next_in + 3 < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */ + match_length = compare258(next_in - dist, next_in, end_in - next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end - 3; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_icf_code(match_length, &code); + get_dist_icf_code(dist, &code2, &extra_bits); + + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; + + write_deflate_icf(next_out, code, code2, extra_bits); + + next_out++; + next_in += match_length; + + continue; + } + } + + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + while (next_in < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *next_in; + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + if (next_in == end_in) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + } + + update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); + + return; +} + void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream) { uint32_t literal = 0, hash; diff --git a/igzip/igzip_icf_body.c b/igzip/igzip_icf_body.c index add8055..3e6f5a7 100644 --- a/igzip/igzip_icf_body.c +++ b/igzip/igzip_icf_body.c @@ -6,6 +6,7 @@ extern void gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t); extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *); extern void isal_deflate_icf_body_lvl1(struct isal_zstream *); +extern void isal_deflate_icf_body_lvl2(struct isal_zstream *); /* ************************************************************* * Helper functions @@ -320,9 +321,12 @@ void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream) void isal_deflate_icf_body_base(struct isal_zstream *stream) { switch (stream->level) { - case 2: + case 3: icf_body_hash1_fillgreedy_lazy(stream); break; + case 2: + isal_deflate_icf_body_lvl2(stream); + break; case 1: default: isal_deflate_icf_body_lvl1(stream); @@ -332,9 +336,12 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) void isal_deflate_icf_body_06(struct isal_zstream *stream) { switch (stream->level) { - case 2: + case 3: icf_body_lazyhash1_fillgreedy_greedy(stream); break; + case 2: + isal_deflate_icf_body_lvl2(stream); + break; case 1: default: isal_deflate_icf_body_lvl1(stream); diff --git a/igzip/igzip_icf_body_h1_gr_bt.asm b/igzip/igzip_icf_body_h1_gr_bt.asm index 2fbf74c..75b9c5c 100644 --- a/igzip/igzip_icf_body_h1_gr_bt.asm +++ b/igzip/igzip_icf_body_h1_gr_bt.asm @@ -112,17 +112,38 @@ stack_size equ 5*8 + 8*8 + 4*16 ;;; 8 because stack address is odd multiple of 8 after a function call and ;;; we want it aligned to 16 bytes -; void isal_deflate_icf_body ( isal_zstream *stream ) +;; Defines to generate functions for different architecture +%xdefine ARCH 01 +%xdefine ARCH1 02 +%xdefine ARCH2 04 + +%ifndef COMPARE_TYPE +%xdefine COMPARE_TYPE_NOT_DEF +%xdefine COMPARE_TYPE 1 +%xdefine COMPARE_TYPE1 2 +%xdefine COMPARE_TYPE2 3 +%endif + +%rep 3 +;; Defines to generate functions for different levels +%xdefine HASH_MASK HASH8K_HASH_MASK +%xdefine HASH_MASK1 HASH_HIST_HASH_MASK +%xdefine METHOD hash8k +%xdefine METHOD1 hash_hist + +%rep 2 +; void isal_deflate_icf_body ( isal_zstream *stream ) +; we make 6 different versions of this function ; arg 1: rcx: addr of stream -global isal_deflate_icf_body_hash8k_ %+ ARCH -isal_deflate_icf_body_hash8k_ %+ ARCH %+ : +global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH +isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif ;; do nothing if (avail_in == 0) cmp dword [rcx + _avail_in], 0 - jne skip1 + jne .skip1 ;; Set stream's next state mov rdx, ZSTATE_FLUSH_READ_BUFFER @@ -133,7 +154,7 @@ isal_deflate_icf_body_hash8k_ %+ ARCH %+ : cmovne rax, rdx mov dword [rcx + _internal_state_state], eax ret -skip1: +.skip1: %ifdef ALIGN_STACK push rbp @@ -182,10 +203,9 @@ skip1: ; if (file_length <= 0) continue; cmp file_length, f_i - jle input_end + jle .input_end ; for (f_i = f_start_i; f_i < file_length; f_i++) { -MARK __body_compute_hash_ %+ ARCH MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] mov tmp1, curr_data @@ -196,19 +216,19 @@ MARK __body_compute_hash_ %+ ARCH shr tmp1, 8 compute_hash hash2, tmp1 - and hash, HASH8K_HASH_MASK - and hash2, HASH8K_HASH_MASK + and hash, HASH_MASK + and hash2, HASH_MASK cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST - je write_first_byte + je .write_first_byte - jmp loop2 + jmp .loop2 align 16 -loop2: +.loop2: ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] - ja output_end + ja .output_end xor dist, dist xor dist2, dist2 @@ -226,7 +246,7 @@ loop2: mov tmp2, curr_data shr curr_data, 16 compute_hash hash, curr_data - and hash %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK mov dist2 %+ w, f_i %+ w dec dist2 @@ -239,43 +259,41 @@ loop2: shr tmp2, 24 compute_hash hash2, tmp2 - and hash2 %+ d, HASH8K_HASH_MASK + and hash2 %+ d, HASH_MASK and dist2 %+ d, (D-1) neg dist2 -MARK __body_compare_ %+ ARCH ;; Check for long len/dist match (>7) with first literal MOVQ len, xdata mov curr_data, len PSRLDQ xdata, 1 xor len, [tmp1 + dist - 1] - jz compare_loop + jz .compare_loop ;; Check for len/dist match (>7) with second literal MOVQ len2, xdata xor len2, [tmp1 + dist2] - jz compare_loop2 + jz .compare_loop2 movzx lit_code, curr_data %+ b shr curr_data, 8 ;; Check for len/dist match for first literal test len %+ d, 0xFFFFFFFF - jz len_dist_huffman_pre + jz .len_dist_huffman_pre inc word [lit_len_hist + HIST_ELEM_SIZE*lit_code] movzx lit_code2, curr_data %+ b ;; Check for len/dist match for second literal test len2 %+ d, 0xFFFFFFFF - jnz write_lit_bits + jnz .write_lit_bits -MARK __body_len_dist_lit_huffman_ %+ ARCH -len_dist_lit_huffman_pre: +.len_dist_lit_huffman_pre: bsf len2, len2 shr len2, 3 -len_dist_lit_huffman: +.len_dist_lit_huffman: or lit_code, LIT movnti dword [m_out_buf], lit_code %+ d @@ -292,7 +310,7 @@ len_dist_lit_huffman: shr curr_data, 24 compute_hash hash3, curr_data - and hash3, HASH8K_HASH_MASK + and hash3, HASH_MASK mov curr_data, tmp1 shr tmp1, 8 @@ -324,22 +342,21 @@ len_dist_lit_huffman: and dist_code2, 0x1F inc word [dist_hist + HIST_ELEM_SIZE*dist_code2] - ; hash = compute_hash(state->file_start + f_i) & HASH8K_HASH_MASK; - and hash %+ d, HASH8K_HASH_MASK - and hash2 %+ d, HASH8K_HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + and hash %+ d, HASH_MASK + and hash2 %+ d, HASH_MASK ; continue cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end ;; encode as dist/len -MARK __body_len_dist_huffman_ %+ ARCH -len_dist_huffman_pre: +.len_dist_huffman_pre: bsf len, len shr len, 3 -len_dist_huffman: +.len_dist_huffman: dec f_i ;; Setup for updateing hash lea tmp3, [f_i + 2] ; tmp3 <= k @@ -377,17 +394,16 @@ len_dist_huffman: and dist_code, 0x1F inc word [dist_hist + HIST_ELEM_SIZE*dist_code] - ; hash = compute_hash(state->file_start + f_i) & HASH8K_HASH_MASK; - and hash %+ d, HASH8K_HASH_MASK - and hash2 %+ d, HASH8K_HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + and hash %+ d, HASH_MASK + and hash2 %+ d, HASH_MASK ; continue cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end -MARK __body_write_lit_bits_ %+ ARCH -write_lit_bits: +.write_lit_bits: MOVDQU xdata, [file_start + f_i + 1] add f_i, 1 MOVQ curr_data, xdata @@ -402,9 +418,9 @@ write_lit_bits: ; continue cmp f_i, file_length - jl loop2 + jl .loop2 -input_end: +.input_end: mov stream, [rsp + stream_offset] mov tmp1, ZSTATE_FLUSH_READ_BUFFER mov tmp2, ZSTATE_BODY @@ -414,13 +430,13 @@ input_end: cmovne tmp2, tmp1 mov dword [stream + _internal_state_state], tmp2 %+ d - jmp end + jmp .end -output_end: +.output_end: mov stream, [rsp + stream_offset] mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR -end: +.end: ;; update input buffer add file_length, LA mov [stream + _total_in], f_i %+ d @@ -452,8 +468,7 @@ end: %endif ret -MARK __body_compare_loops_ %+ ARCH -compare_loop: +.compare_loop: lea tmp2, [tmp1 + dist - 1] %if (COMPARE_TYPE == 1) compare250 tmp1, tmp2, len, tmp3 @@ -465,9 +480,9 @@ compare_loop: %error Unknown Compare type COMPARE_TYPE % error %endif - jmp len_dist_huffman + jmp .len_dist_huffman -compare_loop2: +.compare_loop2: lea tmp2, [tmp1 + dist2] add tmp1, 1 %if (COMPARE_TYPE == 1) @@ -483,12 +498,11 @@ compare_loop2: movzx lit_code, curr_data %+ b shr curr_data, 8 inc word [lit_len_hist + HIST_ELEM_SIZE*lit_code] - jmp len_dist_lit_huffman + jmp .len_dist_lit_huffman -MARK __write_first_byte_ %+ ARCH -write_first_byte: +.write_first_byte: cmp m_out_buf, [rsp + m_out_end] - ja output_end + ja .output_end mov byte [stream + _internal_state_has_hist], IGZIP_HIST @@ -508,9 +522,32 @@ write_first_byte: MOVDQU xdata, [file_start + f_i + 1] add f_i, 1 mov curr_data, [file_start + f_i] - and hash %+ d, HASH8K_HASH_MASK - and hash2 %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK + and hash2 %+ d, HASH_MASK cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end + + +;; Shift defines over in order to iterate over all versions +%undef HASH_MASK +%xdefine HASH_MASK HASH_MASK1 + +%undef METHOD +%xdefine METHOD METHOD1 +%endrep + +;; Shift defines over in order to iterate over all versions +%undef ARCH +%xdefine ARCH ARCH1 +%undef ARCH1 +%xdefine ARCH1 ARCH2 + +%ifdef COMPARE_TYPE_NOT_DEF +%undef COMPARE_TYPE +%xdefine COMPARE_TYPE COMPARE_TYPE1 +%undef COMPARE_TYPE1 +%xdefine COMPARE_TYPE1 COMPARE_TYPE2 +%endif +%endrep diff --git a/igzip/igzip_icf_body_h1_gr_bt_01.asm b/igzip/igzip_icf_body_h1_gr_bt_01.asm deleted file mode 100644 index fda5633..0000000 --- a/igzip/igzip_icf_body_h1_gr_bt_01.asm +++ /dev/null @@ -1,7 +0,0 @@ -%define ARCH 01 - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 2 -%endif - -%include "igzip_icf_body_h1_gr_bt.asm" diff --git a/igzip/igzip_icf_body_h1_gr_bt_02.asm b/igzip/igzip_icf_body_h1_gr_bt_02.asm deleted file mode 100644 index f266ea3..0000000 --- a/igzip/igzip_icf_body_h1_gr_bt_02.asm +++ /dev/null @@ -1,7 +0,0 @@ -%define ARCH 02 - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 2 -%endif - -%include "igzip_icf_body_h1_gr_bt.asm" diff --git a/igzip/igzip_icf_body_h1_gr_bt_04.asm b/igzip/igzip_icf_body_h1_gr_bt_04.asm deleted file mode 100644 index c35c3a7..0000000 --- a/igzip/igzip_icf_body_h1_gr_bt_04.asm +++ /dev/null @@ -1,8 +0,0 @@ -%define ARCH 04 -%define USE_HSWNI - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 3 -%endif - -%include "igzip_icf_body_h1_gr_bt.asm" diff --git a/igzip/igzip_icf_finish.asm b/igzip/igzip_icf_finish.asm index bbdb432..2d13ca7 100644 --- a/igzip/igzip_icf_finish.asm +++ b/igzip/igzip_icf_finish.asm @@ -87,10 +87,17 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes) m_out_end equ 8 m_out_start equ 16 stack_size equ 32 + +%xdefine HASH_MASK HASH8K_HASH_MASK +%xdefine HASH_MASK1 HASH_HIST_HASH_MASK +%xdefine METHOD hash8k +%xdefine METHOD1 hash_hist + +%rep 2 ; void isal_deflate_icf_finish ( isal_zstream *stream ) ; arg 1: rcx: addr of stream -global isal_deflate_icf_finish_hash8k_01 -isal_deflate_icf_finish_hash8k_01: +global isal_deflate_icf_finish_ %+ METHOD %+ _01 +isal_deflate_icf_finish_ %+ METHOD %+ _01: PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size @@ -124,33 +131,33 @@ isal_deflate_icf_finish_hash8k_01: mov [rsp + f_end_i_mem_offset], f_end_i ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { cmp f_i, f_end_i - jge end_loop_2 + jge .end_loop_2 mov curr_data %+ d, [file_start + f_i] cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST - jne skip_write_first_byte + jne .skip_write_first_byte cmp m_out_buf, [rsp + m_out_end] - ja end_loop_2 + ja .end_loop_2 compute_hash hash, curr_data - and hash %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK mov [hash_table + 2 * hash], f_i %+ w mov byte [stream + _internal_state_has_hist], IGZIP_HIST - jmp encode_literal + jmp .encode_literal -skip_write_first_byte: +.skip_write_first_byte: -loop2: +.loop2: ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] - ja end_loop_2 + ja .end_loop_2 - ; hash = compute_hash(state->file_start + f_i) & HASH8K_HASH_MASK; + ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; mov curr_data %+ d, [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK ; f_index = state->head[hash]; movzx f_index %+ d, word [hash_table + 2 * hash] @@ -167,7 +174,7 @@ loop2: mov tmp1 %+ d, dist %+ d sub tmp1 %+ d, 1 cmp tmp1 %+ d, (D-1) - jae encode_literal + jae .encode_literal ; len = f_end_i - f_i; mov tmp4, [rsp + f_end_i_mem_offset] @@ -187,7 +194,7 @@ loop2: ; if (len >= SHORTEST_MATCH) { cmp len, SHORTEST_MATCH - jb encode_literal + jb .encode_literal ;; encode as dist/len @@ -205,37 +212,37 @@ loop2: lea tmp3, [f_i + 1] ; tmp3 <= k add f_i, len cmp f_i, [rsp + f_end_i_mem_offset] - jae skip_hash_update + jae .skip_hash_update ; only update hash twice - ; hash = compute_hash(state->file_start + k) & HASH8K_HASH_MASK; + ; hash = compute_hash(state->file_start + k) & HASH_MASK; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK ; state->head[hash] = k; mov [hash_table + 2 * hash], tmp3 %+ w add tmp3, 1 - ; hash = compute_hash(state->file_start + k) & HASH8K_HASH_MASK; + ; hash = compute_hash(state->file_start + k) & HASH_MASK; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH8K_HASH_MASK + and hash %+ d, HASH_MASK ; state->head[hash] = k; mov [hash_table + 2 * hash], tmp3 %+ w -skip_hash_update: +.skip_hash_update: write_dword code2, m_out_buf shr code2, DIST_OFFSET and code2, 0x1F inc word [dist_hist + HIST_ELEM_SIZE*code2] ; continue cmp f_i, [rsp + f_end_i_mem_offset] - jl loop2 - jmp end_loop_2 + jl .loop2 + jmp .end_loop_2 -encode_literal: +.encode_literal: ; get_lit_code(state->file_start[f_i], &code2, &code_len2); movzx tmp5, byte [file_start + f_i] inc word [lit_len_hist + HIST_ELEM_SIZE*tmp5] @@ -244,20 +251,20 @@ encode_literal: ; continue add f_i, 1 cmp f_i, [rsp + f_end_i_mem_offset] - jl loop2 + jl .loop2 -end_loop_2: +.end_loop_2: mov f_end_i, [rsp + f_end_i_mem_offset] add f_end_i, LAST_BYTES_COUNT mov [rsp + f_end_i_mem_offset], f_end_i ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) { cmp f_i, f_end_i - jge input_end + jge .input_end xor tmp5, tmp5 -final_bytes: +.final_bytes: cmp m_out_buf, [rsp + m_out_end] - ja out_end + ja .out_end movzx tmp5, byte [file_start + f_i] inc word [lit_len_hist + HIST_ELEM_SIZE*tmp5] @@ -266,18 +273,18 @@ final_bytes: inc f_i cmp f_i, [rsp + f_end_i_mem_offset] - jl final_bytes + jl .final_bytes -input_end: +.input_end: cmp word [stream + _end_of_stream], 0 - jne out_end + jne .out_end cmp word [stream + _flush], _NO_FLUSH - jne out_end - jmp end + jne .out_end + jmp .end -out_end: +.out_end: mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR -end: +.end: ;; Update input buffer mov f_end_i, [rsp + f_end_i_mem_offset] mov [stream + _total_in], f_i %+ d @@ -301,6 +308,14 @@ end: POP_ALL ret +;; Shift defines over in order to iterate over all versions +%undef HASH_MASK +%xdefine HASH_MASK HASH_MASK1 + +%undef METHOD +%xdefine METHOD METHOD1 +%endrep + section .data align 4 c258: dq 258 diff --git a/igzip/igzip_level_buf_structs.h b/igzip/igzip_level_buf_structs.h index ba8a948..5c195e3 100644 --- a/igzip/igzip_level_buf_structs.h +++ b/igzip/igzip_level_buf_structs.h @@ -7,10 +7,14 @@ #define MATCH_BUF_SIZE (4 * 1024) -struct hash8k_buf1 { +struct hash8k_buf { uint16_t hash_table[IGZIP_HASH8K_HASH_SIZE]; }; +struct hash_hist_buf { + uint16_t hash_table[IGZIP_HASH_HIST_SIZE]; +}; + struct hash_map_buf { uint16_t hash_table[IGZIP_HASH_MAP_HASH_SIZE]; struct deflate_icf *matches_next; @@ -31,12 +35,13 @@ struct level_buf { uint64_t icf_buf_avail_out; struct deflate_icf *icf_buf_start; union { - struct hash8k_buf1 hash8k; + struct hash8k_buf hash8k; + struct hash_hist_buf hash_hist; struct hash_map_buf hash_map; - struct hash8k_buf1 lvl1; - struct hash_map_buf lvl2; - + struct hash8k_buf lvl1; + struct hash_hist_buf lvl2; + struct hash_map_buf lvl3; }; }; diff --git a/igzip/igzip_multibinary.asm b/igzip/igzip_multibinary.asm index 2fdea47..a006f41 100644 --- a/igzip/igzip_multibinary.asm +++ b/igzip/igzip_multibinary.asm @@ -45,6 +45,14 @@ extern isal_deflate_icf_body_hash8k_02 extern isal_deflate_icf_body_hash8k_04 extern isal_deflate_icf_finish_hash8k_base extern isal_deflate_icf_finish_hash8k_01 + +extern isal_deflate_icf_body_hash_hist_base +extern isal_deflate_icf_body_hash_hist_01 +extern isal_deflate_icf_body_hash_hist_02 +extern isal_deflate_icf_body_hash_hist_04 +extern isal_deflate_icf_finish_hash_hist_base +extern isal_deflate_icf_finish_hash_hist_01 + extern isal_deflate_icf_finish_hash_map_base extern isal_update_histogram_base @@ -91,11 +99,17 @@ mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_ mbin_interface isal_deflate_icf_body_lvl1 mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04 +mbin_interface isal_deflate_icf_body_lvl2 +mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04 + mbin_interface isal_deflate_icf_finish_lvl1 mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01 mbin_interface isal_deflate_icf_finish_lvl2 -mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base +mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01 + +mbin_interface isal_deflate_icf_finish_lvl3 +mbin_dispatch_init5 isal_deflate_icf_finish_lvl3, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base mbin_interface isal_update_histogram mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04 @@ -133,7 +147,10 @@ mbin_interface isal_deflate_hash_lvl1 mbin_dispatch_init5 isal_deflate_hash_lvl1, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01 mbin_interface isal_deflate_hash_lvl2 -mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_mad_base +mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01 + +mbin_interface isal_deflate_hash_lvl3 +mbin_dispatch_init5 isal_deflate_hash_lvl3, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_mad_base %ifdef HAVE_AS_KNOWS_AVX512 mbin_interface isal_deflate_icf_body diff --git a/igzip/igzip_rand_test.c b/igzip/igzip_rand_test.c index 6d67d8a..5a537e9 100644 --- a/igzip/igzip_rand_test.c +++ b/igzip/igzip_rand_test.c @@ -254,6 +254,9 @@ int get_rand_level_buf_size(int level) { int size; switch (level) { + case 3: + size = rand() % IBUF_SIZE + ISAL_DEF_LVL3_MIN; + break; case 2: size = rand() % IBUF_SIZE + ISAL_DEF_LVL2_MIN; break; @@ -1282,8 +1285,11 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_ return COMPRESS_OUT_BUFFER_OVERFLOW; else if (ret == INVALID_FLUSH) return INVALID_FLUSH_ERROR; - else + else { + printf("Return due to ret = %d with level = %d or %d\n", ret, level, + stream.level); return COMPRESS_GENERAL_ERROR; + } } if (!stream.end_of_stream) { diff --git a/igzip/lz0a_const.asm b/igzip/lz0a_const.asm index a0c23da..deb6d23 100644 --- a/igzip/lz0a_const.asm +++ b/igzip/lz0a_const.asm @@ -40,11 +40,13 @@ %assign IGZIP_LVL0_HASH_SIZE (8 * K) %assign IGZIP_HASH8K_HASH_SIZE (8 * K) +%assign IGZIP_HASH_HIST_HASH_SIZE IGZIP_HIST_SIZE %assign IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE -%assign LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) -%assign HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) -%assign HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) +%xdefine LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +%xdefine HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +%xdefine HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1) +%xdefine HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) %assign MIN_DEF_MATCH 3 ; Minimum length of a match in deflate %assign SHORTEST_MATCH 4 diff --git a/include/igzip_lib.h b/include/igzip_lib.h index 5825845..123817a 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -116,11 +116,13 @@ extern "C" { #define ISAL_LIMIT_HASH_UPDATE #define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K) +#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE #define IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE #define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K) #define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE -#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE +#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE +#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE #ifdef LONGER_HUFFTABLE enum {IGZIP_DIST_TABLE_SIZE = 8*1024}; @@ -249,15 +251,17 @@ struct isal_mod_hist { }; #define ISAL_DEF_MIN_LEVEL 0 -#define ISAL_DEF_MAX_LEVEL 2 +#define ISAL_DEF_MAX_LEVEL 3 /* Defines used set level data sizes */ /* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */ #define ISAL_DEF_LVL0_REQ 0 #define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE) #define ISAL_DEF_LVL1_TOKEN_SIZE 4 -#define ISAL_DEF_LVL2_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE +#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE) #define ISAL_DEF_LVL2_TOKEN_SIZE 4 +#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE +#define ISAL_DEF_LVL3_TOKEN_SIZE 4 /* Data sizes for level specific data options */ #define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ @@ -281,6 +285,13 @@ struct isal_mod_hist { #define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K) #define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE +#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL3_EXTRA_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 128 * IGZIP_K) +#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE + #define IGZIP_NO_HIST 0 #define IGZIP_HIST 1 #define IGZIP_DICT_HIST 2