igzip: Swap length code lookup with length lookup

Change-Id: I9f1c3ea5353f2c2fa98bab1d0cb1eb3c7b7397f6
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-03-29 12:41:23 -07:00 committed by Greg Tucker
parent ec6169ac3b
commit 2de5a0fd88
4 changed files with 153 additions and 108 deletions

View File

@ -27,15 +27,16 @@ extern rfc1951_lookup_table
%define LARGE_SHORT_SYM_LEN 25
%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
%define LARGE_LONG_SYM_LEN 9
%define LARGE_LONG_SYM_LEN 10
%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
%define LARGE_SHORT_CODE_LEN_OFFSET 28
%define LARGE_LONG_CODE_LEN_OFFSET 9
%define LARGE_FLAG_BIT_OFFSET 27
%define LARGE_LONG_CODE_LEN_OFFSET 10
%define LARGE_FLAG_BIT_OFFSET 25
%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
%define LARGE_SYM_COUNT_OFFSET 25
%define LARGE_SYM_COUNT_OFFSET 26
%define LARGE_SYM_COUNT_LEN 2
%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
%define LARGE_SHORT_MAX_LEN_OFFSET 26
%define SMALL_SHORT_SYM_LEN 9
%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
@ -309,19 +310,21 @@ stack_size equ 4 * 8 + 8 * 8
test %%next_sym, LARGE_FLAG_BIT
jz %%end
shl rcx, LARGE_SYM_COUNT_LEN
or rcx, %%next_sym_num
;; Save length associated with symbol
mov %%next_bits, %%read_in
shr %%next_bits, %%lookup_size
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
;; Extract the bits beyond the first %%lookup_size bits.
CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
and %%next_sym, LARGE_SHORT_SYM_MASK
add %%next_sym, %%next_bits
lea %%next_sym, [%%state + LARGE_LONG_CODE_SIZE * %%next_sym]
;; Lookup actual next symbol
movzx %%next_sym, word [%%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)]
movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)]
mov %%next_sym_num, 1
;; Save length associated with symbol
@ -480,7 +483,7 @@ loop_block:
;; Decode next symbol and reload the read_in buffer
decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
;; Specutively write next_sym2 if it is a literal
;; Specutively write next_sym if it is a literal
mov [next_out], next_sym
add next_out, next_sym_num
lea next_sym2, [8 * next_sym_num - 8]
@ -496,8 +499,7 @@ loop_block:
or read_in, tmp1
;; Specutively load data associated with length symbol
movzx rcx, byte [rfc_lookup + _len_extra_bit_count + next_sym2 - 257]
movzx repeat_length, word [rfc_lookup + _len_start + 2 * (next_sym2 - 257)]
lea repeat_length, [next_sym2 - 254]
;; Test for end of block symbol
cmp next_sym2, 256
@ -514,9 +516,8 @@ loop_block:
lea read_in_length, [read_in_length + 8 * tmp1]
;; Specultively load next dist code
SHRX read_in_2, read_in, rcx
mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
and next_bits2, read_in_2
and next_bits2, read_in
movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2]
;; Check if next_sym2 is a literal, length, or end of block symbol
@ -524,29 +525,20 @@ loop_block:
jl loop_block
decode_len_dist:
;; Find length for length/dist pair
mov next_bits, read_in
BZHI next_bits, next_bits, rcx, tmp4
add repeat_length, next_bits
;; Update read_in for the length extra bits which were read in
sub read_in_length, rcx
;; Decode distance code
decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, rcx, tmp2
mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
;; Load distance code extra bits
mov next_bits, read_in_2
;; Determine next_out after the copy is finished
lea next_out, [next_out + repeat_length - 1]
;; Decode distance code
decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2
mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
; ;; Load distance code extra bits
mov next_bits, read_in
;; Calculate the look back distance
BZHI next_bits, next_bits, rcx, tmp4
SHRX read_in, read_in_2, rcx
SHRX read_in, read_in, rcx
;; Setup next_sym, read_in, and read_in_length for next loop
mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
@ -569,8 +561,7 @@ decode_len_dist:
;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length
;; This is to decrease use of small_byte_copy branch
xor tmp2, tmp2
or tmp2, COPY_SIZE
mov tmp2, COPY_SIZE
cmp tmp2, repeat_length
cmovg tmp2, repeat_length
@ -634,20 +625,7 @@ multi_symbol_start:
je end_symbol
decode_len_dist_2:
;; Load length exta bits
mov next_bits, read_in
movzx repeat_length, word [rfc_lookup + _len_start + 2 * (next_sym - 257)]
movzx rcx, byte [rfc_lookup + _len_extra_bit_count + next_sym - 257]
;; Calculate repeat length
BZHI next_bits, next_bits, rcx, tmp1
add repeat_length, next_bits
;; Update read_in for the length extra bits which were read in
SHRX read_in, read_in, rcx
sub read_in_length, rcx
lea repeat_length, [next_sym - 254]
;; Decode distance code
decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1

View File

@ -36,15 +36,16 @@ extern int decode_huffman_code_block_stateless(struct inflate_state *);
#define LARGE_SHORT_SYM_LEN 25
#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
#define LARGE_LONG_SYM_LEN 9
#define LARGE_LONG_SYM_LEN 10
#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
#define LARGE_SHORT_CODE_LEN_OFFSET 28
#define LARGE_LONG_CODE_LEN_OFFSET 9
#define LARGE_FLAG_BIT_OFFSET 27
#define LARGE_LONG_CODE_LEN_OFFSET 10
#define LARGE_FLAG_BIT_OFFSET 25
#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
#define LARGE_SYM_COUNT_OFFSET 25
#define LARGE_SYM_COUNT_OFFSET 26
#define LARGE_SYM_COUNT_LEN 2
#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
#define LARGE_SHORT_MAX_LEN_OFFSET 26
#define SMALL_SHORT_SYM_LEN 9
#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
@ -62,7 +63,15 @@ extern int decode_huffman_code_block_stateless(struct inflate_state *);
#define DIST_SYM_EXTRA_LEN 4
#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
#define INVALID_SYMBOL 0x1FF
#define MAX_LIT_LEN_CODE_LEN 21
#define MAX_LIT_LEN_COUNT MAX_LIT_LEN_CODE_LEN + 2
#define LIT_LEN_ELEMS 513
#define INVALID_SYMBOL 0x1FFF
#define INVALID_CODE 0xFFFFFF
#define MIN_DEF_MATCH 3
/* structure contain lookup data based on RFC 1951 */
struct rfc1951_tables {
uint8_t dist_extra_bit_count[32];
@ -97,7 +106,7 @@ static struct rfc1951_tables rfc_lookup_table = {
0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a,
0x000b, 0x000d, 0x000f, 0x0011, 0x0013, 0x0017, 0x001b, 0x001f,
0x0023, 0x002b, 0x0033, 0x003b, 0x0043, 0x0053, 0x0063, 0x0073,
0x0083, 0x00a3, 0x00c3, 0x00e3, 0x0102, 0x0000, 0x0000, 0x0000}
0x0083, 0x00a3, 0x00c3, 0x00e3, 0x0102, 0x0103, 0x0000, 0x0000}
};
struct slver {
@ -216,20 +225,85 @@ static uint64_t inline inflate_in_read_bits(struct inflate_state *state, uint8_t
return ret;
}
static void inline set_codes(struct huff_code *huff_code_table, int table_length,
uint16_t * count)
{
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
int i;
/* Setup for calculating huffman codes */
next_code[0] = 0;
next_code[1] = 0;
for (i = 2; i < MAX_HUFF_TREE_DEPTH + 1; i++)
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
/* Calculate code corresponding to a given symbol */
for (i = 0; i < table_length; i++) {
/* Store codes as zero for invalid codes used in static header construction */
huff_code_table[i].code =
bit_reverse2(next_code[huff_code_table[i].length],
huff_code_table[i].length);
next_code[huff_code_table[i].length] += 1;
}
}
static void inline expand_lit_len_huffcode(struct huff_code *lit_len_huff,
uint16_t * count_total)
{
int huff_index = LIT_LEN - 1;
int len_sym, len_start, len_end, extra_count, len;
uint16_t count_prev, count_current;
uint32_t code, code_len;
struct huff_code *expand_start = &lit_len_huff[ISAL_DEF_LIT_SYMBOLS - MIN_DEF_MATCH];
for (; huff_index >= ISAL_DEF_LIT_SYMBOLS; huff_index--) {
len_sym = huff_index - ISAL_DEF_LIT_SYMBOLS;
len_start = rfc_lookup_table.len_start[len_sym];
len_end = rfc_lookup_table.len_start[len_sym + 1];
extra_count = rfc_lookup_table.len_extra_bit_count[len_sym];
code = lit_len_huff[huff_index].code;
code_len = lit_len_huff[huff_index].length;
if (code_len == 0) {
for (len = len_start; len < len_end; len++) {
expand_start[len].code_and_extra = 0;
expand_start[len].length = 0;
}
} else {
count_total[code_len]--;
count_total[code_len + extra_count] += len_end - len_start;
for (len = len_start; len < len_end; len++) {
expand_start[len].code_and_extra =
code | ((len - len_start) << code_len);
expand_start[len].length = code_len + extra_count;
}
}
}
count_prev = count_total[1];
count_total[0] = 0;
count_total[1] = 0;
for (int i = 2; i < MAX_LIT_LEN_COUNT; i++) {
count_current = count_total[i];
count_total[i] = count_total[i - 1] + count_prev;
count_prev = count_current;
}
}
/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
* the lengths in huff_code_table,where count is a histogram of the appearance
* of each code length */
static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large *result,
struct huff_code *huff_code_table,
uint32_t table_length, uint16_t * count,
uint32_t max_symbol)
static void make_inflate_huff_code_lit_len(struct inflate_huff_code_large *result,
struct huff_code *huff_code_table,
uint32_t table_length, uint16_t * count_total)
{
int i, j, k;
uint16_t code = 0;
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
uint16_t long_code_list[LIT_LEN];
uint16_t long_code_list[LIT_LEN_ELEMS];
uint32_t long_code_length = 0;
uint16_t temp_code_list[1 << (15 - ISAL_DECODE_LONG_BITS)];
uint16_t temp_code_list[1 << (MAX_LIT_LEN_CODE_LEN - ISAL_DECODE_LONG_BITS)];
uint32_t temp_code_length;
uint32_t long_code_lookup_length = 0;
uint32_t max_length;
@ -237,9 +311,9 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
uint32_t code_length;
uint16_t long_bits;
uint16_t min_increment;
uint32_t code_list[LIT_LEN + 2]; /* The +2 is for the extra codes in the static header */
uint32_t code_list[LIT_LEN_ELEMS + 2]; /* The +2 is for the extra codes in the static header */
uint32_t code_list_len;
uint32_t count_total[17], count_total_tmp[17];
uint16_t count_total_tmp[MAX_LIT_LEN_COUNT];
uint32_t insert_index;
uint32_t last_length, min_length;
uint32_t copy_size;
@ -247,13 +321,12 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
int index1, index2, index3, sym1, sym2, sym3;
uint32_t sym1_code, sym2_code, sym3_code, sym1_len, sym2_len, sym3_len;
count_total[0] = 0;
count_total[1] = 0;
for (i = 2; i < 17; i++)
count_total[i] = count_total[i - 1] + count[i - 1];
memcpy(count_total_tmp, count_total, sizeof(count_total));
uint32_t max_symbol = LIT_LEN_ELEMS;
memcpy(count_total_tmp, count_total, sizeof(count_total_tmp));
code_list_len = count_total[MAX_LIT_LEN_COUNT - 1];
code_list_len = count_total[16];
if (code_list_len == 0) {
memset(result->short_code_lookup, 0, sizeof(result->short_code_lookup));
return;
@ -268,11 +341,6 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
}
}
/* Setup for calculating huffman codes */
next_code[0] = code;
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
/* Calculate code corresponding to a given symbol */
for (k = 0; k < code_list_len; k++) {
i = code_list[k];
@ -281,13 +349,6 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
long_code_list[long_code_length] = i;
long_code_length++;
}
/* Store codes as zero for invalid codes used in static header construction */
huff_code_table[i].code =
bit_reverse2(next_code[huff_code_table[i].length],
huff_code_table[i].length);
next_code[huff_code_table[i].length] += 1;
}
/* Determine the length of the first code */
@ -396,8 +457,8 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
sym3 = code_list[index3];
sym3_code = huff_code_table[sym3].code;
/* Check that sym3 is an existing symbol */
if (sym3 >= max_symbol)
/* Check that sym3 is writable existing symbol */
if (sym3 >= max_symbol - 1)
break;
code = sym1_code | (sym2_code << sym1_len) |
@ -419,12 +480,12 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
/*Set the look up table to point to a hint where the symbol can be found
* in the list of long codes and add the current symbol to the list of
* long codes. */
if (huff_code_table[long_code_list[i]].code == 0xFFFF)
if (huff_code_table[long_code_list[i]].code_and_extra == INVALID_CODE)
continue;
max_length = huff_code_table[long_code_list[i]].length;
first_bits =
huff_code_table[long_code_list[i]].code
huff_code_table[long_code_list[i]].code_and_extra
& ((1 << ISAL_DECODE_LONG_BITS) - 1);
temp_code_list[0] = long_code_list[i];
@ -441,12 +502,14 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
}
memset(&result->long_code_lookup[long_code_lookup_length], 0x00,
2 * (1 << (max_length - ISAL_DECODE_LONG_BITS)));
sizeof(*result->long_code_lookup) *
(1 << (max_length - ISAL_DECODE_LONG_BITS)));
for (j = 0; j < temp_code_length; j++) {
code_length = huff_code_table[temp_code_list[j]].length;
long_bits =
huff_code_table[temp_code_list[j]].code >> ISAL_DECODE_LONG_BITS;
huff_code_table[temp_code_list[j]].code_and_extra >>
ISAL_DECODE_LONG_BITS;
min_increment = 1 << (code_length - ISAL_DECODE_LONG_BITS);
for (; long_bits < (1 << (max_length - ISAL_DECODE_LONG_BITS));
long_bits += min_increment) {
@ -454,12 +517,12 @@ static void inline make_inflate_huff_code_lit_len(struct inflate_huff_code_large
temp_code_list[j] |
(code_length << LARGE_LONG_CODE_LEN_OFFSET);
}
huff_code_table[temp_code_list[j]].code = 0xFFFF;
huff_code_table[temp_code_list[j]].code_and_extra = INVALID_CODE;
}
result->short_code_lookup[first_bits] = long_code_lookup_length |
(max_length << LARGE_SHORT_CODE_LEN_OFFSET) | LARGE_FLAG_BIT;
(max_length << LARGE_SHORT_MAX_LEN_OFFSET) | LARGE_FLAG_BIT;
long_code_lookup_length += 1 << (max_length - ISAL_DECODE_LONG_BITS);
}
}
@ -791,13 +854,14 @@ static int inline setup_static_header(struct inflate_state *state)
* regenerating the table. */
int i;
struct huff_code lit_code[LIT_LEN + 2];
struct huff_code lit_code[LIT_LEN_ELEMS];
struct huff_code dist_code[DIST_LEN + 2];
/* These tables are based on the static huffman tree described in RFC
* 1951 */
uint16_t lit_count[16] = {
0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
uint16_t lit_count[MAX_LIT_LEN_COUNT] = {
0, 0, 0, 0, 0, 0, 0, 24,
152, 112, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0
};
uint16_t dist_count[16] = {
0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
@ -820,8 +884,12 @@ static int inline setup_static_header(struct inflate_state *state)
for (i = 0; i < DIST_LEN + 2; i++)
dist_code[i].length = 5;
make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count,
LIT_LEN);
set_codes(lit_code, LIT_LEN + 2, lit_count);
lit_count[8] -= 2;
expand_lit_len_huffcode(lit_code, lit_count);
make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_code, LIT_LEN_ELEMS,
lit_count);
make_inflate_huff_code_dist(&state->dist_huff_code, dist_code, DIST_LEN + 2,
dist_count, DIST_LEN);
@ -836,7 +904,7 @@ static void inline decode_next_lit_len(uint32_t * next_lits, uint32_t * sym_coun
struct inflate_state *state,
struct inflate_huff_code_large *huff_code)
{
uint16_t next_bits;
uint32_t next_bits;
uint32_t next_sym;
uint32_t bit_count;
uint32_t bit_mask;
@ -870,7 +938,7 @@ static void inline decode_next_lit_len(uint32_t * next_lits, uint32_t * sym_coun
} else {
/* If a symbol is not found, do a lookup in the long code
* list starting from the hint in next_sym */
bit_mask = (next_sym - LARGE_FLAG_BIT) >> LARGE_SHORT_CODE_LEN_OFFSET;
bit_mask = next_sym >> LARGE_SHORT_MAX_LEN_OFFSET;
bit_mask = (1 << bit_mask) - 1;
next_bits = state->read_in & bit_mask;
next_sym =
@ -994,11 +1062,11 @@ static int inline setup_dynamic_header(struct inflate_state *state)
{
int i, j;
struct huff_code code_huff[CODE_LEN_CODES];
struct huff_code lit_and_dist_huff[LIT_LEN + DIST_LEN];
struct huff_code lit_and_dist_huff[LIT_LEN_ELEMS];
struct huff_code *previous = NULL, *current, *end;
struct inflate_huff_code_small inflate_code_huff;
uint8_t hclen, hdist, hlit;
uint16_t code_count[16], lit_count[16], dist_count[16];
uint16_t code_count[16], lit_count[MAX_LIT_LEN_COUNT], dist_count[16];
uint16_t *count;
uint16_t symbol;
@ -1147,11 +1215,14 @@ static int inline setup_dynamic_header(struct inflate_state *state)
if (state->read_in_length < 0)
return ISAL_END_INPUT;
make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN,
lit_count, LIT_LEN);
make_inflate_huff_code_dist(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN],
DIST_LEN, dist_count, DIST_LEN);
set_codes(lit_and_dist_huff, LIT_LEN, lit_count);
expand_lit_len_huffcode(lit_and_dist_huff, lit_count);
make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN_ELEMS,
lit_count);
state->block_state = ISAL_BLOCK_CODED;
return 0;
@ -1408,18 +1479,14 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state)
state->block_state = state->bfinal ?
ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
} else if (next_lit < 286) {
} else if (next_lit < LIT_LEN_ELEMS) {
/* Else if the next symbol is a repeat
* length, read in the length extra
* bits, the distance code, the distance
* extra bits. Then write out the
* corresponding data and update the
* state data accordingly*/
repeat_length =
rfc_lookup_table.len_start[next_lit - 257] +
inflate_in_read_bits(state,
rfc_lookup_table.len_extra_bit_count
[next_lit - 257]);
repeat_length = next_lit - 254;
next_dist = decode_next_dist(state, &state->dist_huff_code);
if (next_dist >= DIST_LEN)

View File

@ -48,7 +48,7 @@
%endm
;; See inflate_huff_code structure declaration in igzip_lib.h calculation explanation
%define L_REM (15 - ISAL_DECODE_LONG_BITS)
%define L_REM (21 - ISAL_DECODE_LONG_BITS)
%define S_REM (15 - ISAL_DECODE_SHORT_BITS)
%define L_DUP ((1 << L_REM) - (L_REM + 1))

View File

@ -428,7 +428,7 @@ struct isal_zstream {
*/
/* In the following defines, L stands for LARGE and S for SMALL */
#define ISAL_L_REM (15 - ISAL_DECODE_LONG_BITS)
#define ISAL_L_REM (21 - ISAL_DECODE_LONG_BITS)
#define ISAL_S_REM (15 - ISAL_DECODE_SHORT_BITS)
#define ISAL_L_DUP ((1 << ISAL_L_REM) - (ISAL_L_REM + 1))