igzip: Increase size of large short code lookup

Change-Id: I05a564d1759ae417a966f3a12621799db0edf80a
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler
2018-02-22 12:16:52 -07:00
committed by Greg Tucker
parent fbeb7c83c4
commit 7a1dc55c27
4 changed files with 99 additions and 25 deletions

View File

@@ -23,6 +23,14 @@ default rel
extern rfc1951_lookup_table
%define LARGE_CODE_LENGTH_OFFSET 9
%define LARGE_FLAG_BIT_OFFSET 15
%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
%define SMALL_CODE_LENGTH_OFFSET 9
%define SMALL_FLAG_BIT_OFFSET 15
%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
;; rax
%define tmp3 rax
%define read_in_2 rax
@@ -254,7 +262,7 @@ stack_size equ 3 * 8 + 8 * 8
;; Decode next symbol
;; Clobber rcx
%macro decode_next 7
%macro decode_next_large 7
%define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
@@ -268,25 +276,25 @@ stack_size equ 3 * 8 + 8 * 8
shr %%next_bits, %%lookup_size
mov rcx, %%next_sym
shr rcx, 9
shr rcx, LARGE_CODE_LENGTH_OFFSET
jz invalid_symbol
;; Check if symbol or hint was looked up
and %%next_sym, 0x81FF
cmp %%next_sym, 0x8000
cmp %%next_sym, LARGE_FLAG_BIT
jl %%end
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
lea %%next_sym, [%%state + LONG_CODE_SIZE * %%next_sym]
lea %%next_sym, [%%state + LARGE_LONG_CODE_SIZE * %%next_sym]
CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
;; Lookup actual next symbol
movzx %%next_sym, word [%%next_sym + %%state_offset + LONG_CODE_SIZE * %%next_bits + SHORT_CODE_SIZE * (1 << %%lookup_size) - LONG_CODE_SIZE * 0x8000]
movzx %%next_sym, word [%%next_sym + %%state_offset + LARGE_LONG_CODE_SIZE * %%next_bits + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size) - LARGE_LONG_CODE_SIZE * LARGE_FLAG_BIT]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, 9
shr rcx, LARGE_CODE_LENGTH_OFFSET
jz invalid_symbol
and %%next_sym, 0x1FF
@@ -298,7 +306,7 @@ stack_size equ 3 * 8 + 8 * 8
;; Decode next symbol
;; Clobber rcx
%macro decode_next_with_load 7
%macro decode_next_large_with_load 7
%define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3
@@ -310,11 +318,73 @@ stack_size equ 3 * 8 + 8 * 8
;; Lookup possible next symbol
mov %%next_bits, %%read_in
and %%next_bits, (1 << %%lookup_size) - 1
movzx %%next_sym, word [%%state + %%state_offset + SHORT_CODE_SIZE * %%next_bits]
mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits]
decode_next %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits
decode_next_large %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits
%endm
;; Decode next symbol
;; Clobber rcx
%macro decode_next_small 7
%define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
%define %%read_in %4 ; Bits read in from compressed stream
%define %%read_in_length %5 ; Number of valid bits in read_in
%define %%next_sym %6 ; Returned symobl
%define %%next_bits %7
;; Save length associated with symbol
mov %%next_bits, %%read_in
shr %%next_bits, %%lookup_size
mov rcx, %%next_sym
shr rcx, SMALL_CODE_LENGTH_OFFSET
jz invalid_symbol
;; Check if symbol or hint was looked up
and %%next_sym, 0x81FF
cmp %%next_sym, SMALL_FLAG_BIT
jl %%end
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym]
CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
;; Lookup actual next symbol
movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, SMALL_CODE_LENGTH_OFFSET
jz invalid_symbol
and %%next_sym, 0x1FF
%%end:
;; Updated read_in to reflect the bits which were decoded
SHRX %%read_in, %%read_in, rcx
sub %%read_in_length, rcx
%endm
;; Decode next symbol
;; Clobber rcx
%macro decode_next_small_with_load 7
%define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3
%define %%read_in %4 ; Bits read in from compressed stream
%define %%read_in_length %5 ; Number of valid bits in read_in
%define %%next_sym %6 ; Returned symobl
%define %%next_bits %7
;; Lookup possible next symbol
mov %%next_bits, %%read_in
and %%next_bits, (1 << %%lookup_size) - 1
movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits]
decode_next_small %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits
%endm
global decode_huffman_code_block_stateless_ %+ ARCH
decode_huffman_code_block_stateless_ %+ ARCH %+ :
@@ -357,7 +427,7 @@ decode_huffman_code_block_stateless_ %+ ARCH %+ :
skip_load:
mov tmp3, read_in
and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3]
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Main Loop
@@ -370,7 +440,7 @@ loop_block:
jg end_loop_block_pre
;; Decode next symbol and reload the read_in buffer
decode_next state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
decode_next_large state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
;; Save next_sym in next_sym2 so next_sym can be preloaded
mov next_sym2, next_sym
@@ -393,7 +463,7 @@ loop_block:
je end_symbol_pre
;; Specutively load next_sym for next loop if a literal was decoded
movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3]
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
;; Finish updating read_in_length for read_in
mov tmp1, 64
@@ -406,7 +476,7 @@ loop_block:
SHRX read_in_2, read_in, rcx
mov next_bits2, read_in_2
and next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
movzx next_sym3, word [state + _dist_huff_code + SHORT_CODE_SIZE * next_bits2]
movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2]
;; Specutively write next_sym2 if it is a literal
mov [next_out], next_sym2
@@ -427,7 +497,7 @@ decode_len_dist:
sub read_in_length, rcx
;; Decode distance code
decode_next state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2
decode_next_small state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2
movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym3]
mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
@@ -446,7 +516,7 @@ decode_len_dist:
;; Setup next_sym, read_in, and read_in_length for next loop
mov read_in, read_in_2
and read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * read_in_2]
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2]
sub read_in_length, rcx
;; Copy distance in len/dist pair
@@ -512,7 +582,7 @@ end_loop_block:
mov [rsp + read_in_mem_offset], read_in
mov [rsp + read_in_length_mem_offset], read_in_length
decode_next_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
decode_next_large_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
;; Check that enough input was available to decode symbol
cmp read_in_length, 0
@@ -538,7 +608,7 @@ decode_len_dist_2:
sub read_in_length, rcx
;; Decode distance code
decode_next_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1
decode_next_small_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1
;; Load distance code extra bits
mov next_bits, read_in

View File

@@ -214,7 +214,7 @@ static void inline make_inflate_huff_code_large(struct inflate_huff_code_large *
uint32_t insert_index;
uint32_t last_length;
uint32_t copy_size;
uint16_t *short_code_lookup = result->short_code_lookup;
uint32_t *short_code_lookup = result->short_code_lookup;
count_total[0] = 0;
count_total[1] = 0;

View File

@@ -66,8 +66,12 @@
%define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
%define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
%define SHORT_CODE_SIZE 2
%define LONG_CODE_SIZE 2
%define LARGE_SHORT_CODE_SIZE 4
%define LARGE_LONG_CODE_SIZE 2
%define SMALL_SHORT_CODE_SIZE 2
%define SMALL_LONG_CODE_SIZE 2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -75,8 +79,8 @@
START_FIELDS ;; inflate huff code
;; name size align
FIELD _short_code_lookup_large, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LONG_CODE_SIZE
FIELD _long_code_lookup_large, LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, SHORT_CODE_SIZE
FIELD _short_code_lookup_large, LARGE_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE
FIELD _long_code_lookup_large, LARGE_LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE
%assign _inflate_huff_code_large_size _FIELD_OFFSET
%assign _inflate_huff_code_large_align _STRUCT_ALIGN
@@ -88,8 +92,8 @@ FIELD _long_code_lookup_large, LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, SHORT_CO
START_FIELDS ;; inflate huff code
;; name size align
FIELD _short_code_lookup_small, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), LONG_CODE_SIZE
FIELD _long_code_lookup_small, LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SHORT_CODE_SIZE
FIELD _short_code_lookup_small, SMALL_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE
FIELD _long_code_lookup_small, SMALL_LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE
%assign _inflate_huff_code_small_size _FIELD_OFFSET
%assign _inflate_huff_code_small_align _STRUCT_ALIGN

View File

@@ -445,7 +445,7 @@ struct isal_zstream {
/* Large lookup table for decoding huffman codes */
struct inflate_huff_code_large {
uint16_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)];
uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)];
uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED];
};