From 7a1dc55c27dba33fa0b03adfdca32bbaba207645 Mon Sep 17 00:00:00 2001 From: Roy Oursler Date: Thu, 22 Feb 2018 12:16:52 -0700 Subject: [PATCH] igzip: Increase size of large short code lookup Change-Id: I05a564d1759ae417a966f3a12621799db0edf80a Signed-off-by: Roy Oursler --- igzip/igzip_decode_block_stateless.asm | 104 +++++++++++++++++++++---- igzip/igzip_inflate.c | 2 +- igzip/inflate_data_structs.asm | 16 ++-- include/igzip_lib.h | 2 +- 4 files changed, 99 insertions(+), 25 deletions(-) diff --git a/igzip/igzip_decode_block_stateless.asm b/igzip/igzip_decode_block_stateless.asm index 130a300..ef6b272 100644 --- a/igzip/igzip_decode_block_stateless.asm +++ b/igzip/igzip_decode_block_stateless.asm @@ -23,6 +23,14 @@ default rel extern rfc1951_lookup_table + +%define LARGE_CODE_LENGTH_OFFSET 9 +%define LARGE_FLAG_BIT_OFFSET 15 +%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET) +%define SMALL_CODE_LENGTH_OFFSET 9 +%define SMALL_FLAG_BIT_OFFSET 15 +%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET) + ;; rax %define tmp3 rax %define read_in_2 rax @@ -254,7 +262,7 @@ stack_size equ 3 * 8 + 8 * 8 ;; Decode next symbol ;; Clobber rcx -%macro decode_next 7 +%macro decode_next_large 7 %define %%state %1 ; State structure associated with compressed stream %define %%lookup_size %2 ; Number of bits used for small lookup %define %%state_offset %3 ; Type of huff code, should be either LIT or DIST @@ -268,25 +276,25 @@ stack_size equ 3 * 8 + 8 * 8 shr %%next_bits, %%lookup_size mov rcx, %%next_sym - shr rcx, 9 + shr rcx, LARGE_CODE_LENGTH_OFFSET jz invalid_symbol ;; Check if symbol or hint was looked up and %%next_sym, 0x81FF - cmp %%next_sym, 0x8000 + cmp %%next_sym, LARGE_FLAG_BIT jl %%end ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. - lea %%next_sym, [%%state + LONG_CODE_SIZE * %%next_sym] + lea %%next_sym, [%%state + LARGE_LONG_CODE_SIZE * %%next_sym] CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size ;; Lookup actual next symbol - movzx %%next_sym, word [%%next_sym + %%state_offset + LONG_CODE_SIZE * %%next_bits + SHORT_CODE_SIZE * (1 << %%lookup_size) - LONG_CODE_SIZE * 0x8000] + movzx %%next_sym, word [%%next_sym + %%state_offset + LARGE_LONG_CODE_SIZE * %%next_bits + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size) - LARGE_LONG_CODE_SIZE * LARGE_FLAG_BIT] ;; Save length associated with symbol mov rcx, %%next_sym - shr rcx, 9 + shr rcx, LARGE_CODE_LENGTH_OFFSET jz invalid_symbol and %%next_sym, 0x1FF @@ -298,7 +306,7 @@ stack_size equ 3 * 8 + 8 * 8 ;; Decode next symbol ;; Clobber rcx -%macro decode_next_with_load 7 +%macro decode_next_large_with_load 7 %define %%state %1 ; State structure associated with compressed stream %define %%lookup_size %2 ; Number of bits used for small lookup %define %%state_offset %3 @@ -310,11 +318,73 @@ stack_size equ 3 * 8 + 8 * 8 ;; Lookup possible next symbol mov %%next_bits, %%read_in and %%next_bits, (1 << %%lookup_size) - 1 - movzx %%next_sym, word [%%state + %%state_offset + SHORT_CODE_SIZE * %%next_bits] + mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits] - decode_next %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits + decode_next_large %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits %endm +;; Decode next symbol +;; Clobber rcx +%macro decode_next_small 7 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symobl +%define %%next_bits %7 + + ;; Save length associated with symbol + mov %%next_bits, %%read_in + shr %%next_bits, %%lookup_size + + mov rcx, %%next_sym + shr rcx, SMALL_CODE_LENGTH_OFFSET + jz invalid_symbol + + ;; Check if symbol or hint was looked up + and %%next_sym, 0x81FF + cmp %%next_sym, SMALL_FLAG_BIT + jl %%end + + ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. + lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym] + + CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size + + ;; Lookup actual next symbol + movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT] + + ;; Save length associated with symbol + mov rcx, %%next_sym + shr rcx, SMALL_CODE_LENGTH_OFFSET + jz invalid_symbol + and %%next_sym, 0x1FF + +%%end: + ;; Updated read_in to reflect the bits which were decoded + SHRX %%read_in, %%read_in, rcx + sub %%read_in_length, rcx +%endm + +;; Decode next symbol +;; Clobber rcx +%macro decode_next_small_with_load 7 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symobl +%define %%next_bits %7 + + ;; Lookup possible next symbol + mov %%next_bits, %%read_in + and %%next_bits, (1 << %%lookup_size) - 1 + movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits] + + decode_next_small %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits +%endm global decode_huffman_code_block_stateless_ %+ ARCH decode_huffman_code_block_stateless_ %+ ARCH %+ : @@ -357,7 +427,7 @@ decode_huffman_code_block_stateless_ %+ ARCH %+ : skip_load: mov tmp3, read_in and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 - movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3] + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Main Loop @@ -370,7 +440,7 @@ loop_block: jg end_loop_block_pre ;; Decode next symbol and reload the read_in buffer - decode_next state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1 + decode_next_large state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1 ;; Save next_sym in next_sym2 so next_sym can be preloaded mov next_sym2, next_sym @@ -393,7 +463,7 @@ loop_block: je end_symbol_pre ;; Specutively load next_sym for next loop if a literal was decoded - movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3] + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] ;; Finish updating read_in_length for read_in mov tmp1, 64 @@ -406,7 +476,7 @@ loop_block: SHRX read_in_2, read_in, rcx mov next_bits2, read_in_2 and next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1 - movzx next_sym3, word [state + _dist_huff_code + SHORT_CODE_SIZE * next_bits2] + movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2] ;; Specutively write next_sym2 if it is a literal mov [next_out], next_sym2 @@ -427,7 +497,7 @@ decode_len_dist: sub read_in_length, rcx ;; Decode distance code - decode_next state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2 + decode_next_small state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2 movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym3] mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3] @@ -446,7 +516,7 @@ decode_len_dist: ;; Setup next_sym, read_in, and read_in_length for next loop mov read_in, read_in_2 and read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1 - movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * read_in_2] + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2] sub read_in_length, rcx ;; Copy distance in len/dist pair @@ -512,7 +582,7 @@ end_loop_block: mov [rsp + read_in_mem_offset], read_in mov [rsp + read_in_length_mem_offset], read_in_length - decode_next_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1 + decode_next_large_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1 ;; Check that enough input was available to decode symbol cmp read_in_length, 0 @@ -538,7 +608,7 @@ decode_len_dist_2: sub read_in_length, rcx ;; Decode distance code - decode_next_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1 + decode_next_small_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1 ;; Load distance code extra bits mov next_bits, read_in diff --git a/igzip/igzip_inflate.c b/igzip/igzip_inflate.c index d8b9e13..163a3dc 100644 --- a/igzip/igzip_inflate.c +++ b/igzip/igzip_inflate.c @@ -214,7 +214,7 @@ static void inline make_inflate_huff_code_large(struct inflate_huff_code_large * uint32_t insert_index; uint32_t last_length; uint32_t copy_size; - uint16_t *short_code_lookup = result->short_code_lookup; + uint32_t *short_code_lookup = result->short_code_lookup; count_total[0] = 0; count_total[1] = 0; diff --git a/igzip/inflate_data_structs.asm b/igzip/inflate_data_structs.asm index a547bca..abfe2b8 100644 --- a/igzip/inflate_data_structs.asm +++ b/igzip/inflate_data_structs.asm @@ -66,8 +66,12 @@ %define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf)) %define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf)) -%define SHORT_CODE_SIZE 2 -%define LONG_CODE_SIZE 2 +%define LARGE_SHORT_CODE_SIZE 4 +%define LARGE_LONG_CODE_SIZE 2 + +%define SMALL_SHORT_CODE_SIZE 2 +%define SMALL_LONG_CODE_SIZE 2 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -75,8 +79,8 @@ START_FIELDS ;; inflate huff code ;; name size align -FIELD _short_code_lookup_large, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LONG_CODE_SIZE -FIELD _long_code_lookup_large, LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, SHORT_CODE_SIZE +FIELD _short_code_lookup_large, LARGE_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE +FIELD _long_code_lookup_large, LARGE_LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE %assign _inflate_huff_code_large_size _FIELD_OFFSET %assign _inflate_huff_code_large_align _STRUCT_ALIGN @@ -88,8 +92,8 @@ FIELD _long_code_lookup_large, LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, SHORT_CO START_FIELDS ;; inflate huff code ;; name size align -FIELD _short_code_lookup_small, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), LONG_CODE_SIZE -FIELD _long_code_lookup_small, LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SHORT_CODE_SIZE +FIELD _short_code_lookup_small, SMALL_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE +FIELD _long_code_lookup_small, SMALL_LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE %assign _inflate_huff_code_small_size _FIELD_OFFSET %assign _inflate_huff_code_small_align _STRUCT_ALIGN diff --git a/include/igzip_lib.h b/include/igzip_lib.h index eb9cc17..0c7c4c1 100644 --- a/include/igzip_lib.h +++ b/include/igzip_lib.h @@ -445,7 +445,7 @@ struct isal_zstream { /* Large lookup table for decoding huffman codes */ struct inflate_huff_code_large { - uint16_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)]; + uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)]; uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED]; };