igzip: Some general cleanup of the decode_block assembly

Change-Id: Ie30955fcb47ffc9b23f0c50f520cbd9973b2b315
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
This commit is contained in:
Roy Oursler 2018-02-22 10:59:11 -07:00 committed by Greg Tucker
parent 9edac4799d
commit fbeb7c83c4
2 changed files with 76 additions and 97 deletions

View File

@ -12,25 +12,6 @@ default rel
%define ISAL_DECODE_LONG_BITS 12 %define ISAL_DECODE_LONG_BITS 12
%define ISAL_DECODE_SHORT_BITS 10 %define ISAL_DECODE_SHORT_BITS 10
;; See inflate_huff_code structure declaration in igzip_lib.h calculation explanation
%define L_REM (15 - ISAL_DECODE_LONG_BITS)
%define S_REM (15 - ISAL_DECODE_SHORT_BITS)
%define L_DUP ((1 << L_REM) - (L_REM + 1))
%define S_DUP ((1 << S_REM) - (S_REM + 1))
%define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
%define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
%define L_SIZE (286 + L_DUP + L_UNUSED)
%define S_SIZE (30 + S_DUP + S_UNUSED)
%define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
%define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
%define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
%define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
%define COPY_SIZE 16 %define COPY_SIZE 16
%define COPY_LEN_MAX 258 %define COPY_LEN_MAX 258
@ -248,78 +229,43 @@ stack_size equ 3 * 8 + 8 * 8
%%end: %%end:
%endm %endm
;; Decode next symbol ;; Clears all bits at index %%bit_count and above in %%next_bits
;; Clobber rcx ;; May clobber rcx and %%bit_count
%macro decode_next 8 %macro CLEAR_HIGH_BITS 3
%define %%state %1 ; State structure associated with compressed stream %define %%next_bits %1
%define %%lookup_size %2 ; Number of bits used for small lookup %define %%bit_count %2
%define %%state_offset %3 %define %%lookup_size %3
%define %%read_in %4 ; Bits read in from compressed stream
%define %%read_in_length %5 ; Number of valid bits in read_in
%define %%next_sym %6 ; Returned symobl
%define %%next_bits %7
%define %%next_bits2 %8
;; Lookup possible next symbol sub %%bit_count, 0x40 + %%lookup_size
mov %%next_bits, %%read_in ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
and %%next_bits, (1 << %%lookup_size) - 1
movzx %%next_sym, word [%%state + %%state_offset + 2 * %%next_bits]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, 9
jz invalid_symbol
;; Check if symbol or hint was looked up
and %%next_sym, 0x81FF
cmp %%next_sym, 0x8000
jl %%end
;; Decode next_sym using hint
mov %%next_bits2, %%read_in
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
%ifdef USE_HSWNI %ifdef USE_HSWNI
and rcx, 0x1F and %%bit_count, 0x1F
bzhi %%next_bits2, %%next_bits2, rcx bzhi %%next_bits, %%next_bits, %%bit_count
%else %else
neg rcx %ifnidn %%bit_count, rcx
shl %%next_bits2, cl mov rcx, %%bit_count
shr %%next_bits2, cl %endif
neg rcx
shl %%next_bits, cl
shr %%next_bits, cl
%endif %endif
shr %%next_bits2, %%lookup_size
add %%next_bits2, %%next_sym
;; Lookup actual next symbol
movzx %%next_sym, word [%%state + %%state_offset + 2 * %%next_bits2 + 2 *((1 << %%lookup_size) - 0x8000)]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, 9
jz invalid_symbol
and %%next_sym, 0x1FF
%%end:
;; Updated read_in to reflect the bits which were decoded
sub %%read_in_length, rcx
SHRX %%read_in, %%read_in, rcx
%endm %endm
;; Decode next symbol ;; Decode next symbol
;; Clobber rcx ;; Clobber rcx
%macro decode_next2 7 %macro decode_next 7
%define %%state %1 ; State structure associated with compressed stream %define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup %define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST %define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
%define %%read_in %4 ; Bits read in from compressed stream %define %%read_in %4 ; Bits read in from compressed stream
%define %%read_in_length %5 ; Number of valid bits in read_in %define %%read_in_length %5 ; Number of valid bits in read_in
%define %%next_sym %6 ; Returned symobl %define %%next_sym %6 ; Returned symobl
%define %%next_bits2 %7 %define %%next_bits %7
;; Save length associated with symbol ;; Save length associated with symbol
mov %%next_bits2, %%read_in mov %%next_bits, %%read_in
shr %%next_bits2, %%lookup_size shr %%next_bits, %%lookup_size
mov rcx, %%next_sym mov rcx, %%next_sym
shr rcx, 9 shr rcx, 9
@ -331,20 +277,12 @@ stack_size equ 3 * 8 + 8 * 8
jl %%end jl %%end
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
lea %%next_sym, [%%state + 2 * %%next_sym] lea %%next_sym, [%%state + LONG_CODE_SIZE * %%next_sym]
sub rcx, 0x40 + %%lookup_size
%ifdef USE_HSWNI CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
bzhi %%next_bits2, %%next_bits2, rcx
%else
;; Decode next_sym using hint
neg rcx
shl %%next_bits2, cl
shr %%next_bits2, cl
%endif
;; Lookup actual next symbol ;; Lookup actual next symbol
movzx %%next_sym, word [%%next_sym + %%state_offset + 2 * %%next_bits2 + 2 * ((1 << %%lookup_size) - 0x8000)] movzx %%next_sym, word [%%next_sym + %%state_offset + LONG_CODE_SIZE * %%next_bits + SHORT_CODE_SIZE * (1 << %%lookup_size) - LONG_CODE_SIZE * 0x8000]
;; Save length associated with symbol ;; Save length associated with symbol
mov rcx, %%next_sym mov rcx, %%next_sym
@ -358,6 +296,26 @@ stack_size equ 3 * 8 + 8 * 8
sub %%read_in_length, rcx sub %%read_in_length, rcx
%endm %endm
;; Decode next symbol
;; Clobber rcx
%macro decode_next_with_load 7
%define %%state %1 ; State structure associated with compressed stream
%define %%lookup_size %2 ; Number of bits used for small lookup
%define %%state_offset %3
%define %%read_in %4 ; Bits read in from compressed stream
%define %%read_in_length %5 ; Number of valid bits in read_in
%define %%next_sym %6 ; Returned symobl
%define %%next_bits %7
;; Lookup possible next symbol
mov %%next_bits, %%read_in
and %%next_bits, (1 << %%lookup_size) - 1
movzx %%next_sym, word [%%state + %%state_offset + SHORT_CODE_SIZE * %%next_bits]
decode_next %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_bits
%endm
global decode_huffman_code_block_stateless_ %+ ARCH global decode_huffman_code_block_stateless_ %+ ARCH
decode_huffman_code_block_stateless_ %+ ARCH %+ : decode_huffman_code_block_stateless_ %+ ARCH %+ :
@ -399,7 +357,7 @@ decode_huffman_code_block_stateless_ %+ ARCH %+ :
skip_load: skip_load:
mov tmp3, read_in mov tmp3, read_in
and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
movzx next_sym, word [state + _lit_huff_code + 2 * tmp3] movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Main Loop ; Main Loop
@ -412,7 +370,7 @@ loop_block:
jg end_loop_block_pre jg end_loop_block_pre
;; Decode next symbol and reload the read_in buffer ;; Decode next symbol and reload the read_in buffer
decode_next2 state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1 decode_next state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
;; Save next_sym in next_sym2 so next_sym can be preloaded ;; Save next_sym in next_sym2 so next_sym can be preloaded
mov next_sym2, next_sym mov next_sym2, next_sym
@ -435,7 +393,7 @@ loop_block:
je end_symbol_pre je end_symbol_pre
;; Specutively load next_sym for next loop if a literal was decoded ;; Specutively load next_sym for next loop if a literal was decoded
movzx next_sym, word [state + _lit_huff_code + 2 * tmp3] movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * tmp3]
;; Finish updating read_in_length for read_in ;; Finish updating read_in_length for read_in
mov tmp1, 64 mov tmp1, 64
@ -448,7 +406,7 @@ loop_block:
SHRX read_in_2, read_in, rcx SHRX read_in_2, read_in, rcx
mov next_bits2, read_in_2 mov next_bits2, read_in_2
and next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1 and next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
movzx next_sym3, word [state + _dist_huff_code + 2 * next_bits2] movzx next_sym3, word [state + _dist_huff_code + SHORT_CODE_SIZE * next_bits2]
;; Specutively write next_sym2 if it is a literal ;; Specutively write next_sym2 if it is a literal
mov [next_out], next_sym2 mov [next_out], next_sym2
@ -469,7 +427,7 @@ decode_len_dist:
sub read_in_length, rcx sub read_in_length, rcx
;; Decode distance code ;; Decode distance code
decode_next2 state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2 decode_next state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in_2, read_in_length, next_sym3, tmp2
movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym3] movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym3]
mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3] mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
@ -488,7 +446,7 @@ decode_len_dist:
;; Setup next_sym, read_in, and read_in_length for next loop ;; Setup next_sym, read_in, and read_in_length for next loop
mov read_in, read_in_2 mov read_in, read_in_2
and read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1 and read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
movzx next_sym, word [state + _lit_huff_code + 2 * read_in_2] movzx next_sym, word [state + _lit_huff_code + SHORT_CODE_SIZE * read_in_2]
sub read_in_length, rcx sub read_in_length, rcx
;; Copy distance in len/dist pair ;; Copy distance in len/dist pair
@ -554,7 +512,7 @@ end_loop_block:
mov [rsp + read_in_mem_offset], read_in mov [rsp + read_in_mem_offset], read_in
mov [rsp + read_in_length_mem_offset], read_in_length mov [rsp + read_in_length_mem_offset], read_in_length
decode_next state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2 decode_next_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, tmp1
;; Check that enough input was available to decode symbol ;; Check that enough input was available to decode symbol
cmp read_in_length, 0 cmp read_in_length, 0
@ -580,7 +538,7 @@ decode_len_dist_2:
sub read_in_length, rcx sub read_in_length, rcx
;; Decode distance code ;; Decode distance code
decode_next state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2 decode_next_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, tmp1
;; Load distance code extra bits ;; Load distance code extra bits
mov next_bits, read_in mov next_bits, read_in

View File

@ -47,6 +47,27 @@
%endif %endif
%endm %endm
;; See inflate_huff_code structure declaration in igzip_lib.h calculation explanation
%define L_REM (15 - ISAL_DECODE_LONG_BITS)
%define S_REM (15 - ISAL_DECODE_SHORT_BITS)
%define L_DUP ((1 << L_REM) - (L_REM + 1))
%define S_DUP ((1 << S_REM) - (S_REM + 1))
%define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
%define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
%define L_SIZE (286 + L_DUP + L_UNUSED)
%define S_SIZE (30 + S_DUP + S_UNUSED)
%define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
%define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
%define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
%define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
%define SHORT_CODE_SIZE 2
%define LONG_CODE_SIZE 2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -54,8 +75,8 @@
START_FIELDS ;; inflate huff code START_FIELDS ;; inflate huff code
;; name size align ;; name size align
FIELD _short_code_lookup_large, 2 * (1 << (ISAL_DECODE_LONG_BITS)), 2 FIELD _short_code_lookup_large, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LONG_CODE_SIZE
FIELD _long_code_lookup_large, 2 * MAX_LONG_CODE_LARGE, 2 FIELD _long_code_lookup_large, LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, SHORT_CODE_SIZE
%assign _inflate_huff_code_large_size _FIELD_OFFSET %assign _inflate_huff_code_large_size _FIELD_OFFSET
%assign _inflate_huff_code_large_align _STRUCT_ALIGN %assign _inflate_huff_code_large_align _STRUCT_ALIGN
@ -67,8 +88,8 @@ FIELD _long_code_lookup_large, 2 * MAX_LONG_CODE_LARGE, 2
START_FIELDS ;; inflate huff code START_FIELDS ;; inflate huff code
;; name size align ;; name size align
FIELD _short_code_lookup_small, 2 * (1 << (ISAL_DECODE_SHORT_BITS)), 2 FIELD _short_code_lookup_small, SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), LONG_CODE_SIZE
FIELD _long_code_lookup_small, 2 * MAX_LONG_CODE_SMALL, 2 FIELD _long_code_lookup_small, LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SHORT_CODE_SIZE
%assign _inflate_huff_code_small_size _FIELD_OFFSET %assign _inflate_huff_code_small_size _FIELD_OFFSET
%assign _inflate_huff_code_small_align _STRUCT_ALIGN %assign _inflate_huff_code_small_align _STRUCT_ALIGN