mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
cd888f01a4
To support Intel CET, all indirect branch targets must start with ENDBR32/ENDBR64. Here is a patch to define endbranch and add it to function entries in x86 assembly codes which are indirect branch targets as discovered by running testsuite on Intel CET machine and visual inspection. Verified with $ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure x86_64-linux $ make -j8 $ make -j8 check with both nasm and yasm on both CET and non-CET machines. Change-Id: I9822578e7294fb5043a64ab7de5c41de81a7d337 Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
801 lines
22 KiB
NASM
801 lines
22 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions
|
|
; are met:
|
|
; * Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; * Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in
|
|
; the documentation and/or other materials provided with the
|
|
; distribution.
|
|
; * Neither the name of Intel Corporation nor the names of its
|
|
; contributors may be used to endorse or promote products derived
|
|
; from this software without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
default rel
|
|
|
|
%include "reg_sizes.asm"
|
|
|
|
%define DECOMP_OK 0
|
|
%define END_INPUT 1
|
|
%define OUT_OVERFLOW 2
|
|
%define INVALID_BLOCK -1
|
|
%define INVALID_SYMBOL -2
|
|
%define INVALID_LOOKBACK -3
|
|
|
|
%define ISAL_DECODE_LONG_BITS 12
|
|
%define ISAL_DECODE_SHORT_BITS 10
|
|
|
|
%define COPY_SIZE 16
|
|
%define COPY_LEN_MAX 258
|
|
|
|
%define IN_BUFFER_SLOP 8
|
|
%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX
|
|
|
|
%include "inflate_data_structs.asm"
|
|
%include "stdmac.asm"
|
|
|
|
extern rfc1951_lookup_table
|
|
|
|
|
|
|
|
%define LARGE_SHORT_SYM_LEN 25
|
|
%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
|
|
%define LARGE_LONG_SYM_LEN 10
|
|
%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
|
|
%define LARGE_SHORT_CODE_LEN_OFFSET 28
|
|
%define LARGE_LONG_CODE_LEN_OFFSET 10
|
|
%define LARGE_FLAG_BIT_OFFSET 25
|
|
%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
|
|
%define LARGE_SYM_COUNT_OFFSET 26
|
|
%define LARGE_SYM_COUNT_LEN 2
|
|
%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
|
|
%define LARGE_SHORT_MAX_LEN_OFFSET 26
|
|
|
|
%define SMALL_SHORT_SYM_LEN 9
|
|
%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
|
|
%define SMALL_LONG_SYM_LEN 9
|
|
%define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
|
|
%define SMALL_SHORT_CODE_LEN_OFFSET 11
|
|
%define SMALL_LONG_CODE_LEN_OFFSET 10
|
|
%define SMALL_FLAG_BIT_OFFSET 10
|
|
%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
|
|
|
|
%define DIST_SYM_OFFSET 0
|
|
%define DIST_SYM_LEN 5
|
|
%define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
|
|
%define DIST_SYM_EXTRA_OFFSET 5
|
|
%define DIST_SYM_EXTRA_LEN 4
|
|
%define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
|
|
|
|
;; rax
|
|
%define tmp3 rax
|
|
%define read_in_2 rax
|
|
%define look_back_dist rax
|
|
|
|
;; rcx
|
|
;; rdx arg3
|
|
%define next_sym2 rdx
|
|
%define copy_start rdx
|
|
%define tmp4 rdx
|
|
|
|
;; rdi arg1
|
|
%define tmp1 rdi
|
|
%define look_back_dist2 rdi
|
|
%define next_bits2 rdi
|
|
%define next_sym3 rdi
|
|
|
|
;; rsi arg2
|
|
%define tmp2 rsi
|
|
%define next_sym_num rsi
|
|
%define next_bits rsi
|
|
|
|
;; rbx ; Saved
|
|
%define next_in rbx
|
|
|
|
;; rbp ; Saved
|
|
%define end_in rbp
|
|
|
|
;; r8
|
|
%define repeat_length r8
|
|
|
|
;; r9
|
|
%define read_in r9
|
|
|
|
;; r10
|
|
%define read_in_length r10
|
|
|
|
;; r11
|
|
%define state r11
|
|
|
|
;; r12 ; Saved
|
|
%define next_out r12
|
|
|
|
;; r13 ; Saved
|
|
%define end_out r13
|
|
|
|
;; r14 ; Saved
|
|
%define next_sym r14
|
|
|
|
;; r15 ; Saved
|
|
%define rfc_lookup r15
|
|
|
|
start_out_mem_offset equ 0
|
|
read_in_mem_offset equ 8
|
|
read_in_length_mem_offset equ 16
|
|
next_out_mem_offset equ 24
|
|
gpr_save_mem_offset equ 32
|
|
stack_size equ 4 * 8 + 8 * 8
|
|
|
|
%define _dist_extra_bit_count 264
|
|
%define _dist_start _dist_extra_bit_count + 1*32
|
|
%define _len_extra_bit_count _dist_start + 4*32
|
|
%define _len_start _len_extra_bit_count + 1*32
|
|
|
|
%ifidn __OUTPUT_FORMAT__, elf64
|
|
%define arg0 rdi
|
|
%define arg1 rsi
|
|
|
|
%macro FUNC_SAVE 0
|
|
%ifdef ALIGN_STACK
|
|
push rbp
|
|
mov rbp, rsp
|
|
sub rsp, stack_size
|
|
and rsp, ~15
|
|
%else
|
|
sub rsp, stack_size
|
|
%endif
|
|
|
|
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
|
mov [rsp + gpr_save_mem_offset + 1*8], rbp
|
|
mov [rsp + gpr_save_mem_offset + 2*8], r12
|
|
mov [rsp + gpr_save_mem_offset + 3*8], r13
|
|
mov [rsp + gpr_save_mem_offset + 4*8], r14
|
|
mov [rsp + gpr_save_mem_offset + 5*8], r15
|
|
%endm
|
|
|
|
%macro FUNC_RESTORE 0
|
|
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
|
mov rbp, [rsp + gpr_save_mem_offset + 1*8]
|
|
mov r12, [rsp + gpr_save_mem_offset + 2*8]
|
|
mov r13, [rsp + gpr_save_mem_offset + 3*8]
|
|
mov r14, [rsp + gpr_save_mem_offset + 4*8]
|
|
mov r15, [rsp + gpr_save_mem_offset + 5*8]
|
|
|
|
%ifndef ALIGN_STACK
|
|
add rsp, stack_size
|
|
%else
|
|
mov rsp, rbp
|
|
pop rbp
|
|
%endif
|
|
%endm
|
|
%endif
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
%define arg0 rcx
|
|
%define arg1 rdx
|
|
|
|
%macro FUNC_SAVE 0
|
|
%ifdef ALIGN_STACK
|
|
push rbp
|
|
mov rbp, rsp
|
|
sub rsp, stack_size
|
|
and rsp, ~15
|
|
%else
|
|
sub rsp, stack_size
|
|
%endif
|
|
|
|
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
|
mov [rsp + gpr_save_mem_offset + 1*8], rsi
|
|
mov [rsp + gpr_save_mem_offset + 2*8], rdi
|
|
mov [rsp + gpr_save_mem_offset + 3*8], rbp
|
|
mov [rsp + gpr_save_mem_offset + 4*8], r12
|
|
mov [rsp + gpr_save_mem_offset + 5*8], r13
|
|
mov [rsp + gpr_save_mem_offset + 6*8], r14
|
|
mov [rsp + gpr_save_mem_offset + 7*8], r15
|
|
%endm
|
|
|
|
%macro FUNC_RESTORE 0
|
|
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
|
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
|
|
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
|
|
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
|
|
mov r12, [rsp + gpr_save_mem_offset + 4*8]
|
|
mov r13, [rsp + gpr_save_mem_offset + 5*8]
|
|
mov r14, [rsp + gpr_save_mem_offset + 6*8]
|
|
mov r15, [rsp + gpr_save_mem_offset + 7*8]
|
|
|
|
%ifndef ALIGN_STACK
|
|
add rsp, stack_size
|
|
%else
|
|
mov rsp, rbp
|
|
pop rbp
|
|
%endif
|
|
%endm
|
|
%endif
|
|
|
|
;; Load read_in and updated in_buffer accordingly
|
|
;; when there are at least 8 bytes in the in buffer
|
|
;; Clobbers rcx, unless rcx is %%read_in_length
|
|
%macro inflate_in_load 6
|
|
%define %%next_in %1
|
|
%define %%end_in %2
|
|
%define %%read_in %3
|
|
%define %%read_in_length %4
|
|
%define %%tmp1 %5 ; Tmp registers
|
|
%define %%tmp2 %6
|
|
|
|
SHLX %%tmp1, [%%next_in], %%read_in_length
|
|
or %%read_in, %%tmp1
|
|
|
|
mov %%tmp1, 64
|
|
sub %%tmp1, %%read_in_length
|
|
shr %%tmp1, 3
|
|
|
|
add %%next_in, %%tmp1
|
|
lea %%read_in_length, [%%read_in_length + 8 * %%tmp1]
|
|
%%end:
|
|
%endm
|
|
|
|
;; Load read_in and updated in_buffer accordingly
|
|
;; Clobbers rcx, unless rcx is %%read_in_length
|
|
%macro inflate_in_small_load 6
|
|
%define %%next_in %1
|
|
%define %%end_in %2
|
|
%define %%read_in %3
|
|
%define %%read_in_length %4
|
|
%define %%avail_in %5 ; Tmp registers
|
|
%define %%tmp1 %5
|
|
%define %%loop_count %6
|
|
|
|
mov %%avail_in, %%end_in
|
|
sub %%avail_in, %%next_in
|
|
|
|
%ifnidn %%read_in_length, rcx
|
|
mov rcx, %%read_in_length
|
|
%endif
|
|
|
|
mov %%loop_count, 64
|
|
sub %%loop_count, %%read_in_length
|
|
shr %%loop_count, 3
|
|
|
|
cmp %%loop_count, %%avail_in
|
|
cmovg %%loop_count, %%avail_in
|
|
cmp %%loop_count, 0
|
|
je %%end
|
|
|
|
%%load_byte:
|
|
xor %%tmp1, %%tmp1
|
|
mov %%tmp1 %+ b, byte [%%next_in]
|
|
SHLX %%tmp1, %%tmp1, rcx
|
|
or %%read_in, %%tmp1
|
|
add rcx, 8
|
|
add %%next_in, 1
|
|
sub %%loop_count, 1
|
|
jg %%load_byte
|
|
%ifnidn %%read_in_length, rcx
|
|
mov %%read_in_length, rcx
|
|
%endif
|
|
%%end:
|
|
%endm
|
|
|
|
;; Clears all bits at index %%bit_count and above in %%next_bits
|
|
;; May clobber rcx and %%bit_count
|
|
%macro CLEAR_HIGH_BITS 3
|
|
%define %%next_bits %1
|
|
%define %%bit_count %2
|
|
%define %%lookup_size %3
|
|
|
|
sub %%bit_count, 0x40 + %%lookup_size
|
|
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
|
|
%ifdef USE_HSWNI
|
|
and %%bit_count, 0x1F
|
|
bzhi %%next_bits, %%next_bits, %%bit_count
|
|
%else
|
|
%ifnidn %%bit_count, rcx
|
|
mov rcx, %%bit_count
|
|
%endif
|
|
neg rcx
|
|
shl %%next_bits, cl
|
|
shr %%next_bits, cl
|
|
%endif
|
|
|
|
%endm
|
|
|
|
;; Decode next symbol
|
|
;; Clobber rcx
|
|
%macro decode_next_lit_len 8
|
|
%define %%state %1 ; State structure associated with compressed stream
|
|
%define %%lookup_size %2 ; Number of bits used for small lookup
|
|
%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
|
|
%define %%read_in %4 ; Bits read in from compressed stream
|
|
%define %%read_in_length %5 ; Number of valid bits in read_in
|
|
%define %%next_sym %6 ; Returned symbols
|
|
%define %%next_sym_num %7 ; Returned symbols count
|
|
%define %%next_bits %8
|
|
|
|
mov %%next_sym_num, %%next_sym
|
|
mov rcx, %%next_sym
|
|
shr rcx, LARGE_SHORT_CODE_LEN_OFFSET
|
|
jz invalid_symbol
|
|
|
|
and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET
|
|
shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET
|
|
|
|
;; Check if symbol or hint was looked up
|
|
and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK
|
|
test %%next_sym, LARGE_FLAG_BIT
|
|
jz %%end
|
|
|
|
shl rcx, LARGE_SYM_COUNT_LEN
|
|
or rcx, %%next_sym_num
|
|
|
|
;; Save length associated with symbol
|
|
mov %%next_bits, %%read_in
|
|
shr %%next_bits, %%lookup_size
|
|
|
|
;; Extract the bits beyond the first %%lookup_size bits.
|
|
CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
|
|
|
|
and %%next_sym, LARGE_SHORT_SYM_MASK
|
|
add %%next_sym, %%next_bits
|
|
|
|
;; Lookup actual next symbol
|
|
movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)]
|
|
mov %%next_sym_num, 1
|
|
|
|
;; Save length associated with symbol
|
|
mov rcx, %%next_sym
|
|
shr rcx, LARGE_LONG_CODE_LEN_OFFSET
|
|
jz invalid_symbol
|
|
and %%next_sym, LARGE_LONG_SYM_MASK
|
|
|
|
%%end:
|
|
;; Updated read_in to reflect the bits which were decoded
|
|
SHRX %%read_in, %%read_in, rcx
|
|
sub %%read_in_length, rcx
|
|
%endm
|
|
|
|
;; Decode next symbol
|
|
;; Clobber rcx
|
|
%macro decode_next_lit_len_with_load 8
|
|
%define %%state %1 ; State structure associated with compressed stream
|
|
%define %%lookup_size %2 ; Number of bits used for small lookup
|
|
%define %%state_offset %3
|
|
%define %%read_in %4 ; Bits read in from compressed stream
|
|
%define %%read_in_length %5 ; Number of valid bits in read_in
|
|
%define %%next_sym %6 ; Returned symbols
|
|
%define %%next_sym_num %7 ; Returned symbols count
|
|
%define %%next_bits %8
|
|
|
|
;; Lookup possible next symbol
|
|
mov %%next_bits, %%read_in
|
|
and %%next_bits, (1 << %%lookup_size) - 1
|
|
mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits]
|
|
|
|
decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits
|
|
%endm
|
|
|
|
;; Decode next symbol
|
|
;; Clobber rcx
|
|
%macro decode_next_dist 8
|
|
%define %%state %1 ; State structure associated with compressed stream
|
|
%define %%lookup_size %2 ; Number of bits used for small lookup
|
|
%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
|
|
%define %%read_in %4 ; Bits read in from compressed stream
|
|
%define %%read_in_length %5 ; Number of valid bits in read_in
|
|
%define %%next_sym %6 ; Returned symobl
|
|
%define %%next_extra_bits %7
|
|
%define %%next_bits %8
|
|
|
|
mov rcx, %%next_sym
|
|
shr rcx, SMALL_SHORT_CODE_LEN_OFFSET
|
|
jz invalid_dist_symbol_ %+ %%next_sym
|
|
|
|
;; Check if symbol or hint was looked up
|
|
and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK
|
|
test %%next_sym, SMALL_FLAG_BIT
|
|
jz %%end
|
|
|
|
;; Save length associated with symbol
|
|
mov %%next_bits, %%read_in
|
|
shr %%next_bits, %%lookup_size
|
|
|
|
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
|
|
lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym]
|
|
|
|
CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
|
|
|
|
;; Lookup actual next symbol
|
|
movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT]
|
|
|
|
;; Save length associated with symbol
|
|
mov rcx, %%next_sym
|
|
shr rcx, SMALL_LONG_CODE_LEN_OFFSET
|
|
jz invalid_dist_symbol_ %+ %%next_sym
|
|
and %%next_sym, SMALL_SHORT_SYM_MASK
|
|
|
|
%%end:
|
|
;; Updated read_in to reflect the bits which were decoded
|
|
SHRX %%read_in, %%read_in, rcx
|
|
sub %%read_in_length, rcx
|
|
mov rcx, %%next_sym
|
|
shr rcx, DIST_SYM_EXTRA_OFFSET
|
|
and %%next_sym, DIST_SYM_MASK
|
|
%endm
|
|
|
|
;; Decode next symbol
|
|
;; Clobber rcx
|
|
%macro decode_next_dist_with_load 8
|
|
%define %%state %1 ; State structure associated with compressed stream
|
|
%define %%lookup_size %2 ; Number of bits used for small lookup
|
|
%define %%state_offset %3
|
|
%define %%read_in %4 ; Bits read in from compressed stream
|
|
%define %%read_in_length %5 ; Number of valid bits in read_in
|
|
%define %%next_sym %6 ; Returned symobl
|
|
%define %%next_extra_bits %7
|
|
%define %%next_bits %8
|
|
|
|
;; Lookup possible next symbol
|
|
mov %%next_bits, %%read_in
|
|
and %%next_bits, (1 << %%lookup_size) - 1
|
|
movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits]
|
|
|
|
decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits
|
|
%endm
|
|
|
|
[bits 64]
|
|
default rel
|
|
section .text
|
|
|
|
global decode_huffman_code_block_stateless_ %+ ARCH
|
|
decode_huffman_code_block_stateless_ %+ ARCH %+ :
|
|
endbranch
|
|
|
|
FUNC_SAVE
|
|
|
|
mov state, arg0
|
|
mov [rsp + start_out_mem_offset], arg1
|
|
lea rfc_lookup, [rfc1951_lookup_table]
|
|
|
|
mov read_in,[state + _read_in]
|
|
mov read_in_length %+ d, dword [state + _read_in_length]
|
|
mov next_out, [state + _next_out]
|
|
mov end_out %+ d, dword [state + _avail_out]
|
|
add end_out, next_out
|
|
mov next_in, [state + _next_in]
|
|
mov end_in %+ d, dword [state + _avail_in]
|
|
add end_in, next_in
|
|
|
|
mov dword [state + _copy_overflow_len], 0
|
|
mov dword [state + _copy_overflow_dist], 0
|
|
|
|
sub end_out, OUT_BUFFER_SLOP
|
|
sub end_in, IN_BUFFER_SLOP
|
|
|
|
cmp next_in, end_in
|
|
jg end_loop_block_pre
|
|
|
|
cmp read_in_length, 64
|
|
je skip_load
|
|
|
|
inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
|
|
|
|
skip_load:
|
|
mov tmp3, read_in
|
|
and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
|
|
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Main Loop
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
loop_block:
|
|
;; Check if near end of in buffer or out buffer
|
|
cmp next_in, end_in
|
|
jg end_loop_block_pre
|
|
cmp next_out, end_out
|
|
jg end_loop_block_pre
|
|
|
|
;; Decode next symbol and reload the read_in buffer
|
|
decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
|
|
|
|
;; Specutively write next_sym if it is a literal
|
|
mov [next_out], next_sym
|
|
add next_out, next_sym_num
|
|
lea next_sym2, [8 * next_sym_num - 8]
|
|
SHRX next_sym2, next_sym, next_sym2
|
|
|
|
;; Find index to specutively preload next_sym from
|
|
mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
|
|
and tmp3, read_in
|
|
|
|
;; Start reloading read_in
|
|
mov tmp1, [next_in]
|
|
SHLX tmp1, tmp1, read_in_length
|
|
or read_in, tmp1
|
|
|
|
;; Specutively load data associated with length symbol
|
|
lea repeat_length, [next_sym2 - 254]
|
|
|
|
;; Test for end of block symbol
|
|
cmp next_sym2, 256
|
|
je end_symbol_pre
|
|
|
|
;; Specutively load next_sym for next loop if a literal was decoded
|
|
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
|
|
|
|
;; Finish updating read_in_length for read_in
|
|
mov tmp1, 64
|
|
sub tmp1, read_in_length
|
|
shr tmp1, 3
|
|
add next_in, tmp1
|
|
lea read_in_length, [read_in_length + 8 * tmp1]
|
|
|
|
;; Specultively load next dist code
|
|
mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
|
|
and next_bits2, read_in
|
|
movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2]
|
|
|
|
;; Check if next_sym2 is a literal, length, or end of block symbol
|
|
cmp next_sym2, 256
|
|
jl loop_block
|
|
|
|
decode_len_dist:
|
|
;; Determine next_out after the copy is finished
|
|
lea next_out, [next_out + repeat_length - 1]
|
|
|
|
;; Decode distance code
|
|
decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2
|
|
|
|
mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
|
|
|
|
; ;; Load distance code extra bits
|
|
mov next_bits, read_in
|
|
|
|
;; Calculate the look back distance
|
|
BZHI next_bits, next_bits, rcx, tmp4
|
|
SHRX read_in, read_in, rcx
|
|
|
|
;; Setup next_sym, read_in, and read_in_length for next loop
|
|
mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
|
|
and read_in_2, read_in
|
|
mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2]
|
|
sub read_in_length, rcx
|
|
|
|
;; Copy distance in len/dist pair
|
|
add look_back_dist2, next_bits
|
|
|
|
;; Find beginning of copy
|
|
mov copy_start, next_out
|
|
sub copy_start, repeat_length
|
|
sub copy_start, look_back_dist2
|
|
|
|
;; Check if a valid look back distances was decoded
|
|
cmp copy_start, [rsp + start_out_mem_offset]
|
|
jl invalid_look_back_distance
|
|
MOVDQU xmm1, [copy_start]
|
|
|
|
;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length
|
|
;; This is to decrease use of small_byte_copy branch
|
|
mov tmp2, COPY_SIZE
|
|
cmp tmp2, repeat_length
|
|
cmovg tmp2, repeat_length
|
|
|
|
;; Check for overlapping memory in the copy
|
|
cmp look_back_dist2, tmp2
|
|
jl small_byte_copy_pre
|
|
|
|
large_byte_copy:
|
|
;; Copy length distance pair when memory overlap is not an issue
|
|
MOVDQU [copy_start + look_back_dist2], xmm1
|
|
|
|
sub repeat_length, COPY_SIZE
|
|
jle loop_block
|
|
|
|
add copy_start, COPY_SIZE
|
|
MOVDQU xmm1, [copy_start]
|
|
jmp large_byte_copy
|
|
|
|
small_byte_copy_pre:
|
|
;; Copy length distance pair when source and destination overlap
|
|
add repeat_length, look_back_dist2
|
|
small_byte_copy:
|
|
MOVDQU [copy_start + look_back_dist2], xmm1
|
|
|
|
shl look_back_dist2, 1
|
|
MOVDQU xmm1, [copy_start]
|
|
cmp look_back_dist2, COPY_SIZE
|
|
jl small_byte_copy
|
|
|
|
sub repeat_length, look_back_dist2
|
|
jge large_byte_copy
|
|
jmp loop_block
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Finish Main Loop
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
end_loop_block_pre:
|
|
;; Fix up in buffer and out buffer to reflect the actual buffer end
|
|
add end_out, OUT_BUFFER_SLOP
|
|
add end_in, IN_BUFFER_SLOP
|
|
|
|
end_loop_block:
|
|
;; Load read in buffer and decode next lit/len symbol
|
|
inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
|
|
mov [rsp + read_in_mem_offset], read_in
|
|
mov [rsp + read_in_length_mem_offset], read_in_length
|
|
mov [rsp + next_out_mem_offset], next_out
|
|
|
|
decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
|
|
|
|
;; Check that enough input was available to decode symbol
|
|
cmp read_in_length, 0
|
|
jl end_of_input
|
|
|
|
multi_symbol_start:
|
|
cmp next_sym_num, 1
|
|
jg decode_literal
|
|
|
|
cmp next_sym, 256
|
|
jl decode_literal
|
|
je end_symbol
|
|
|
|
decode_len_dist_2:
|
|
lea repeat_length, [next_sym - 254]
|
|
;; Decode distance code
|
|
decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1
|
|
|
|
;; Load distance code extra bits
|
|
mov next_bits, read_in
|
|
mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym]
|
|
|
|
;; Calculate the look back distance and check for enough input
|
|
BZHI next_bits, next_bits, rcx, tmp1
|
|
SHRX read_in, read_in, rcx
|
|
add look_back_dist, next_bits
|
|
sub read_in_length, rcx
|
|
jl end_of_input
|
|
|
|
;; Setup code for byte copy using rep movsb
|
|
mov rsi, next_out
|
|
mov rdi, rsi
|
|
mov rcx, repeat_length
|
|
sub rsi, look_back_dist
|
|
|
|
;; Check if a valid look back distance was decoded
|
|
cmp rsi, [rsp + start_out_mem_offset]
|
|
jl invalid_look_back_distance
|
|
|
|
;; Check for out buffer overflow
|
|
add repeat_length, next_out
|
|
cmp repeat_length, end_out
|
|
jg out_buffer_overflow_repeat
|
|
|
|
mov next_out, repeat_length
|
|
|
|
rep movsb
|
|
jmp end_loop_block
|
|
|
|
decode_literal:
|
|
;; Store literal decoded from the input stream
|
|
cmp next_out, end_out
|
|
jge out_buffer_overflow_lit
|
|
add next_out, 1
|
|
mov byte [next_out - 1], next_sym %+ b
|
|
sub next_sym_num, 1
|
|
jz end_loop_block
|
|
shr next_sym, 8
|
|
jmp multi_symbol_start
|
|
|
|
;; Set exit codes
|
|
end_of_input:
|
|
mov read_in, [rsp + read_in_mem_offset]
|
|
mov read_in_length, [rsp + read_in_length_mem_offset]
|
|
mov next_out, [rsp + next_out_mem_offset]
|
|
xor tmp1, tmp1
|
|
mov dword [state + _write_overflow_lits], tmp1 %+ d
|
|
mov dword [state + _write_overflow_len], tmp1 %+ d
|
|
mov rax, END_INPUT
|
|
jmp end
|
|
|
|
out_buffer_overflow_repeat:
|
|
mov rcx, end_out
|
|
sub rcx, next_out
|
|
sub repeat_length, rcx
|
|
sub repeat_length, next_out
|
|
rep movsb
|
|
|
|
mov [state + _copy_overflow_len], repeat_length %+ d
|
|
mov [state + _copy_overflow_dist], look_back_dist %+ d
|
|
|
|
mov next_out, end_out
|
|
|
|
mov rax, OUT_OVERFLOW
|
|
jmp end
|
|
|
|
out_buffer_overflow_lit:
|
|
mov dword [state + _write_overflow_lits], next_sym %+ d
|
|
mov dword [state + _write_overflow_len], next_sym_num %+ d
|
|
sub next_sym_num, 1
|
|
shl next_sym_num, 3
|
|
SHRX next_sym, next_sym, next_sym_num
|
|
mov rax, OUT_OVERFLOW
|
|
shr next_sym_num, 3
|
|
cmp next_sym, 256
|
|
jl end
|
|
mov dword [state + _write_overflow_len], next_sym_num %+ d
|
|
jg decode_len_dist_2
|
|
jmp end_state
|
|
|
|
invalid_look_back_distance:
|
|
mov rax, INVALID_LOOKBACK
|
|
jmp end
|
|
|
|
invalid_dist_symbol_ %+ next_sym:
|
|
cmp read_in_length, next_sym
|
|
jl end_of_input
|
|
jmp invalid_symbol
|
|
invalid_dist_symbol_ %+ next_sym3:
|
|
cmp read_in_length, next_sym3
|
|
jl end_of_input
|
|
invalid_symbol:
|
|
mov rax, INVALID_SYMBOL
|
|
jmp end
|
|
|
|
end_symbol_pre:
|
|
;; Fix up in buffer and out buffer to reflect the actual buffer
|
|
sub next_out, 1
|
|
add end_out, OUT_BUFFER_SLOP
|
|
add end_in, IN_BUFFER_SLOP
|
|
end_symbol:
|
|
xor rax, rax
|
|
end_state:
|
|
;; Set flag identifying a new block is required
|
|
mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR
|
|
cmp dword [state + _bfinal], 0
|
|
je end
|
|
mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE
|
|
|
|
end:
|
|
;; Save current buffer states
|
|
mov [state + _read_in], read_in
|
|
mov [state + _read_in_length], read_in_length %+ d
|
|
|
|
;; Set avail_out
|
|
sub end_out, next_out
|
|
mov dword [state + _avail_out], end_out %+ d
|
|
|
|
;; Set total_out
|
|
mov tmp1, next_out
|
|
sub tmp1, [state + _next_out]
|
|
add [state + _total_out], tmp1 %+ d
|
|
|
|
;; Set next_out
|
|
mov [state + _next_out], next_out
|
|
|
|
;; Set next_in
|
|
mov [state + _next_in], next_in
|
|
|
|
;; Set avail_in
|
|
sub end_in, next_in
|
|
mov [state + _avail_in], end_in %+ d
|
|
|
|
FUNC_RESTORE
|
|
|
|
ret
|