diff --git a/igzip/huff_codes.c b/igzip/huff_codes.c index c0820cd..e4270d0 100644 --- a/igzip/huff_codes.c +++ b/igzip/huff_codes.c @@ -42,6 +42,18 @@ static const uint8_t code_length_code_order[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +/* Version info */ +struct slver isal_update_histogram_slver_00010085; +struct slver isal_update_histogram_slver = { 0x0085, 0x01, 0x00 }; +struct slver isal_create_hufftables_slver_00010086; +struct slver isal_create_hufftables_slver = { 0x0086, 0x01, 0x00 }; + int heap_push(struct huff_tree element, struct histheap *heap) { uint16_t index; diff --git a/igzip/igzip.c b/igzip/igzip.c index dbac4af..f4ec87e 100644 --- a/igzip/igzip.c +++ b/igzip/igzip.c @@ -97,6 +97,9 @@ struct slver { struct slver isal_deflate_init_slver_01030081; struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 }; +struct slver isal_deflate_stateless_init_slver_00010084; +struct slver isal_deflate_stateless_init_slver = { 0x0084, 0x01, 0x00 }; + struct slver isal_deflate_slver_01030082; struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 }; diff --git a/igzip/igzip_decode_block_stateless.asm b/igzip/igzip_decode_block_stateless.asm index f35d6f2..f1b74ef 100644 --- a/igzip/igzip_decode_block_stateless.asm +++ b/igzip/igzip_decode_block_stateless.asm @@ -81,13 +81,93 @@ extern rfc1951_lookup_table start_out_mem_offset equ 0 read_in_mem_offset equ 8 read_in_length_mem_offset equ 16 -stack_size equ 4 * 8 + 8 +gpr_save_mem_offset equ 24 +stack_size equ 3 * 8 + 8 * 8 %define _dist_extra_bit_count 264 %define _dist_start _dist_extra_bit_count + 1*32 %define _len_extra_bit_count _dist_start + 4*32 %define _len_start _len_extra_bit_count + 1*32 +%ifidn __OUTPUT_FORMAT__, elf64 +%define arg0 rdi + +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rbp + mov [rsp + gpr_save_mem_offset + 2*8], r12 + mov [rsp + gpr_save_mem_offset + 3*8], r13 + mov [rsp + gpr_save_mem_offset + 4*8], r14 + mov [rsp + gpr_save_mem_offset + 5*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rbp, [rsp + gpr_save_mem_offset + 1*8] + mov r12, [rsp + gpr_save_mem_offset + 2*8] + mov r13, [rsp + gpr_save_mem_offset + 3*8] + mov r14, [rsp + gpr_save_mem_offset + 4*8] + mov r15, [rsp + gpr_save_mem_offset + 5*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg0 rcx +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + ;; Load read_in and updated in_buffer accordingly ;; when there are at least 8 bytes in the in buffer ;; Clobbers rcx, unless rcx is %%read_in_length @@ -261,16 +341,9 @@ stack_size equ 4 * 8 + 8 global decode_huffman_code_block_stateless_ %+ ARCH decode_huffman_code_block_stateless_ %+ ARCH %+ : - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 + FUNC_SAVE - sub rsp, stack_size - - mov state, rdi + mov state, arg0 lea rfc_lookup, [rfc1951_lookup_table] mov read_in,[state + _read_in] @@ -581,12 +654,6 @@ end: sub end_in, next_in mov [state + _avail_in], end_in %+ d - add rsp, stack_size - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx + FUNC_RESTORE ret diff --git a/igzip/igzip_inflate.c b/igzip/igzip_inflate.c index d1d76d3..3c7f7ac 100644 --- a/igzip/igzip_inflate.c +++ b/igzip/igzip_inflate.c @@ -43,6 +43,22 @@ static struct rfc1951_tables rfc_lookup_table = { 0x0083, 0x00a3, 0x00c3, 0x00e3, 0x0102, 0x0000, 0x0000, 0x0000} }; + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +/* Version info */ +struct slver isal_inflate_init_slver_00010088; +struct slver isal_inflate_init_slver = { 0x0088, 0x01, 0x00 }; +struct slver isal_inflate_stateless_slver_00010089; +struct slver isal_inflate_stateless_slver = { 0x0089, 0x01, 0x00 }; +struct slver isal_inflate_slver_0001008a; +struct slver isal_inflate_slver = { 0x008a, 0x01, 0x00 }; + + /*Performs a copy of length repeat_length data starting at dest - * lookback_distance into dest. This copy copies data previously copied when the * src buffer and the dest buffer overlap. */ diff --git a/igzip/igzip_update_histogram.asm b/igzip/igzip_update_histogram.asm index 248c0cc..b0fdd67 100644 --- a/igzip/igzip_update_histogram.asm +++ b/igzip/igzip_update_histogram.asm @@ -83,8 +83,6 @@ global %1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - _eob_count_offset equ 0 ; local variable (8 bytes) f_end_i_mem_offset equ 8 gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes) @@ -92,10 +90,97 @@ xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) stack_size equ 2*8 + 8*8 + 4*16 + 8 ;;; 8 because stack address is odd multiple of 8 after a function call and ;;; we want it aligned to 16 bytes + +%ifidn __OUTPUT_FORMAT__, elf64 +%define arg0 rdi +%define arg1 rsi +%define arg2 rdx + +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rbp + mov [rsp + gpr_save_mem_offset + 2*8], r12 + mov [rsp + gpr_save_mem_offset + 3*8], r13 + mov [rsp + gpr_save_mem_offset + 4*8], r14 + mov [rsp + gpr_save_mem_offset + 5*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rbp, [rsp + gpr_save_mem_offset + 1*8] + mov r12, [rsp + gpr_save_mem_offset + 2*8] + mov r13, [rsp + gpr_save_mem_offset + 3*8] + mov r14, [rsp + gpr_save_mem_offset + 4*8] + mov r15, [rsp + gpr_save_mem_offset + 5*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg0 rcx +%define arg1 rdx +%define arg2 r8 + +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + + _lit_len_offset equ 0 _dist_offset equ (8 * LIT_LEN) _hash_offset equ (_dist_offset + 8 * DIST_LEN) + %macro len_to_len_code 3 %define %%len_code %1 ; Output %define %%len %2 ; Input @@ -145,24 +230,18 @@ isal_update_histogram_ %+ ARCH %+ : jne skip1 ret skip1: + FUNC_SAVE -%ifdef ALIGN_STACK - push rbp - mov rbp, rsp - sub rsp, stack_size - and rsp, ~15 -%else - sub rsp, stack_size +%ifnidn file_start, arg0 + mov file_start, arg0 +%endif +%ifnidn file_length, arg1 + mov file_length, arg1 +%endif +%ifnidn histogram, arg2 + mov histogram, arg2 %endif - mov [rsp + gpr_save_mem_offset + 0*8], rbx - mov [rsp + gpr_save_mem_offset + 1*8], rsi - mov [rsp + gpr_save_mem_offset + 2*8], rdi - mov [rsp + gpr_save_mem_offset + 3*8], rbp - mov [rsp + gpr_save_mem_offset + 4*8], r12 - mov [rsp + gpr_save_mem_offset + 5*8], r13 - mov [rsp + gpr_save_mem_offset + 6*8], r14 - mov [rsp + gpr_save_mem_offset + 7*8], r15 mov f_i, 0 mov tmp1, qword [histogram + _lit_len_offset + 8*256] @@ -436,21 +515,8 @@ end: mov tmp1, [rsp + _eob_count_offset] mov qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * 256], tmp1 - mov rbx, [rsp + gpr_save_mem_offset + 0*8] - mov rsi, [rsp + gpr_save_mem_offset + 1*8] - mov rdi, [rsp + gpr_save_mem_offset + 2*8] - mov rbp, [rsp + gpr_save_mem_offset + 3*8] - mov r12, [rsp + gpr_save_mem_offset + 4*8] - mov r13, [rsp + gpr_save_mem_offset + 5*8] - mov r14, [rsp + gpr_save_mem_offset + 6*8] - mov r15, [rsp + gpr_save_mem_offset + 7*8] + FUNC_RESTORE -%ifndef ALIGN_STACK - add rsp, stack_size -%else - mov rsp, rbp - pop rbp -%endif ret compare_loop: