isa-l/igzip/igzip_finish.asm
Roy Oursler 9d53af0c7c igzip: Improve LARGE_WINDOW and change that option to be default.
Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
2016-12-05 16:36:31 -07:00

322 lines
8.9 KiB
NASM

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "stdmac.asm"
%include "reg_sizes.asm"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define tmp1 rax
%define f_index rbx
%define code rbx
%define tmp4 rbx
%define tmp5 rbx
%define tmp6 rbx
%define tmp2 rcx
%define hash rcx
%define tmp3 rdx
%define stream rsi
%define f_i rdi
%define code_len2 rbp
%define m_out_buf r8
%define m_bits r9
%define dist r10
%define m_bit_count r11
%define code2 r12
%define f_end_i r12
%define file_start r13
%define len r14
%define hufftables r15
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f_end_i_mem_offset equ 0 ; local variable (8 bytes)
stack_size equ 8
; void isal_deflate_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_finish_01
isal_deflate_finish_01:
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
sub rsp, stack_size
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
mov stream, rcx
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov m_out_buf, [stream + _next_out]
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
mov tmp1 %+ d, [stream + _avail_out]
add tmp1, m_out_buf
sub tmp1, SLOP
skip_SLOP:
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
mov hufftables, [stream + _hufftables]
; f_i = state->b_bytes_processed;
; f_end_i = state->b_bytes_valid;
mov f_i %+ d, [stream + _internal_state_b_bytes_processed]
mov f_end_i %+ d, [stream + _internal_state_b_bytes_valid]
; f_i += (uint32_t)(state->buffer - state->file_start);
; f_end_i += (uint32_t)(state->buffer - state->file_start);
mov file_start, [stream + _internal_state_file_start]
lea tmp1, [stream + _internal_state_buffer]
sub tmp1, file_start
add f_i, tmp1
add f_end_i, tmp1
mov [rsp + f_end_i_mem_offset], f_end_i
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
cmp f_i, f_end_i
jge end_loop_2
mov tmp1, [file_start + f_i]
cmp dword [stream + _internal_state_b_bytes_processed], 0
jne skip_write_first_byte
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
compute_hash hash, tmp1
and hash %+ d, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
jmp encode_literal
skip_write_first_byte:
loop2:
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
compute_hash hash, tmp1
and hash %+ d, HASH_MASK
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
; state->head[hash] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
; dist = f_i - f_index; // mod 64k
mov dist %+ d, f_i %+ d
sub dist %+ d, f_index %+ d
and dist %+ d, 0xFFFF
; if ((dist-1) <= (D-1)) {
mov tmp1 %+ d, dist %+ d
sub tmp1 %+ d, 1
cmp tmp1 %+ d, (D-1)
jae encode_literal
; len = f_end_i - f_i;
mov tmp4, [rsp + f_end_i_mem_offset]
sub tmp4, f_i
; if (len > 258) len = 258;
cmp tmp4, 258
cmovg tmp4, [c258]
; len = compare(state->file_start + f_i,
; state->file_start + f_i - dist, len);
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist
compare tmp4, tmp1, tmp2, len, tmp3
; if (len >= SHORTEST_MATCH) {
cmp len, SHORTEST_MATCH
jb encode_literal
;; encode as dist/len
; get_dist_code(dist, &code2, &code_len2);
dec dist
get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
; get_len_code(len, &code, &code_len);
get_len_code len, code, rcx, hufftables ;; rcx is code_len
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
SHLX code2, code2, rcx
or code2, code
add code_len2, rcx
; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
lea tmp3, [f_i + 1] ; tmp3 <= k
add f_i, len
%ifdef LIMIT_HASH_UPDATE
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
%else
loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
inc tmp3
cmp tmp3, f_i
jl loop3
%endif
mov tmp1 %+ d, [file_start + f_i]
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
; continue
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
jmp end_loop_2
encode_literal:
mov tmp1 %+ d, [file_start + f_i + 1]
; get_lit_code(state->file_start[f_i], &code2, &code_len2);
movzx tmp5, byte [file_start + f_i]
get_lit_code tmp5, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
; continue
add f_i, 1
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
end_loop_2:
; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
cmp f_i, [rsp + f_end_i_mem_offset]
jl not_end
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja not_end
cmp dword [stream + _end_of_stream], 1
jne cont
cmp dword [stream + _internal_state_left_over], 0
jg not_end
cont:
; get_lit_code(256, &code2, &code_len2);
get_lit_code 256, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
mov dword [stream + _internal_state_has_eob], 1
cmp dword [stream + _end_of_stream], 1
jne sync_flush
; state->state = ZSTATE_TRL;
mov dword [stream + _internal_state_state], ZSTATE_TRL
jmp not_end
sync_flush:
; state->state = ZSTATE_SYNC_FLUSH;
mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
; }
not_end:
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
add f_i, [stream + _internal_state_file_start]
sub f_i, stream
sub f_i, _internal_state_buffer
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
; // update output buffer
; stream->next_out = state->bitbuf.buffer_ptr();
mov [stream + _next_out], m_out_buf
; len = state->bitbuf.buffer_used();
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
; stream->avail_out -= len;
sub [stream + _avail_out], m_out_buf %+ d
; stream->total_out += len;
add [stream + _total_out], m_out_buf %+ d
mov [stream + _internal_state_bitbuf_m_bits], m_bits
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
add rsp, stack_size
POP_ALL
ret
section .data
align 4
c258: dq 258