isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
Jerry Yu f3bb041799 igzip: Implement deflate body/finish with assembly
Change-Id: I556af7976294f31abd72ac49366f7259e3baf399
Signed-off-by: Jerry Yu <jerry.h.yu@arm.com>
2019-10-11 09:59:30 -07:00

262 lines
8.2 KiB
ArmAsm

/**********************************************************************
Copyright(c) 2019 Arm Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Arm Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
.arch armv8-a+crc
.text
.align 2
#include "lz0a_const_aarch64.h"
#include "data_struct_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"
/*
declare Macros
*/
.macro declare_generic_reg name:req,reg:req,default:req
\name .req \default\reg
w_\name .req w\reg
x_\name .req x\reg
.endm
.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
//m_out_buf=bytes_written
sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
cmp next_in,start_in
bls skip_has_hist
mov w_\tmp0,1
strb w_\tmp0,[x_\stream,_internal_state_has_hist]
skip_has_hist:
ldr w_\tmp0,[\stream,_total_in]
ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
str x_\next_in,[\stream,_next_in]
sub x_\start_in,x_\next_in,x_\start_in
sub x_\end_in,x_\end_in,x_\next_in
add w_\tmp0,w_\tmp0,w_\start_in
stp w_\end_in,w_\tmp0,[\stream,_avail_in]
//next_in=avail_out,start_in=total_out
ldp w_\next_in,w_\start_in,[\stream,_avail_out]
add x_\m_out_start,x_\m_out_start,x_\m_out_buf
str x_\m_out_start,[\stream,_next_out]
add w_\start_in,w_\start_in,w_\m_out_buf
sub w_\next_in,w_\next_in,w_\m_out_buf
stp w_\next_in,w_\start_in,[\stream,_avail_out]
.endm
.global isal_deflate_body_aarch64
.type isal_deflate_body_aarch64, %function
/*
void isal_deflate_body_aarch64(struct isal_zstream *stream)
*/
declare_generic_reg stream, 0,x //struct isal_zstream *stream
declare_generic_reg state, 8,x //&stream->state
declare_generic_reg avail_in, 9,w
declare_generic_reg end_of_stream, 10,w //can be used in loop
declare_generic_reg hash_mask, 11,w
declare_generic_reg match_length, 12,w
declare_generic_reg hufftables, 13,x
declare_generic_reg m_out_buf, 14,x
declare_generic_reg m_out_start, 15,x
declare_generic_reg m_out_end, 16,x
declare_generic_reg m_bits, 17,x
declare_generic_reg m_bit_count, 18,w
declare_generic_reg start_in, 19,x
declare_generic_reg end_in, 20,x
declare_generic_reg next_in, 21,x
declare_generic_reg loop_end_cnt, 22,x
declare_generic_reg literal, 23,w
declare_generic_reg hash, 24,w
declare_generic_reg dist, 25,w
declare_generic_reg last_seen, 26,x
declare_generic_reg file_start, 27,x
declare_generic_reg hist_size, 28,w
declare_generic_reg tmp0, 5 ,w
declare_generic_reg tmp1, 6 ,w
declare_generic_reg tmp2, 7 ,w
declare_generic_reg code, 3,x
declare_generic_reg code_len, 24,x
declare_generic_reg code2, 10,x
declare_generic_reg code_len2, 4,x
isal_deflate_body_aarch64:
//save registers
push_stack
ldr avail_in, [stream, _avail_in]
cbz avail_in, exit_save_state
// set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
ldr w_m_out_end,[stream,_avail_out]
ldr m_out_buf,[stream,_next_out]
add m_out_end,m_out_buf,w_m_out_end,uxtw
sub m_out_end,m_out_end , 8
mov m_out_start,m_out_buf
stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
//init variables
//last_seen=&stream.internal_state.head = _internal_state+_head
add last_seen,stream,65536
add last_seen,last_seen,_internal_state+_head -65536
//start_in=stream->next_in;next_in=start_in
ldr start_in,[stream,_next_in]
mov next_in,start_in
add end_in,start_in,avail_in,uxtw //avail_in reg is free now
sub loop_end_cnt,end_in,289 //loop end
cmp next_in,loop_end_cnt
//file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
ldr w_file_start,[stream,_total_in]
sub file_start,next_in,file_start,uxtw
//uint32_t hist_size = state->dist_mask;
ldr hist_size,[stream,_internal_state + _dist_mask]
//uint32_t hash_mask = state->hash_mask;
ldr hash_mask,[stream,_internal_state + _hash_mask]
ldr hufftables,[stream,_hufftables]
bhi main_loop_end
main_loop_start:
//is_full(&state->bitbuf)
cmp m_out_buf,m_out_end
bhi update_state_exit
ldr literal,[next_in]
crc32cw hash,wzr,literal
and hash,hash,hash_mask
///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
sub x_dist,next_in,file_start
//last_seen[hash] = (uint64_t) (next_in - file_start);
strh dist,[last_seen,x_hash,lsl 1]
sub dist,dist,w_tmp0
and dist,dist,0xffff
sub w_tmp0,dist,1
cmp hist_size,w_tmp0
bls get_lit_code
///match_length = compare258(next_in - dist, next_in, 258);
sub x_tmp2,next_in,x_dist
compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1
cmp match_length,3
bls get_lit_code
sub x_tmp0,next_in,file_start
ldr literal,[next_in,1]
crc32cw hash,wzr,literal
and hash,hash,hash_mask
add tmp0,tmp0,1
strh tmp0,[last_seen,x_hash,lsl 1]
//call_print_b hash,dist,last_seen
ldr literal,[next_in,2]
crc32cw hash,wzr,literal
and hash,hash,hash_mask
add tmp0,tmp0,1
strh tmp0,[last_seen,x_hash,lsl 1]
//get_len_code(stream->hufftables, match_length, &code,
// &code_len);
get_len_code hufftables,match_length,code,code_len,tmp0
//get_dist_code(stream->hufftables, dist, &code2, &code_len2);
get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
//code |= code2 << code_len;
//code_len += code_len2;
lsl code2,code2,code_len
orr code,code,code2
add code_len,code_len,code_len2
//next_in += match_length;
add next_in,next_in,match_length,uxtw
//write_bits(&state->bitbuf, code, code_len);
update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
cmp next_in,loop_end_cnt
bls main_loop_start
b main_loop_end
get_lit_code:
//get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
and literal,literal,0xff
get_lit_code hufftables,literal,code,code_len
//next_in++;
add next_in,next_in,1
//write_bits(&state->bitbuf, code, code_len);
update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
cmp next_in,loop_end_cnt
bls main_loop_start
main_loop_end:
//update state here
//load end_of_stream and flush together
ldr w_end_of_stream, [stream, _end_of_stream]
//(stream->end_of_stream || stream->flush != 0)
cbz w_end_of_stream, update_state_exit
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
str w_tmp0, [stream, _internal_state+_state]
update_state_exit:
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
exit_ret:
pop_stack
ret
exit_save_state:
ldr w_end_of_stream, [stream, _end_of_stream]
cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0)
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
str w_tmp0, [stream, _internal_state+_state]
b exit_ret
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64