/********************************************************************** Copyright(c) 2019 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Arm Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ #include "../include/aarch64_label.h" .arch armv8-a+crc .text #include "lz0a_const_aarch64.h" #include "data_struct_aarch64.h" #include "huffman_aarch64.h" #include "bitbuf2_aarch64.h" #include "stdmac_aarch64.h" /* declare Macros */ .macro declare_generic_reg name:req,reg:req,default:req \name .req \default\reg w_\name .req w\reg x_\name .req x\reg .endm /* void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); */ /* constant */ /* offset of struct isal_zstream */ .equ offset_next_in, 0 .equ offset_avail_in, 8 .equ offset_total_in, 12 .equ offset_next_out, 16 .equ offset_avail_out, 24 .equ offset_total_out, 28 .equ offset_hufftables, 32 .equ offset_level, 40 .equ offset_level_buf_size, 44 .equ offset_level_buf, 48 .equ offset_end_of_stream, 56 .equ offset_flush, 58 .equ offset_gzip_flag, 60 .equ offset_hist_bits, 62 .equ offset_state, 64 .equ offset_state_block_end, 72 .equ offset_state_state, 84 .equ offset_state_has_hist, 135 /* offset of struct level_buf */ .equ offset_encode_tables, 0 .equ offset_hist, 2176 .equ offset_hist_d_hist, 2176 .equ offset_hist_ll_hist, 2296 .equ offset_deflate_hdr_count, 4348 .equ offset_deflate_hdr_extra_bits, 4352 .equ offset_deflate_hdr, 4356 .equ offset_icf_buf_next, 4688 .equ offset_icf_buf_avail_out, 4696 .equ offset_icf_buf_start, 4704 .equ offset_hash8k, 4712 .equ offset_hash_hist, 4712 /* offset of struct isal_zstate */ .equ offset_dist_mask, 12 .equ offset_hash_mask, 16 .equ offset_state_of_zstate, 20 /* macros*/ .equ ISAL_LOOK_AHEAD, 288 /* arguments */ declare_generic_reg stream, 0,x declare_generic_reg param0, 0,x declare_generic_reg param1, 1,x declare_generic_reg param2, 2,x declare_generic_reg param3, 3,x declare_generic_reg param4, 4,x declare_generic_reg param5, 5,x declare_generic_reg param6, 6,x /* local variable */ declare_generic_reg stream_saved, 15,x declare_generic_reg level_buf, 13,x declare_generic_reg start_in, 21,x declare_generic_reg start_out, 22,x declare_generic_reg state, 23,x declare_generic_reg end_out, 12,x declare_generic_reg end_in, 11,x declare_generic_reg next_in, 8,x declare_generic_reg next_out, 10,x declare_generic_reg next_out_iter, 5,x declare_generic_reg file_start, 18,x declare_generic_reg last_seen, 14,x declare_generic_reg literal_code, 9,w declare_generic_reg hash_mask, 19,w declare_generic_reg hist_size, 20,w declare_generic_reg dist, 7,w declare_generic_reg dist_inc, 24,w declare_generic_reg tmp0, 25,x declare_generic_reg tmp1, 26,x declare_generic_reg tmp2, 27,x declare_generic_reg tmp3, 28,x .align 2 #ifndef __APPLE__ .type write_deflate_icf_constprop, %function #endif write_deflate_icf_constprop: ldrh w2, [x0] mov w3, 30 bfi w2, w1, 0, 10 strh w2, [x0] ldr w1, [x0] bfi w1, w3, 10, 9 str w1, [x0] ubfx x1, x1, 16, 3 strh w1, [x0, 2] ret #ifndef __APPLE__ .size write_deflate_icf_constprop, .-write_deflate_icf_constprop #endif .align 2 #ifndef __APPLE__ .type write_deflate_icf, %function #endif write_deflate_icf: ldrh w4, [x0] bfi w4, w1, 0, 10 strh w4, [x0] ldr w1, [x0] bfi w1, w2, 10, 9 str w1, [x0] lsr w1, w1, 16 bfi w1, w3, 3, 13 strh w1, [x0, 2] ret #ifndef __APPLE__ .size write_deflate_icf, .-write_deflate_icf #endif .align 2 #ifndef __APPLE__ .type update_state, %function #endif update_state: sub x7, x2, x1 ldr x4, [x0, 48] cmp x7, 0 ble .L48 mov w1, 1 strb w1, [x0, 135] .L48: ldr w1, [x0, 12] sub x6, x6, x5 str x2, [x0] sub x3, x3, x2 add w1, w1, w7 stp w3, w1, [x0, 8] str w1, [x0, 72] asr x6, x6, 2 str x5, [x4, 4688] str x6, [x4, 4696] ret #ifndef __APPLE__ .size update_state, .-update_state #endif .align 2 .global cdecl(isal_deflate_icf_finish_hash_hist_aarch64) #ifndef __APPLE__ .type isal_deflate_icf_finish_hash_hist_aarch64, %function #endif cdecl(isal_deflate_icf_finish_hash_hist_aarch64): ldr w_end_in, [stream, 8] // stream->avail_in cbz w_end_in, .stream_not_available stp x29, x30, [sp, -96]! add x29, sp, 0 stp x19, x20, [sp, 16] stp x21, x22, [sp, 32] stp x23, x24, [sp, 48] stp x25, x26, [sp, 64] stp x27, x28, [sp, 80] mov stream_saved, stream ldr level_buf, [stream, offset_level_buf] // 48 ldr start_in, [stream, offset_next_in] // 0 ldr start_out, [level_buf, offset_icf_buf_next] // 4688 add state, stream, offset_state // 64 ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696 mov next_in, start_in ldr w_file_start, [stream, offset_total_in] // 12 mov tmp0, offset_hash_hist // 4712 add last_seen, level_buf, tmp0 add end_in, start_in, w_end_in, uxtw and end_out, end_out, -4 mov next_out, start_out ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 sub file_start, start_in, file_start add end_out, start_out, end_out mov next_out_iter, next_out add x0, next_in, 3 cmp end_in, x0 // x0 <= next_in + 3 bls .while_first_end .p2align 3 .while_first: cmp next_out, end_out bcs .save_and_update_state ldr literal_code, [next_in] mov w0, literal_code crc32cw w0, wzr, w0 and w0, w0, hash_mask sub x2, next_in, file_start lsl x0, x0, 1 ldrh dist, [last_seen, x0] strh w2, [last_seen, x0] sub w2, w2, dist and w_dist, w2, 65535 sub dist_inc, dist, #1 cmp dist_inc, hist_size bcs .skip_compare258 mov x2, 0 sub w2, w_end_in, w8 mov x1, next_in sub x0, next_in, w_dist, uxth compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1 mov w0, w_tmp2 and w2, w0, 65535 cmp w2, 3 bhi .while_first_match_length .skip_compare258: and literal_code, literal_code, 255 // get_lit_icf_code add next_in, next_in, 1 mov w1, literal_code mov x0, next_out add x_literal_code, level_buf, literal_code, uxtb 2 // level_buf->hist.ll_hist ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 add w_tmp0, w_tmp0, 1 str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 bl write_deflate_icf_constprop // write_deflate_icf add next_out, next_out, 4 .while_first_check: add x0, next_in, 3 mov next_out_iter, next_out cmp end_in, x0 bhi .while_first .while_first_end: cmp next_in, end_in bcs .while_2nd_end cmp next_out, end_out bcc .while_2nd_handle b .save_and_update_state_2nd .p2align 2 .while_2nd: cmp end_out, next_out_iter bls .save_and_update_state_2nd .while_2nd_handle: ldrb w2, [next_in], 1 mov x0, next_out_iter add next_out_iter, next_out_iter, 4 mov w1, w2 add x2, level_buf, w2, uxtb 2 ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296 add w_tmp0, w_tmp0, 1 str w_tmp0, [x2, offset_hist_ll_hist] // 2296 bl write_deflate_icf_constprop cmp end_in, next_in bne .while_2nd mov next_in, end_in b .end_of_stream_check_and_exit .p2align 2 .while_first_match_length: and w0, w0, 65535 mov w3, 0 add w1, w0, 254 // get_len_icf_code cmp dist, 2 bhi .compute_dist_icf_code .while_first_match_length_end: ubfiz x_tmp2, x1, 2, 17 add x_tmp1, level_buf, dist_inc, uxtw 2 add x_tmp2, level_buf, x_tmp2 add next_in, next_in, w2, uxth mov w2, dist_inc ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 add w_tmp0, w_tmp0, 1 str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 mov x0, next_out ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 add w_tmp0, w_tmp0, 1 str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 bl write_deflate_icf add next_out, next_out, 4 b .while_first_check // compute_dist_icf_code .p2align 2 .compute_dist_icf_code: clz w3, dist_inc mov w0, 30 sub w0, w0, w3 mov w3, 1 lsl w3, w3, w0 sub w3, w3, #1 and w3, w3, dist_inc lsl w4, w0, 1 lsr dist_inc, dist_inc, w0 add dist_inc, dist_inc, w4 b .while_first_match_length_end .while_2nd_end: beq .end_of_stream_check_and_exit mov param6, end_out b .update_state .end_of_stream_check_and_exit: ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56 cbz w_tmp0, .update_state_2nd b .save_and_update_state_2nd .p2align 3 .save_and_update_state_2nd: mov w_tmp0, 2 str w_tmp0, [state, offset_state_of_zstate] // 20 .update_state_2nd: mov param6, end_out b .update_state .p2align 2 .save_and_update_state: mov param6, end_out mov param5, next_out mov w_tmp0, 2 str w_tmp0, [state, offset_state_of_zstate] // 20 .update_state: mov param4, start_out mov param1, start_in mov param3, end_in mov param2, next_in mov param0, stream_saved ldp x19, x20, [sp, 16] ldp x21, x22, [sp, 32] ldp x23, x24, [sp, 48] ldp x25, x26, [sp, 64] ldp x27, x28, [sp, 80] ldp x29, x30, [sp], 96 b update_state .p2align 2 .stream_not_available: ldr w1, [stream, offset_end_of_stream] // 56 cbz w1, .done mov w1, 2 str w1, [stream, offset_state_state] // 84 .done: ret #ifndef __APPLE__ .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 #endif