/********************************************************************** Copyright(c) 2019 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Arm Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ #include "../include/aarch64_label.h" .arch armv8-a+crc+crypto .text .align 2 #include "lz0a_const_aarch64.h" #include "data_struct_aarch64.h" #include "huffman_aarch64.h" #include "bitbuf2_aarch64.h" #include "stdmac_aarch64.h" /* declare Macros */ .macro declare_generic_reg name:req,reg:req,default:req \name .req \default\reg w_\name .req w\reg x_\name .req x\reg .endm .macro tzbytecnt param0:req,param1:req rbit x_\param1, x_\param0 cmp x_\param0, 0 clz x_\param1, x_\param1 mov w_\param0, 8 lsr w_\param1, w_\param1, 3 csel w_\param0, w_\param1, w_\param0, ne .endm .macro write_deflate_icf param0:req,param1:req,param2:req,param3:req orr w_\param1, w_\param1, w_\param3, lsl 19 orr w_\param1, w_\param1, w_\param2, lsl 10 str w_\param1, [x_\param0] .endm .align 2 .global cdecl(gen_icf_map_h1_aarch64) #ifndef __APPLE__ .type gen_icf_map_h1_aarch64, %function #endif /* arguments */ declare_generic_reg stream_param, 0,x declare_generic_reg matches_icf_lookup_param, 1,x declare_generic_reg input_size_param, 2,x declare_generic_reg param0, 0,x declare_generic_reg param1, 1,x declare_generic_reg param2, 2,x declare_generic_reg param3, 3,x /* return */ declare_generic_reg ret_val, 0,x /* variables */ declare_generic_reg input_size, 3,x declare_generic_reg next_in, 4,x declare_generic_reg matches_icf_lookup, 6,x declare_generic_reg hash_table, 7,x declare_generic_reg end_in, 8,x declare_generic_reg file_start, 9,x declare_generic_reg hash_mask, 10,w declare_generic_reg hist_size, 11,w declare_generic_reg stream_saved, 12,x declare_generic_reg literal_32, 13,w declare_generic_reg literal_1, 14,w declare_generic_reg dist, 15,w declare_generic_reg tmp_has_hist, 0,w declare_generic_reg tmp_offset_hash_table, 1,x declare_generic_reg tmp0, 0,x declare_generic_reg tmp1, 1,x declare_generic_reg tmp2, 2,x declare_generic_reg tmp3, 3,x declare_generic_reg tmp5, 5,x /* constant */ .equ ISAL_LOOK_AHEAD, 288 .equ SHORTEST_MATCH, 4 .equ LEN_OFFSET, 254 /* mask */ .equ mask_10bit, 1023 .equ mask_lit_dist, 0x7800 /* offset of struct isal_zstream */ .equ offset_next_in, 0 .equ offset_avail_in, 8 .equ offset_total_in, 12 .equ offset_next_out, 16 .equ offset_avail_out, 24 .equ offset_total_out, 28 .equ offset_hufftables, 32 .equ offset_level, 40 .equ offset_level_buf_size, 44 .equ offset_level_buf, 48 .equ offset_end_of_stream, 56 .equ offset_flush, 58 .equ offset_gzip_flag, 60 .equ offset_hist_bits, 62 .equ offset_state, 64 .equ offset_state_block_end, 72 .equ offset_state_dist_mask, 76 .equ offset_state_has_hist, 135 /* offset of struct level_buf */ .equ offset_hash_map_hash_table, 4712 /* uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, struct deflate_icf *matches_icf_lookup, uint64_t input_size) */ cdecl(gen_icf_map_h1_aarch64): cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 bls .fast_exit stp x29, x30, [sp, -16]! mov stream_saved, stream_param mov matches_icf_lookup, matches_icf_lookup_param mov x29, sp ldrb tmp_has_hist, [stream_saved, offset_state_has_hist] mov tmp_offset_hash_table, offset_hash_map_hash_table ldr end_in, [stream_saved, offset_next_in] mov input_size, input_size_param ldr hash_table, [stream_saved, offset_level_buf] ldr w_file_start, [stream_saved, offset_total_in] ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask] add hash_table, hash_table, tmp_offset_hash_table sub file_start, end_in, file_start cbz tmp_has_hist, .igzip_no_hist b .while_check1 .align 3 .igzip_no_hist: ldrb w_tmp1, [end_in] add next_in, end_in, 1 ldrh w_tmp0, [matches_icf_lookup] bfi w_tmp0, w_tmp1, 0, 10 strh w_tmp0, [matches_icf_lookup] ldr w_tmp0, [matches_icf_lookup] and w_tmp0, w_tmp0, mask_10bit orr w_tmp0, w_tmp0, mask_lit_dist str w_tmp0, [matches_icf_lookup], 4 ldr w_tmp0, [end_in] crc32cw w_tmp0, wzr, w_tmp0 and w_tmp5, w_tmp0, hash_mask sub x_tmp1, end_in, file_start mov w_tmp2, 1 mov x_tmp0, 1 strh w_tmp1, [hash_table, x_tmp5, lsl 1] strb w_tmp2, [stream_saved, offset_state_has_hist] b .while_check2 .while_check1: mov next_in, end_in mov x_tmp0, 0 .while_check2: sub input_size, input_size, #288 add end_in, end_in, input_size cmp next_in, end_in bcs .exit mov literal_32, 32 mov literal_1, 1 b .while_loop .align 3 .new_match_found: clz w_tmp5, w_tmp2 add w_tmp1, w_tmp0, LEN_OFFSET sub w_tmp5, literal_32, w_tmp5 cmp dist, 2 sub w_tmp5, w_tmp5, #2 bls .skip_compute_dist_icf_code lsl w_tmp3, literal_1, w_tmp5 sub w_tmp3, w_tmp3, #1 lsr w_tmp0, w_tmp2, w_tmp5 and w_tmp3, w_tmp3, w_tmp2 add w_tmp2, w_tmp0, w_tmp5, lsl 1 .skip_compute_dist_icf_code: mov param0, matches_icf_lookup write_deflate_icf param0,param1,param2,param3 add next_in, next_in, 1 add matches_icf_lookup, matches_icf_lookup, 4 cmp next_in, end_in beq .save_with_exit .while_loop: ldr w_tmp0, [next_in] crc32cw w_tmp0, wzr, w_tmp0 and w_tmp0, w_tmp0, hash_mask sub x_tmp1, next_in, file_start lsl x_tmp0, x_tmp0, 1 sub w_tmp2, w_tmp1, #1 ldrh w_tmp3, [hash_table, x_tmp0] strh w_tmp1, [hash_table, x_tmp0] sub w_tmp2, w_tmp2, w_tmp3 and w_tmp2, w_tmp2, hist_size add dist, w_tmp2, 1 ldr x_tmp0, [next_in] sub x_tmp1, next_in, w_dist, uxtw ldr x_tmp1, [x_tmp1] eor x_tmp0, x_tmp1, x_tmp0 tzbytecnt param0,param1 cmp w_tmp0, (SHORTEST_MATCH-1) mov w_tmp3, 0 bhi .new_match_found ldrb w_param1, [next_in] mov x_param0, matches_icf_lookup mov w_param3, 0 mov w_param2, 0x1e write_deflate_icf param0,param1,param2,param3 add next_in, next_in, 1 add matches_icf_lookup, matches_icf_lookup, 4 cmp next_in, end_in bne .while_loop .save_with_exit: ldr ret_val, [stream_saved, offset_next_in] sub ret_val, next_in, ret_val .exit: ldp x29, x30, [sp], 16 ret .align 3 .fast_exit: mov ret_val, 0 ret #ifndef __APPLE__ .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 #endif