/********************************************************************** Copyright(c) 2019 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Arm Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ #include "../include/aarch64_label.h" .arch armv8-a+crc .text .align 2 #include "lz0a_const_aarch64.h" #include "data_struct_aarch64.h" #include "huffman_aarch64.h" #include "bitbuf2_aarch64.h" #include "stdmac_aarch64.h" /* declare Macros */ .macro declare_generic_reg name:req,reg:req,default:req \name .req \default\reg w_\name .req w\reg x_\name .req x\reg .endm .macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req mov w_\tmp0, w_\dist mov w_\dist, -1 cmp w_\tmp0, 32768 bhi .dist2code_done sub w_\dist, w_\tmp0, #1 cmp w_\tmp0, 4 bls .dist2code_done clz w_\tmp1, w_\dist mov w_\tmp0, 30 sub w_\tmp0, w_\tmp0, w_\tmp1 lsr w_\dist, w_\dist, w_\tmp0 add w_\dist, w_\dist, w_\tmp0, lsl 1 .dist2code_done: .endm .macro convert_length_to_len_sym length:req,length_out:req,tmp0:req #ifndef __APPLE__ adrp x_\tmp0, .len_to_code_tab_lanchor add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor #else adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF #endif ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] add w_\length_out, w_\length_out, 256 .endm ASM_DEF_RODATA .align 4 .len_to_code_tab_lanchor = . + 0 #ifndef __APPLE__ .type len_to_code_tab, %object .size len_to_code_tab, 1056 #endif len_to_code_tab: .word 0x00, 0x00, 0x00 .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10 .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14 .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d .word 0x00, 0x00, 0x00, 0x00, 0x00 .text .global cdecl(isal_update_histogram_aarch64) .arch armv8-a+crc #ifndef __APPLE__ .type isal_update_histogram_aarch64, %function #endif /* void isal_update_histogram_aarch64(uint8_t * start_stream, int length, struct isal_huff_histogram *histogram); */ /* arguments */ declare_generic_reg start_stream, 0,x declare_generic_reg length, 1,x declare_generic_reg histogram, 2,x declare_generic_reg param0, 0,x declare_generic_reg param1, 1,x declare_generic_reg param2, 2,x /* local variable */ declare_generic_reg start_stream_saved, 10,x declare_generic_reg histogram_saved, 23,x declare_generic_reg current, 19,x declare_generic_reg last_seen, 20,x declare_generic_reg end_stream, 21,x declare_generic_reg loop_end_iter, 22,x declare_generic_reg dist_histogram, 12,x declare_generic_reg lit_len_histogram, 23,x declare_generic_reg literal, 8,x declare_generic_reg next_hash, 9,x declare_generic_reg end, 4,x declare_generic_reg dist, 7,x declare_generic_reg D, 11,w declare_generic_reg match_length, 3,w declare_generic_reg tmp0, 5,w declare_generic_reg tmp1, 6,w /* constant */ .equ LIT_LEN, 286 .equ DIST_LEN, 30 .equ lit_len_offset, 0 .equ dist_offset, (8*LIT_LEN) // 2288 .equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 .equ hash_table_size, (8*1024*2) // 16384 cdecl(isal_update_histogram_aarch64): cmp w_length, 0 ble .done stp x29, x30, [sp, -64]! add x29, sp, 0 stp x19, x20, [sp, 16] stp x21, x22, [sp, 32] str x23, [sp, 48] add last_seen, histogram, hash_offset add end_stream, start_stream, w_length, sxtw mov current, start_stream sub loop_end_iter, end_stream, #3 mov histogram_saved, histogram mov x0, last_seen mov w1, 0 mov x2, hash_table_size bl cdecl(memset) cmp current, loop_end_iter bcs .loop_end mov start_stream_saved, current add dist_histogram, histogram_saved, dist_offset mov D, 32766 b .loop .align 2 .loop_2nd_stream: and literal, literal, 0xff mov current, next_hash cmp loop_end_iter, current ldr x0, [lit_len_histogram, literal, lsl 3] add x0, x0, 1 str x0, [lit_len_histogram, literal, lsl 3] bls .loop_end .loop: ldr w_literal, [current] add next_hash, current, 1 mov w0, w_literal crc32cw w0, wzr, w0 ubfiz x0, x0, 1, 13 sub x2, current, start_stream_saved ldrh w_dist, [last_seen, x0] strh w2, [last_seen, x0] sub w2, w2, w_dist and w_dist, w2, 65535 sub w0, w_dist, #1 cmp w0, D bhi .loop_2nd_stream sub w2, w_end_stream, w_current mov x1, current sub x0, current, w_dist, uxth compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1 cmp match_length, 3 bls .loop_2nd_stream add end, current, 3 cmp end, loop_end_iter csel end, end, loop_end_iter, ls cmp end, next_hash bls .skip_inner_loop .align 3 .inner_loop: ldr w0, [next_hash] crc32cw w0, wzr, w0 ubfiz x0, x0, 1, 13 sub x1, next_hash, start_stream_saved add next_hash, next_hash, 1 cmp next_hash, end strh w1, [last_seen, x0] bne .inner_loop .skip_inner_loop: convert_dist_to_dist_sym dist, tmp0, tmp1 uxtw x2, w_dist ldr x1, [dist_histogram, x2, lsl 3] add x1, x1, 1 str x1, [dist_histogram, x2, lsl 3] convert_length_to_len_sym match_length,tmp1,tmp0 uxtw x0, w_tmp1 ldr x1, [lit_len_histogram, x0, lsl 3] add x1, x1, 1 str x1, [lit_len_histogram, x0, lsl 3] sub match_length, match_length, #1 add x3, x3, 1 add current, current, x3 cmp loop_end_iter, current bhi .loop .align 3 // fold the last for loop .loop_end: cmp end_stream, current bls .loop_fold_end mov x0, current ldrb w1, [x0], 1 cmp end_stream, x0 ldr x0, [lit_len_histogram, x1, lsl 3] add x0, x0, 1 str x0, [lit_len_histogram, x1, lsl 3] bls .loop_fold_end ldrb w1, [current, 1] add x0, current, 2 cmp end_stream, x0 ldr x0, [lit_len_histogram, x1, lsl 3] add x0, x0, 1 str x0, [lit_len_histogram, x1, lsl 3] bls .loop_fold_end ldrb w1, [current, 2] add x0, current, 3 cmp end_stream, x0 ldr x0, [lit_len_histogram, x1, lsl 3] add x0, x0, 1 str x0, [lit_len_histogram, x1, lsl 3] bls .loop_fold_end ldrb w1, [current, 3] ldr x0, [lit_len_histogram, x1, lsl 3] add x0, x0, 1 str x0, [lit_len_histogram, x1, lsl 3] .loop_fold_end: ldr x0, [lit_len_histogram, (256*8)] add x0, x0, 1 str x0, [lit_len_histogram, (256*8)] ldr x23, [sp, 48] ldp x19, x20, [sp, 16] ldp x21, x22, [sp, 32] ldp x29, x30, [sp], 64 ret .align 2 .done: ret #ifndef __APPLE__ .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 #endif