mirror of
https://github.com/intel/isa-l.git
synced 2025-01-07 15:22:25 +01:00
1187583a97
- It should be fine to enable pmull always on Apple Silicon - macOS 12+ is required for PMULL instruction. - Changed the conditional macro to __APPLE__ - Rewritten dispatcher using sysctlbyname - Use __USER_LABEL_PREFIX__ - Use __TEXT,__const as readonly section - use ASM_DEF_RODATA macro - fix func decl Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974 Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
209 lines
5.9 KiB
ArmAsm
209 lines
5.9 KiB
ArmAsm
/**********************************************************************
|
|
Copyright(c) 2019 Arm Corporation All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in
|
|
the documentation and/or other materials provided with the
|
|
distribution.
|
|
* Neither the name of Arm Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
**********************************************************************/
|
|
|
|
#include "../include/aarch64_label.h"
|
|
|
|
.arch armv8-a
|
|
.text
|
|
.align 2
|
|
|
|
#include "lz0a_const_aarch64.h"
|
|
#include "data_struct_aarch64.h"
|
|
#include "huffman_aarch64.h"
|
|
#include "bitbuf2_aarch64.h"
|
|
#include "stdmac_aarch64.h"
|
|
|
|
/*
|
|
declare Macros
|
|
*/
|
|
|
|
.macro declare_generic_reg name:req,reg:req,default:req
|
|
\name .req \default\reg
|
|
w_\name .req w\reg
|
|
x_\name .req x\reg
|
|
.endm
|
|
|
|
.text
|
|
.align 2
|
|
.global cdecl(set_long_icf_fg_aarch64)
|
|
#ifndef __APPLE__
|
|
.type set_long_icf_fg_aarch64, %function
|
|
#endif
|
|
|
|
/*
|
|
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
|
struct deflate_icf *match_lookup)
|
|
*/
|
|
|
|
/* arguments */
|
|
declare_generic_reg next_in_param, 0,x
|
|
declare_generic_reg processed_param, 1,x
|
|
declare_generic_reg input_size_param, 2,x
|
|
declare_generic_reg match_lookup_param, 3,x
|
|
|
|
declare_generic_reg param0, 0,x
|
|
declare_generic_reg param1, 1,x
|
|
declare_generic_reg param2, 2,x
|
|
|
|
/* local variable */
|
|
declare_generic_reg len, 7,w
|
|
declare_generic_reg dist_code, 8,w
|
|
declare_generic_reg shortest_match_len, 9,w
|
|
declare_generic_reg len_max, 10,w
|
|
declare_generic_reg dist_extra, 11,w
|
|
declare_generic_reg const_8, 13,x
|
|
declare_generic_reg next_in, 20,x
|
|
declare_generic_reg dist_start, 21,x
|
|
declare_generic_reg end_processed, 22,x
|
|
declare_generic_reg end_in, 23,x
|
|
declare_generic_reg match_lookup, 19,x
|
|
|
|
declare_generic_reg match_length, 4,w
|
|
declare_generic_reg tmp0, 5,w
|
|
declare_generic_reg tmp1, 6,w
|
|
|
|
/* constant */
|
|
.equ DIST_START_SIZE, 128
|
|
.equ ISAL_LOOK_AHEAD, 288
|
|
.equ LEN_OFFSET, 254
|
|
.equ SHORTEST_MATCH, 4
|
|
.equ LEN_MAX_CONST, 512
|
|
|
|
cdecl(set_long_icf_fg_aarch64):
|
|
stp x29, x30, [sp, -192]!
|
|
add x29, sp, 0
|
|
stp x21, x22, [sp, 32]
|
|
add x21, x29, 64
|
|
stp x19, x20, [sp, 16]
|
|
str x23, [sp, 48]
|
|
|
|
add end_processed, next_in_param, processed_param
|
|
mov next_in, next_in_param
|
|
add end_in, next_in_param, input_size_param
|
|
mov match_lookup, match_lookup_param
|
|
|
|
#ifndef __APPLE__
|
|
adrp x1, .data_dist_start
|
|
mov x2, DIST_START_SIZE // 128
|
|
add x1, x1, :lo12:.data_dist_start
|
|
mov x0, dist_start
|
|
#else
|
|
adrp x1, .data_dist_start@PAGE
|
|
mov x2, DIST_START_SIZE // 128
|
|
add x1, x1, .data_dist_start@PAGEOFF
|
|
mov x0, dist_start
|
|
#endif
|
|
bl cdecl(memcpy)
|
|
|
|
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
|
cmp end_in, x_tmp0
|
|
csel end_in, end_in, x_tmp0, cc
|
|
cmp next_in, end_processed
|
|
bcs .done
|
|
|
|
mov const_8, 8
|
|
mov len_max, LEN_MAX_CONST // 512
|
|
mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
|
|
b .while_outer_loop
|
|
|
|
.align 2
|
|
.while_outer_check:
|
|
add next_in, next_in, 1
|
|
add match_lookup, match_lookup, 4
|
|
cmp end_processed, next_in
|
|
bls .done
|
|
|
|
.while_outer_loop:
|
|
ldrh len, [match_lookup]
|
|
and len, len, LIT_LEN_MASK // 1023
|
|
cmp len, (LEN_OFFSET + 8 - 1) // 261
|
|
bls .while_outer_check
|
|
|
|
ldr dist_code, [match_lookup]
|
|
add x1, next_in, 8
|
|
ldrh dist_extra, [match_lookup, 2]
|
|
sub w2, w_end_in, w1
|
|
ubfx x_dist_code, x_dist_code, 10, 9
|
|
ubfx x_dist_extra, x_dist_extra, 3, 13
|
|
uxtw x0, dist_code
|
|
ldr w0, [dist_start, x0, lsl 2]
|
|
add w0, dist_extra, w0
|
|
sub x0, const_8, x0
|
|
add x0, next_in, x0
|
|
|
|
compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
|
|
mov w0, w_match_length
|
|
|
|
add w0, w0, (LEN_OFFSET + 8) // 262
|
|
cmp w0, len
|
|
bls .while_outer_check
|
|
|
|
lsl w2, dist_extra, 19
|
|
orr w2, w2, dist_code, lsl 10
|
|
|
|
.align 3
|
|
.while_inner_loop:
|
|
cmp w0, LEN_MAX_CONST // 512
|
|
add next_in, next_in, 1
|
|
csel w1, w0, len_max, ls
|
|
sub w0, w0, #1
|
|
orr w1, w1, w2
|
|
str w1, [match_lookup]
|
|
ldrh w1, [match_lookup, 4]!
|
|
|
|
and w1, w1, LIT_LEN_MASK // 1023
|
|
cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
|
|
csel w1, w1, shortest_match_len, cs
|
|
cmp w1, w0
|
|
bcc .while_inner_loop
|
|
|
|
add next_in, next_in, 1
|
|
add match_lookup, match_lookup, 4
|
|
cmp end_processed, next_in
|
|
bhi .while_outer_loop
|
|
|
|
.done:
|
|
ldp x19, x20, [sp, 16]
|
|
ldp x21, x22, [sp, 32]
|
|
ldr x23, [sp, 48]
|
|
ldp x29, x30, [sp], 192
|
|
ret
|
|
#ifndef __APPLE__
|
|
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
|
#endif
|
|
|
|
ASM_DEF_RODATA
|
|
.align 3
|
|
.set .data_dist_start,. + 0
|
|
.real_data_dist_start:
|
|
.word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
|
|
.word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
|
|
.word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
|
.word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|