isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
Taiju Yamada 1187583a97 Fixes for aarch64 mac
- It should be fine to enable pmull always on Apple Silicon
- macOS 12+ is required for PMULL instruction.
- Changed the conditional macro to __APPLE__
- Rewritten dispatcher using sysctlbyname
- Use __USER_LABEL_PREFIX__
- Use __TEXT,__const as readonly section
- use ASM_DEF_RODATA macro
- fix func decl

Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974
Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
2022-10-28 08:27:26 -07:00

209 lines
5.9 KiB
ArmAsm

/**********************************************************************
Copyright(c) 2019 Arm Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Arm Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a
.text
.align 2
#include "lz0a_const_aarch64.h"
#include "data_struct_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"
/*
declare Macros
*/
.macro declare_generic_reg name:req,reg:req,default:req
\name .req \default\reg
w_\name .req w\reg
x_\name .req x\reg
.endm
.text
.align 2
.global cdecl(set_long_icf_fg_aarch64)
#ifndef __APPLE__
.type set_long_icf_fg_aarch64, %function
#endif
/*
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
struct deflate_icf *match_lookup)
*/
/* arguments */
declare_generic_reg next_in_param, 0,x
declare_generic_reg processed_param, 1,x
declare_generic_reg input_size_param, 2,x
declare_generic_reg match_lookup_param, 3,x
declare_generic_reg param0, 0,x
declare_generic_reg param1, 1,x
declare_generic_reg param2, 2,x
/* local variable */
declare_generic_reg len, 7,w
declare_generic_reg dist_code, 8,w
declare_generic_reg shortest_match_len, 9,w
declare_generic_reg len_max, 10,w
declare_generic_reg dist_extra, 11,w
declare_generic_reg const_8, 13,x
declare_generic_reg next_in, 20,x
declare_generic_reg dist_start, 21,x
declare_generic_reg end_processed, 22,x
declare_generic_reg end_in, 23,x
declare_generic_reg match_lookup, 19,x
declare_generic_reg match_length, 4,w
declare_generic_reg tmp0, 5,w
declare_generic_reg tmp1, 6,w
/* constant */
.equ DIST_START_SIZE, 128
.equ ISAL_LOOK_AHEAD, 288
.equ LEN_OFFSET, 254
.equ SHORTEST_MATCH, 4
.equ LEN_MAX_CONST, 512
cdecl(set_long_icf_fg_aarch64):
stp x29, x30, [sp, -192]!
add x29, sp, 0
stp x21, x22, [sp, 32]
add x21, x29, 64
stp x19, x20, [sp, 16]
str x23, [sp, 48]
add end_processed, next_in_param, processed_param
mov next_in, next_in_param
add end_in, next_in_param, input_size_param
mov match_lookup, match_lookup_param
#ifndef __APPLE__
adrp x1, .data_dist_start
mov x2, DIST_START_SIZE // 128
add x1, x1, :lo12:.data_dist_start
mov x0, dist_start
#else
adrp x1, .data_dist_start@PAGE
mov x2, DIST_START_SIZE // 128
add x1, x1, .data_dist_start@PAGEOFF
mov x0, dist_start
#endif
bl cdecl(memcpy)
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
cmp end_in, x_tmp0
csel end_in, end_in, x_tmp0, cc
cmp next_in, end_processed
bcs .done
mov const_8, 8
mov len_max, LEN_MAX_CONST // 512
mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
b .while_outer_loop
.align 2
.while_outer_check:
add next_in, next_in, 1
add match_lookup, match_lookup, 4
cmp end_processed, next_in
bls .done
.while_outer_loop:
ldrh len, [match_lookup]
and len, len, LIT_LEN_MASK // 1023
cmp len, (LEN_OFFSET + 8 - 1) // 261
bls .while_outer_check
ldr dist_code, [match_lookup]
add x1, next_in, 8
ldrh dist_extra, [match_lookup, 2]
sub w2, w_end_in, w1
ubfx x_dist_code, x_dist_code, 10, 9
ubfx x_dist_extra, x_dist_extra, 3, 13
uxtw x0, dist_code
ldr w0, [dist_start, x0, lsl 2]
add w0, dist_extra, w0
sub x0, const_8, x0
add x0, next_in, x0
compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
mov w0, w_match_length
add w0, w0, (LEN_OFFSET + 8) // 262
cmp w0, len
bls .while_outer_check
lsl w2, dist_extra, 19
orr w2, w2, dist_code, lsl 10
.align 3
.while_inner_loop:
cmp w0, LEN_MAX_CONST // 512
add next_in, next_in, 1
csel w1, w0, len_max, ls
sub w0, w0, #1
orr w1, w1, w2
str w1, [match_lookup]
ldrh w1, [match_lookup, 4]!
and w1, w1, LIT_LEN_MASK // 1023
cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
csel w1, w1, shortest_match_len, cs
cmp w1, w0
bcc .while_inner_loop
add next_in, next_in, 1
add match_lookup, match_lookup, 4
cmp end_processed, next_in
bhi .while_outer_loop
.done:
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldr x23, [sp, 48]
ldp x29, x30, [sp], 192
ret
#ifndef __APPLE__
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
#endif
ASM_DEF_RODATA
.align 3
.set .data_dist_start,. + 0
.real_data_dist_start:
.word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
.word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
.word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
.word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000