mirror of
https://github.com/intel/isa-l.git
synced 2025-01-22 05:20:02 +01:00
1187583a97
- It should be fine to enable pmull always on Apple Silicon - macOS 12+ is required for PMULL instruction. - Changed the conditional macro to __APPLE__ - Rewritten dispatcher using sysctlbyname - Use __USER_LABEL_PREFIX__ - Use __TEXT,__const as readonly section - use ASM_DEF_RODATA macro - fix func decl Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974 Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
326 lines
9.5 KiB
ArmAsm
326 lines
9.5 KiB
ArmAsm
/**********************************************************************
|
|
Copyright(c) 2019 Arm Corporation All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in
|
|
the documentation and/or other materials provided with the
|
|
distribution.
|
|
* Neither the name of Arm Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
**********************************************************************/
|
|
|
|
#include "../include/aarch64_label.h"
|
|
|
|
.arch armv8-a+crc
|
|
.text
|
|
.align 2
|
|
|
|
#include "lz0a_const_aarch64.h"
|
|
#include "data_struct_aarch64.h"
|
|
#include "huffman_aarch64.h"
|
|
#include "bitbuf2_aarch64.h"
|
|
#include "stdmac_aarch64.h"
|
|
|
|
/*
|
|
declare Macros
|
|
*/
|
|
|
|
.macro declare_generic_reg name:req,reg:req,default:req
|
|
\name .req \default\reg
|
|
w_\name .req w\reg
|
|
x_\name .req x\reg
|
|
.endm
|
|
|
|
.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req
|
|
mov w_\tmp0, w_\dist
|
|
mov w_\dist, -1
|
|
cmp w_\tmp0, 32768
|
|
bhi .dist2code_done
|
|
sub w_\dist, w_\tmp0, #1
|
|
cmp w_\tmp0, 4
|
|
bls .dist2code_done
|
|
clz w_\tmp1, w_\dist
|
|
mov w_\tmp0, 30
|
|
sub w_\tmp0, w_\tmp0, w_\tmp1
|
|
lsr w_\dist, w_\dist, w_\tmp0
|
|
add w_\dist, w_\dist, w_\tmp0, lsl 1
|
|
.dist2code_done:
|
|
.endm
|
|
|
|
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
|
|
#ifndef __APPLE__
|
|
adrp x_\tmp0, .len_to_code_tab_lanchor
|
|
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
|
|
#else
|
|
adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE
|
|
add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF
|
|
#endif
|
|
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
|
|
add w_\length_out, w_\length_out, 256
|
|
.endm
|
|
|
|
ASM_DEF_RODATA
|
|
.align 4
|
|
.len_to_code_tab_lanchor = . + 0
|
|
#ifndef __APPLE__
|
|
.type len_to_code_tab, %object
|
|
.size len_to_code_tab, 1056
|
|
#endif
|
|
len_to_code_tab:
|
|
.word 0x00, 0x00, 0x00
|
|
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
|
|
.word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
|
|
.word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
|
|
.word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
|
|
.word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
|
|
.word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
|
|
.word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
|
|
.word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
|
|
.word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
|
|
.word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
|
|
.word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
|
|
.word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
|
|
.word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
|
|
.word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
|
|
.word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
|
.word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
|
|
.word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
|
.word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
|
.word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
|
.word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
|
|
.word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
|
.word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
|
.word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
|
.word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
|
|
.word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
|
.word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
|
.word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
|
.word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
|
|
.word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
|
.word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
|
.word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
|
|
.word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
|
|
.word 0x00, 0x00, 0x00, 0x00, 0x00
|
|
|
|
.text
|
|
.global cdecl(isal_update_histogram_aarch64)
|
|
.arch armv8-a+crc
|
|
#ifndef __APPLE__
|
|
.type isal_update_histogram_aarch64, %function
|
|
#endif
|
|
|
|
/*
|
|
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
|
struct isal_huff_histogram *histogram);
|
|
*/
|
|
|
|
/* arguments */
|
|
declare_generic_reg start_stream, 0,x
|
|
declare_generic_reg length, 1,x
|
|
declare_generic_reg histogram, 2,x
|
|
|
|
declare_generic_reg param0, 0,x
|
|
declare_generic_reg param1, 1,x
|
|
declare_generic_reg param2, 2,x
|
|
|
|
/* local variable */
|
|
declare_generic_reg start_stream_saved, 10,x
|
|
declare_generic_reg histogram_saved, 23,x
|
|
declare_generic_reg current, 19,x
|
|
declare_generic_reg last_seen, 20,x
|
|
declare_generic_reg end_stream, 21,x
|
|
declare_generic_reg loop_end_iter, 22,x
|
|
declare_generic_reg dist_histogram, 12,x
|
|
declare_generic_reg lit_len_histogram, 23,x
|
|
declare_generic_reg literal, 8,x
|
|
declare_generic_reg next_hash, 9,x
|
|
declare_generic_reg end, 4,x
|
|
declare_generic_reg dist, 7,x
|
|
declare_generic_reg D, 11,w
|
|
declare_generic_reg match_length, 3,w
|
|
|
|
declare_generic_reg tmp0, 5,w
|
|
declare_generic_reg tmp1, 6,w
|
|
|
|
/* constant */
|
|
.equ LIT_LEN, 286
|
|
.equ DIST_LEN, 30
|
|
|
|
.equ lit_len_offset, 0
|
|
.equ dist_offset, (8*LIT_LEN) // 2288
|
|
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
|
|
.equ hash_table_size, (8*1024*2) // 16384
|
|
|
|
cdecl(isal_update_histogram_aarch64):
|
|
cmp w_length, 0
|
|
ble .done
|
|
|
|
stp x29, x30, [sp, -64]!
|
|
add x29, sp, 0
|
|
stp x19, x20, [sp, 16]
|
|
stp x21, x22, [sp, 32]
|
|
str x23, [sp, 48]
|
|
|
|
add last_seen, histogram, hash_offset
|
|
add end_stream, start_stream, w_length, sxtw
|
|
mov current, start_stream
|
|
sub loop_end_iter, end_stream, #3
|
|
mov histogram_saved, histogram
|
|
|
|
mov x0, last_seen
|
|
mov w1, 0
|
|
mov x2, hash_table_size
|
|
bl cdecl(memset)
|
|
|
|
cmp current, loop_end_iter
|
|
bcs .loop_end
|
|
|
|
mov start_stream_saved, current
|
|
add dist_histogram, histogram_saved, dist_offset
|
|
mov D, 32766
|
|
b .loop
|
|
|
|
.align 2
|
|
.loop_2nd_stream:
|
|
and literal, literal, 0xff
|
|
mov current, next_hash
|
|
cmp loop_end_iter, current
|
|
|
|
ldr x0, [lit_len_histogram, literal, lsl 3]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, literal, lsl 3]
|
|
bls .loop_end
|
|
|
|
.loop:
|
|
ldr w_literal, [current]
|
|
add next_hash, current, 1
|
|
|
|
mov w0, w_literal
|
|
crc32cw w0, wzr, w0
|
|
|
|
ubfiz x0, x0, 1, 13
|
|
sub x2, current, start_stream_saved
|
|
ldrh w_dist, [last_seen, x0]
|
|
strh w2, [last_seen, x0]
|
|
sub w2, w2, w_dist
|
|
and w_dist, w2, 65535
|
|
|
|
sub w0, w_dist, #1
|
|
cmp w0, D
|
|
bhi .loop_2nd_stream
|
|
|
|
sub w2, w_end_stream, w_current
|
|
mov x1, current
|
|
sub x0, current, w_dist, uxth
|
|
compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1
|
|
|
|
cmp match_length, 3
|
|
bls .loop_2nd_stream
|
|
|
|
add end, current, 3
|
|
cmp end, loop_end_iter
|
|
csel end, end, loop_end_iter, ls
|
|
cmp end, next_hash
|
|
bls .skip_inner_loop
|
|
|
|
.align 3
|
|
.inner_loop:
|
|
ldr w0, [next_hash]
|
|
crc32cw w0, wzr, w0
|
|
|
|
ubfiz x0, x0, 1, 13
|
|
sub x1, next_hash, start_stream_saved
|
|
add next_hash, next_hash, 1
|
|
cmp next_hash, end
|
|
strh w1, [last_seen, x0]
|
|
bne .inner_loop
|
|
|
|
.skip_inner_loop:
|
|
convert_dist_to_dist_sym dist, tmp0, tmp1
|
|
uxtw x2, w_dist
|
|
ldr x1, [dist_histogram, x2, lsl 3]
|
|
add x1, x1, 1
|
|
str x1, [dist_histogram, x2, lsl 3]
|
|
|
|
convert_length_to_len_sym match_length,tmp1,tmp0
|
|
uxtw x0, w_tmp1
|
|
ldr x1, [lit_len_histogram, x0, lsl 3]
|
|
add x1, x1, 1
|
|
str x1, [lit_len_histogram, x0, lsl 3]
|
|
|
|
sub match_length, match_length, #1
|
|
add x3, x3, 1
|
|
add current, current, x3
|
|
cmp loop_end_iter, current
|
|
bhi .loop
|
|
|
|
.align 3
|
|
// fold the last for loop
|
|
.loop_end:
|
|
cmp end_stream, current
|
|
bls .loop_fold_end
|
|
|
|
mov x0, current
|
|
ldrb w1, [x0], 1
|
|
cmp end_stream, x0
|
|
ldr x0, [lit_len_histogram, x1, lsl 3]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, x1, lsl 3]
|
|
bls .loop_fold_end
|
|
|
|
ldrb w1, [current, 1]
|
|
add x0, current, 2
|
|
cmp end_stream, x0
|
|
ldr x0, [lit_len_histogram, x1, lsl 3]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, x1, lsl 3]
|
|
bls .loop_fold_end
|
|
|
|
ldrb w1, [current, 2]
|
|
add x0, current, 3
|
|
cmp end_stream, x0
|
|
ldr x0, [lit_len_histogram, x1, lsl 3]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, x1, lsl 3]
|
|
bls .loop_fold_end
|
|
|
|
ldrb w1, [current, 3]
|
|
ldr x0, [lit_len_histogram, x1, lsl 3]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, x1, lsl 3]
|
|
|
|
.loop_fold_end:
|
|
ldr x0, [lit_len_histogram, (256*8)]
|
|
add x0, x0, 1
|
|
str x0, [lit_len_histogram, (256*8)]
|
|
|
|
ldr x23, [sp, 48]
|
|
ldp x19, x20, [sp, 16]
|
|
ldp x21, x22, [sp, 32]
|
|
ldp x29, x30, [sp], 64
|
|
ret
|
|
.align 2
|
|
.done:
|
|
ret
|
|
#ifndef __APPLE__
|
|
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
|
|
#endif
|