Fixes for aarch64 mac

- It should be fine to enable pmull always on Apple Silicon
- macOS 12+ is required for PMULL instruction.
- Changed the conditional macro to __APPLE__
- Rewritten dispatcher using sysctlbyname
- Use __USER_LABEL_PREFIX__
- Use __TEXT,__const as readonly section
- use ASM_DEF_RODATA macro
- fix func decl

Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974
Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
Taiju Yamada 2020-11-22 01:51:37 +09:00 committed by Greg Tucker
parent 85716fe2fe
commit 1187583a97
86 changed files with 917 additions and 227 deletions

View File

@ -27,11 +27,15 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto
.text
.align 3
.global crc16_t10dif_copy_pmull
.global cdecl(crc16_t10dif_copy_pmull)
#ifndef __APPLE__
.type crc16_t10dif_copy_pmull, %function
#endif
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
@ -67,7 +71,7 @@ x_crc16tab .req x5
x_src_saved .req x0
x_dst_saved .req x12
crc16_t10dif_copy_pmull:
cdecl(crc16_t10dif_copy_pmull):
cmp x_len, 63
sub sp, sp, #16
uxth w_seed, w_seed
@ -80,11 +84,19 @@ crc16_t10dif_copy_pmull:
cmp x_len, x_tmp
bls .end
#ifndef __APPLE__
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0
sub x_src, x_src, x_counter
sub x_dst, x_dst, x_counter
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
#else
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0@PAGE
sub x_src, x_src, x_counter
sub x_dst, x_dst, x_counter
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
#endif
.align 2
.crc_table_loop:
@ -145,8 +157,13 @@ v_tmp3 .req v16
stp q_x0, q_x1, [x_dst]
stp q_x2, q_x3, [x_dst, 32]
#ifndef __APPLE__
adrp x_tmp, .shuffle_mask_lanchor
ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor]
#else
adrp x_tmp, .shuffle_mask_lanchor@PAGE
ldr q_permutation, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
#endif
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
@ -193,7 +210,7 @@ v_tmp1_x3 .req v27
q_fold_const .req q17
v_fold_const .req v17
ldr q_fold_const, =0x371d00000000000087e70000;
ldr q_fold_const, fold_constant
.align 2
.crc_fold_loop:
@ -358,23 +375,32 @@ v_br1 .req v5
umov x0, v_x0.d[0]
ubfx x0, x0, 16, 16
b .crc_table_loop_pre
#ifndef __APPLE__
.size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
#endif
.section .rodata
ASM_DEF_RODATA
.align 4
fold_constant:
.word 0x87e70000
.word 0x00000000
.word 0x371d0000
.word 0x00000000
.shuffle_mask_lanchor = . + 0
#ifndef __APPLE__
.type shuffle_mask, %object
.size shuffle_mask, 16
#endif
shuffle_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8
.byte 7, 6, 5, 4, 3, 2, 1, 0
.align 4
.LANCHOR0 = . + 0
#ifndef __APPLE__
.type crc16tab, %object
.size crc16tab, 512
#endif
crc16tab:
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6

View File

@ -27,11 +27,15 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto
.text
.align 3
.global crc16_t10dif_pmull
.global cdecl(crc16_t10dif_pmull)
#ifndef __APPLE__
.type crc16_t10dif_pmull, %function
#endif
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
@ -65,7 +69,7 @@ x_counter .req x3
x_crc16tab .req x4
x_buf_saved .req x0
crc16_t10dif_pmull:
cdecl(crc16_t10dif_pmull):
cmp x_len, 63
sub sp, sp, #16
uxth w_seed, w_seed
@ -78,10 +82,17 @@ crc16_t10dif_pmull:
cmp x_len, x_tmp
bls .end
#ifndef __APPLE__
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0
sub x_buf, x_buf, x_counter
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
#else
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0@PAGE
sub x_buf, x_buf, x_counter
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
#endif
.align 2
.crc_table_loop:
@ -137,8 +148,13 @@ v_tmp3 .req v16
ldp q_x0, q_x1, [x_buf]
ldp q_x2, q_x3, [x_buf, 32]
#ifndef __APPLE__
adrp x_tmp, .shuffle_mask_lanchor
ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
#else
adrp x_tmp, .shuffle_mask_lanchor@PAGE
ldr q7, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
#endif
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
@ -185,7 +201,7 @@ v_tmp1_x3 .req v27
q_fold_const .req q17
v_fold_const .req v17
ldr q_fold_const, =0x371d00000000000087e70000;
ldr q_fold_const, fold_constant
.align 2
.crc_fold_loop:
@ -344,22 +360,32 @@ v_br1 .req v5
ubfx x0, x0, 16, 16
b .crc_table_loop_pre
#ifndef __APPLE__
.size crc16_t10dif_pmull, .-crc16_t10dif_pmull
#endif
.section .rodata
ASM_DEF_RODATA
.align 4
fold_constant:
.word 0x87e70000
.word 0x00000000
.word 0x371d0000
.word 0x00000000
.shuffle_mask_lanchor = . + 0
#ifndef __APPLE__
.type shuffle_mask, %object
.size shuffle_mask, 16
#endif
shuffle_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8
.byte 7, 6, 5, 4, 3, 2, 1, 0
.align 4
.LANCHOR0 = . + 0
#ifndef __APPLE__
.type crc16tab, %object
.size crc16tab, 512
#endif
crc16tab:
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6

View File

@ -27,8 +27,7 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.macro crc32_hw_common poly_type

View File

@ -27,6 +27,7 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.macro declare_var_vector_reg name:req,reg:req
\name\()_q .req q\reg
@ -429,4 +430,3 @@ start_final:
.endif
ret
.endm

View File

@ -88,8 +88,12 @@
);
*/
.global crc32_gzip_refl_3crc_fold
.global cdecl(crc32_gzip_refl_3crc_fold)
#ifndef __APPLE__
.type crc32_gzip_refl_3crc_fold, %function
crc32_gzip_refl_3crc_fold:
#endif
cdecl(crc32_gzip_refl_3crc_fold):
crc32_3crc_fold crc32
#ifndef __APPLE__
.size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
#endif

View File

@ -59,8 +59,12 @@
* uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
* uint64_t LEN,uint32_t wCRC);
*/
.global crc32_gzip_refl_crc_ext
.global cdecl(crc32_gzip_refl_crc_ext)
#ifndef __APPLE__
.type crc32_gzip_refl_crc_ext, %function
crc32_gzip_refl_crc_ext:
#endif
cdecl(crc32_gzip_refl_crc_ext):
crc32_hw_common crc32
#ifndef __APPLE__
.size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc32_gzip_refl_pmull.h"
#include "crc32_refl_common_pmull.h"

View File

@ -47,11 +47,13 @@
.equ br_high_b2, 0x1
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_gzip_refl, %object
.size crc32_table_gzip_refl, 1024
#endif
crc32_table_gzip_refl:
.word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
.word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc32_ieee_norm_pmull.h"
#include "crc32_norm_common_pmull.h"

View File

@ -47,11 +47,13 @@
.equ br_high_b2, 0x1
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_ieee_norm, %object
.size crc32_table_ieee_norm, 1024
#endif
crc32_table_ieee_norm:
.word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
.word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd

View File

@ -90,8 +90,12 @@
*/
.global crc32_iscsi_3crc_fold
.global cdecl(crc32_iscsi_3crc_fold)
#ifndef __APPLE__
.type crc32_iscsi_3crc_fold, %function
crc32_iscsi_3crc_fold:
#endif
cdecl(crc32_iscsi_3crc_fold):
crc32_3crc_fold crc32c
#ifndef __APPLE__
.size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
#endif

View File

@ -58,8 +58,12 @@
* uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
* uint64_t LEN,uint32_t wCRC);
*/
.global crc32_iscsi_crc_ext
.global cdecl(crc32_iscsi_crc_ext)
#ifndef __APPLE__
.type crc32_iscsi_crc_ext, %function
crc32_iscsi_crc_ext:
#endif
cdecl(crc32_iscsi_crc_ext):
crc32_hw_common crc32c
#ifndef __APPLE__
.size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc32_iscsi_refl_pmull.h"
#include "crc32_refl_common_pmull.h"
@ -35,9 +36,11 @@ crc32_refl_func crc32_iscsi_refl_pmull_internal
.arch armv8-a+crc+crypto
.text
.align 3
.global crc32_iscsi_refl_pmull
.global cdecl(crc32_iscsi_refl_pmull)
#ifndef __APPLE__
.type crc32_iscsi_refl_pmull, %function
crc32_iscsi_refl_pmull:
#endif
cdecl(crc32_iscsi_refl_pmull):
stp x29, x30, [sp, -32]!
mov x29, sp
@ -47,7 +50,7 @@ crc32_iscsi_refl_pmull:
mov w0, w7
mvn w0, w0
bl crc32_iscsi_refl_pmull_internal
bl cdecl(crc32_iscsi_refl_pmull_internal)
mvn w0, w0
ldp x29, x30, [sp], 32
ret

View File

@ -47,11 +47,14 @@
.equ br_high_b2, 0x0
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_iscsi_refl, %object
.size crc32_table_iscsi_refl, 1024
#endif
crc32_table_iscsi_refl:
.word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
.word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24

View File

@ -51,42 +51,56 @@
#include "crc32_mix_default_common.S"
.global crc32_mix_default
.global cdecl(crc32_mix_default)
#ifndef __APPLE__
.type crc32_mix_default, %function
crc32_mix_default:
#endif
cdecl(crc32_mix_default):
crc32_mix_main_default
#ifndef __APPLE__
.size crc32_mix_default, .-crc32_mix_default
#endif
.section .rodata
ASM_DEF_RODATA
.align 4
.set lanchor_crc32,. + 0
#ifndef __APPLE__
.type k1k2, %object
.size k1k2, 16
#endif
k1k2:
.xword 0x0154442bd4
.xword 0x01c6e41596
#ifndef __APPLE__
.type k3k4, %object
.size k3k4, 16
#endif
k3k4:
.xword 0x01751997d0
.xword 0x00ccaa009e
#ifndef __APPLE__
.type k5k0, %object
.size k5k0, 16
#endif
k5k0:
.xword 0x0163cd6124
.xword 0
#ifndef __APPLE__
.type poly, %object
.size poly, 16
#endif
poly:
.xword 0x01db710641
.xword 0x01f7011641
#ifndef __APPLE__
.type crc32_const, %object
.size crc32_const, 48
#endif
crc32_const:
.xword 0x1753ab84
.xword 0
@ -98,8 +112,10 @@ crc32_const:
.align 4
.set .lanchor_mask,. + 0
#ifndef __APPLE__
.type mask, %object
.size mask, 16
#endif
mask:
.word -1
.word 0

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.macro declare_generic_reg name:req, reg:req, default:req
\name .req \default\reg
w_\name .req w\reg
@ -207,8 +209,13 @@
fmov s_a1, w_crc
movi v_neon_tmp.4s, 0
#ifndef __APPLE__
adrp x_pconst, lanchor_crc32
add x_buf_tmp, x_buf, 64
#else
adrp x_pconst, lanchor_crc32@PAGE
add x_buf_tmp, x_buf, 64
#endif
ldr x_data_crc0, [x_buf, 512]
ldr x_data_crc1, [x_buf, 1024]
@ -231,7 +238,11 @@
ldr x_data_crc2, [x_buf, 1544]
eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
#ifndef __APPLE__
ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
#else
ldr q_a0, [x_pconst, #lanchor_crc32@PAGEOFF] // k1k2
#endif
crc32_u64 w_crc0, w_crc0, x_data_crc0
crc32_u64 w_crc1, w_crc1, x_data_crc1
@ -261,7 +272,11 @@
// loop end
// PMULL: fold into 128-bits
#ifndef __APPLE__
add x_pconst, x_pconst, :lo12:lanchor_crc32
#else
add x_pconst, x_pconst, lanchor_crc32@PAGEOFF
#endif
ldr x_data_crc0, [x_buf, 976]
ldr x_data_crc1, [x_buf, 1488]
@ -321,7 +336,11 @@
movi v_neon_zero.4s, 0
ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
#ifndef __APPLE__
adrp x_tmp, .lanchor_mask
#else
adrp x_tmp, .lanchor_mask@PAGE
#endif
ldr x_data_crc0, [x_buf, 1008]
ldr x_data_crc1, [x_buf, 1520]
@ -329,7 +348,11 @@
ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
eor v_a1.16b, v_a2.16b, v_a1.16b
#ifndef __APPLE__
ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
#else
ldr q_neon_tmp3, [x_tmp, #.lanchor_mask@PAGEOFF]
#endif
crc32_u64 w_crc0, w_crc0, x_data_crc0
crc32_u64 w_crc1, w_crc1, x_data_crc1

View File

@ -62,9 +62,12 @@
CRC .req x0
wCRC .req w0
.align 6
.global crc32_mix_neoverse_n1
.global cdecl(crc32_mix_neoverse_n1)
#ifndef __APPLE__
.type crc32_mix_neoverse_n1, %function
crc32_mix_neoverse_n1:
#endif
cdecl(crc32_mix_neoverse_n1):
crc32_common_mix crc32
#ifndef __APPLE__
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
#endif

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto
.text
.align 3
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name, %function
#endif
/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
\name\():
cdecl(\name\()):
mvn w_seed, w_seed
mov x_counter, 0
cmp x_len, (FOLD_SIZE - 1)
@ -48,10 +50,17 @@
cmp x_len, x_counter
bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3
.loop_crc_tab:
@ -124,10 +133,12 @@
umov w_seed, v_tmp_high.s[0]
b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name
.section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4
.shuffle_data:
.byte 15, 14, 13, 12, 11, 10, 9

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto
.text
.align 3
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name, %function
#endif
/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
\name\():
cdecl(\name\()):
mvn w_seed, w_seed
mov x_counter, 0
cmp x_len, (FOLD_SIZE - 1)
@ -48,10 +50,17 @@
cmp x_len, x_counter
bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3
.loop_crc_tab:
@ -121,6 +130,7 @@
umov w_seed, v_tmp_high.s[1]
b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name
#endif
.endm

View File

@ -49,46 +49,60 @@
#include "crc32_mix_default_common.S"
.global crc32c_mix_default
.global cdecl(crc32c_mix_default)
#ifndef __APPLE__
.type crc32c_mix_default, %function
crc32c_mix_default:
#endif
cdecl(crc32c_mix_default):
mov w3, w2
sxtw x2, w1
mov x1, x0
mov w0, w3
crc32_mix_main_default
#ifndef __APPLE__
.size crc32c_mix_default, .-crc32c_mix_default
#endif
.section .rodata
ASM_DEF_RODATA
.align 4
.set lanchor_crc32,. + 0
#ifndef __APPLE__
.type k1k2, %object
.size k1k2, 16
#endif
k1k2:
.xword 0x00740eef02
.xword 0x009e4addf8
#ifndef __APPLE__
.type k3k4, %object
.size k3k4, 16
#endif
k3k4:
.xword 0x00f20c0dfe
.xword 0x014cd00bd6
#ifndef __APPLE__
.type k5k0, %object
.size k5k0, 16
#endif
k5k0:
.xword 0x00dd45aab8
.xword 0
#ifndef __APPLE__
.type poly, %object
.size poly, 16
#endif
poly:
.xword 0x0105ec76f0
.xword 0x00dea713f1
#ifndef __APPLE__
.type crc32_const, %object
.size crc32_const, 48
#endif
crc32_const:
.xword 0x9ef68d35
.xword 0
@ -100,8 +114,10 @@ crc32_const:
.align 4
.set .lanchor_mask,. + 0
#ifndef __APPLE__
.type mask, %object
.size mask, 16
#endif
mask:
.word -1
.word 0

View File

@ -61,8 +61,12 @@
CRC .req x2
wCRC .req w2
.align 6
.global crc32c_mix_neoverse_n1
.global cdecl(crc32c_mix_neoverse_n1)
#ifndef __APPLE__
.type crc32c_mix_neoverse_n1, %function
crc32c_mix_neoverse_n1:
#endif
cdecl(crc32c_mix_neoverse_n1):
crc32_common_mix crc32c
#ifndef __APPLE__
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_ecma_norm_pmull.h"
#include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,13 @@
.equ br_high_b3, 0x42f0
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0x42f0e1eba9ea3693
.xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_ecma_refl_pmull.h"
#include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,13 @@
.equ br_high_b3, 0x92d8
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0xb32e4cbe03a75f6f
.xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_iso_norm_pmull.h"
#include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,13 @@
.equ br_high_b3, 0x0000
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0x000000000000001b

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_iso_refl_pmull.h"
#include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,13 @@
.equ br_high_b3, 0xb000
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0x01b0000000000000

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_jones_norm_pmull.h"
#include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,14 @@
.equ br_high_b3, 0xad93
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0xad93d23594c935a9
.xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
#include "crc64_jones_refl_pmull.h"
#include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,14 @@
.equ br_high_b3, 0x2b59
.text
.section .rodata
ASM_DEF_RODATA
.align 4
.set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object
.size crc64_tab, 2048
#endif
crc64_tab:
.xword 0x0000000000000000, 0x7ad870c830358979
.xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto
.text
.align 3
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name, %function
#endif
/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
\name\():
cdecl(\name\()):
mvn x_seed, x_seed
mov x_counter, 0
cmp x_len, (FOLD_SIZE-1)
@ -48,10 +50,17 @@
cmp x_len, x_counter
bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3
.loop_crc_tab:
@ -119,9 +128,12 @@
b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name
.section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4
.shuffle_data:
.byte 15, 14, 13, 12, 11, 10, 9, 8

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto
.text
.align 3
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name, %function
#endif
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
\name\():
cdecl(\name\()):
mvn x_seed, x_seed
mov x_counter, 0
cmp x_len, (FOLD_SIZE-1)
@ -48,10 +50,17 @@
cmp x_len, x_counter
bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3
.loop_crc_tab:
@ -121,6 +130,7 @@
umov x_crc_ret, v_tmp_low.d[1]
b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name
#endif
.endm

View File

@ -30,37 +30,50 @@
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc16_t10dif_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc16_t10dif_pmull);
#endif
return PROVIDER_BASIC(crc16_t10dif);
}
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
#endif
return PROVIDER_BASIC(crc16_t10dif_copy);
}
DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) {
return PROVIDER_INFO(crc32_ieee_norm_pmull);
}
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_ieee_norm_pmull);
#endif
return PROVIDER_BASIC(crc32_ieee);
}
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) {
switch (get_micro_arch_id()) {
@ -77,12 +90,19 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
if (auxval & HWCAP_PMULL) {
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
}
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(crc32_iscsi_3crc_fold);
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
#endif
return PROVIDER_BASIC(crc32_iscsi);
}
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) {
@ -99,68 +119,97 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc32_gzip_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_gzip_refl_pmull);
#endif
return PROVIDER_BASIC(crc32_gzip_refl);
}
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_ecma_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_ecma_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_ecma_refl);
}
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_ecma_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_ecma_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_ecma_norm);
}
DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_iso_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_iso_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_iso_refl);
}
DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_iso_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_iso_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_iso_norm);
}
DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_jones_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_jones_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_jones_refl);
}
DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_jones_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_jones_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_jones_norm);
}

View File

@ -27,6 +27,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
// parameters
#define w_seed w0
#define x_seed x0
@ -126,8 +128,13 @@
.endm
.macro crc_norm_load_first_block
#ifndef __APPLE__
adrp x_tmp, .shuffle_data
ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
#else
adrp x_tmp, .shuffle_data@PAGE
ldr q_shuffle, [x_tmp, #.shuffle_data@PAGEOFF]
#endif
ldr q_x0_tmp, [x_buf]
ldr q_x1, [x_buf, 16]
@ -299,4 +306,4 @@
pmull v_tmp_low.1q, v_x2.1d, v_p1.1d
eor v_x3.16b, v_x3.16b, v_tmp_high.16b
eor v_x3.16b, v_x3.16b, v_tmp_low.16b
.endm
.endm

View File

@ -30,60 +30,90 @@
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_dot_prod_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif
return PROVIDER_BASIC(gf_vect_dot_prod);
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mad_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon);
#endif
return PROVIDER_BASIC(gf_vect_mad);
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon);
#endif
return PROVIDER_BASIC(ec_encode_data);
}
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_update_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon);
#endif
return PROVIDER_BASIC(ec_encode_data_update);
}
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mul_sve);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon);
#endif
return PROVIDER_BASIC(gf_vect_mul);
}

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_2vect_dot_prod_neon
.global cdecl(gf_2vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_2vect_dot_prod_neon, %function
#endif
/* arguments */
x_len .req x0
@ -130,7 +133,7 @@ q_data .req q_p1_1
v_data_lo .req v_p1_2
v_data_hi .req v_p1_3
gf_2vect_dot_prod_neon:
cdecl(gf_2vect_dot_prod_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_2vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_2vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_2vect_dot_prod_sve, %function
#endif
/* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -81,7 +85,7 @@ q_gft2_hi .req q18
z_dest2 .req z27
gf_2vect_dot_prod_sve:
cdecl(gf_2vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_2vect_mad_neon
.global cdecl(gf_2vect_mad_neon)
#ifndef __APPLE__
.type gf_2vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -125,7 +129,7 @@ v_data_lo .req v17
v_data_hi .req v18
gf_2vect_mad_neon:
cdecl(gf_2vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -360,8 +364,13 @@ gf_2vect_mad_neon:
sub x_dest1, x_dest1, x_tmp
sub x_dest2, x_dest2, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -395,7 +404,7 @@ gf_2vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_2vect_mad_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_2vect_mad_sve)
#ifndef __APPLE__
.type gf_2vect_mad_sve, %function
#endif
/* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
@ -77,7 +81,7 @@ q_gft2_hi .req q18
z_dest2 .req z27
gf_2vect_mad_sve:
cdecl(gf_2vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_3vect_dot_prod_neon
.global cdecl(gf_3vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_3vect_dot_prod_neon, %function
#endif
/* arguments */
x_len .req x0
@ -117,7 +120,7 @@ v_data_lo .req v_p1_2
v_data_hi .req v_p1_3
gf_3vect_dot_prod_neon:
cdecl(gf_3vect_dot_prod_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_3vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_3vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_3vect_dot_prod_sve, %function
#endif
/* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -89,7 +93,7 @@ q_gft3_hi .req q20
z_dest2 .req z27
z_dest3 .req z28
gf_3vect_dot_prod_sve:
cdecl(gf_3vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_3vect_mad_neon
.global cdecl(gf_3vect_mad_neon)
#ifndef __APPLE__
.type gf_3vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -122,7 +126,7 @@ q_data .req q21
v_data_lo .req v22
v_data_hi .req v23
gf_3vect_mad_neon:
cdecl(gf_3vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -332,8 +336,13 @@ gf_3vect_mad_neon:
sub x_dest2, x_dest2, x_tmp
sub x_dest3, x_dest3, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -375,7 +384,7 @@ gf_3vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_3vect_mad_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_3vect_mad_sve)
#ifndef __APPLE__
.type gf_3vect_mad_sve, %function
#endif
/* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
@ -84,7 +88,7 @@ q_gft3_hi .req q20
z_dest2 .req z27
z_dest3 .req z28
gf_3vect_mad_sve:
cdecl(gf_3vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_4vect_dot_prod_neon
.global cdecl(gf_4vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_4vect_dot_prod_neon, %function
#endif
/* arguments */
x_len .req x0
@ -132,7 +136,7 @@ q_data .req q_tmp1
v_data_lo .req v_tmp1_lo
v_data_hi .req v_tmp1_hi
gf_4vect_dot_prod_neon:
cdecl(gf_4vect_dot_prod_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_4vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_4vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_4vect_dot_prod_sve, %function
#endif
/* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -97,7 +101,7 @@ z_dest2 .req z27
z_dest3 .req z28
z_dest4 .req z29
gf_4vect_dot_prod_sve:
cdecl(gf_4vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_4vect_mad_neon
.global cdecl(gf_4vect_mad_neon)
#ifndef __APPLE__
.type gf_4vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -138,7 +141,7 @@ q_data .req q21
v_data_lo .req v22
v_data_hi .req v23
gf_4vect_mad_neon:
cdecl(gf_4vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -397,8 +400,13 @@ gf_4vect_mad_neon:
sub x_dest3, x_dest3, x_tmp
sub x_dest4, x_dest4, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -449,7 +457,7 @@ gf_4vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_4vect_mad_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_4vect_mad_sve)
#ifndef __APPLE__
.type gf_4vect_mad_sve, %function
#endif
/* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
@ -91,7 +95,7 @@ z_dest2 .req z27
z_dest3 .req z28
z_dest4 .req z29
gf_4vect_mad_sve:
cdecl(gf_4vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_5vect_dot_prod_neon
.global cdecl(gf_5vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_5vect_dot_prod_neon, %function
#endif
/* arguments */
x_len .req x0
@ -159,7 +162,7 @@ q_gft5_lo .req q_p2_3
q_gft5_hi .req q_p3_3
gf_5vect_dot_prod_neon:
cdecl(gf_5vect_dot_prod_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_5vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_5vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_5vect_dot_prod_sve, %function
#endif
/* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -105,7 +109,7 @@ z_dest3 .req z28
z_dest4 .req z29
z_dest5 .req z30
gf_5vect_dot_prod_sve:
cdecl(gf_5vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_5vect_mad_neon
.global cdecl(gf_5vect_mad_neon)
#ifndef __APPLE__
.type gf_5vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -152,7 +156,7 @@ q_data .req q21
v_data_lo .req v22
v_data_hi .req v23
gf_5vect_mad_neon:
cdecl(gf_5vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -463,8 +467,13 @@ gf_5vect_mad_neon:
sub x_dest4, x_dest4, x_tmp
sub x_dest5, x_dest5, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -528,7 +537,7 @@ gf_5vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_5vect_mad_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_5vect_mad_sve)
#ifndef __APPLE__
.type gf_5vect_mad_sve, %function
#endif
/* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
@ -98,7 +102,7 @@ z_dest3 .req z28
z_dest4 .req z29
z_dest5 .req z30
gf_5vect_mad_sve:
cdecl(gf_5vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_6vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_6vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_6vect_dot_prod_sve, %function
#endif
/* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -113,7 +117,7 @@ z_dest4 .req z29
z_dest5 .req z30
z_dest6 .req z31
gf_6vect_dot_prod_sve:
cdecl(gf_6vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -27,10 +27,13 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
.text
.global gf_6vect_mad_neon
.type gf_6vect_mad_neon, %function
#include "../include/aarch64_label.h"
.text
.global cdecl(gf_6vect_mad_neon)
#ifndef __APPLE__
.type gf_6vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -166,7 +169,7 @@ q_data .req q21
v_data_lo .req v22
v_data_hi .req v23
gf_6vect_mad_neon:
cdecl(gf_6vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -526,8 +529,13 @@ gf_6vect_mad_neon:
sub x_dest5, x_dest5, x_tmp
sub x_dest6, x_dest6, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -603,7 +611,7 @@ gf_6vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_6vect_mad_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_6vect_mad_sve)
#ifndef __APPLE__
.type gf_6vect_mad_sve, %function
#endif
/* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest);
@ -105,7 +109,7 @@ z_dest4 .req z29
z_dest5 .req z30
z_dest6 .req z31
gf_6vect_mad_sve:
cdecl(gf_6vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_7vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_7vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_7vect_dot_prod_sve, %function
#endif
/* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -122,7 +126,7 @@ z_dest4 .req z29
z_dest5 .req z30
z_dest6 .req z31
gf_7vect_dot_prod_sve:
cdecl(gf_7vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_8vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_8vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_8vect_dot_prod_sve, %function
#endif
/* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest);
*/
@ -131,7 +135,7 @@ z_dest4 .req z29
z_dest5 .req z30
z_dest6 .req z31
gf_8vect_dot_prod_sve:
cdecl(gf_8vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,10 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_vect_dot_prod_neon
.global cdecl(gf_vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_vect_dot_prod_neon, %function
#endif
/* arguments */
x_len .req x0
@ -115,7 +120,7 @@ v_data_lo .req v_p2
v_data_hi .req v_p3
gf_vect_dot_prod_neon:
cdecl(gf_vect_dot_prod_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_vect_dot_prod_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_vect_dot_prod_sve, %function
#endif
/* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest);
*/
@ -66,7 +70,7 @@ z_gft1_hi .req z5
q_gft1_lo .req q4
q_gft1_hi .req q5
gf_vect_dot_prod_sve:
cdecl(gf_vect_dot_prod_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_vect_mad_neon
.global cdecl(gf_vect_mad_neon)
#ifndef __APPLE__
.type gf_vect_mad_neon, %function
#endif
/* arguments */
x_len .req x0
@ -121,7 +125,7 @@ v_data_lo .req v_d1_2
v_data_hi .req v_d1_3
gf_vect_mad_neon:
cdecl(gf_vect_mad_neon):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail
@ -281,8 +285,13 @@ gf_vect_mad_neon:
mov x_src, x_src_end
sub x_dest1, x_dest1, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16]
@ -308,7 +317,7 @@ gf_vect_mad_neon:
mov w_ret, #1
ret
.section .rodata
ASM_DEF_RODATA
.balign 8
const_tbl:
.dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,9 +30,12 @@
.align 6
.arch armv8-a+sve
#include "../include/aarch64_label.h"
.global gf_vect_mad_sve
.global cdecl(gf_vect_mad_sve)
#ifndef __APPLE__
.type gf_vect_mad_sve, %function
#endif
/* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest);
@ -68,7 +71,7 @@ z_gft1_hi .req z7
q_gft1_lo .req q6
q_gft1_hi .req q7
gf_vect_mad_sve:
cdecl(gf_vect_mad_sve):
/* less than 16 bytes, return_fail */
cmp x_len, #16
blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.text
.global gf_vect_mul_neon
.global cdecl(gf_vect_mul_neon)
#ifndef __APPLE__
.type gf_vect_mul_neon, %function
#endif
/* arguments */
x_len .req x0
@ -90,7 +93,7 @@ v_data_6_hi .req v_data_6
v_data_7_hi .req v_data_7
gf_vect_mul_neon:
cdecl(gf_vect_mul_neon):
/* less than 32 bytes, return_fail */
cmp x_len, #32
blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6
.arch armv8-a+sve
.global gf_vect_mul_sve
#include "../include/aarch64_label.h"
.global cdecl(gf_vect_mul_sve)
#ifndef __APPLE__
.type gf_vect_mul_sve, %function
#endif
/* Refer to include/gf_vect_mul.h
*
@ -72,7 +76,7 @@ z_gft1_hi .req z7
q_gft1_lo .req q6
q_gft1_hi .req q7
gf_vect_mul_sve:
cdecl(gf_vect_mul_sve):
/* less than 32 bytes, return_fail */
cmp x_len, #32
blt .return_fail

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -46,8 +48,10 @@ declare Macros
x_\name .req x\reg
.endm
.global encode_deflate_icf_aarch64
.global cdecl(encode_deflate_icf_aarch64)
#ifndef __APPLE__
.type encode_deflate_icf_aarch64, %function
#endif
/*
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
@ -86,7 +90,7 @@ declare Macros
.equ offset_m_out_buf, 16
.equ offset_m_out_end, 24
encode_deflate_icf_aarch64:
cdecl(encode_deflate_icf_aarch64):
cmp next_in, end_in
bcs .done
@ -156,4 +160,6 @@ encode_deflate_icf_aarch64:
.done:
ret
#ifndef __APPLE__
.size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto
.text
.align 2
@ -62,8 +65,10 @@ declare Macros
.endm
.align 2
.global gen_icf_map_h1_aarch64
.global cdecl(gen_icf_map_h1_aarch64)
#ifndef __APPLE__
.type gen_icf_map_h1_aarch64, %function
#endif
/* arguments */
declare_generic_reg stream_param, 0,x
@ -137,7 +142,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
*/
gen_icf_map_h1_aarch64:
cdecl(gen_icf_map_h1_aarch64):
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
bls .fast_exit
stp x29, x30, [sp, -16]!
@ -263,4 +268,6 @@ gen_icf_map_h1_aarch64:
.fast_exit:
mov ret_val, 0
ret
#ifndef __APPLE__
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
#endif

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a
.text
.align 2
@ -257,8 +259,10 @@ declare Macros
.endm
.global decode_huffman_code_block_stateless_aarch64
.global cdecl(decode_huffman_code_block_stateless_aarch64)
#ifndef __APPLE__
.type decode_huffman_code_block_stateless_aarch64, %function
#endif
/*
void decode_huffman_code_block_stateless_aarch64(
struct inflate_state *state,
@ -305,7 +309,7 @@ declare Macros
declare_generic_reg write_overflow_lits,26,w
declare_generic_reg repeat_length,27,w
decode_huffman_code_block_stateless_aarch64:
cdecl(decode_huffman_code_block_stateless_aarch64):
//save registers
push_stack
@ -324,8 +328,13 @@ decode_huffman_code_block_stateless_aarch64:
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
//init rfc_table
#ifndef __APPLE__
adrp rfc_table,rfc_lookup_table
add rfc_table,rfc_table,:lo12:rfc_lookup_table
#else
adrp rfc_table,rfc_lookup_table@PAGE
add rfc_table,rfc_table,rfc_lookup_table@PAGEOFF
#endif
#if ENABLE_TBL_INSTRUCTION
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
add rfc_table,rfc_table,48
@ -661,8 +670,10 @@ byte_copy_loop:
strb w_arg0, [next_out],1
bne byte_copy_loop
b decompress_data_end
#ifndef __APPLE__
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
.type rfc_lookup_table, %object
#endif
rfc_lookup_table:
#if ENABLE_TBL_INSTRUCTION
@ -686,4 +697,6 @@ rfc_lookup_table:
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
#endif
#ifndef __APPLE__
.size rfc_lookup_table, . - rfc_lookup_table
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -72,8 +75,10 @@ skip_has_hist:
.endm
.global isal_deflate_body_aarch64
.global cdecl(isal_deflate_body_aarch64)
#ifndef __APPLE__
.type isal_deflate_body_aarch64, %function
#endif
/*
void isal_deflate_body_aarch64(struct isal_zstream *stream)
*/
@ -115,7 +120,7 @@ skip_has_hist:
declare_generic_reg code_len2, 4,x
isal_deflate_body_aarch64:
cdecl(isal_deflate_body_aarch64):
//save registers
push_stack
ldr avail_in, [stream, _avail_in]
@ -258,4 +263,6 @@ exit_save_state:
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
str w_tmp0, [stream, _internal_state+_state]
b exit_ret
#ifndef __APPLE__
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -73,9 +76,11 @@ skip_has_hist:
sub w_\next_in,w_\next_in,w_\m_out_buf
stp w_\next_in,w_\start_in,[\stream,_avail_out]
.endm
.global isal_deflate_finish_aarch64
.global cdecl(isal_deflate_finish_aarch64)
.arch armv8-a+crc
#ifndef __APPLE__
.type isal_deflate_finish_aarch64, %function
#endif
/*
void isal_deflate_finish_aarch64(struct isal_zstream *stream)
*/
@ -117,7 +122,7 @@ skip_has_hist:
declare_generic_reg code_len2, 4,x
isal_deflate_finish_aarch64:
cdecl(isal_deflate_finish_aarch64):
//save registers
push_stack
@ -260,5 +265,6 @@ update_state_exit:
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
pop_stack
ret
#ifndef __APPLE__
.size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -46,8 +49,10 @@ declare Macros
.global isal_deflate_hash_aarch64
.global cdecl(isal_deflate_hash_aarch64)
#ifndef __APPLE__
.type isal_deflate_hash_aarch64, %function
#endif
/*
void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
uint32_t current_index, uint8_t * dict, uint32_t dict_len)
@ -58,14 +63,14 @@ declare Macros
declare_generic_reg dict, 3,x
declare_generic_reg dict_len, 4,w
declare_generic_reg next_in 3,x
declare_generic_reg end_in 6,x
declare_generic_reg ind 5,w
declare_generic_reg hash 2,w
declare_generic_reg literal 2,w
declare_generic_reg next_in, 3,x
declare_generic_reg end_in, 6,x
declare_generic_reg ind, 5,w
declare_generic_reg hash, 2,w
declare_generic_reg literal, 2,w
#define SHORTEST_MATCH #4
isal_deflate_hash_aarch64:
cdecl(isal_deflate_hash_aarch64):
sub ind, current_index, dict_len
and ind,ind,0xffff
@ -92,4 +97,6 @@ loop_start:
exit_func:
ret
#ifndef __APPLE__
.size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crypto
.text
.align 3
@ -56,9 +59,11 @@ Arguements list
adler32 .req w0
start .req x1
length .req x2
.global adler32_neon
.global cdecl(adler32_neon)
#ifndef __APPLE__
.type adler32_neon, %function
adler32_neon:
#endif
cdecl(adler32_neon):
/*
local variables
*/
@ -92,8 +97,13 @@ local variables
lsr adler1, adler32, 16
lsr loop_cnt,length,5
#ifndef __APPLE__
adrp x3,factors
add x3,x3,:lo12:factors
#else
adrp x3,factors@PAGE
add x3,x3,factors@PAGEOFF
#endif
ld1 {factor0_v.16b-factor1_v.16b},[x3]
add end,start,length
@ -162,12 +172,15 @@ end_func:
orr w0,adler0,adler1,lsl 16
ret
#ifndef __APPLE__
.size adler32_neon, .-adler32_neon
.section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4
factors:
.quad 0x191a1b1c1d1e1f20
.quad 0x1112131415161718
.quad 0x090a0b0c0d0e0f10
.quad 0x0102030405060708

View File

@ -30,86 +30,121 @@
DEFINE_INTERFACE_DISPATCHER(isal_adler32)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(adler32_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(adler32_neon);
#endif
return PROVIDER_BASIC(adler32);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_body_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_body_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_body);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_finish_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_finish_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_finish);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
#endif
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
}
@ -125,64 +160,92 @@ DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_update_histogram_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_update_histogram_aarch64);
#endif
return PROVIDER_BASIC(isal_update_histogram);
}
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) {
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
}
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
#endif
return PROVIDER_BASIC(gen_icf_map_h1);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash);
}
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash);
}
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
#endif
return PROVIDER_BASIC(decode_huffman_code_block_stateless);
}

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a
.text
.align 2
@ -48,8 +51,10 @@ declare Macros
.text
.align 2
.global set_long_icf_fg_aarch64
.global cdecl(set_long_icf_fg_aarch64)
#ifndef __APPLE__
.type set_long_icf_fg_aarch64, %function
#endif
/*
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
@ -69,7 +74,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
/* local variable */
declare_generic_reg len, 7,w
declare_generic_reg dist_code, 8,w
declare_generic_reg shortest_match_len 9,w
declare_generic_reg shortest_match_len, 9,w
declare_generic_reg len_max, 10,w
declare_generic_reg dist_extra, 11,w
declare_generic_reg const_8, 13,x
@ -90,7 +95,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
.equ SHORTEST_MATCH, 4
.equ LEN_MAX_CONST, 512
set_long_icf_fg_aarch64:
cdecl(set_long_icf_fg_aarch64):
stp x29, x30, [sp, -192]!
add x29, sp, 0
stp x21, x22, [sp, 32]
@ -103,11 +108,18 @@ set_long_icf_fg_aarch64:
add end_in, next_in_param, input_size_param
mov match_lookup, match_lookup_param
#ifndef __APPLE__
adrp x1, .data_dist_start
mov x2, DIST_START_SIZE // 128
add x1, x1, :lo12:.data_dist_start
mov x0, dist_start
bl memcpy
#else
adrp x1, .data_dist_start@PAGE
mov x2, DIST_START_SIZE // 128
add x1, x1, .data_dist_start@PAGEOFF
mov x0, dist_start
#endif
bl cdecl(memcpy)
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
cmp end_in, x_tmp0
@ -182,9 +194,11 @@ set_long_icf_fg_aarch64:
ldr x23, [sp, 48]
ldp x29, x30, [sp], 192
ret
#ifndef __APPLE__
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
#endif
.section .rodata
ASM_DEF_RODATA
.align 3
.set .data_dist_start,. + 0
.real_data_dist_start:

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -45,8 +48,10 @@ declare Macros
x_\name .req x\reg
.endm
.global isal_deflate_icf_body_hash_hist_aarch64
.global cdecl(isal_deflate_icf_body_hash_hist_aarch64)
#ifndef __APPLE__
.type isal_deflate_icf_body_hash_hist_aarch64, %function
#endif
/*
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
*/
@ -126,7 +131,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
declare_generic_reg tmp0, 4,x
declare_generic_reg tmp1, 5,x
isal_deflate_icf_body_hash_hist_aarch64:
cdecl(isal_deflate_icf_body_hash_hist_aarch64):
stp x29, x30, [sp, -80]!
add x29, sp, 0
str x24, [sp, 56]
@ -360,5 +365,6 @@ isal_deflate_icf_body_hash_hist_aarch64:
ldr x24, [sp, 56]
ldp x29, x30, [sp], 80
ret
#ifndef __APPLE__
.size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
@ -129,7 +132,9 @@ void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
declare_generic_reg tmp3, 28,x
.align 2
#ifndef __APPLE__
.type write_deflate_icf_constprop, %function
#endif
write_deflate_icf_constprop:
ldrh w2, [x0]
mov w3, 30
@ -141,10 +146,14 @@ write_deflate_icf_constprop:
ubfx x1, x1, 16, 3
strh w1, [x0, 2]
ret
#ifndef __APPLE__
.size write_deflate_icf_constprop, .-write_deflate_icf_constprop
#endif
.align 2
#ifndef __APPLE__
.type write_deflate_icf, %function
#endif
write_deflate_icf:
ldrh w4, [x0]
bfi w4, w1, 0, 10
@ -156,10 +165,14 @@ write_deflate_icf:
bfi w1, w3, 3, 13
strh w1, [x0, 2]
ret
#ifndef __APPLE__
.size write_deflate_icf, .-write_deflate_icf
#endif
.align 2
#ifndef __APPLE__
.type update_state, %function
#endif
update_state:
sub x7, x2, x1
ldr x4, [x0, 48]
@ -179,12 +192,16 @@ update_state:
str x5, [x4, 4688]
str x6, [x4, 4696]
ret
#ifndef __APPLE__
.size update_state, .-update_state
#endif
.align 2
.global isal_deflate_icf_finish_hash_hist_aarch64
.global cdecl(isal_deflate_icf_finish_hash_hist_aarch64)
#ifndef __APPLE__
.type isal_deflate_icf_finish_hash_hist_aarch64, %function
isal_deflate_icf_finish_hash_hist_aarch64:
#endif
cdecl(isal_deflate_icf_finish_hash_hist_aarch64):
ldr w_end_in, [stream, 8] // stream->avail_in
cbz w_end_in, .stream_not_available
@ -393,5 +410,6 @@ isal_deflate_icf_finish_hash_hist_aarch64:
str w1, [stream, offset_state_state] // 84
.done:
ret
#ifndef __APPLE__
.size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc
.text
.align 2
@ -63,17 +66,24 @@ declare Macros
.endm
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
#ifndef __APPLE__
adrp x_\tmp0, .len_to_code_tab_lanchor
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
#else
adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE
add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF
#endif
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
add w_\length_out, w_\length_out, 256
.endm
.section .rodata
ASM_DEF_RODATA
.align 4
.len_to_code_tab_lanchor = . + 0
#ifndef __APPLE__
.type len_to_code_tab, %object
.size len_to_code_tab, 1056
#endif
len_to_code_tab:
.word 0x00, 0x00, 0x00
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
@ -111,9 +121,11 @@ len_to_code_tab:
.word 0x00, 0x00, 0x00, 0x00, 0x00
.text
.global isal_update_histogram_aarch64
.global cdecl(isal_update_histogram_aarch64)
.arch armv8-a+crc
#ifndef __APPLE__
.type isal_update_histogram_aarch64, %function
#endif
/*
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
@ -157,7 +169,7 @@ void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
.equ hash_table_size, (8*1024*2) // 16384
isal_update_histogram_aarch64:
cdecl(isal_update_histogram_aarch64):
cmp w_length, 0
ble .done
@ -176,7 +188,7 @@ isal_update_histogram_aarch64:
mov x0, last_seen
mov w1, 0
mov x2, hash_table_size
bl memset
bl cdecl(memset)
cmp current, loop_end_iter
bcs .loop_end
@ -308,4 +320,6 @@ isal_update_histogram_aarch64:
.align 2
.done:
ret
#ifndef __APPLE__
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
#endif

18
include/aarch64_label.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef __AARCH64_LABEL_H__
#define __AARCH64_LABEL_H__
#ifdef __USER_LABEL_PREFIX__
#define CONCAT1(a, b) CONCAT2(a, b)
#define CONCAT2(a, b) a ## b
#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
#else
#define cdecl(x) x
#endif
#ifdef __APPLE__
#define ASM_DEF_RODATA .section __TEXT,__const
#else
#define ASM_DEF_RODATA .section .rodata
#endif
#endif

View File

@ -31,7 +31,14 @@
#ifndef __aarch64__
#error "This file is for aarch64 only"
#endif
#ifdef __APPLE__
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
#else
#include <asm/hwcap.h>
#endif
#include "aarch64_label.h"
#ifdef __ASSEMBLY__
/**
* # mbin_interface : the wrapper layer for isal-l api
@ -48,17 +55,18 @@
* 4. The dispather should return the right function pointer , revision and a string information .
**/
.macro mbin_interface name:req
.extern \name\()_dispatcher
.section .data
.extern cdecl(\name\()_dispatcher)
.data
.balign 8
.global \name\()_dispatcher_info
.global cdecl(\name\()_dispatcher_info)
#ifndef __APPLE__
.type \name\()_dispatcher_info,%object
\name\()_dispatcher_info:
#endif
cdecl(\name\()_dispatcher_info):
.quad \name\()_mbinit //func_entry
#ifndef __APPLE__
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
#endif
.balign 8
.text
\name\()_mbinit:
@ -108,7 +116,7 @@
*/
bl \name\()_dispatcher
bl cdecl(\name\()_dispatcher)
//restore temp/indirect result registers
ldp x8, x9, [sp, 16]
.cfi_restore 8
@ -150,16 +158,24 @@
.cfi_def_cfa_offset 0
.cfi_endproc
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name,%function
#endif
.align 2
\name\():
cdecl(\name\()):
#ifndef __APPLE__
adrp x9, :got:\name\()_dispatcher_info
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
#else
adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE
ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF]
#endif
ldr x10,[x9]
br x10
#ifndef __APPLE__
.size \name,. - \name
#endif
.endm
/**
@ -168,32 +184,53 @@
*/
.macro mbin_interface_base name:req, base:req
.extern \base
.section .data
.data
.balign 8
.global \name\()_dispatcher_info
.global cdecl(\name\()_dispatcher_info)
#ifndef __APPLE__
.type \name\()_dispatcher_info,%object
\name\()_dispatcher_info:
#endif
cdecl(\name\()_dispatcher_info):
.quad \base //func_entry
#ifndef __APPLE__
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
#endif
.balign 8
.text
.global \name
.global cdecl(\name)
#ifndef __APPLE__
.type \name,%function
#endif
.align 2
\name\():
adrp x9, :got:\name\()_dispatcher_info
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
cdecl(\name\()):
#ifndef __APPLE__
adrp x9, :got:cdecl(_\name\()_dispatcher_info)
ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)]
#else
adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE
ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF]
#endif
ldr x10,[x9]
br x10
#ifndef __APPLE__
.size \name,. - \name
#endif
.endm
#else /* __ASSEMBLY__ */
#include <stdint.h>
#if defined(__linux__)
#include <sys/auxv.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#include <stddef.h>
static inline int sysctlEnabled(const char* name){
int enabled;
size_t size = sizeof(enabled);
int status = sysctlbyname(name, &enabled, &size, NULL, 0);
return status ? 0 : enabled;
}
#endif
#define DEFINE_INTERFACE_DISPATCHER(name) \
@ -298,10 +335,12 @@
static inline uint32_t get_micro_arch_id(void)
{
uint32_t id=CPU_IMPLEMENTER_RESERVE;
#ifndef __APPLE__
if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
/** Here will trap into kernel space */
asm("mrs %0, MIDR_EL1 " : "=r" (id));
}
#endif
return id&0xff00fff0;
}

View File

@ -30,10 +30,12 @@
DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(mem_zero_detect_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(mem_zero_detect_neon);
#endif
return PROVIDER_BASIC(mem_zero_detect);
}

View File

@ -27,6 +27,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.text
.arch armv8-a
@ -36,10 +38,12 @@
// input: n -> x1
// output: -> x0 (true or false)
.global mem_zero_detect_neon
.global cdecl(mem_zero_detect_neon)
#ifndef __APPLE__
.type mem_zero_detect_neon, %function
#endif
mem_zero_detect_neon:
cdecl(mem_zero_detect_neon):
cmp x1, #(16*24-1)
b.ls .loop_16x24_end

View File

@ -27,10 +27,13 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.text
.global pq_check_neon
.global cdecl(pq_check_neon)
#ifndef __APPLE__
.type pq_check_neon, %function
#endif
/* int pq_check_neon(int vects, int len, void **src) */
@ -85,7 +88,7 @@ v_0x80 .req v29
* +----------+ +------------------+
*/
pq_check_neon:
cdecl(pq_check_neon):
sub x_src_ptr_end, x_src, #8
sub w_vects, w_vects, #3

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.text
.global pq_gen_neon
.global cdecl(pq_gen_neon)
#ifndef __APPLE__
.type pq_gen_neon, %function
#endif
/* int pq_gen_neon(int vects, int len, void **src) */
@ -84,7 +88,7 @@ v_0x80 .req v29
* +----------+ +------------------+
*/
pq_gen_neon:
cdecl(pq_gen_neon):
sub x_src_ptr_end, x_src, #8
sub w_vects, w_vects, #3

View File

@ -30,32 +30,48 @@
DEFINE_INTERFACE_DISPATCHER(xor_gen)
{
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(xor_gen_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(xor_gen_neon);
#endif
return PROVIDER_BASIC(xor_gen);
}
DEFINE_INTERFACE_DISPATCHER(xor_check)
{
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(xor_check_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(xor_check_neon);
#endif
return PROVIDER_BASIC(xor_check);
}
DEFINE_INTERFACE_DISPATCHER(pq_gen)
{
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(pq_gen_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(pq_gen_neon);
#endif
return PROVIDER_BASIC(pq_gen);
}
DEFINE_INTERFACE_DISPATCHER(pq_check)
{
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(pq_check_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(pq_check_neon);
#endif
return PROVIDER_BASIC(pq_check);
}

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.text
.global xor_check_neon
.global cdecl(xor_check_neon)
#ifndef __APPLE__
.type xor_check_neon, %function
#endif
/* int xor_check_neon(int vects, int len, void **src) */
@ -76,7 +80,7 @@ w_xor .req w11
* src_ptr_end -->
*/
xor_check_neon:
cdecl(xor_check_neon):
add x_src_ptr_end, x_src, x_vects, lsl #3
ldr x_src0, [x_src]
add x_src0_end, x_src0, x_len

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "../include/aarch64_label.h"
.text
.global xor_gen_neon
.global cdecl(xor_gen_neon)
#ifndef __APPLE__
.type xor_gen_neon, %function
#endif
/* int xor_gen_neon(int vects, int len, void **src) */
@ -78,7 +82,7 @@ x_dst_ptr .req x11
* +----------+ +------------------+
*/
xor_gen_neon:
cdecl(xor_gen_neon):
add x_dst_ptr, x_src, x_vects, lsl #3
ldr x_dst, [x_dst_ptr, #-8]!
ldr x_src0, [x_src]