diff --git a/crc/aarch64/crc16_t10dif_copy_pmull.S b/crc/aarch64/crc16_t10dif_copy_pmull.S index 0a6a3ca..afb98f1 100644 --- a/crc/aarch64/crc16_t10dif_copy_pmull.S +++ b/crc/aarch64/crc16_t10dif_copy_pmull.S @@ -27,11 +27,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .arch armv8-a+crc+crypto .text .align 3 - .global crc16_t10dif_copy_pmull + .global cdecl(crc16_t10dif_copy_pmull) +#ifndef __APPLE__ .type crc16_t10dif_copy_pmull, %function +#endif /* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ @@ -67,7 +71,7 @@ x_crc16tab .req x5 x_src_saved .req x0 x_dst_saved .req x12 -crc16_t10dif_copy_pmull: +cdecl(crc16_t10dif_copy_pmull): cmp x_len, 63 sub sp, sp, #16 uxth w_seed, w_seed @@ -80,11 +84,19 @@ crc16_t10dif_copy_pmull: cmp x_len, x_tmp bls .end +#ifndef __APPLE__ sxtw x_counter, w_counter adrp x_crc16tab, .LANCHOR0 sub x_src, x_src, x_counter sub x_dst, x_dst, x_counter add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 +#else + sxtw x_counter, w_counter + adrp x_crc16tab, .LANCHOR0@PAGE + sub x_src, x_src, x_counter + sub x_dst, x_dst, x_counter + add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF +#endif .align 2 .crc_table_loop: @@ -145,8 +157,13 @@ v_tmp3 .req v16 stp q_x0, q_x1, [x_dst] stp q_x2, q_x3, [x_dst, 32] +#ifndef __APPLE__ adrp x_tmp, .shuffle_mask_lanchor ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor] +#else + adrp x_tmp, .shuffle_mask_lanchor@PAGE + ldr q_permutation, [x_tmp, .shuffle_mask_lanchor@PAGEOFF] +#endif tbl v_tmp1.16b, {v_x0.16b}, v7.16b eor v_x0.16b, v_tmp3.16b, v_tmp1.16b @@ -193,7 +210,7 @@ v_tmp1_x3 .req v27 q_fold_const .req q17 v_fold_const .req v17 - ldr q_fold_const, =0x371d00000000000087e70000; + ldr q_fold_const, fold_constant .align 2 .crc_fold_loop: @@ -358,23 +375,32 @@ v_br1 .req v5 umov x0, v_x0.d[0] ubfx x0, x0, 16, 16 b .crc_table_loop_pre - +#ifndef __APPLE__ .size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull +#endif - .section .rodata - +ASM_DEF_RODATA .align 4 +fold_constant: + .word 0x87e70000 + .word 0x00000000 + .word 0x371d0000 + .word 0x00000000 .shuffle_mask_lanchor = . + 0 +#ifndef __APPLE__ .type shuffle_mask, %object .size shuffle_mask, 16 +#endif shuffle_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8 .byte 7, 6, 5, 4, 3, 2, 1, 0 .align 4 .LANCHOR0 = . + 0 +#ifndef __APPLE__ .type crc16tab, %object .size crc16tab, 512 +#endif crc16tab: .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 diff --git a/crc/aarch64/crc16_t10dif_pmull.S b/crc/aarch64/crc16_t10dif_pmull.S index 7c3b803..aa0b206 100644 --- a/crc/aarch64/crc16_t10dif_pmull.S +++ b/crc/aarch64/crc16_t10dif_pmull.S @@ -27,11 +27,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .arch armv8-a+crc+crypto .text .align 3 - .global crc16_t10dif_pmull + .global cdecl(crc16_t10dif_pmull) +#ifndef __APPLE__ .type crc16_t10dif_pmull, %function +#endif /* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ @@ -65,7 +69,7 @@ x_counter .req x3 x_crc16tab .req x4 x_buf_saved .req x0 -crc16_t10dif_pmull: +cdecl(crc16_t10dif_pmull): cmp x_len, 63 sub sp, sp, #16 uxth w_seed, w_seed @@ -78,10 +82,17 @@ crc16_t10dif_pmull: cmp x_len, x_tmp bls .end +#ifndef __APPLE__ sxtw x_counter, w_counter adrp x_crc16tab, .LANCHOR0 sub x_buf, x_buf, x_counter add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 +#else + sxtw x_counter, w_counter + adrp x_crc16tab, .LANCHOR0@PAGE + sub x_buf, x_buf, x_counter + add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF +#endif .align 2 .crc_table_loop: @@ -137,8 +148,13 @@ v_tmp3 .req v16 ldp q_x0, q_x1, [x_buf] ldp q_x2, q_x3, [x_buf, 32] +#ifndef __APPLE__ adrp x_tmp, .shuffle_mask_lanchor ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor] +#else + adrp x_tmp, .shuffle_mask_lanchor@PAGE + ldr q7, [x_tmp, .shuffle_mask_lanchor@PAGEOFF] +#endif tbl v_tmp1.16b, {v_x0.16b}, v7.16b eor v_x0.16b, v_tmp3.16b, v_tmp1.16b @@ -185,7 +201,7 @@ v_tmp1_x3 .req v27 q_fold_const .req q17 v_fold_const .req v17 - ldr q_fold_const, =0x371d00000000000087e70000; + ldr q_fold_const, fold_constant .align 2 .crc_fold_loop: @@ -344,22 +360,32 @@ v_br1 .req v5 ubfx x0, x0, 16, 16 b .crc_table_loop_pre +#ifndef __APPLE__ .size crc16_t10dif_pmull, .-crc16_t10dif_pmull +#endif - .section .rodata - +ASM_DEF_RODATA .align 4 +fold_constant: + .word 0x87e70000 + .word 0x00000000 + .word 0x371d0000 + .word 0x00000000 .shuffle_mask_lanchor = . + 0 +#ifndef __APPLE__ .type shuffle_mask, %object .size shuffle_mask, 16 +#endif shuffle_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8 .byte 7, 6, 5, 4, 3, 2, 1, 0 .align 4 .LANCHOR0 = . + 0 +#ifndef __APPLE__ .type crc16tab, %object .size crc16tab, 512 +#endif crc16tab: .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 diff --git a/crc/aarch64/crc32_aarch64_common.h b/crc/aarch64/crc32_aarch64_common.h index a2ef22a..1c9b7df 100644 --- a/crc/aarch64/crc32_aarch64_common.h +++ b/crc/aarch64/crc32_aarch64_common.h @@ -27,8 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ - - +#include "../include/aarch64_label.h" .macro crc32_hw_common poly_type diff --git a/crc/aarch64/crc32_common_mix_neoverse_n1.S b/crc/aarch64/crc32_common_mix_neoverse_n1.S index 4911a30..6a874d7 100644 --- a/crc/aarch64/crc32_common_mix_neoverse_n1.S +++ b/crc/aarch64/crc32_common_mix_neoverse_n1.S @@ -27,6 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" .macro declare_var_vector_reg name:req,reg:req \name\()_q .req q\reg @@ -429,4 +430,3 @@ start_final: .endif ret .endm - diff --git a/crc/aarch64/crc32_gzip_refl_3crc_fold.S b/crc/aarch64/crc32_gzip_refl_3crc_fold.S index 116d62c..cbe4580 100644 --- a/crc/aarch64/crc32_gzip_refl_3crc_fold.S +++ b/crc/aarch64/crc32_gzip_refl_3crc_fold.S @@ -88,8 +88,12 @@ ); */ - .global crc32_gzip_refl_3crc_fold + .global cdecl(crc32_gzip_refl_3crc_fold) +#ifndef __APPLE__ .type crc32_gzip_refl_3crc_fold, %function -crc32_gzip_refl_3crc_fold: +#endif +cdecl(crc32_gzip_refl_3crc_fold): crc32_3crc_fold crc32 +#ifndef __APPLE__ .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold +#endif diff --git a/crc/aarch64/crc32_gzip_refl_crc_ext.S b/crc/aarch64/crc32_gzip_refl_crc_ext.S index 8e3d227..58a07eb 100644 --- a/crc/aarch64/crc32_gzip_refl_crc_ext.S +++ b/crc/aarch64/crc32_gzip_refl_crc_ext.S @@ -59,8 +59,12 @@ * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF, * uint64_t LEN,uint32_t wCRC); */ - .global crc32_gzip_refl_crc_ext + .global cdecl(crc32_gzip_refl_crc_ext) +#ifndef __APPLE__ .type crc32_gzip_refl_crc_ext, %function -crc32_gzip_refl_crc_ext: +#endif +cdecl(crc32_gzip_refl_crc_ext): crc32_hw_common crc32 +#ifndef __APPLE__ .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext +#endif diff --git a/crc/aarch64/crc32_gzip_refl_pmull.S b/crc/aarch64/crc32_gzip_refl_pmull.S index d52e2d8..1dd28f1 100644 --- a/crc/aarch64/crc32_gzip_refl_pmull.S +++ b/crc/aarch64/crc32_gzip_refl_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc32_gzip_refl_pmull.h" #include "crc32_refl_common_pmull.h" diff --git a/crc/aarch64/crc32_gzip_refl_pmull.h b/crc/aarch64/crc32_gzip_refl_pmull.h index 883567d..6bd786f 100644 --- a/crc/aarch64/crc32_gzip_refl_pmull.h +++ b/crc/aarch64/crc32_gzip_refl_pmull.h @@ -47,11 +47,13 @@ .equ br_high_b2, 0x1 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc32_table_gzip_refl, %object .size crc32_table_gzip_refl, 1024 +#endif crc32_table_gzip_refl: .word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3 .word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91 diff --git a/crc/aarch64/crc32_ieee_norm_pmull.S b/crc/aarch64/crc32_ieee_norm_pmull.S index 32966fb..571166d 100644 --- a/crc/aarch64/crc32_ieee_norm_pmull.S +++ b/crc/aarch64/crc32_ieee_norm_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc32_ieee_norm_pmull.h" #include "crc32_norm_common_pmull.h" diff --git a/crc/aarch64/crc32_ieee_norm_pmull.h b/crc/aarch64/crc32_ieee_norm_pmull.h index 67acd2a..7a2fc02 100644 --- a/crc/aarch64/crc32_ieee_norm_pmull.h +++ b/crc/aarch64/crc32_ieee_norm_pmull.h @@ -47,11 +47,13 @@ .equ br_high_b2, 0x1 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc32_table_ieee_norm, %object .size crc32_table_ieee_norm, 1024 +#endif crc32_table_ieee_norm: .word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005 .word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd diff --git a/crc/aarch64/crc32_iscsi_3crc_fold.S b/crc/aarch64/crc32_iscsi_3crc_fold.S index 2beaa80..f98172e 100644 --- a/crc/aarch64/crc32_iscsi_3crc_fold.S +++ b/crc/aarch64/crc32_iscsi_3crc_fold.S @@ -90,8 +90,12 @@ */ - .global crc32_iscsi_3crc_fold + .global cdecl(crc32_iscsi_3crc_fold) +#ifndef __APPLE__ .type crc32_iscsi_3crc_fold, %function -crc32_iscsi_3crc_fold: +#endif +cdecl(crc32_iscsi_3crc_fold): crc32_3crc_fold crc32c +#ifndef __APPLE__ .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold +#endif diff --git a/crc/aarch64/crc32_iscsi_crc_ext.S b/crc/aarch64/crc32_iscsi_crc_ext.S index 359401a..0c56ce9 100644 --- a/crc/aarch64/crc32_iscsi_crc_ext.S +++ b/crc/aarch64/crc32_iscsi_crc_ext.S @@ -58,8 +58,12 @@ * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF, * uint64_t LEN,uint32_t wCRC); */ - .global crc32_iscsi_crc_ext + .global cdecl(crc32_iscsi_crc_ext) +#ifndef __APPLE__ .type crc32_iscsi_crc_ext, %function -crc32_iscsi_crc_ext: +#endif +cdecl(crc32_iscsi_crc_ext): crc32_hw_common crc32c +#ifndef __APPLE__ .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext +#endif diff --git a/crc/aarch64/crc32_iscsi_refl_pmull.S b/crc/aarch64/crc32_iscsi_refl_pmull.S index 09a88e2..d4e4d4b 100644 --- a/crc/aarch64/crc32_iscsi_refl_pmull.S +++ b/crc/aarch64/crc32_iscsi_refl_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc32_iscsi_refl_pmull.h" #include "crc32_refl_common_pmull.h" @@ -35,9 +36,11 @@ crc32_refl_func crc32_iscsi_refl_pmull_internal .arch armv8-a+crc+crypto .text .align 3 - .global crc32_iscsi_refl_pmull + .global cdecl(crc32_iscsi_refl_pmull) +#ifndef __APPLE__ .type crc32_iscsi_refl_pmull, %function -crc32_iscsi_refl_pmull: +#endif +cdecl(crc32_iscsi_refl_pmull): stp x29, x30, [sp, -32]! mov x29, sp @@ -47,7 +50,7 @@ crc32_iscsi_refl_pmull: mov w0, w7 mvn w0, w0 - bl crc32_iscsi_refl_pmull_internal + bl cdecl(crc32_iscsi_refl_pmull_internal) mvn w0, w0 ldp x29, x30, [sp], 32 ret diff --git a/crc/aarch64/crc32_iscsi_refl_pmull.h b/crc/aarch64/crc32_iscsi_refl_pmull.h index c17b91b..413e161 100644 --- a/crc/aarch64/crc32_iscsi_refl_pmull.h +++ b/crc/aarch64/crc32_iscsi_refl_pmull.h @@ -47,11 +47,14 @@ .equ br_high_b2, 0x0 .text - .section .rodata + +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc32_table_iscsi_refl, %object .size crc32_table_iscsi_refl, 1024 +#endif crc32_table_iscsi_refl: .word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB .word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24 diff --git a/crc/aarch64/crc32_mix_default.S b/crc/aarch64/crc32_mix_default.S index 05c3407..875206f 100644 --- a/crc/aarch64/crc32_mix_default.S +++ b/crc/aarch64/crc32_mix_default.S @@ -51,42 +51,56 @@ #include "crc32_mix_default_common.S" - .global crc32_mix_default + .global cdecl(crc32_mix_default) +#ifndef __APPLE__ .type crc32_mix_default, %function -crc32_mix_default: +#endif +cdecl(crc32_mix_default): crc32_mix_main_default +#ifndef __APPLE__ .size crc32_mix_default, .-crc32_mix_default +#endif - .section .rodata +ASM_DEF_RODATA .align 4 .set lanchor_crc32,. + 0 +#ifndef __APPLE__ .type k1k2, %object .size k1k2, 16 +#endif k1k2: .xword 0x0154442bd4 .xword 0x01c6e41596 +#ifndef __APPLE__ .type k3k4, %object .size k3k4, 16 +#endif k3k4: .xword 0x01751997d0 .xword 0x00ccaa009e +#ifndef __APPLE__ .type k5k0, %object .size k5k0, 16 +#endif k5k0: .xword 0x0163cd6124 .xword 0 +#ifndef __APPLE__ .type poly, %object .size poly, 16 +#endif poly: .xword 0x01db710641 .xword 0x01f7011641 +#ifndef __APPLE__ .type crc32_const, %object .size crc32_const, 48 +#endif crc32_const: .xword 0x1753ab84 .xword 0 @@ -98,8 +112,10 @@ crc32_const: .align 4 .set .lanchor_mask,. + 0 +#ifndef __APPLE__ .type mask, %object .size mask, 16 +#endif mask: .word -1 .word 0 diff --git a/crc/aarch64/crc32_mix_default_common.S b/crc/aarch64/crc32_mix_default_common.S index 106da20..79c8d8d 100644 --- a/crc/aarch64/crc32_mix_default_common.S +++ b/crc/aarch64/crc32_mix_default_common.S @@ -27,6 +27,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .macro declare_generic_reg name:req, reg:req, default:req \name .req \default\reg w_\name .req w\reg @@ -207,8 +209,13 @@ fmov s_a1, w_crc movi v_neon_tmp.4s, 0 +#ifndef __APPLE__ adrp x_pconst, lanchor_crc32 add x_buf_tmp, x_buf, 64 +#else + adrp x_pconst, lanchor_crc32@PAGE + add x_buf_tmp, x_buf, 64 +#endif ldr x_data_crc0, [x_buf, 512] ldr x_data_crc1, [x_buf, 1024] @@ -231,7 +238,11 @@ ldr x_data_crc2, [x_buf, 1544] eor v_a1.16b, v_a1.16b, v_neon_tmp.16b +#ifndef __APPLE__ ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2 +#else + ldr q_a0, [x_pconst, #lanchor_crc32@PAGEOFF] // k1k2 +#endif crc32_u64 w_crc0, w_crc0, x_data_crc0 crc32_u64 w_crc1, w_crc1, x_data_crc1 @@ -261,7 +272,11 @@ // loop end // PMULL: fold into 128-bits +#ifndef __APPLE__ add x_pconst, x_pconst, :lo12:lanchor_crc32 +#else + add x_pconst, x_pconst, lanchor_crc32@PAGEOFF +#endif ldr x_data_crc0, [x_buf, 976] ldr x_data_crc1, [x_buf, 1488] @@ -321,7 +336,11 @@ movi v_neon_zero.4s, 0 ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0 +#ifndef __APPLE__ adrp x_tmp, .lanchor_mask +#else + adrp x_tmp, .lanchor_mask@PAGE +#endif ldr x_data_crc0, [x_buf, 1008] ldr x_data_crc1, [x_buf, 1520] @@ -329,7 +348,11 @@ ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8 eor v_a1.16b, v_a2.16b, v_a1.16b +#ifndef __APPLE__ ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask] +#else + ldr q_neon_tmp3, [x_tmp, #.lanchor_mask@PAGEOFF] +#endif crc32_u64 w_crc0, w_crc0, x_data_crc0 crc32_u64 w_crc1, w_crc1, x_data_crc1 diff --git a/crc/aarch64/crc32_mix_neoverse_n1.S b/crc/aarch64/crc32_mix_neoverse_n1.S index 62b40e1..95f7eca 100644 --- a/crc/aarch64/crc32_mix_neoverse_n1.S +++ b/crc/aarch64/crc32_mix_neoverse_n1.S @@ -62,9 +62,12 @@ CRC .req x0 wCRC .req w0 .align 6 - .global crc32_mix_neoverse_n1 + .global cdecl(crc32_mix_neoverse_n1) +#ifndef __APPLE__ .type crc32_mix_neoverse_n1, %function -crc32_mix_neoverse_n1: +#endif +cdecl(crc32_mix_neoverse_n1): crc32_common_mix crc32 +#ifndef __APPLE__ .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1 - +#endif diff --git a/crc/aarch64/crc32_norm_common_pmull.h b/crc/aarch64/crc32_norm_common_pmull.h index 7377e30..a18b27e 100644 --- a/crc/aarch64/crc32_norm_common_pmull.h +++ b/crc/aarch64/crc32_norm_common_pmull.h @@ -33,12 +33,14 @@ .arch armv8-a+crypto .text .align 3 - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name, %function +#endif /* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */ -\name\(): +cdecl(\name\()): mvn w_seed, w_seed mov x_counter, 0 cmp x_len, (FOLD_SIZE - 1) @@ -48,10 +50,17 @@ cmp x_len, x_counter bls .done +#ifndef __APPLE__ adrp x_tmp, .lanchor_crc_tab add x_buf_iter, x_buf, x_counter add x_buf, x_buf, x_len add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab +#else + adrp x_tmp, .lanchor_crc_tab@PAGE + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF +#endif .align 3 .loop_crc_tab: @@ -124,10 +133,12 @@ umov w_seed, v_tmp_high.s[0] b .crc_tab_pre - +#ifndef __APPLE__ .size \name, .-\name - .section .rodata.cst16,"aM",@progbits,16 +#else + .section __TEXT,__const +#endif .align 4 .shuffle_data: .byte 15, 14, 13, 12, 11, 10, 9 diff --git a/crc/aarch64/crc32_refl_common_pmull.h b/crc/aarch64/crc32_refl_common_pmull.h index 6418f12..eb1c1b1 100644 --- a/crc/aarch64/crc32_refl_common_pmull.h +++ b/crc/aarch64/crc32_refl_common_pmull.h @@ -33,12 +33,14 @@ .arch armv8-a+crypto .text .align 3 - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name, %function +#endif /* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */ -\name\(): +cdecl(\name\()): mvn w_seed, w_seed mov x_counter, 0 cmp x_len, (FOLD_SIZE - 1) @@ -48,10 +50,17 @@ cmp x_len, x_counter bls .done +#ifndef __APPLE__ adrp x_tmp, .lanchor_crc_tab add x_buf_iter, x_buf, x_counter add x_buf, x_buf, x_len add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab +#else + adrp x_tmp, .lanchor_crc_tab@PAGE + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF +#endif .align 3 .loop_crc_tab: @@ -121,6 +130,7 @@ umov w_seed, v_tmp_high.s[1] b .crc_tab_pre - +#ifndef __APPLE__ .size \name, .-\name +#endif .endm diff --git a/crc/aarch64/crc32c_mix_default.S b/crc/aarch64/crc32c_mix_default.S index 87b8ce3..99243cc 100644 --- a/crc/aarch64/crc32c_mix_default.S +++ b/crc/aarch64/crc32c_mix_default.S @@ -49,46 +49,60 @@ #include "crc32_mix_default_common.S" - .global crc32c_mix_default + .global cdecl(crc32c_mix_default) +#ifndef __APPLE__ .type crc32c_mix_default, %function -crc32c_mix_default: +#endif +cdecl(crc32c_mix_default): mov w3, w2 sxtw x2, w1 mov x1, x0 mov w0, w3 crc32_mix_main_default +#ifndef __APPLE__ .size crc32c_mix_default, .-crc32c_mix_default +#endif - .section .rodata +ASM_DEF_RODATA .align 4 .set lanchor_crc32,. + 0 +#ifndef __APPLE__ .type k1k2, %object .size k1k2, 16 +#endif k1k2: .xword 0x00740eef02 .xword 0x009e4addf8 +#ifndef __APPLE__ .type k3k4, %object .size k3k4, 16 +#endif k3k4: .xword 0x00f20c0dfe .xword 0x014cd00bd6 +#ifndef __APPLE__ .type k5k0, %object .size k5k0, 16 +#endif k5k0: .xword 0x00dd45aab8 .xword 0 +#ifndef __APPLE__ .type poly, %object .size poly, 16 +#endif poly: .xword 0x0105ec76f0 .xword 0x00dea713f1 +#ifndef __APPLE__ .type crc32_const, %object .size crc32_const, 48 +#endif crc32_const: .xword 0x9ef68d35 .xword 0 @@ -100,8 +114,10 @@ crc32_const: .align 4 .set .lanchor_mask,. + 0 +#ifndef __APPLE__ .type mask, %object .size mask, 16 +#endif mask: .word -1 .word 0 diff --git a/crc/aarch64/crc32c_mix_neoverse_n1.S b/crc/aarch64/crc32c_mix_neoverse_n1.S index a98511a..5d58263 100644 --- a/crc/aarch64/crc32c_mix_neoverse_n1.S +++ b/crc/aarch64/crc32c_mix_neoverse_n1.S @@ -61,8 +61,12 @@ CRC .req x2 wCRC .req w2 .align 6 - .global crc32c_mix_neoverse_n1 + .global cdecl(crc32c_mix_neoverse_n1) +#ifndef __APPLE__ .type crc32c_mix_neoverse_n1, %function -crc32c_mix_neoverse_n1: +#endif +cdecl(crc32c_mix_neoverse_n1): crc32_common_mix crc32c +#ifndef __APPLE__ .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1 +#endif diff --git a/crc/aarch64/crc64_ecma_norm_pmull.S b/crc/aarch64/crc64_ecma_norm_pmull.S index 0089a09..289df28 100644 --- a/crc/aarch64/crc64_ecma_norm_pmull.S +++ b/crc/aarch64/crc64_ecma_norm_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_ecma_norm_pmull.h" #include "crc64_norm_common_pmull.h" diff --git a/crc/aarch64/crc64_ecma_norm_pmull.h b/crc/aarch64/crc64_ecma_norm_pmull.h index 07d58cd..ef980bb 100644 --- a/crc/aarch64/crc64_ecma_norm_pmull.h +++ b/crc/aarch64/crc64_ecma_norm_pmull.h @@ -64,11 +64,13 @@ .equ br_high_b3, 0x42f0 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif crc64_tab: .xword 0x0000000000000000, 0x42f0e1eba9ea3693 .xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 diff --git a/crc/aarch64/crc64_ecma_refl_pmull.S b/crc/aarch64/crc64_ecma_refl_pmull.S index 812517f..aeadc81 100644 --- a/crc/aarch64/crc64_ecma_refl_pmull.S +++ b/crc/aarch64/crc64_ecma_refl_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_ecma_refl_pmull.h" #include "crc64_refl_common_pmull.h" diff --git a/crc/aarch64/crc64_ecma_refl_pmull.h b/crc/aarch64/crc64_ecma_refl_pmull.h index 5f53d79..c68a1b7 100644 --- a/crc/aarch64/crc64_ecma_refl_pmull.h +++ b/crc/aarch64/crc64_ecma_refl_pmull.h @@ -60,11 +60,13 @@ .equ br_high_b3, 0x92d8 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif crc64_tab: .xword 0x0000000000000000, 0xb32e4cbe03a75f6f .xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 diff --git a/crc/aarch64/crc64_iso_norm_pmull.S b/crc/aarch64/crc64_iso_norm_pmull.S index 185b75b..c7fe014 100644 --- a/crc/aarch64/crc64_iso_norm_pmull.S +++ b/crc/aarch64/crc64_iso_norm_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_iso_norm_pmull.h" #include "crc64_norm_common_pmull.h" diff --git a/crc/aarch64/crc64_iso_norm_pmull.h b/crc/aarch64/crc64_iso_norm_pmull.h index cc17605..f0c39d7 100644 --- a/crc/aarch64/crc64_iso_norm_pmull.h +++ b/crc/aarch64/crc64_iso_norm_pmull.h @@ -64,11 +64,13 @@ .equ br_high_b3, 0x0000 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif crc64_tab: .xword 0x0000000000000000, 0x000000000000001b diff --git a/crc/aarch64/crc64_iso_refl_pmull.S b/crc/aarch64/crc64_iso_refl_pmull.S index 2d2bc66..c49d7ec 100644 --- a/crc/aarch64/crc64_iso_refl_pmull.S +++ b/crc/aarch64/crc64_iso_refl_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_iso_refl_pmull.h" #include "crc64_refl_common_pmull.h" diff --git a/crc/aarch64/crc64_iso_refl_pmull.h b/crc/aarch64/crc64_iso_refl_pmull.h index 8ee4f58..924a768 100644 --- a/crc/aarch64/crc64_iso_refl_pmull.h +++ b/crc/aarch64/crc64_iso_refl_pmull.h @@ -60,11 +60,13 @@ .equ br_high_b3, 0xb000 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif crc64_tab: .xword 0x0000000000000000, 0x01b0000000000000 diff --git a/crc/aarch64/crc64_jones_norm_pmull.S b/crc/aarch64/crc64_jones_norm_pmull.S index 4f29837..bd94ee1 100644 --- a/crc/aarch64/crc64_jones_norm_pmull.S +++ b/crc/aarch64/crc64_jones_norm_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_jones_norm_pmull.h" #include "crc64_norm_common_pmull.h" diff --git a/crc/aarch64/crc64_jones_norm_pmull.h b/crc/aarch64/crc64_jones_norm_pmull.h index a20fa04..28d8231 100644 --- a/crc/aarch64/crc64_jones_norm_pmull.h +++ b/crc/aarch64/crc64_jones_norm_pmull.h @@ -64,11 +64,14 @@ .equ br_high_b3, 0xad93 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif + crc64_tab: .xword 0x0000000000000000, 0xad93d23594c935a9 .xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 diff --git a/crc/aarch64/crc64_jones_refl_pmull.S b/crc/aarch64/crc64_jones_refl_pmull.S index 177092f..caa2736 100644 --- a/crc/aarch64/crc64_jones_refl_pmull.S +++ b/crc/aarch64/crc64_jones_refl_pmull.S @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" #include "crc64_jones_refl_pmull.h" #include "crc64_refl_common_pmull.h" diff --git a/crc/aarch64/crc64_jones_refl_pmull.h b/crc/aarch64/crc64_jones_refl_pmull.h index 5bf98f7..39b859d 100644 --- a/crc/aarch64/crc64_jones_refl_pmull.h +++ b/crc/aarch64/crc64_jones_refl_pmull.h @@ -60,11 +60,14 @@ .equ br_high_b3, 0x2b59 .text - .section .rodata +ASM_DEF_RODATA .align 4 .set .lanchor_crc_tab,. + 0 +#ifndef __APPLE__ .type crc64_tab, %object .size crc64_tab, 2048 +#endif + crc64_tab: .xword 0x0000000000000000, 0x7ad870c830358979 .xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b diff --git a/crc/aarch64/crc64_norm_common_pmull.h b/crc/aarch64/crc64_norm_common_pmull.h index 1bdfc26..cf25418 100644 --- a/crc/aarch64/crc64_norm_common_pmull.h +++ b/crc/aarch64/crc64_norm_common_pmull.h @@ -33,12 +33,14 @@ .arch armv8-a+crypto .text .align 3 - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name, %function +#endif /* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ -\name\(): +cdecl(\name\()): mvn x_seed, x_seed mov x_counter, 0 cmp x_len, (FOLD_SIZE-1) @@ -48,10 +50,17 @@ cmp x_len, x_counter bls .done +#ifndef __APPLE__ adrp x_tmp, .lanchor_crc_tab add x_buf_iter, x_buf, x_counter add x_buf, x_buf, x_len add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab +#else + adrp x_tmp, .lanchor_crc_tab@PAGE + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF +#endif .align 3 .loop_crc_tab: @@ -119,9 +128,12 @@ b .crc_tab_pre +#ifndef __APPLE__ .size \name, .-\name - .section .rodata.cst16,"aM",@progbits,16 +#else + .section __TEXT,__const +#endif .align 4 .shuffle_data: .byte 15, 14, 13, 12, 11, 10, 9, 8 diff --git a/crc/aarch64/crc64_refl_common_pmull.h b/crc/aarch64/crc64_refl_common_pmull.h index a455013..a974f4b 100644 --- a/crc/aarch64/crc64_refl_common_pmull.h +++ b/crc/aarch64/crc64_refl_common_pmull.h @@ -33,12 +33,14 @@ .arch armv8-a+crypto .text .align 3 - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name, %function +#endif /* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ -\name\(): +cdecl(\name\()): mvn x_seed, x_seed mov x_counter, 0 cmp x_len, (FOLD_SIZE-1) @@ -48,10 +50,17 @@ cmp x_len, x_counter bls .done +#ifndef __APPLE__ adrp x_tmp, .lanchor_crc_tab add x_buf_iter, x_buf, x_counter add x_buf, x_buf, x_len add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab +#else + adrp x_tmp, .lanchor_crc_tab@PAGE + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF +#endif .align 3 .loop_crc_tab: @@ -121,6 +130,7 @@ umov x_crc_ret, v_tmp_low.d[1] b .crc_tab_pre - +#ifndef __APPLE__ .size \name, .-\name +#endif .endm diff --git a/crc/aarch64/crc_aarch64_dispatcher.c b/crc/aarch64/crc_aarch64_dispatcher.c index 22ea72b..fe3bb46 100644 --- a/crc/aarch64/crc_aarch64_dispatcher.c +++ b/crc/aarch64/crc_aarch64_dispatcher.c @@ -30,37 +30,50 @@ DEFINE_INTERFACE_DISPATCHER(crc16_t10dif) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc16_t10dif_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc16_t10dif_pmull); +#endif return PROVIDER_BASIC(crc16_t10dif); } DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc16_t10dif_copy_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc16_t10dif_copy_pmull); +#endif return PROVIDER_BASIC(crc16_t10dif_copy); } DEFINE_INTERFACE_DISPATCHER(crc32_ieee) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) { return PROVIDER_INFO(crc32_ieee_norm_pmull); } - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc32_ieee_norm_pmull); +#endif return PROVIDER_BASIC(crc32_ieee); } DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) { switch (get_micro_arch_id()) { @@ -77,12 +90,19 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) if (auxval & HWCAP_PMULL) { return PROVIDER_INFO(crc32_iscsi_refl_pmull); } +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(crc32_iscsi_3crc_fold); + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc32_iscsi_refl_pmull); +#endif return PROVIDER_BASIC(crc32_iscsi); } DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) { @@ -99,68 +119,97 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc32_gzip_refl_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(crc32_gzip_refl_3crc_fold); + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc32_gzip_refl_pmull); +#endif return PROVIDER_BASIC(crc32_gzip_refl); } DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_ecma_refl_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_ecma_refl_pmull); +#endif return PROVIDER_BASIC(crc64_ecma_refl); } DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_ecma_norm_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_ecma_norm_pmull); +#endif return PROVIDER_BASIC(crc64_ecma_norm); } DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_iso_refl_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_iso_refl_pmull); +#endif return PROVIDER_BASIC(crc64_iso_refl); } DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_iso_norm_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_iso_norm_pmull); +#endif return PROVIDER_BASIC(crc64_iso_norm); } DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_jones_refl_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_jones_refl_pmull); +#endif return PROVIDER_BASIC(crc64_jones_refl); } DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_PMULL) return PROVIDER_INFO(crc64_jones_norm_pmull); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_PMULL_KEY)) + return PROVIDER_INFO(crc64_jones_norm_pmull); +#endif return PROVIDER_BASIC(crc64_jones_norm); } diff --git a/crc/aarch64/crc_common_pmull.h b/crc/aarch64/crc_common_pmull.h index 20a71b9..e746274 100644 --- a/crc/aarch64/crc_common_pmull.h +++ b/crc/aarch64/crc_common_pmull.h @@ -27,6 +27,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + // parameters #define w_seed w0 #define x_seed x0 @@ -126,8 +128,13 @@ .endm .macro crc_norm_load_first_block +#ifndef __APPLE__ adrp x_tmp, .shuffle_data ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data] +#else + adrp x_tmp, .shuffle_data@PAGE + ldr q_shuffle, [x_tmp, #.shuffle_data@PAGEOFF] +#endif ldr q_x0_tmp, [x_buf] ldr q_x1, [x_buf, 16] @@ -299,4 +306,4 @@ pmull v_tmp_low.1q, v_x2.1d, v_p1.1d eor v_x3.16b, v_x3.16b, v_tmp_high.16b eor v_x3.16b, v_x3.16b, v_tmp_low.16b -.endm \ No newline at end of file +.endm diff --git a/erasure_code/aarch64/ec_aarch64_dispatcher.c b/erasure_code/aarch64/ec_aarch64_dispatcher.c index 42bd780..ad0fb73 100644 --- a/erasure_code/aarch64/ec_aarch64_dispatcher.c +++ b/erasure_code/aarch64/ec_aarch64_dispatcher.c @@ -30,60 +30,90 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_SVE) return PROVIDER_INFO(gf_vect_dot_prod_sve); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_dot_prod_neon); +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_SVE_KEY)) + return PROVIDER_INFO(gf_vect_dot_prod_sve); + return PROVIDER_INFO(gf_vect_dot_prod_neon); +#endif return PROVIDER_BASIC(gf_vect_dot_prod); } DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_SVE) return PROVIDER_INFO(gf_vect_mad_sve); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_mad_neon); +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_SVE_KEY)) + return PROVIDER_INFO(gf_vect_mad_sve); + return PROVIDER_INFO(gf_vect_mad_neon); +#endif return PROVIDER_BASIC(gf_vect_mad); } DEFINE_INTERFACE_DISPATCHER(ec_encode_data) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_SVE) return PROVIDER_INFO(ec_encode_data_sve); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(ec_encode_data_neon); +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_SVE_KEY)) + return PROVIDER_INFO(ec_encode_data_sve); + return PROVIDER_INFO(ec_encode_data_neon); +#endif return PROVIDER_BASIC(ec_encode_data); } DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_SVE) return PROVIDER_INFO(ec_encode_data_update_sve); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(ec_encode_data_update_neon); +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_SVE_KEY)) + return PROVIDER_INFO(ec_encode_data_update_sve); + return PROVIDER_INFO(ec_encode_data_update_neon); +#endif return PROVIDER_BASIC(ec_encode_data_update); } DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_SVE) return PROVIDER_INFO(gf_vect_mul_sve); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_mul_neon); +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_SVE_KEY)) + return PROVIDER_INFO(gf_vect_mul_sve); + return PROVIDER_INFO(gf_vect_mul_neon); +#endif return PROVIDER_BASIC(gf_vect_mul); } diff --git a/erasure_code/aarch64/gf_2vect_dot_prod_neon.S b/erasure_code/aarch64/gf_2vect_dot_prod_neon.S index 33a2850..4ff7e7c 100644 --- a/erasure_code/aarch64/gf_2vect_dot_prod_neon.S +++ b/erasure_code/aarch64/gf_2vect_dot_prod_neon.S @@ -27,11 +27,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .text -.global gf_2vect_dot_prod_neon +.global cdecl(gf_2vect_dot_prod_neon) +#ifndef __APPLE__ .type gf_2vect_dot_prod_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -130,7 +133,7 @@ q_data .req q_p1_1 v_data_lo .req v_p1_2 v_data_hi .req v_p1_3 -gf_2vect_dot_prod_neon: +cdecl(gf_2vect_dot_prod_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_2vect_dot_prod_sve.S b/erasure_code/aarch64/gf_2vect_dot_prod_sve.S index abe5083..99b5f15 100644 --- a/erasure_code/aarch64/gf_2vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_2vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_2vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_2vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_2vect_dot_prod_sve, %function +#endif /* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -81,7 +85,7 @@ q_gft2_hi .req q18 z_dest2 .req z27 -gf_2vect_dot_prod_sve: +cdecl(gf_2vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_2vect_mad_neon.S b/erasure_code/aarch64/gf_2vect_mad_neon.S index b8a8cad..453524a 100644 --- a/erasure_code/aarch64/gf_2vect_mad_neon.S +++ b/erasure_code/aarch64/gf_2vect_mad_neon.S @@ -26,11 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_2vect_mad_neon +.global cdecl(gf_2vect_mad_neon) +#ifndef __APPLE__ .type gf_2vect_mad_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -125,7 +129,7 @@ v_data_lo .req v17 v_data_hi .req v18 -gf_2vect_mad_neon: +cdecl(gf_2vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -360,8 +364,13 @@ gf_2vect_mad_neon: sub x_dest1, x_dest1, x_tmp sub x_dest2, x_dest2, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -395,7 +404,7 @@ gf_2vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_2vect_mad_sve.S b/erasure_code/aarch64/gf_2vect_mad_sve.S index 5e83210..f0ddf01 100644 --- a/erasure_code/aarch64/gf_2vect_mad_sve.S +++ b/erasure_code/aarch64/gf_2vect_mad_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_2vect_mad_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_2vect_mad_sve) +#ifndef __APPLE__ .type gf_2vect_mad_sve, %function +#endif /* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); @@ -77,7 +81,7 @@ q_gft2_hi .req q18 z_dest2 .req z27 -gf_2vect_mad_sve: +cdecl(gf_2vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_3vect_dot_prod_neon.S b/erasure_code/aarch64/gf_3vect_dot_prod_neon.S index becca90..cff34fc 100644 --- a/erasure_code/aarch64/gf_3vect_dot_prod_neon.S +++ b/erasure_code/aarch64/gf_3vect_dot_prod_neon.S @@ -27,11 +27,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .text -.global gf_3vect_dot_prod_neon +.global cdecl(gf_3vect_dot_prod_neon) +#ifndef __APPLE__ .type gf_3vect_dot_prod_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -117,7 +120,7 @@ v_data_lo .req v_p1_2 v_data_hi .req v_p1_3 -gf_3vect_dot_prod_neon: +cdecl(gf_3vect_dot_prod_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_3vect_dot_prod_sve.S b/erasure_code/aarch64/gf_3vect_dot_prod_sve.S index b326c72..8f6414e 100644 --- a/erasure_code/aarch64/gf_3vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_3vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_3vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_3vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_3vect_dot_prod_sve, %function +#endif /* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -89,7 +93,7 @@ q_gft3_hi .req q20 z_dest2 .req z27 z_dest3 .req z28 -gf_3vect_dot_prod_sve: +cdecl(gf_3vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_3vect_mad_neon.S b/erasure_code/aarch64/gf_3vect_mad_neon.S index d9a3ccc..fcfeec1 100644 --- a/erasure_code/aarch64/gf_3vect_mad_neon.S +++ b/erasure_code/aarch64/gf_3vect_mad_neon.S @@ -26,11 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_3vect_mad_neon +.global cdecl(gf_3vect_mad_neon) +#ifndef __APPLE__ .type gf_3vect_mad_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -122,7 +126,7 @@ q_data .req q21 v_data_lo .req v22 v_data_hi .req v23 -gf_3vect_mad_neon: +cdecl(gf_3vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -332,8 +336,13 @@ gf_3vect_mad_neon: sub x_dest2, x_dest2, x_tmp sub x_dest3, x_dest3, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -375,7 +384,7 @@ gf_3vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_3vect_mad_sve.S b/erasure_code/aarch64/gf_3vect_mad_sve.S index 52c2ffc..9e0ca5c 100644 --- a/erasure_code/aarch64/gf_3vect_mad_sve.S +++ b/erasure_code/aarch64/gf_3vect_mad_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_3vect_mad_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_3vect_mad_sve) +#ifndef __APPLE__ .type gf_3vect_mad_sve, %function +#endif /* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); @@ -84,7 +88,7 @@ q_gft3_hi .req q20 z_dest2 .req z27 z_dest3 .req z28 -gf_3vect_mad_sve: +cdecl(gf_3vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_4vect_dot_prod_neon.S b/erasure_code/aarch64/gf_4vect_dot_prod_neon.S index 2cfe5aa..6204102 100644 --- a/erasure_code/aarch64/gf_4vect_dot_prod_neon.S +++ b/erasure_code/aarch64/gf_4vect_dot_prod_neon.S @@ -26,11 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_4vect_dot_prod_neon +.global cdecl(gf_4vect_dot_prod_neon) +#ifndef __APPLE__ .type gf_4vect_dot_prod_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -132,7 +136,7 @@ q_data .req q_tmp1 v_data_lo .req v_tmp1_lo v_data_hi .req v_tmp1_hi -gf_4vect_dot_prod_neon: +cdecl(gf_4vect_dot_prod_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_4vect_dot_prod_sve.S b/erasure_code/aarch64/gf_4vect_dot_prod_sve.S index ae7cdcb..eb35427 100644 --- a/erasure_code/aarch64/gf_4vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_4vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_4vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_4vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_4vect_dot_prod_sve, %function +#endif /* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -97,7 +101,7 @@ z_dest2 .req z27 z_dest3 .req z28 z_dest4 .req z29 -gf_4vect_dot_prod_sve: +cdecl(gf_4vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_4vect_mad_neon.S b/erasure_code/aarch64/gf_4vect_mad_neon.S index 8692437..ebf82e7 100644 --- a/erasure_code/aarch64/gf_4vect_mad_neon.S +++ b/erasure_code/aarch64/gf_4vect_mad_neon.S @@ -27,11 +27,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .text -.global gf_4vect_mad_neon +.global cdecl(gf_4vect_mad_neon) +#ifndef __APPLE__ .type gf_4vect_mad_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -138,7 +141,7 @@ q_data .req q21 v_data_lo .req v22 v_data_hi .req v23 -gf_4vect_mad_neon: +cdecl(gf_4vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -397,8 +400,13 @@ gf_4vect_mad_neon: sub x_dest3, x_dest3, x_tmp sub x_dest4, x_dest4, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -449,7 +457,7 @@ gf_4vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_4vect_mad_sve.S b/erasure_code/aarch64/gf_4vect_mad_sve.S index 8bf682c..89ec89f 100644 --- a/erasure_code/aarch64/gf_4vect_mad_sve.S +++ b/erasure_code/aarch64/gf_4vect_mad_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_4vect_mad_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_4vect_mad_sve) +#ifndef __APPLE__ .type gf_4vect_mad_sve, %function +#endif /* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); @@ -91,7 +95,7 @@ z_dest2 .req z27 z_dest3 .req z28 z_dest4 .req z29 -gf_4vect_mad_sve: +cdecl(gf_4vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_5vect_dot_prod_neon.S b/erasure_code/aarch64/gf_5vect_dot_prod_neon.S index 2e4dea4..1316666 100644 --- a/erasure_code/aarch64/gf_5vect_dot_prod_neon.S +++ b/erasure_code/aarch64/gf_5vect_dot_prod_neon.S @@ -27,11 +27,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .text -.global gf_5vect_dot_prod_neon +.global cdecl(gf_5vect_dot_prod_neon) +#ifndef __APPLE__ .type gf_5vect_dot_prod_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -159,7 +162,7 @@ q_gft5_lo .req q_p2_3 q_gft5_hi .req q_p3_3 -gf_5vect_dot_prod_neon: +cdecl(gf_5vect_dot_prod_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_5vect_dot_prod_sve.S b/erasure_code/aarch64/gf_5vect_dot_prod_sve.S index ae999ff..bb7cd01 100644 --- a/erasure_code/aarch64/gf_5vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_5vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_5vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_5vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_5vect_dot_prod_sve, %function +#endif /* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -105,7 +109,7 @@ z_dest3 .req z28 z_dest4 .req z29 z_dest5 .req z30 -gf_5vect_dot_prod_sve: +cdecl(gf_5vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_5vect_mad_neon.S b/erasure_code/aarch64/gf_5vect_mad_neon.S index f0ff163..473e4c5 100644 --- a/erasure_code/aarch64/gf_5vect_mad_neon.S +++ b/erasure_code/aarch64/gf_5vect_mad_neon.S @@ -26,11 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_5vect_mad_neon +.global cdecl(gf_5vect_mad_neon) +#ifndef __APPLE__ .type gf_5vect_mad_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -152,7 +156,7 @@ q_data .req q21 v_data_lo .req v22 v_data_hi .req v23 -gf_5vect_mad_neon: +cdecl(gf_5vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -463,8 +467,13 @@ gf_5vect_mad_neon: sub x_dest4, x_dest4, x_tmp sub x_dest5, x_dest5, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -528,7 +537,7 @@ gf_5vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_5vect_mad_sve.S b/erasure_code/aarch64/gf_5vect_mad_sve.S index 82e88d9..ab374d3 100644 --- a/erasure_code/aarch64/gf_5vect_mad_sve.S +++ b/erasure_code/aarch64/gf_5vect_mad_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_5vect_mad_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_5vect_mad_sve) +#ifndef __APPLE__ .type gf_5vect_mad_sve, %function +#endif /* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); @@ -98,7 +102,7 @@ z_dest3 .req z28 z_dest4 .req z29 z_dest5 .req z30 -gf_5vect_mad_sve: +cdecl(gf_5vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_6vect_dot_prod_sve.S b/erasure_code/aarch64/gf_6vect_dot_prod_sve.S index 1196bc1..acc9895 100644 --- a/erasure_code/aarch64/gf_6vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_6vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_6vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_6vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_6vect_dot_prod_sve, %function +#endif /* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -113,7 +117,7 @@ z_dest4 .req z29 z_dest5 .req z30 z_dest6 .req z31 -gf_6vect_dot_prod_sve: +cdecl(gf_6vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_6vect_mad_neon.S b/erasure_code/aarch64/gf_6vect_mad_neon.S index 7ec2d80..3b1b1b4 100644 --- a/erasure_code/aarch64/gf_6vect_mad_neon.S +++ b/erasure_code/aarch64/gf_6vect_mad_neon.S @@ -27,10 +27,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ -.text -.global gf_6vect_mad_neon -.type gf_6vect_mad_neon, %function +#include "../include/aarch64_label.h" +.text +.global cdecl(gf_6vect_mad_neon) +#ifndef __APPLE__ +.type gf_6vect_mad_neon, %function +#endif /* arguments */ x_len .req x0 @@ -166,7 +169,7 @@ q_data .req q21 v_data_lo .req v22 v_data_hi .req v23 -gf_6vect_mad_neon: +cdecl(gf_6vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -526,8 +529,13 @@ gf_6vect_mad_neon: sub x_dest5, x_dest5, x_tmp sub x_dest6, x_dest6, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -603,7 +611,7 @@ gf_6vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_6vect_mad_sve.S b/erasure_code/aarch64/gf_6vect_mad_sve.S index 670e664..c4f372c 100644 --- a/erasure_code/aarch64/gf_6vect_mad_sve.S +++ b/erasure_code/aarch64/gf_6vect_mad_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_6vect_mad_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_6vect_mad_sve) +#ifndef __APPLE__ .type gf_6vect_mad_sve, %function +#endif /* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char **dest); @@ -105,7 +109,7 @@ z_dest4 .req z29 z_dest5 .req z30 z_dest6 .req z31 -gf_6vect_mad_sve: +cdecl(gf_6vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_7vect_dot_prod_sve.S b/erasure_code/aarch64/gf_7vect_dot_prod_sve.S index cccaec5..0f74873 100644 --- a/erasure_code/aarch64/gf_7vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_7vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_7vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_7vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_7vect_dot_prod_sve, %function +#endif /* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -122,7 +126,7 @@ z_dest4 .req z29 z_dest5 .req z30 z_dest6 .req z31 -gf_7vect_dot_prod_sve: +cdecl(gf_7vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_8vect_dot_prod_sve.S b/erasure_code/aarch64/gf_8vect_dot_prod_sve.S index ee839a4..20768f4 100644 --- a/erasure_code/aarch64/gf_8vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_8vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_8vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_8vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_8vect_dot_prod_sve, %function +#endif /* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char **dest); */ @@ -131,7 +135,7 @@ z_dest4 .req z29 z_dest5 .req z30 z_dest6 .req z31 -gf_8vect_dot_prod_sve: +cdecl(gf_8vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_vect_dot_prod_neon.S b/erasure_code/aarch64/gf_vect_dot_prod_neon.S index 117110c..4d17362 100644 --- a/erasure_code/aarch64/gf_vect_dot_prod_neon.S +++ b/erasure_code/aarch64/gf_vect_dot_prod_neon.S @@ -26,10 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_vect_dot_prod_neon +.global cdecl(gf_vect_dot_prod_neon) +#ifndef __APPLE__ .type gf_vect_dot_prod_neon, %function +#endif /* arguments */ x_len .req x0 @@ -115,7 +120,7 @@ v_data_lo .req v_p2 v_data_hi .req v_p3 -gf_vect_dot_prod_neon: +cdecl(gf_vect_dot_prod_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_vect_dot_prod_sve.S b/erasure_code/aarch64/gf_vect_dot_prod_sve.S index 7cf3d0d..48ce151 100644 --- a/erasure_code/aarch64/gf_vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_vect_dot_prod_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_vect_dot_prod_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_vect_dot_prod_sve) +#ifndef __APPLE__ .type gf_vect_dot_prod_sve, %function +#endif /* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char *dest); */ @@ -66,7 +70,7 @@ z_gft1_hi .req z5 q_gft1_lo .req q4 q_gft1_hi .req q5 -gf_vect_dot_prod_sve: +cdecl(gf_vect_dot_prod_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_vect_mad_neon.S b/erasure_code/aarch64/gf_vect_mad_neon.S index 9ebd86b..bc2b957 100644 --- a/erasure_code/aarch64/gf_vect_mad_neon.S +++ b/erasure_code/aarch64/gf_vect_mad_neon.S @@ -26,11 +26,15 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .text -.global gf_vect_mad_neon +.global cdecl(gf_vect_mad_neon) +#ifndef __APPLE__ .type gf_vect_mad_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -121,7 +125,7 @@ v_data_lo .req v_d1_2 v_data_hi .req v_d1_3 -gf_vect_mad_neon: +cdecl(gf_vect_mad_neon): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail @@ -281,8 +285,13 @@ gf_vect_mad_neon: mov x_src, x_src_end sub x_dest1, x_dest1, x_tmp +#ifndef __APPLE__ adrp x_const, const_tbl add x_const, x_const, :lo12:const_tbl +#else + adrp x_const, const_tbl@PAGE + add x_const, x_const, const_tbl@PAGEOFF +#endif sub x_const, x_const, x_tmp ldr q_tmp, [x_const, #16] @@ -308,7 +317,7 @@ gf_vect_mad_neon: mov w_ret, #1 ret -.section .rodata +ASM_DEF_RODATA .balign 8 const_tbl: .dword 0x0000000000000000, 0x0000000000000000 diff --git a/erasure_code/aarch64/gf_vect_mad_sve.S b/erasure_code/aarch64/gf_vect_mad_sve.S index 970cf23..41d6da9 100644 --- a/erasure_code/aarch64/gf_vect_mad_sve.S +++ b/erasure_code/aarch64/gf_vect_mad_sve.S @@ -30,9 +30,12 @@ .align 6 .arch armv8-a+sve +#include "../include/aarch64_label.h" -.global gf_vect_mad_sve +.global cdecl(gf_vect_mad_sve) +#ifndef __APPLE__ .type gf_vect_mad_sve, %function +#endif /* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, unsigned char *dest); @@ -68,7 +71,7 @@ z_gft1_hi .req z7 q_gft1_lo .req q6 q_gft1_hi .req q7 -gf_vect_mad_sve: +cdecl(gf_vect_mad_sve): /* less than 16 bytes, return_fail */ cmp x_len, #16 blt .return_fail diff --git a/erasure_code/aarch64/gf_vect_mul_neon.S b/erasure_code/aarch64/gf_vect_mul_neon.S index 5f129c2..096b91d 100644 --- a/erasure_code/aarch64/gf_vect_mul_neon.S +++ b/erasure_code/aarch64/gf_vect_mul_neon.S @@ -27,11 +27,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .text -.global gf_vect_mul_neon +.global cdecl(gf_vect_mul_neon) +#ifndef __APPLE__ .type gf_vect_mul_neon, %function - +#endif /* arguments */ x_len .req x0 @@ -90,7 +93,7 @@ v_data_6_hi .req v_data_6 v_data_7_hi .req v_data_7 -gf_vect_mul_neon: +cdecl(gf_vect_mul_neon): /* less than 32 bytes, return_fail */ cmp x_len, #32 blt .return_fail diff --git a/erasure_code/aarch64/gf_vect_mul_sve.S b/erasure_code/aarch64/gf_vect_mul_sve.S index 195b597..d52fb17 100644 --- a/erasure_code/aarch64/gf_vect_mul_sve.S +++ b/erasure_code/aarch64/gf_vect_mul_sve.S @@ -30,8 +30,12 @@ .align 6 .arch armv8-a+sve -.global gf_vect_mul_sve +#include "../include/aarch64_label.h" + +.global cdecl(gf_vect_mul_sve) +#ifndef __APPLE__ .type gf_vect_mul_sve, %function +#endif /* Refer to include/gf_vect_mul.h * @@ -72,7 +76,7 @@ z_gft1_hi .req z7 q_gft1_lo .req q6 q_gft1_hi .req q7 -gf_vect_mul_sve: +cdecl(gf_vect_mul_sve): /* less than 32 bytes, return_fail */ cmp x_len, #32 blt .return_fail diff --git a/igzip/aarch64/encode_df.S b/igzip/aarch64/encode_df.S index 6dddddf..7368099 100644 --- a/igzip/aarch64/encode_df.S +++ b/igzip/aarch64/encode_df.S @@ -27,6 +27,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -46,8 +48,10 @@ declare Macros x_\name .req x\reg .endm - .global encode_deflate_icf_aarch64 + .global cdecl(encode_deflate_icf_aarch64) +#ifndef __APPLE__ .type encode_deflate_icf_aarch64, %function +#endif /* struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, @@ -86,7 +90,7 @@ declare Macros .equ offset_m_out_buf, 16 .equ offset_m_out_end, 24 -encode_deflate_icf_aarch64: +cdecl(encode_deflate_icf_aarch64): cmp next_in, end_in bcs .done @@ -156,4 +160,6 @@ encode_deflate_icf_aarch64: .done: ret +#ifndef __APPLE__ .size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64 +#endif diff --git a/igzip/aarch64/gen_icf_map.S b/igzip/aarch64/gen_icf_map.S index fe04ee4..93d4b44 100644 --- a/igzip/aarch64/gen_icf_map.S +++ b/igzip/aarch64/gen_icf_map.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc+crypto .text .align 2 @@ -62,8 +65,10 @@ declare Macros .endm .align 2 - .global gen_icf_map_h1_aarch64 + .global cdecl(gen_icf_map_h1_aarch64) +#ifndef __APPLE__ .type gen_icf_map_h1_aarch64, %function +#endif /* arguments */ declare_generic_reg stream_param, 0,x @@ -137,7 +142,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, struct deflate_icf *matches_icf_lookup, uint64_t input_size) */ -gen_icf_map_h1_aarch64: +cdecl(gen_icf_map_h1_aarch64): cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 bls .fast_exit stp x29, x30, [sp, -16]! @@ -263,4 +268,6 @@ gen_icf_map_h1_aarch64: .fast_exit: mov ret_val, 0 ret +#ifndef __APPLE__ .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 +#endif diff --git a/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S b/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S index 3255ba4..b5bdf5d 100644 --- a/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S +++ b/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S @@ -27,6 +27,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ +#include "../include/aarch64_label.h" + .arch armv8-a .text .align 2 @@ -257,8 +259,10 @@ declare Macros .endm - .global decode_huffman_code_block_stateless_aarch64 + .global cdecl(decode_huffman_code_block_stateless_aarch64) +#ifndef __APPLE__ .type decode_huffman_code_block_stateless_aarch64, %function +#endif /* void decode_huffman_code_block_stateless_aarch64( struct inflate_state *state, @@ -305,7 +309,7 @@ declare Macros declare_generic_reg write_overflow_lits,26,w declare_generic_reg repeat_length,27,w -decode_huffman_code_block_stateless_aarch64: +cdecl(decode_huffman_code_block_stateless_aarch64): //save registers push_stack @@ -324,8 +328,13 @@ decode_huffman_code_block_stateless_aarch64: ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] //init rfc_table +#ifndef __APPLE__ adrp rfc_table,rfc_lookup_table add rfc_table,rfc_table,:lo12:rfc_lookup_table +#else + adrp rfc_table,rfc_lookup_table@PAGE + add rfc_table,rfc_table,rfc_lookup_table@PAGEOFF +#endif #if ENABLE_TBL_INSTRUCTION ld1 {v1.16b,v2.16b,v3.16b},[rfc_table] add rfc_table,rfc_table,48 @@ -661,8 +670,10 @@ byte_copy_loop: strb w_arg0, [next_out],1 bne byte_copy_loop b decompress_data_end +#ifndef __APPLE__ .size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64 .type rfc_lookup_table, %object +#endif rfc_lookup_table: #if ENABLE_TBL_INSTRUCTION @@ -686,4 +697,6 @@ rfc_lookup_table: .short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01 .short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 #endif +#ifndef __APPLE__ .size rfc_lookup_table, . - rfc_lookup_table +#endif diff --git a/igzip/aarch64/igzip_deflate_body_aarch64.S b/igzip/aarch64/igzip_deflate_body_aarch64.S index 254f74c..9a7e1a9 100644 --- a/igzip/aarch64/igzip_deflate_body_aarch64.S +++ b/igzip/aarch64/igzip_deflate_body_aarch64.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -72,8 +75,10 @@ skip_has_hist: .endm - .global isal_deflate_body_aarch64 + .global cdecl(isal_deflate_body_aarch64) +#ifndef __APPLE__ .type isal_deflate_body_aarch64, %function +#endif /* void isal_deflate_body_aarch64(struct isal_zstream *stream) */ @@ -115,7 +120,7 @@ skip_has_hist: declare_generic_reg code_len2, 4,x -isal_deflate_body_aarch64: +cdecl(isal_deflate_body_aarch64): //save registers push_stack ldr avail_in, [stream, _avail_in] @@ -258,4 +263,6 @@ exit_save_state: mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER str w_tmp0, [stream, _internal_state+_state] b exit_ret +#ifndef __APPLE__ .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64 +#endif diff --git a/igzip/aarch64/igzip_deflate_finish_aarch64.S b/igzip/aarch64/igzip_deflate_finish_aarch64.S index e5842b5..a90e225 100644 --- a/igzip/aarch64/igzip_deflate_finish_aarch64.S +++ b/igzip/aarch64/igzip_deflate_finish_aarch64.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -73,9 +76,11 @@ skip_has_hist: sub w_\next_in,w_\next_in,w_\m_out_buf stp w_\next_in,w_\start_in,[\stream,_avail_out] .endm - .global isal_deflate_finish_aarch64 + .global cdecl(isal_deflate_finish_aarch64) .arch armv8-a+crc +#ifndef __APPLE__ .type isal_deflate_finish_aarch64, %function +#endif /* void isal_deflate_finish_aarch64(struct isal_zstream *stream) */ @@ -117,7 +122,7 @@ skip_has_hist: declare_generic_reg code_len2, 4,x -isal_deflate_finish_aarch64: +cdecl(isal_deflate_finish_aarch64): //save registers push_stack @@ -260,5 +265,6 @@ update_state_exit: update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 pop_stack ret - +#ifndef __APPLE__ .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 +#endif diff --git a/igzip/aarch64/igzip_deflate_hash_aarch64.S b/igzip/aarch64/igzip_deflate_hash_aarch64.S index 40251da..263a455 100644 --- a/igzip/aarch64/igzip_deflate_hash_aarch64.S +++ b/igzip/aarch64/igzip_deflate_hash_aarch64.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -46,8 +49,10 @@ declare Macros - .global isal_deflate_hash_aarch64 + .global cdecl(isal_deflate_hash_aarch64) +#ifndef __APPLE__ .type isal_deflate_hash_aarch64, %function +#endif /* void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask, uint32_t current_index, uint8_t * dict, uint32_t dict_len) @@ -58,14 +63,14 @@ declare Macros declare_generic_reg dict, 3,x declare_generic_reg dict_len, 4,w - declare_generic_reg next_in 3,x - declare_generic_reg end_in 6,x - declare_generic_reg ind 5,w - declare_generic_reg hash 2,w - declare_generic_reg literal 2,w + declare_generic_reg next_in, 3,x + declare_generic_reg end_in, 6,x + declare_generic_reg ind, 5,w + declare_generic_reg hash, 2,w + declare_generic_reg literal, 2,w #define SHORTEST_MATCH #4 -isal_deflate_hash_aarch64: +cdecl(isal_deflate_hash_aarch64): sub ind, current_index, dict_len and ind,ind,0xffff @@ -92,4 +97,6 @@ loop_start: exit_func: ret +#ifndef __APPLE__ .size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64 +#endif diff --git a/igzip/aarch64/igzip_isal_adler32_neon.S b/igzip/aarch64/igzip_isal_adler32_neon.S index caa0f33..9d67243 100644 --- a/igzip/aarch64/igzip_isal_adler32_neon.S +++ b/igzip/aarch64/igzip_isal_adler32_neon.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crypto .text .align 3 @@ -56,9 +59,11 @@ Arguements list adler32 .req w0 start .req x1 length .req x2 - .global adler32_neon + .global cdecl(adler32_neon) +#ifndef __APPLE__ .type adler32_neon, %function -adler32_neon: +#endif +cdecl(adler32_neon): /* local variables */ @@ -92,8 +97,13 @@ local variables lsr adler1, adler32, 16 lsr loop_cnt,length,5 +#ifndef __APPLE__ adrp x3,factors add x3,x3,:lo12:factors +#else + adrp x3,factors@PAGE + add x3,x3,factors@PAGEOFF +#endif ld1 {factor0_v.16b-factor1_v.16b},[x3] add end,start,length @@ -162,12 +172,15 @@ end_func: orr w0,adler0,adler1,lsl 16 ret +#ifndef __APPLE__ .size adler32_neon, .-adler32_neon .section .rodata.cst16,"aM",@progbits,16 +#else + .section __TEXT,__const +#endif .align 4 factors: .quad 0x191a1b1c1d1e1f20 .quad 0x1112131415161718 .quad 0x090a0b0c0d0e0f10 .quad 0x0102030405060708 - diff --git a/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c index 183010c..a09508f 100644 --- a/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c +++ b/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c @@ -30,86 +30,121 @@ DEFINE_INTERFACE_DISPATCHER(isal_adler32) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(adler32_neon); - +#elif defined(__APPLE__) + return PROVIDER_INFO(adler32_neon); +#endif return PROVIDER_BASIC(adler32); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_body) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_body_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_body_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_body); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_finish_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_finish_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_finish); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); +#endif return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); +#endif return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map); } @@ -125,64 +160,92 @@ DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf) DEFINE_INTERFACE_DISPATCHER(isal_update_histogram) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_update_histogram_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_update_histogram_aarch64); +#endif return PROVIDER_BASIC(isal_update_histogram); } DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) { return PROVIDER_INFO(gen_icf_map_h1_aarch64); } - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(gen_icf_map_h1_aarch64); +#endif return PROVIDER_BASIC(gen_icf_map_h1); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_hash_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_hash_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_hash); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_hash_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_hash_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_hash); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_hash_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_hash_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_hash); } DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(isal_deflate_hash_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(isal_deflate_hash_aarch64); +#endif return PROVIDER_BASIC(isal_deflate_hash); } DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_CRC32) return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64); - +#elif defined(__APPLE__) + if (sysctlEnabled(SYSCTL_CRC32_KEY)) + return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64); +#endif return PROVIDER_BASIC(decode_huffman_code_block_stateless); } diff --git a/igzip/aarch64/igzip_set_long_icf_fg.S b/igzip/aarch64/igzip_set_long_icf_fg.S index 13f9b08..e1439b4 100644 --- a/igzip/aarch64/igzip_set_long_icf_fg.S +++ b/igzip/aarch64/igzip_set_long_icf_fg.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a .text .align 2 @@ -48,8 +51,10 @@ declare Macros .text .align 2 - .global set_long_icf_fg_aarch64 + .global cdecl(set_long_icf_fg_aarch64) +#ifndef __APPLE__ .type set_long_icf_fg_aarch64, %function +#endif /* void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size, @@ -69,7 +74,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp /* local variable */ declare_generic_reg len, 7,w declare_generic_reg dist_code, 8,w - declare_generic_reg shortest_match_len 9,w + declare_generic_reg shortest_match_len, 9,w declare_generic_reg len_max, 10,w declare_generic_reg dist_extra, 11,w declare_generic_reg const_8, 13,x @@ -90,7 +95,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp .equ SHORTEST_MATCH, 4 .equ LEN_MAX_CONST, 512 -set_long_icf_fg_aarch64: +cdecl(set_long_icf_fg_aarch64): stp x29, x30, [sp, -192]! add x29, sp, 0 stp x21, x22, [sp, 32] @@ -103,11 +108,18 @@ set_long_icf_fg_aarch64: add end_in, next_in_param, input_size_param mov match_lookup, match_lookup_param +#ifndef __APPLE__ adrp x1, .data_dist_start mov x2, DIST_START_SIZE // 128 add x1, x1, :lo12:.data_dist_start mov x0, dist_start - bl memcpy +#else + adrp x1, .data_dist_start@PAGE + mov x2, DIST_START_SIZE // 128 + add x1, x1, .data_dist_start@PAGEOFF + mov x0, dist_start +#endif + bl cdecl(memcpy) add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288 cmp end_in, x_tmp0 @@ -182,9 +194,11 @@ set_long_icf_fg_aarch64: ldr x23, [sp, 48] ldp x29, x30, [sp], 192 ret +#ifndef __APPLE__ .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64 +#endif - .section .rodata +ASM_DEF_RODATA .align 3 .set .data_dist_start,. + 0 .real_data_dist_start: diff --git a/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/igzip/aarch64/isal_deflate_icf_body_hash_hist.S index 3daaa1b..178d91c 100644 --- a/igzip/aarch64/isal_deflate_icf_body_hash_hist.S +++ b/igzip/aarch64/isal_deflate_icf_body_hash_hist.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -45,8 +48,10 @@ declare Macros x_\name .req x\reg .endm - .global isal_deflate_icf_body_hash_hist_aarch64 + .global cdecl(isal_deflate_icf_body_hash_hist_aarch64) +#ifndef __APPLE__ .type isal_deflate_icf_body_hash_hist_aarch64, %function +#endif /* void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); */ @@ -126,7 +131,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); declare_generic_reg tmp0, 4,x declare_generic_reg tmp1, 5,x -isal_deflate_icf_body_hash_hist_aarch64: +cdecl(isal_deflate_icf_body_hash_hist_aarch64): stp x29, x30, [sp, -80]! add x29, sp, 0 str x24, [sp, 56] @@ -360,5 +365,6 @@ isal_deflate_icf_body_hash_hist_aarch64: ldr x24, [sp, 56] ldp x29, x30, [sp], 80 ret - +#ifndef __APPLE__ .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64 +#endif diff --git a/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S index bb2baa2..e1a6d21 100644 --- a/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S +++ b/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text @@ -129,7 +132,9 @@ void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); declare_generic_reg tmp3, 28,x .align 2 +#ifndef __APPLE__ .type write_deflate_icf_constprop, %function +#endif write_deflate_icf_constprop: ldrh w2, [x0] mov w3, 30 @@ -141,10 +146,14 @@ write_deflate_icf_constprop: ubfx x1, x1, 16, 3 strh w1, [x0, 2] ret +#ifndef __APPLE__ .size write_deflate_icf_constprop, .-write_deflate_icf_constprop +#endif .align 2 +#ifndef __APPLE__ .type write_deflate_icf, %function +#endif write_deflate_icf: ldrh w4, [x0] bfi w4, w1, 0, 10 @@ -156,10 +165,14 @@ write_deflate_icf: bfi w1, w3, 3, 13 strh w1, [x0, 2] ret +#ifndef __APPLE__ .size write_deflate_icf, .-write_deflate_icf +#endif .align 2 +#ifndef __APPLE__ .type update_state, %function +#endif update_state: sub x7, x2, x1 ldr x4, [x0, 48] @@ -179,12 +192,16 @@ update_state: str x5, [x4, 4688] str x6, [x4, 4696] ret +#ifndef __APPLE__ .size update_state, .-update_state +#endif .align 2 - .global isal_deflate_icf_finish_hash_hist_aarch64 + .global cdecl(isal_deflate_icf_finish_hash_hist_aarch64) +#ifndef __APPLE__ .type isal_deflate_icf_finish_hash_hist_aarch64, %function -isal_deflate_icf_finish_hash_hist_aarch64: +#endif +cdecl(isal_deflate_icf_finish_hash_hist_aarch64): ldr w_end_in, [stream, 8] // stream->avail_in cbz w_end_in, .stream_not_available @@ -393,5 +410,6 @@ isal_deflate_icf_finish_hash_hist_aarch64: str w1, [stream, offset_state_state] // 84 .done: ret - +#ifndef __APPLE__ .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 +#endif diff --git a/igzip/aarch64/isal_update_histogram.S b/igzip/aarch64/isal_update_histogram.S index abcec0f..a75e1da 100644 --- a/igzip/aarch64/isal_update_histogram.S +++ b/igzip/aarch64/isal_update_histogram.S @@ -26,6 +26,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************/ + +#include "../include/aarch64_label.h" + .arch armv8-a+crc .text .align 2 @@ -63,17 +66,24 @@ declare Macros .endm .macro convert_length_to_len_sym length:req,length_out:req,tmp0:req +#ifndef __APPLE__ adrp x_\tmp0, .len_to_code_tab_lanchor add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor +#else + adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE + add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF +#endif ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] add w_\length_out, w_\length_out, 256 .endm - .section .rodata +ASM_DEF_RODATA .align 4 .len_to_code_tab_lanchor = . + 0 +#ifndef __APPLE__ .type len_to_code_tab, %object .size len_to_code_tab, 1056 +#endif len_to_code_tab: .word 0x00, 0x00, 0x00 .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 @@ -111,9 +121,11 @@ len_to_code_tab: .word 0x00, 0x00, 0x00, 0x00, 0x00 .text - .global isal_update_histogram_aarch64 + .global cdecl(isal_update_histogram_aarch64) .arch armv8-a+crc +#ifndef __APPLE__ .type isal_update_histogram_aarch64, %function +#endif /* void isal_update_histogram_aarch64(uint8_t * start_stream, int length, @@ -157,7 +169,7 @@ void isal_update_histogram_aarch64(uint8_t * start_stream, int length, .equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 .equ hash_table_size, (8*1024*2) // 16384 -isal_update_histogram_aarch64: +cdecl(isal_update_histogram_aarch64): cmp w_length, 0 ble .done @@ -176,7 +188,7 @@ isal_update_histogram_aarch64: mov x0, last_seen mov w1, 0 mov x2, hash_table_size - bl memset + bl cdecl(memset) cmp current, loop_end_iter bcs .loop_end @@ -308,4 +320,6 @@ isal_update_histogram_aarch64: .align 2 .done: ret +#ifndef __APPLE__ .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 +#endif diff --git a/include/aarch64_label.h b/include/aarch64_label.h new file mode 100644 index 0000000..a4e6d06 --- /dev/null +++ b/include/aarch64_label.h @@ -0,0 +1,18 @@ +#ifndef __AARCH64_LABEL_H__ +#define __AARCH64_LABEL_H__ + +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b +#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define cdecl(x) x +#endif + +#ifdef __APPLE__ +#define ASM_DEF_RODATA .section __TEXT,__const +#else +#define ASM_DEF_RODATA .section .rodata +#endif + +#endif diff --git a/include/aarch64_multibinary.h b/include/aarch64_multibinary.h index e31451b..aad4b43 100644 --- a/include/aarch64_multibinary.h +++ b/include/aarch64_multibinary.h @@ -31,7 +31,14 @@ #ifndef __aarch64__ #error "This file is for aarch64 only" #endif +#ifdef __APPLE__ +#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available +#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32" +#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update +#else #include +#endif +#include "aarch64_label.h" #ifdef __ASSEMBLY__ /** * # mbin_interface : the wrapper layer for isal-l api @@ -48,17 +55,18 @@ * 4. The dispather should return the right function pointer , revision and a string information . **/ .macro mbin_interface name:req - .extern \name\()_dispatcher - .section .data + .extern cdecl(\name\()_dispatcher) + .data .balign 8 - .global \name\()_dispatcher_info + .global cdecl(\name\()_dispatcher_info) +#ifndef __APPLE__ .type \name\()_dispatcher_info,%object - - \name\()_dispatcher_info: +#endif + cdecl(\name\()_dispatcher_info): .quad \name\()_mbinit //func_entry - +#ifndef __APPLE__ .size \name\()_dispatcher_info,. - \name\()_dispatcher_info - +#endif .balign 8 .text \name\()_mbinit: @@ -108,7 +116,7 @@ */ - bl \name\()_dispatcher + bl cdecl(\name\()_dispatcher) //restore temp/indirect result registers ldp x8, x9, [sp, 16] .cfi_restore 8 @@ -150,16 +158,24 @@ .cfi_def_cfa_offset 0 .cfi_endproc - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name,%function +#endif .align 2 - \name\(): + cdecl(\name\()): +#ifndef __APPLE__ adrp x9, :got:\name\()_dispatcher_info ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] +#else + adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE + ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF] +#endif ldr x10,[x9] br x10 +#ifndef __APPLE__ .size \name,. - \name - +#endif .endm /** @@ -168,32 +184,53 @@ */ .macro mbin_interface_base name:req, base:req .extern \base - .section .data + .data .balign 8 - .global \name\()_dispatcher_info + .global cdecl(\name\()_dispatcher_info) +#ifndef __APPLE__ .type \name\()_dispatcher_info,%object - - \name\()_dispatcher_info: +#endif + cdecl(\name\()_dispatcher_info): .quad \base //func_entry +#ifndef __APPLE__ .size \name\()_dispatcher_info,. - \name\()_dispatcher_info - +#endif .balign 8 .text - .global \name + .global cdecl(\name) +#ifndef __APPLE__ .type \name,%function +#endif .align 2 - \name\(): - adrp x9, :got:\name\()_dispatcher_info - ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] + cdecl(\name\()): +#ifndef __APPLE__ + adrp x9, :got:cdecl(_\name\()_dispatcher_info) + ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)] +#else + adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE + ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF] +#endif ldr x10,[x9] br x10 +#ifndef __APPLE__ .size \name,. - \name - +#endif .endm #else /* __ASSEMBLY__ */ +#include +#if defined(__linux__) #include - +#elif defined(__APPLE__) +#include +#include +static inline int sysctlEnabled(const char* name){ + int enabled; + size_t size = sizeof(enabled); + int status = sysctlbyname(name, &enabled, &size, NULL, 0); + return status ? 0 : enabled; +} +#endif #define DEFINE_INTERFACE_DISPATCHER(name) \ @@ -298,10 +335,12 @@ static inline uint32_t get_micro_arch_id(void) { uint32_t id=CPU_IMPLEMENTER_RESERVE; +#ifndef __APPLE__ if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) { /** Here will trap into kernel space */ asm("mrs %0, MIDR_EL1 " : "=r" (id)); } +#endif return id&0xff00fff0; } diff --git a/mem/aarch64/mem_aarch64_dispatcher.c b/mem/aarch64/mem_aarch64_dispatcher.c index 0dfe3a3..a89c97f 100644 --- a/mem/aarch64/mem_aarch64_dispatcher.c +++ b/mem/aarch64/mem_aarch64_dispatcher.c @@ -30,10 +30,12 @@ DEFINE_INTERFACE_DISPATCHER(isal_zero_detect) { +#if defined(__linux__) unsigned long auxval = getauxval(AT_HWCAP); if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(mem_zero_detect_neon); - +#elif defined(__APPLE__) + return PROVIDER_INFO(mem_zero_detect_neon); +#endif return PROVIDER_BASIC(mem_zero_detect); - } diff --git a/mem/aarch64/mem_zero_detect_neon.S b/mem/aarch64/mem_zero_detect_neon.S index 6f93ff6..d36bdb2 100644 --- a/mem/aarch64/mem_zero_detect_neon.S +++ b/mem/aarch64/mem_zero_detect_neon.S @@ -27,6 +27,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .text .arch armv8-a @@ -36,10 +38,12 @@ // input: n -> x1 // output: -> x0 (true or false) -.global mem_zero_detect_neon +.global cdecl(mem_zero_detect_neon) +#ifndef __APPLE__ .type mem_zero_detect_neon, %function +#endif -mem_zero_detect_neon: +cdecl(mem_zero_detect_neon): cmp x1, #(16*24-1) b.ls .loop_16x24_end diff --git a/raid/aarch64/pq_check_neon.S b/raid/aarch64/pq_check_neon.S index 55ad798..e43e91a 100644 --- a/raid/aarch64/pq_check_neon.S +++ b/raid/aarch64/pq_check_neon.S @@ -27,10 +27,13 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" .text -.global pq_check_neon +.global cdecl(pq_check_neon) +#ifndef __APPLE__ .type pq_check_neon, %function +#endif /* int pq_check_neon(int vects, int len, void **src) */ @@ -85,7 +88,7 @@ v_0x80 .req v29 * +----------+ +------------------+ */ -pq_check_neon: +cdecl(pq_check_neon): sub x_src_ptr_end, x_src, #8 sub w_vects, w_vects, #3 diff --git a/raid/aarch64/pq_gen_neon.S b/raid/aarch64/pq_gen_neon.S index f60ad12..08b68a8 100644 --- a/raid/aarch64/pq_gen_neon.S +++ b/raid/aarch64/pq_gen_neon.S @@ -27,10 +27,14 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .text -.global pq_gen_neon +.global cdecl(pq_gen_neon) +#ifndef __APPLE__ .type pq_gen_neon, %function +#endif /* int pq_gen_neon(int vects, int len, void **src) */ @@ -84,7 +88,7 @@ v_0x80 .req v29 * +----------+ +------------------+ */ -pq_gen_neon: +cdecl(pq_gen_neon): sub x_src_ptr_end, x_src, #8 sub w_vects, w_vects, #3 diff --git a/raid/aarch64/raid_aarch64_dispatcher.c b/raid/aarch64/raid_aarch64_dispatcher.c index c81bd8c..5a6148e 100644 --- a/raid/aarch64/raid_aarch64_dispatcher.c +++ b/raid/aarch64/raid_aarch64_dispatcher.c @@ -30,32 +30,48 @@ DEFINE_INTERFACE_DISPATCHER(xor_gen) { +#if defined(__linux__) if (getauxval(AT_HWCAP) & HWCAP_ASIMD) return PROVIDER_INFO(xor_gen_neon); +#elif defined(__APPLE__) + return PROVIDER_INFO(xor_gen_neon); +#endif return PROVIDER_BASIC(xor_gen); } DEFINE_INTERFACE_DISPATCHER(xor_check) { +#if defined(__linux__) if (getauxval(AT_HWCAP) & HWCAP_ASIMD) return PROVIDER_INFO(xor_check_neon); +#elif defined(__APPLE__) + return PROVIDER_INFO(xor_check_neon); +#endif return PROVIDER_BASIC(xor_check); } DEFINE_INTERFACE_DISPATCHER(pq_gen) { +#if defined(__linux__) if (getauxval(AT_HWCAP) & HWCAP_ASIMD) return PROVIDER_INFO(pq_gen_neon); +#elif defined(__APPLE__) + return PROVIDER_INFO(pq_gen_neon); +#endif return PROVIDER_BASIC(pq_gen); } DEFINE_INTERFACE_DISPATCHER(pq_check) { +#if defined(__linux__) if (getauxval(AT_HWCAP) & HWCAP_ASIMD) return PROVIDER_INFO(pq_check_neon); +#elif defined(__APPLE__) + return PROVIDER_INFO(pq_check_neon); +#endif return PROVIDER_BASIC(pq_check); } diff --git a/raid/aarch64/xor_check_neon.S b/raid/aarch64/xor_check_neon.S index 95cb7d1..71e87df 100644 --- a/raid/aarch64/xor_check_neon.S +++ b/raid/aarch64/xor_check_neon.S @@ -27,10 +27,14 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .text -.global xor_check_neon +.global cdecl(xor_check_neon) +#ifndef __APPLE__ .type xor_check_neon, %function +#endif /* int xor_check_neon(int vects, int len, void **src) */ @@ -76,7 +80,7 @@ w_xor .req w11 * src_ptr_end --> */ -xor_check_neon: +cdecl(xor_check_neon): add x_src_ptr_end, x_src, x_vects, lsl #3 ldr x_src0, [x_src] add x_src0_end, x_src0, x_len diff --git a/raid/aarch64/xor_gen_neon.S b/raid/aarch64/xor_gen_neon.S index 00f65a2..99d06ab 100644 --- a/raid/aarch64/xor_gen_neon.S +++ b/raid/aarch64/xor_gen_neon.S @@ -27,10 +27,14 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### +#include "../include/aarch64_label.h" + .text -.global xor_gen_neon +.global cdecl(xor_gen_neon) +#ifndef __APPLE__ .type xor_gen_neon, %function +#endif /* int xor_gen_neon(int vects, int len, void **src) */ @@ -78,7 +82,7 @@ x_dst_ptr .req x11 * +----------+ +------------------+ */ -xor_gen_neon: +cdecl(xor_gen_neon): add x_dst_ptr, x_src, x_vects, lsl #3 ldr x_dst, [x_dst_ptr, #-8]! ldr x_src0, [x_src]