diff --git a/erasure_code/aarch64/ec_aarch64_dispatcher.c b/erasure_code/aarch64/ec_aarch64_dispatcher.c index 7f90dcb..3dc0864 100644 --- a/erasure_code/aarch64/ec_aarch64_dispatcher.c +++ b/erasure_code/aarch64/ec_aarch64_dispatcher.c @@ -38,7 +38,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_dot_prod_neon); #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return PROVIDER_INFO(gf_vect_dot_prod_sve); return PROVIDER_INFO(gf_vect_dot_prod_neon); #endif @@ -55,7 +55,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_mad_neon); #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return PROVIDER_INFO(gf_vect_mad_sve); return PROVIDER_INFO(gf_vect_mad_neon); #endif @@ -72,7 +72,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data) if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(ec_encode_data_neon); #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return PROVIDER_INFO(ec_encode_data_sve); return PROVIDER_INFO(ec_encode_data_neon); #endif @@ -89,7 +89,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(ec_encode_data_update_neon); #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return PROVIDER_INFO(ec_encode_data_update_sve); return PROVIDER_INFO(ec_encode_data_update_neon); #endif @@ -106,7 +106,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) if (auxval & HWCAP_ASIMD) return PROVIDER_INFO(gf_vect_mul_neon); #elif defined(__APPLE__) - if (sysctlEnabled(SYSCTL_SVE_KEY)) + if (sysctlEnabled(SYSCTL_SME_KEY)) return PROVIDER_INFO(gf_vect_mul_sve); return PROVIDER_INFO(gf_vect_mul_neon); #endif diff --git a/erasure_code/aarch64/gf_2vect_dot_prod_sve.S b/erasure_code/aarch64/gf_2vect_dot_prod_sve.S index 99b5f15..706b4d8 100644 --- a/erasure_code/aarch64/gf_2vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_2vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -98,7 +102,7 @@ cdecl(gf_2vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_2vect_mad_sve.S b/erasure_code/aarch64/gf_2vect_mad_sve.S index f0ddf01..0152f48 100644 --- a/erasure_code/aarch64/gf_2vect_mad_sve.S +++ b/erasure_code/aarch64/gf_2vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* prefetch dest data */ prfb pldl2strm, p0, [x_dest1, x_pos] diff --git a/erasure_code/aarch64/gf_3vect_dot_prod_sve.S b/erasure_code/aarch64/gf_3vect_dot_prod_sve.S index 8f6414e..e38cf72 100644 --- a/erasure_code/aarch64/gf_3vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_3vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -107,7 +111,7 @@ cdecl(gf_3vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_3vect_mad_sve.S b/erasure_code/aarch64/gf_3vect_mad_sve.S index 9e0ca5c..4664ce0 100644 --- a/erasure_code/aarch64/gf_3vect_mad_sve.S +++ b/erasure_code/aarch64/gf_3vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* dest data prefetch */ prfb pldl2strm, p0, [x_dest1, x_pos] diff --git a/erasure_code/aarch64/gf_4vect_dot_prod_sve.S b/erasure_code/aarch64/gf_4vect_dot_prod_sve.S index eb35427..4902985 100644 --- a/erasure_code/aarch64/gf_4vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_4vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -115,7 +119,7 @@ cdecl(gf_4vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_4vect_mad_sve.S b/erasure_code/aarch64/gf_4vect_mad_sve.S index 89ec89f..40a2bd9 100644 --- a/erasure_code/aarch64/gf_4vect_mad_sve.S +++ b/erasure_code/aarch64/gf_4vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_5vect_dot_prod_sve.S b/erasure_code/aarch64/gf_5vect_dot_prod_sve.S index bb7cd01..22f5fb1 100644 --- a/erasure_code/aarch64/gf_5vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_5vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -128,7 +132,7 @@ cdecl(gf_5vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_5vect_mad_sve.S b/erasure_code/aarch64/gf_5vect_mad_sve.S index ab374d3..136ae07 100644 --- a/erasure_code/aarch64/gf_5vect_mad_sve.S +++ b/erasure_code/aarch64/gf_5vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_6vect_dot_prod_sve.S b/erasure_code/aarch64/gf_6vect_dot_prod_sve.S index acc9895..2618928 100644 --- a/erasure_code/aarch64/gf_6vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_6vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -137,7 +141,7 @@ cdecl(gf_6vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_6vect_mad_sve.S b/erasure_code/aarch64/gf_6vect_mad_sve.S index c4f372c..3498365 100644 --- a/erasure_code/aarch64/gf_6vect_mad_sve.S +++ b/erasure_code/aarch64/gf_6vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos] diff --git a/erasure_code/aarch64/gf_7vect_dot_prod_sve.S b/erasure_code/aarch64/gf_7vect_dot_prod_sve.S index 0f74873..20cb593 100644 --- a/erasure_code/aarch64/gf_7vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_7vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -148,7 +152,7 @@ cdecl(gf_7vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_8vect_dot_prod_sve.S b/erasure_code/aarch64/gf_8vect_dot_prod_sve.S index 20768f4..417b646 100644 --- a/erasure_code/aarch64/gf_8vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_8vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -160,7 +164,7 @@ cdecl(gf_8vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov x_vec_i, #0 /* clear x_vec_i */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ diff --git a/erasure_code/aarch64/gf_vect_dot_prod_sve.S b/erasure_code/aarch64/gf_vect_dot_prod_sve.S index 48ce151..865e308 100644 --- a/erasure_code/aarch64/gf_vect_dot_prod_sve.S +++ b/erasure_code/aarch64/gf_vect_dot_prod_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -82,7 +86,7 @@ cdecl(gf_vect_dot_prod_sve): /* Loop 1: x_len, vector length */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass mov z_dest.b, #0 /* clear z_dest */ mov x_vec_i, #0 /* clear x_vec_i */ diff --git a/erasure_code/aarch64/gf_vect_mad_sve.S b/erasure_code/aarch64/gf_vect_mad_sve.S index 41d6da9..e254f8c 100644 --- a/erasure_code/aarch64/gf_vect_mad_sve.S +++ b/erasure_code/aarch64/gf_vect_mad_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -87,7 +91,7 @@ cdecl(gf_vect_mad_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* prefetch dest data */ prfb pldl2strm, p0, [x_dest, x_pos] diff --git a/erasure_code/aarch64/gf_vect_mul_sve.S b/erasure_code/aarch64/gf_vect_mul_sve.S index d2219bf..486eb13 100644 --- a/erasure_code/aarch64/gf_vect_mul_sve.S +++ b/erasure_code/aarch64/gf_vect_mul_sve.S @@ -28,7 +28,11 @@ **********************************************************************/ .text .align 6 +#ifdef __APPLE__ +.arch armv8-a+sme +#else .arch armv8-a+sve +#endif #include "../include/aarch64_label.h" @@ -92,7 +96,7 @@ cdecl(gf_vect_mul_sve): /* vector length agnostic */ .Lloopsve_vl: whilelo p0.b, x_pos, x_len - b.none .return_pass + b.eq .return_pass /* load src data, governed by p0 */ ld1b z_src.b, p0/z, [x_src, x_pos] diff --git a/include/aarch64_multibinary.h b/include/aarch64_multibinary.h index cd5d999..d252d51 100644 --- a/include/aarch64_multibinary.h +++ b/include/aarch64_multibinary.h @@ -218,7 +218,7 @@ #elif defined(__APPLE__) #define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available #define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32" -#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update +#define SYSCTL_SME_KEY "hw.optional.arm.FEAT_SME" #include #include static inline int sysctlEnabled(const char* name){