Use +sme for Apple

Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
Taiju Yamada 2024-11-08 09:36:43 +09:00
parent 496255cda6
commit b504f2e9a4
17 changed files with 81 additions and 21 deletions

View File

@ -38,7 +38,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon); return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve); return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon); return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif #endif
@ -55,7 +55,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon); return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_mad_sve); return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon); return PROVIDER_INFO(gf_vect_mad_neon);
#endif #endif
@ -72,7 +72,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon); return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(ec_encode_data_sve); return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon); return PROVIDER_INFO(ec_encode_data_neon);
#endif #endif
@ -89,7 +89,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon); return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve); return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon); return PROVIDER_INFO(ec_encode_data_update_neon);
#endif #endif
@ -106,7 +106,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon); return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__) #elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY)) if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_mul_sve); return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon); return PROVIDER_INFO(gf_vect_mul_neon);
#endif #endif

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -98,7 +102,7 @@ cdecl(gf_2vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
/* prefetch dest data */ /* prefetch dest data */
prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest1, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -107,7 +111,7 @@ cdecl(gf_3vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
/* dest data prefetch */ /* dest data prefetch */
prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest1, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -115,7 +119,7 @@ cdecl(gf_4vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -128,7 +132,7 @@ cdecl(gf_5vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -137,7 +141,7 @@ cdecl(gf_6vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
prfb pldl2strm, p0, [x_dest1, x_pos] prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos] prfb pldl2strm, p0, [x_dest2, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -148,7 +152,7 @@ cdecl(gf_7vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -160,7 +164,7 @@ cdecl(gf_8vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */ ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -82,7 +86,7 @@ cdecl(gf_vect_dot_prod_sve):
/* Loop 1: x_len, vector length */ /* Loop 1: x_len, vector length */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
mov z_dest.b, #0 /* clear z_dest */ mov z_dest.b, #0 /* clear z_dest */
mov x_vec_i, #0 /* clear x_vec_i */ mov x_vec_i, #0 /* clear x_vec_i */

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -87,7 +91,7 @@ cdecl(gf_vect_mad_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
/* prefetch dest data */ /* prefetch dest data */
prfb pldl2strm, p0, [x_dest, x_pos] prfb pldl2strm, p0, [x_dest, x_pos]

View File

@ -28,7 +28,11 @@
**********************************************************************/ **********************************************************************/
.text .text
.align 6 .align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve .arch armv8-a+sve
#endif
#include "../include/aarch64_label.h" #include "../include/aarch64_label.h"
@ -92,7 +96,7 @@ cdecl(gf_vect_mul_sve):
/* vector length agnostic */ /* vector length agnostic */
.Lloopsve_vl: .Lloopsve_vl:
whilelo p0.b, x_pos, x_len whilelo p0.b, x_pos, x_len
b.none .return_pass b.eq .return_pass
/* load src data, governed by p0 */ /* load src data, governed by p0 */
ld1b z_src.b, p0/z, [x_src, x_pos] ld1b z_src.b, p0/z, [x_src, x_pos]

View File

@ -218,7 +218,7 @@
#elif defined(__APPLE__) #elif defined(__APPLE__)
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available #define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32" #define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update #define SYSCTL_SME_KEY "hw.optional.arm.FEAT_SME"
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <stddef.h> #include <stddef.h>
static inline int sysctlEnabled(const char* name){ static inline int sysctlEnabled(const char* name){