mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
Use +sme for Apple
Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
parent
496255cda6
commit
b504f2e9a4
@ -38,7 +38,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
if (sysctlEnabled(SYSCTL_SME_KEY))
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#endif
|
||||
@ -55,7 +55,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
if (sysctlEnabled(SYSCTL_SME_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#endif
|
||||
@ -72,7 +72,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
if (sysctlEnabled(SYSCTL_SME_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#endif
|
||||
@ -89,7 +89,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
if (sysctlEnabled(SYSCTL_SME_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#endif
|
||||
@ -106,7 +106,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
if (sysctlEnabled(SYSCTL_SME_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#endif
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -98,7 +102,7 @@ cdecl(gf_2vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
/* prefetch dest data */
|
||||
prfb pldl2strm, p0, [x_dest1, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -107,7 +111,7 @@ cdecl(gf_3vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
/* dest data prefetch */
|
||||
prfb pldl2strm, p0, [x_dest1, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -115,7 +119,7 @@ cdecl(gf_4vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
prfb pldl2strm, p0, [x_dest1, x_pos]
|
||||
prfb pldl2strm, p0, [x_dest2, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -128,7 +132,7 @@ cdecl(gf_5vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
prfb pldl2strm, p0, [x_dest1, x_pos]
|
||||
prfb pldl2strm, p0, [x_dest2, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -137,7 +141,7 @@ cdecl(gf_6vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
prfb pldl2strm, p0, [x_dest1, x_pos]
|
||||
prfb pldl2strm, p0, [x_dest2, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -148,7 +152,7 @@ cdecl(gf_7vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -160,7 +164,7 @@ cdecl(gf_8vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -82,7 +86,7 @@ cdecl(gf_vect_dot_prod_sve):
|
||||
/* Loop 1: x_len, vector length */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
mov z_dest.b, #0 /* clear z_dest */
|
||||
mov x_vec_i, #0 /* clear x_vec_i */
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -87,7 +91,7 @@ cdecl(gf_vect_mad_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
/* prefetch dest data */
|
||||
prfb pldl2strm, p0, [x_dest, x_pos]
|
||||
|
@ -28,7 +28,11 @@
|
||||
**********************************************************************/
|
||||
.text
|
||||
.align 6
|
||||
#ifdef __APPLE__
|
||||
.arch armv8-a+sme
|
||||
#else
|
||||
.arch armv8-a+sve
|
||||
#endif
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
@ -92,7 +96,7 @@ cdecl(gf_vect_mul_sve):
|
||||
/* vector length agnostic */
|
||||
.Lloopsve_vl:
|
||||
whilelo p0.b, x_pos, x_len
|
||||
b.none .return_pass
|
||||
b.eq .return_pass
|
||||
|
||||
/* load src data, governed by p0 */
|
||||
ld1b z_src.b, p0/z, [x_src, x_pos]
|
||||
|
@ -218,7 +218,7 @@
|
||||
#elif defined(__APPLE__)
|
||||
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
|
||||
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
|
||||
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
|
||||
#define SYSCTL_SME_KEY "hw.optional.arm.FEAT_SME"
|
||||
#include <sys/sysctl.h>
|
||||
#include <stddef.h>
|
||||
static inline int sysctlEnabled(const char* name){
|
||||
|
Loading…
Reference in New Issue
Block a user