Fixes for aarch64 mac

- It should be fine to enable pmull always on Apple Silicon
- macOS 12+ is required for PMULL instruction.
- Changed the conditional macro to __APPLE__
- Rewritten dispatcher using sysctlbyname
- Use __USER_LABEL_PREFIX__
- Use __TEXT,__const as readonly section
- use ASM_DEF_RODATA macro
- fix func decl

Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974
Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
Taiju Yamada 2020-11-22 01:51:37 +09:00 committed by Greg Tucker
parent 85716fe2fe
commit 1187583a97
86 changed files with 917 additions and 227 deletions

View File

@ -27,11 +27,15 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto .arch armv8-a+crc+crypto
.text .text
.align 3 .align 3
.global crc16_t10dif_copy_pmull .global cdecl(crc16_t10dif_copy_pmull)
#ifndef __APPLE__
.type crc16_t10dif_copy_pmull, %function .type crc16_t10dif_copy_pmull, %function
#endif
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ /* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
@ -67,7 +71,7 @@ x_crc16tab .req x5
x_src_saved .req x0 x_src_saved .req x0
x_dst_saved .req x12 x_dst_saved .req x12
crc16_t10dif_copy_pmull: cdecl(crc16_t10dif_copy_pmull):
cmp x_len, 63 cmp x_len, 63
sub sp, sp, #16 sub sp, sp, #16
uxth w_seed, w_seed uxth w_seed, w_seed
@ -80,11 +84,19 @@ crc16_t10dif_copy_pmull:
cmp x_len, x_tmp cmp x_len, x_tmp
bls .end bls .end
#ifndef __APPLE__
sxtw x_counter, w_counter sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0 adrp x_crc16tab, .LANCHOR0
sub x_src, x_src, x_counter sub x_src, x_src, x_counter
sub x_dst, x_dst, x_counter sub x_dst, x_dst, x_counter
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
#else
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0@PAGE
sub x_src, x_src, x_counter
sub x_dst, x_dst, x_counter
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
#endif
.align 2 .align 2
.crc_table_loop: .crc_table_loop:
@ -145,8 +157,13 @@ v_tmp3 .req v16
stp q_x0, q_x1, [x_dst] stp q_x0, q_x1, [x_dst]
stp q_x2, q_x3, [x_dst, 32] stp q_x2, q_x3, [x_dst, 32]
#ifndef __APPLE__
adrp x_tmp, .shuffle_mask_lanchor adrp x_tmp, .shuffle_mask_lanchor
ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor] ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor]
#else
adrp x_tmp, .shuffle_mask_lanchor@PAGE
ldr q_permutation, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
#endif
tbl v_tmp1.16b, {v_x0.16b}, v7.16b tbl v_tmp1.16b, {v_x0.16b}, v7.16b
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
@ -193,7 +210,7 @@ v_tmp1_x3 .req v27
q_fold_const .req q17 q_fold_const .req q17
v_fold_const .req v17 v_fold_const .req v17
ldr q_fold_const, =0x371d00000000000087e70000; ldr q_fold_const, fold_constant
.align 2 .align 2
.crc_fold_loop: .crc_fold_loop:
@ -358,23 +375,32 @@ v_br1 .req v5
umov x0, v_x0.d[0] umov x0, v_x0.d[0]
ubfx x0, x0, 16, 16 ubfx x0, x0, 16, 16
b .crc_table_loop_pre b .crc_table_loop_pre
#ifndef __APPLE__
.size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull .size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
#endif
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
fold_constant:
.word 0x87e70000
.word 0x00000000
.word 0x371d0000
.word 0x00000000
.shuffle_mask_lanchor = . + 0 .shuffle_mask_lanchor = . + 0
#ifndef __APPLE__
.type shuffle_mask, %object .type shuffle_mask, %object
.size shuffle_mask, 16 .size shuffle_mask, 16
#endif
shuffle_mask: shuffle_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8 .byte 15, 14, 13, 12, 11, 10, 9, 8
.byte 7, 6, 5, 4, 3, 2, 1, 0 .byte 7, 6, 5, 4, 3, 2, 1, 0
.align 4 .align 4
.LANCHOR0 = . + 0 .LANCHOR0 = . + 0
#ifndef __APPLE__
.type crc16tab, %object .type crc16tab, %object
.size crc16tab, 512 .size crc16tab, 512
#endif
crc16tab: crc16tab:
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6

View File

@ -27,11 +27,15 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto .arch armv8-a+crc+crypto
.text .text
.align 3 .align 3
.global crc16_t10dif_pmull .global cdecl(crc16_t10dif_pmull)
#ifndef __APPLE__
.type crc16_t10dif_pmull, %function .type crc16_t10dif_pmull, %function
#endif
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ /* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
@ -65,7 +69,7 @@ x_counter .req x3
x_crc16tab .req x4 x_crc16tab .req x4
x_buf_saved .req x0 x_buf_saved .req x0
crc16_t10dif_pmull: cdecl(crc16_t10dif_pmull):
cmp x_len, 63 cmp x_len, 63
sub sp, sp, #16 sub sp, sp, #16
uxth w_seed, w_seed uxth w_seed, w_seed
@ -78,10 +82,17 @@ crc16_t10dif_pmull:
cmp x_len, x_tmp cmp x_len, x_tmp
bls .end bls .end
#ifndef __APPLE__
sxtw x_counter, w_counter sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0 adrp x_crc16tab, .LANCHOR0
sub x_buf, x_buf, x_counter sub x_buf, x_buf, x_counter
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
#else
sxtw x_counter, w_counter
adrp x_crc16tab, .LANCHOR0@PAGE
sub x_buf, x_buf, x_counter
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
#endif
.align 2 .align 2
.crc_table_loop: .crc_table_loop:
@ -137,8 +148,13 @@ v_tmp3 .req v16
ldp q_x0, q_x1, [x_buf] ldp q_x0, q_x1, [x_buf]
ldp q_x2, q_x3, [x_buf, 32] ldp q_x2, q_x3, [x_buf, 32]
#ifndef __APPLE__
adrp x_tmp, .shuffle_mask_lanchor adrp x_tmp, .shuffle_mask_lanchor
ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor] ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
#else
adrp x_tmp, .shuffle_mask_lanchor@PAGE
ldr q7, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
#endif
tbl v_tmp1.16b, {v_x0.16b}, v7.16b tbl v_tmp1.16b, {v_x0.16b}, v7.16b
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
@ -185,7 +201,7 @@ v_tmp1_x3 .req v27
q_fold_const .req q17 q_fold_const .req q17
v_fold_const .req v17 v_fold_const .req v17
ldr q_fold_const, =0x371d00000000000087e70000; ldr q_fold_const, fold_constant
.align 2 .align 2
.crc_fold_loop: .crc_fold_loop:
@ -344,22 +360,32 @@ v_br1 .req v5
ubfx x0, x0, 16, 16 ubfx x0, x0, 16, 16
b .crc_table_loop_pre b .crc_table_loop_pre
#ifndef __APPLE__
.size crc16_t10dif_pmull, .-crc16_t10dif_pmull .size crc16_t10dif_pmull, .-crc16_t10dif_pmull
#endif
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
fold_constant:
.word 0x87e70000
.word 0x00000000
.word 0x371d0000
.word 0x00000000
.shuffle_mask_lanchor = . + 0 .shuffle_mask_lanchor = . + 0
#ifndef __APPLE__
.type shuffle_mask, %object .type shuffle_mask, %object
.size shuffle_mask, 16 .size shuffle_mask, 16
#endif
shuffle_mask: shuffle_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8 .byte 15, 14, 13, 12, 11, 10, 9, 8
.byte 7, 6, 5, 4, 3, 2, 1, 0 .byte 7, 6, 5, 4, 3, 2, 1, 0
.align 4 .align 4
.LANCHOR0 = . + 0 .LANCHOR0 = . + 0
#ifndef __APPLE__
.type crc16tab, %object .type crc16tab, %object
.size crc16tab, 512 .size crc16tab, 512
#endif
crc16tab: crc16tab:
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6

View File

@ -27,8 +27,7 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.macro crc32_hw_common poly_type .macro crc32_hw_common poly_type

View File

@ -27,6 +27,7 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.macro declare_var_vector_reg name:req,reg:req .macro declare_var_vector_reg name:req,reg:req
\name\()_q .req q\reg \name\()_q .req q\reg
@ -429,4 +430,3 @@ start_final:
.endif .endif
ret ret
.endm .endm

View File

@ -88,8 +88,12 @@
); );
*/ */
.global crc32_gzip_refl_3crc_fold .global cdecl(crc32_gzip_refl_3crc_fold)
#ifndef __APPLE__
.type crc32_gzip_refl_3crc_fold, %function .type crc32_gzip_refl_3crc_fold, %function
crc32_gzip_refl_3crc_fold: #endif
cdecl(crc32_gzip_refl_3crc_fold):
crc32_3crc_fold crc32 crc32_3crc_fold crc32
#ifndef __APPLE__
.size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
#endif

View File

@ -59,8 +59,12 @@
* uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF, * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
* uint64_t LEN,uint32_t wCRC); * uint64_t LEN,uint32_t wCRC);
*/ */
.global crc32_gzip_refl_crc_ext .global cdecl(crc32_gzip_refl_crc_ext)
#ifndef __APPLE__
.type crc32_gzip_refl_crc_ext, %function .type crc32_gzip_refl_crc_ext, %function
crc32_gzip_refl_crc_ext: #endif
cdecl(crc32_gzip_refl_crc_ext):
crc32_hw_common crc32 crc32_hw_common crc32
#ifndef __APPLE__
.size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc32_gzip_refl_pmull.h" #include "crc32_gzip_refl_pmull.h"
#include "crc32_refl_common_pmull.h" #include "crc32_refl_common_pmull.h"

View File

@ -47,11 +47,13 @@
.equ br_high_b2, 0x1 .equ br_high_b2, 0x1
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_gzip_refl, %object .type crc32_table_gzip_refl, %object
.size crc32_table_gzip_refl, 1024 .size crc32_table_gzip_refl, 1024
#endif
crc32_table_gzip_refl: crc32_table_gzip_refl:
.word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3 .word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
.word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91 .word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc32_ieee_norm_pmull.h" #include "crc32_ieee_norm_pmull.h"
#include "crc32_norm_common_pmull.h" #include "crc32_norm_common_pmull.h"

View File

@ -47,11 +47,13 @@
.equ br_high_b2, 0x1 .equ br_high_b2, 0x1
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_ieee_norm, %object .type crc32_table_ieee_norm, %object
.size crc32_table_ieee_norm, 1024 .size crc32_table_ieee_norm, 1024
#endif
crc32_table_ieee_norm: crc32_table_ieee_norm:
.word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005 .word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
.word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd .word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd

View File

@ -90,8 +90,12 @@
*/ */
.global crc32_iscsi_3crc_fold .global cdecl(crc32_iscsi_3crc_fold)
#ifndef __APPLE__
.type crc32_iscsi_3crc_fold, %function .type crc32_iscsi_3crc_fold, %function
crc32_iscsi_3crc_fold: #endif
cdecl(crc32_iscsi_3crc_fold):
crc32_3crc_fold crc32c crc32_3crc_fold crc32c
#ifndef __APPLE__
.size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
#endif

View File

@ -58,8 +58,12 @@
* uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF, * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
* uint64_t LEN,uint32_t wCRC); * uint64_t LEN,uint32_t wCRC);
*/ */
.global crc32_iscsi_crc_ext .global cdecl(crc32_iscsi_crc_ext)
#ifndef __APPLE__
.type crc32_iscsi_crc_ext, %function .type crc32_iscsi_crc_ext, %function
crc32_iscsi_crc_ext: #endif
cdecl(crc32_iscsi_crc_ext):
crc32_hw_common crc32c crc32_hw_common crc32c
#ifndef __APPLE__
.size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc32_iscsi_refl_pmull.h" #include "crc32_iscsi_refl_pmull.h"
#include "crc32_refl_common_pmull.h" #include "crc32_refl_common_pmull.h"
@ -35,9 +36,11 @@ crc32_refl_func crc32_iscsi_refl_pmull_internal
.arch armv8-a+crc+crypto .arch armv8-a+crc+crypto
.text .text
.align 3 .align 3
.global crc32_iscsi_refl_pmull .global cdecl(crc32_iscsi_refl_pmull)
#ifndef __APPLE__
.type crc32_iscsi_refl_pmull, %function .type crc32_iscsi_refl_pmull, %function
crc32_iscsi_refl_pmull: #endif
cdecl(crc32_iscsi_refl_pmull):
stp x29, x30, [sp, -32]! stp x29, x30, [sp, -32]!
mov x29, sp mov x29, sp
@ -47,7 +50,7 @@ crc32_iscsi_refl_pmull:
mov w0, w7 mov w0, w7
mvn w0, w0 mvn w0, w0
bl crc32_iscsi_refl_pmull_internal bl cdecl(crc32_iscsi_refl_pmull_internal)
mvn w0, w0 mvn w0, w0
ldp x29, x30, [sp], 32 ldp x29, x30, [sp], 32
ret ret

View File

@ -47,11 +47,14 @@
.equ br_high_b2, 0x0 .equ br_high_b2, 0x0
.text .text
.section .rodata
ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc32_table_iscsi_refl, %object .type crc32_table_iscsi_refl, %object
.size crc32_table_iscsi_refl, 1024 .size crc32_table_iscsi_refl, 1024
#endif
crc32_table_iscsi_refl: crc32_table_iscsi_refl:
.word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB .word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
.word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24 .word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24

View File

@ -51,42 +51,56 @@
#include "crc32_mix_default_common.S" #include "crc32_mix_default_common.S"
.global crc32_mix_default .global cdecl(crc32_mix_default)
#ifndef __APPLE__
.type crc32_mix_default, %function .type crc32_mix_default, %function
crc32_mix_default: #endif
cdecl(crc32_mix_default):
crc32_mix_main_default crc32_mix_main_default
#ifndef __APPLE__
.size crc32_mix_default, .-crc32_mix_default .size crc32_mix_default, .-crc32_mix_default
#endif
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set lanchor_crc32,. + 0 .set lanchor_crc32,. + 0
#ifndef __APPLE__
.type k1k2, %object .type k1k2, %object
.size k1k2, 16 .size k1k2, 16
#endif
k1k2: k1k2:
.xword 0x0154442bd4 .xword 0x0154442bd4
.xword 0x01c6e41596 .xword 0x01c6e41596
#ifndef __APPLE__
.type k3k4, %object .type k3k4, %object
.size k3k4, 16 .size k3k4, 16
#endif
k3k4: k3k4:
.xword 0x01751997d0 .xword 0x01751997d0
.xword 0x00ccaa009e .xword 0x00ccaa009e
#ifndef __APPLE__
.type k5k0, %object .type k5k0, %object
.size k5k0, 16 .size k5k0, 16
#endif
k5k0: k5k0:
.xword 0x0163cd6124 .xword 0x0163cd6124
.xword 0 .xword 0
#ifndef __APPLE__
.type poly, %object .type poly, %object
.size poly, 16 .size poly, 16
#endif
poly: poly:
.xword 0x01db710641 .xword 0x01db710641
.xword 0x01f7011641 .xword 0x01f7011641
#ifndef __APPLE__
.type crc32_const, %object .type crc32_const, %object
.size crc32_const, 48 .size crc32_const, 48
#endif
crc32_const: crc32_const:
.xword 0x1753ab84 .xword 0x1753ab84
.xword 0 .xword 0
@ -98,8 +112,10 @@ crc32_const:
.align 4 .align 4
.set .lanchor_mask,. + 0 .set .lanchor_mask,. + 0
#ifndef __APPLE__
.type mask, %object .type mask, %object
.size mask, 16 .size mask, 16
#endif
mask: mask:
.word -1 .word -1
.word 0 .word 0

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.macro declare_generic_reg name:req, reg:req, default:req .macro declare_generic_reg name:req, reg:req, default:req
\name .req \default\reg \name .req \default\reg
w_\name .req w\reg w_\name .req w\reg
@ -207,8 +209,13 @@
fmov s_a1, w_crc fmov s_a1, w_crc
movi v_neon_tmp.4s, 0 movi v_neon_tmp.4s, 0
#ifndef __APPLE__
adrp x_pconst, lanchor_crc32 adrp x_pconst, lanchor_crc32
add x_buf_tmp, x_buf, 64 add x_buf_tmp, x_buf, 64
#else
adrp x_pconst, lanchor_crc32@PAGE
add x_buf_tmp, x_buf, 64
#endif
ldr x_data_crc0, [x_buf, 512] ldr x_data_crc0, [x_buf, 512]
ldr x_data_crc1, [x_buf, 1024] ldr x_data_crc1, [x_buf, 1024]
@ -231,7 +238,11 @@
ldr x_data_crc2, [x_buf, 1544] ldr x_data_crc2, [x_buf, 1544]
eor v_a1.16b, v_a1.16b, v_neon_tmp.16b eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
#ifndef __APPLE__
ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2 ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
#else
ldr q_a0, [x_pconst, #lanchor_crc32@PAGEOFF] // k1k2
#endif
crc32_u64 w_crc0, w_crc0, x_data_crc0 crc32_u64 w_crc0, w_crc0, x_data_crc0
crc32_u64 w_crc1, w_crc1, x_data_crc1 crc32_u64 w_crc1, w_crc1, x_data_crc1
@ -261,7 +272,11 @@
// loop end // loop end
// PMULL: fold into 128-bits // PMULL: fold into 128-bits
#ifndef __APPLE__
add x_pconst, x_pconst, :lo12:lanchor_crc32 add x_pconst, x_pconst, :lo12:lanchor_crc32
#else
add x_pconst, x_pconst, lanchor_crc32@PAGEOFF
#endif
ldr x_data_crc0, [x_buf, 976] ldr x_data_crc0, [x_buf, 976]
ldr x_data_crc1, [x_buf, 1488] ldr x_data_crc1, [x_buf, 1488]
@ -321,7 +336,11 @@
movi v_neon_zero.4s, 0 movi v_neon_zero.4s, 0
ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0 ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
#ifndef __APPLE__
adrp x_tmp, .lanchor_mask adrp x_tmp, .lanchor_mask
#else
adrp x_tmp, .lanchor_mask@PAGE
#endif
ldr x_data_crc0, [x_buf, 1008] ldr x_data_crc0, [x_buf, 1008]
ldr x_data_crc1, [x_buf, 1520] ldr x_data_crc1, [x_buf, 1520]
@ -329,7 +348,11 @@
ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8 ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
eor v_a1.16b, v_a2.16b, v_a1.16b eor v_a1.16b, v_a2.16b, v_a1.16b
#ifndef __APPLE__
ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask] ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
#else
ldr q_neon_tmp3, [x_tmp, #.lanchor_mask@PAGEOFF]
#endif
crc32_u64 w_crc0, w_crc0, x_data_crc0 crc32_u64 w_crc0, w_crc0, x_data_crc0
crc32_u64 w_crc1, w_crc1, x_data_crc1 crc32_u64 w_crc1, w_crc1, x_data_crc1

View File

@ -62,9 +62,12 @@
CRC .req x0 CRC .req x0
wCRC .req w0 wCRC .req w0
.align 6 .align 6
.global crc32_mix_neoverse_n1 .global cdecl(crc32_mix_neoverse_n1)
#ifndef __APPLE__
.type crc32_mix_neoverse_n1, %function .type crc32_mix_neoverse_n1, %function
crc32_mix_neoverse_n1: #endif
cdecl(crc32_mix_neoverse_n1):
crc32_common_mix crc32 crc32_common_mix crc32
#ifndef __APPLE__
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1 .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
#endif

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto .arch armv8-a+crypto
.text .text
.align 3 .align 3
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name, %function .type \name, %function
#endif
/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */ /* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
\name\(): cdecl(\name\()):
mvn w_seed, w_seed mvn w_seed, w_seed
mov x_counter, 0 mov x_counter, 0
cmp x_len, (FOLD_SIZE - 1) cmp x_len, (FOLD_SIZE - 1)
@ -48,10 +50,17 @@
cmp x_len, x_counter cmp x_len, x_counter
bls .done bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3 .align 3
.loop_crc_tab: .loop_crc_tab:
@ -124,10 +133,12 @@
umov w_seed, v_tmp_high.s[0] umov w_seed, v_tmp_high.s[0]
b .crc_tab_pre b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name .size \name, .-\name
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4 .align 4
.shuffle_data: .shuffle_data:
.byte 15, 14, 13, 12, 11, 10, 9 .byte 15, 14, 13, 12, 11, 10, 9

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto .arch armv8-a+crypto
.text .text
.align 3 .align 3
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name, %function .type \name, %function
#endif
/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */ /* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
\name\(): cdecl(\name\()):
mvn w_seed, w_seed mvn w_seed, w_seed
mov x_counter, 0 mov x_counter, 0
cmp x_len, (FOLD_SIZE - 1) cmp x_len, (FOLD_SIZE - 1)
@ -48,10 +50,17 @@
cmp x_len, x_counter cmp x_len, x_counter
bls .done bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3 .align 3
.loop_crc_tab: .loop_crc_tab:
@ -121,6 +130,7 @@
umov w_seed, v_tmp_high.s[1] umov w_seed, v_tmp_high.s[1]
b .crc_tab_pre b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name .size \name, .-\name
#endif
.endm .endm

View File

@ -49,46 +49,60 @@
#include "crc32_mix_default_common.S" #include "crc32_mix_default_common.S"
.global crc32c_mix_default .global cdecl(crc32c_mix_default)
#ifndef __APPLE__
.type crc32c_mix_default, %function .type crc32c_mix_default, %function
crc32c_mix_default: #endif
cdecl(crc32c_mix_default):
mov w3, w2 mov w3, w2
sxtw x2, w1 sxtw x2, w1
mov x1, x0 mov x1, x0
mov w0, w3 mov w0, w3
crc32_mix_main_default crc32_mix_main_default
#ifndef __APPLE__
.size crc32c_mix_default, .-crc32c_mix_default .size crc32c_mix_default, .-crc32c_mix_default
#endif
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set lanchor_crc32,. + 0 .set lanchor_crc32,. + 0
#ifndef __APPLE__
.type k1k2, %object .type k1k2, %object
.size k1k2, 16 .size k1k2, 16
#endif
k1k2: k1k2:
.xword 0x00740eef02 .xword 0x00740eef02
.xword 0x009e4addf8 .xword 0x009e4addf8
#ifndef __APPLE__
.type k3k4, %object .type k3k4, %object
.size k3k4, 16 .size k3k4, 16
#endif
k3k4: k3k4:
.xword 0x00f20c0dfe .xword 0x00f20c0dfe
.xword 0x014cd00bd6 .xword 0x014cd00bd6
#ifndef __APPLE__
.type k5k0, %object .type k5k0, %object
.size k5k0, 16 .size k5k0, 16
#endif
k5k0: k5k0:
.xword 0x00dd45aab8 .xword 0x00dd45aab8
.xword 0 .xword 0
#ifndef __APPLE__
.type poly, %object .type poly, %object
.size poly, 16 .size poly, 16
#endif
poly: poly:
.xword 0x0105ec76f0 .xword 0x0105ec76f0
.xword 0x00dea713f1 .xword 0x00dea713f1
#ifndef __APPLE__
.type crc32_const, %object .type crc32_const, %object
.size crc32_const, 48 .size crc32_const, 48
#endif
crc32_const: crc32_const:
.xword 0x9ef68d35 .xword 0x9ef68d35
.xword 0 .xword 0
@ -100,8 +114,10 @@ crc32_const:
.align 4 .align 4
.set .lanchor_mask,. + 0 .set .lanchor_mask,. + 0
#ifndef __APPLE__
.type mask, %object .type mask, %object
.size mask, 16 .size mask, 16
#endif
mask: mask:
.word -1 .word -1
.word 0 .word 0

View File

@ -61,8 +61,12 @@
CRC .req x2 CRC .req x2
wCRC .req w2 wCRC .req w2
.align 6 .align 6
.global crc32c_mix_neoverse_n1 .global cdecl(crc32c_mix_neoverse_n1)
#ifndef __APPLE__
.type crc32c_mix_neoverse_n1, %function .type crc32c_mix_neoverse_n1, %function
crc32c_mix_neoverse_n1: #endif
cdecl(crc32c_mix_neoverse_n1):
crc32_common_mix crc32c crc32_common_mix crc32c
#ifndef __APPLE__
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1 .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
#endif

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_ecma_norm_pmull.h" #include "crc64_ecma_norm_pmull.h"
#include "crc64_norm_common_pmull.h" #include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,13 @@
.equ br_high_b3, 0x42f0 .equ br_high_b3, 0x42f0
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0x42f0e1eba9ea3693 .xword 0x0000000000000000, 0x42f0e1eba9ea3693
.xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 .xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_ecma_refl_pmull.h" #include "crc64_ecma_refl_pmull.h"
#include "crc64_refl_common_pmull.h" #include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,13 @@
.equ br_high_b3, 0x92d8 .equ br_high_b3, 0x92d8
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0xb32e4cbe03a75f6f .xword 0x0000000000000000, 0xb32e4cbe03a75f6f
.xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 .xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_iso_norm_pmull.h" #include "crc64_iso_norm_pmull.h"
#include "crc64_norm_common_pmull.h" #include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,13 @@
.equ br_high_b3, 0x0000 .equ br_high_b3, 0x0000
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0x000000000000001b .xword 0x0000000000000000, 0x000000000000001b

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_iso_refl_pmull.h" #include "crc64_iso_refl_pmull.h"
#include "crc64_refl_common_pmull.h" #include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,13 @@
.equ br_high_b3, 0xb000 .equ br_high_b3, 0xb000
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0x01b0000000000000 .xword 0x0000000000000000, 0x01b0000000000000

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_jones_norm_pmull.h" #include "crc64_jones_norm_pmull.h"
#include "crc64_norm_common_pmull.h" #include "crc64_norm_common_pmull.h"

View File

@ -64,11 +64,14 @@
.equ br_high_b3, 0xad93 .equ br_high_b3, 0xad93
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0xad93d23594c935a9 .xword 0x0000000000000000, 0xad93d23594c935a9
.xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 .xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52

View File

@ -27,6 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
#include "crc64_jones_refl_pmull.h" #include "crc64_jones_refl_pmull.h"
#include "crc64_refl_common_pmull.h" #include "crc64_refl_common_pmull.h"

View File

@ -60,11 +60,14 @@
.equ br_high_b3, 0x2b59 .equ br_high_b3, 0x2b59
.text .text
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.set .lanchor_crc_tab,. + 0 .set .lanchor_crc_tab,. + 0
#ifndef __APPLE__
.type crc64_tab, %object .type crc64_tab, %object
.size crc64_tab, 2048 .size crc64_tab, 2048
#endif
crc64_tab: crc64_tab:
.xword 0x0000000000000000, 0x7ad870c830358979 .xword 0x0000000000000000, 0x7ad870c830358979
.xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b .xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto .arch armv8-a+crypto
.text .text
.align 3 .align 3
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name, %function .type \name, %function
#endif
/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ /* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
\name\(): cdecl(\name\()):
mvn x_seed, x_seed mvn x_seed, x_seed
mov x_counter, 0 mov x_counter, 0
cmp x_len, (FOLD_SIZE-1) cmp x_len, (FOLD_SIZE-1)
@ -48,10 +50,17 @@
cmp x_len, x_counter cmp x_len, x_counter
bls .done bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3 .align 3
.loop_crc_tab: .loop_crc_tab:
@ -119,9 +128,12 @@
b .crc_tab_pre b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name .size \name, .-\name
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4 .align 4
.shuffle_data: .shuffle_data:
.byte 15, 14, 13, 12, 11, 10, 9, 8 .byte 15, 14, 13, 12, 11, 10, 9, 8

View File

@ -33,12 +33,14 @@
.arch armv8-a+crypto .arch armv8-a+crypto
.text .text
.align 3 .align 3
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name, %function .type \name, %function
#endif
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ /* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
\name\(): cdecl(\name\()):
mvn x_seed, x_seed mvn x_seed, x_seed
mov x_counter, 0 mov x_counter, 0
cmp x_len, (FOLD_SIZE-1) cmp x_len, (FOLD_SIZE-1)
@ -48,10 +50,17 @@
cmp x_len, x_counter cmp x_len, x_counter
bls .done bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3 .align 3
.loop_crc_tab: .loop_crc_tab:
@ -121,6 +130,7 @@
umov x_crc_ret, v_tmp_low.d[1] umov x_crc_ret, v_tmp_low.d[1]
b .crc_tab_pre b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name .size \name, .-\name
#endif
.endm .endm

View File

@ -30,37 +30,50 @@
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif) DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc16_t10dif_pmull); return PROVIDER_INFO(crc16_t10dif_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc16_t10dif_pmull);
#endif
return PROVIDER_BASIC(crc16_t10dif); return PROVIDER_BASIC(crc16_t10dif);
} }
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy) DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc16_t10dif_copy_pmull); return PROVIDER_INFO(crc16_t10dif_copy_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
#endif
return PROVIDER_BASIC(crc16_t10dif_copy); return PROVIDER_BASIC(crc16_t10dif_copy);
} }
DEFINE_INTERFACE_DISPATCHER(crc32_ieee) DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) { if (auxval & HWCAP_PMULL) {
return PROVIDER_INFO(crc32_ieee_norm_pmull); return PROVIDER_INFO(crc32_ieee_norm_pmull);
} }
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_ieee_norm_pmull);
#endif
return PROVIDER_BASIC(crc32_ieee); return PROVIDER_BASIC(crc32_ieee);
} }
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) { if (auxval & HWCAP_CRC32) {
switch (get_micro_arch_id()) { switch (get_micro_arch_id()) {
@ -77,12 +90,19 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
if (auxval & HWCAP_PMULL) { if (auxval & HWCAP_PMULL) {
return PROVIDER_INFO(crc32_iscsi_refl_pmull); return PROVIDER_INFO(crc32_iscsi_refl_pmull);
} }
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(crc32_iscsi_3crc_fold);
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
#endif
return PROVIDER_BASIC(crc32_iscsi); return PROVIDER_BASIC(crc32_iscsi);
} }
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) { if (auxval & HWCAP_CRC32) {
@ -99,68 +119,97 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc32_gzip_refl_pmull); return PROVIDER_INFO(crc32_gzip_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc32_gzip_refl_pmull);
#endif
return PROVIDER_BASIC(crc32_gzip_refl); return PROVIDER_BASIC(crc32_gzip_refl);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl) DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_ecma_refl_pmull); return PROVIDER_INFO(crc64_ecma_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_ecma_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_ecma_refl); return PROVIDER_BASIC(crc64_ecma_refl);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm) DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_ecma_norm_pmull); return PROVIDER_INFO(crc64_ecma_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_ecma_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_ecma_norm); return PROVIDER_BASIC(crc64_ecma_norm);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl) DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_iso_refl_pmull); return PROVIDER_INFO(crc64_iso_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_iso_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_iso_refl); return PROVIDER_BASIC(crc64_iso_refl);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm) DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_iso_norm_pmull); return PROVIDER_INFO(crc64_iso_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_iso_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_iso_norm); return PROVIDER_BASIC(crc64_iso_norm);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl) DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_jones_refl_pmull); return PROVIDER_INFO(crc64_jones_refl_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_jones_refl_pmull);
#endif
return PROVIDER_BASIC(crc64_jones_refl); return PROVIDER_BASIC(crc64_jones_refl);
} }
DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm) DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_PMULL) if (auxval & HWCAP_PMULL)
return PROVIDER_INFO(crc64_jones_norm_pmull); return PROVIDER_INFO(crc64_jones_norm_pmull);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_PMULL_KEY))
return PROVIDER_INFO(crc64_jones_norm_pmull);
#endif
return PROVIDER_BASIC(crc64_jones_norm); return PROVIDER_BASIC(crc64_jones_norm);
} }

View File

@ -27,6 +27,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
// parameters // parameters
#define w_seed w0 #define w_seed w0
#define x_seed x0 #define x_seed x0
@ -126,8 +128,13 @@
.endm .endm
.macro crc_norm_load_first_block .macro crc_norm_load_first_block
#ifndef __APPLE__
adrp x_tmp, .shuffle_data adrp x_tmp, .shuffle_data
ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data] ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
#else
adrp x_tmp, .shuffle_data@PAGE
ldr q_shuffle, [x_tmp, #.shuffle_data@PAGEOFF]
#endif
ldr q_x0_tmp, [x_buf] ldr q_x0_tmp, [x_buf]
ldr q_x1, [x_buf, 16] ldr q_x1, [x_buf, 16]

View File

@ -30,60 +30,90 @@
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_dot_prod_sve); return PROVIDER_INFO(gf_vect_dot_prod_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon); return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif
return PROVIDER_BASIC(gf_vect_dot_prod); return PROVIDER_BASIC(gf_vect_dot_prod);
} }
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mad_sve); return PROVIDER_INFO(gf_vect_mad_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon); return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon);
#endif
return PROVIDER_BASIC(gf_vect_mad); return PROVIDER_BASIC(gf_vect_mad);
} }
DEFINE_INTERFACE_DISPATCHER(ec_encode_data) DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_sve); return PROVIDER_INFO(ec_encode_data_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon); return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon);
#endif
return PROVIDER_BASIC(ec_encode_data); return PROVIDER_BASIC(ec_encode_data);
} }
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(ec_encode_data_update_sve); return PROVIDER_INFO(ec_encode_data_update_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon); return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon);
#endif
return PROVIDER_BASIC(ec_encode_data_update); return PROVIDER_BASIC(ec_encode_data_update);
} }
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_SVE) if (auxval & HWCAP_SVE)
return PROVIDER_INFO(gf_vect_mul_sve); return PROVIDER_INFO(gf_vect_mul_sve);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon); return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon);
#endif
return PROVIDER_BASIC(gf_vect_mul); return PROVIDER_BASIC(gf_vect_mul);
} }

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_2vect_dot_prod_neon .global cdecl(gf_2vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_2vect_dot_prod_neon, %function .type gf_2vect_dot_prod_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -130,7 +133,7 @@ q_data .req q_p1_1
v_data_lo .req v_p1_2 v_data_lo .req v_p1_2
v_data_hi .req v_p1_3 v_data_hi .req v_p1_3
gf_2vect_dot_prod_neon: cdecl(gf_2vect_dot_prod_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_2vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_2vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_2vect_dot_prod_sve, %function .type gf_2vect_dot_prod_sve, %function
#endif
/* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -81,7 +85,7 @@ q_gft2_hi .req q18
z_dest2 .req z27 z_dest2 .req z27
gf_2vect_dot_prod_sve: cdecl(gf_2vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_2vect_mad_neon .global cdecl(gf_2vect_mad_neon)
#ifndef __APPLE__
.type gf_2vect_mad_neon, %function .type gf_2vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -125,7 +129,7 @@ v_data_lo .req v17
v_data_hi .req v18 v_data_hi .req v18
gf_2vect_mad_neon: cdecl(gf_2vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -360,8 +364,13 @@ gf_2vect_mad_neon:
sub x_dest1, x_dest1, x_tmp sub x_dest1, x_dest1, x_tmp
sub x_dest2, x_dest2, x_tmp sub x_dest2, x_dest2, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -395,7 +404,7 @@ gf_2vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_2vect_mad_sve #include "../include/aarch64_label.h"
.global cdecl(gf_2vect_mad_sve)
#ifndef __APPLE__
.type gf_2vect_mad_sve, %function .type gf_2vect_mad_sve, %function
#endif
/* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest); unsigned char *src, unsigned char **dest);
@ -77,7 +81,7 @@ q_gft2_hi .req q18
z_dest2 .req z27 z_dest2 .req z27
gf_2vect_mad_sve: cdecl(gf_2vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_3vect_dot_prod_neon .global cdecl(gf_3vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_3vect_dot_prod_neon, %function .type gf_3vect_dot_prod_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -117,7 +120,7 @@ v_data_lo .req v_p1_2
v_data_hi .req v_p1_3 v_data_hi .req v_p1_3
gf_3vect_dot_prod_neon: cdecl(gf_3vect_dot_prod_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_3vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_3vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_3vect_dot_prod_sve, %function .type gf_3vect_dot_prod_sve, %function
#endif
/* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -89,7 +93,7 @@ q_gft3_hi .req q20
z_dest2 .req z27 z_dest2 .req z27
z_dest3 .req z28 z_dest3 .req z28
gf_3vect_dot_prod_sve: cdecl(gf_3vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_3vect_mad_neon .global cdecl(gf_3vect_mad_neon)
#ifndef __APPLE__
.type gf_3vect_mad_neon, %function .type gf_3vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -122,7 +126,7 @@ q_data .req q21
v_data_lo .req v22 v_data_lo .req v22
v_data_hi .req v23 v_data_hi .req v23
gf_3vect_mad_neon: cdecl(gf_3vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -332,8 +336,13 @@ gf_3vect_mad_neon:
sub x_dest2, x_dest2, x_tmp sub x_dest2, x_dest2, x_tmp
sub x_dest3, x_dest3, x_tmp sub x_dest3, x_dest3, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -375,7 +384,7 @@ gf_3vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_3vect_mad_sve #include "../include/aarch64_label.h"
.global cdecl(gf_3vect_mad_sve)
#ifndef __APPLE__
.type gf_3vect_mad_sve, %function .type gf_3vect_mad_sve, %function
#endif
/* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest); unsigned char *src, unsigned char **dest);
@ -84,7 +88,7 @@ q_gft3_hi .req q20
z_dest2 .req z27 z_dest2 .req z27
z_dest3 .req z28 z_dest3 .req z28
gf_3vect_mad_sve: cdecl(gf_3vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_4vect_dot_prod_neon .global cdecl(gf_4vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_4vect_dot_prod_neon, %function .type gf_4vect_dot_prod_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -132,7 +136,7 @@ q_data .req q_tmp1
v_data_lo .req v_tmp1_lo v_data_lo .req v_tmp1_lo
v_data_hi .req v_tmp1_hi v_data_hi .req v_tmp1_hi
gf_4vect_dot_prod_neon: cdecl(gf_4vect_dot_prod_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_4vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_4vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_4vect_dot_prod_sve, %function .type gf_4vect_dot_prod_sve, %function
#endif
/* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -97,7 +101,7 @@ z_dest2 .req z27
z_dest3 .req z28 z_dest3 .req z28
z_dest4 .req z29 z_dest4 .req z29
gf_4vect_dot_prod_sve: cdecl(gf_4vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_4vect_mad_neon .global cdecl(gf_4vect_mad_neon)
#ifndef __APPLE__
.type gf_4vect_mad_neon, %function .type gf_4vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -138,7 +141,7 @@ q_data .req q21
v_data_lo .req v22 v_data_lo .req v22
v_data_hi .req v23 v_data_hi .req v23
gf_4vect_mad_neon: cdecl(gf_4vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -397,8 +400,13 @@ gf_4vect_mad_neon:
sub x_dest3, x_dest3, x_tmp sub x_dest3, x_dest3, x_tmp
sub x_dest4, x_dest4, x_tmp sub x_dest4, x_dest4, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -449,7 +457,7 @@ gf_4vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_4vect_mad_sve #include "../include/aarch64_label.h"
.global cdecl(gf_4vect_mad_sve)
#ifndef __APPLE__
.type gf_4vect_mad_sve, %function .type gf_4vect_mad_sve, %function
#endif
/* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest); unsigned char *src, unsigned char **dest);
@ -91,7 +95,7 @@ z_dest2 .req z27
z_dest3 .req z28 z_dest3 .req z28
z_dest4 .req z29 z_dest4 .req z29
gf_4vect_mad_sve: cdecl(gf_4vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_5vect_dot_prod_neon .global cdecl(gf_5vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_5vect_dot_prod_neon, %function .type gf_5vect_dot_prod_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -159,7 +162,7 @@ q_gft5_lo .req q_p2_3
q_gft5_hi .req q_p3_3 q_gft5_hi .req q_p3_3
gf_5vect_dot_prod_neon: cdecl(gf_5vect_dot_prod_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_5vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_5vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_5vect_dot_prod_sve, %function .type gf_5vect_dot_prod_sve, %function
#endif
/* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -105,7 +109,7 @@ z_dest3 .req z28
z_dest4 .req z29 z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
gf_5vect_dot_prod_sve: cdecl(gf_5vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_5vect_mad_neon .global cdecl(gf_5vect_mad_neon)
#ifndef __APPLE__
.type gf_5vect_mad_neon, %function .type gf_5vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -152,7 +156,7 @@ q_data .req q21
v_data_lo .req v22 v_data_lo .req v22
v_data_hi .req v23 v_data_hi .req v23
gf_5vect_mad_neon: cdecl(gf_5vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -463,8 +467,13 @@ gf_5vect_mad_neon:
sub x_dest4, x_dest4, x_tmp sub x_dest4, x_dest4, x_tmp
sub x_dest5, x_dest5, x_tmp sub x_dest5, x_dest5, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -528,7 +537,7 @@ gf_5vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_5vect_mad_sve #include "../include/aarch64_label.h"
.global cdecl(gf_5vect_mad_sve)
#ifndef __APPLE__
.type gf_5vect_mad_sve, %function .type gf_5vect_mad_sve, %function
#endif
/* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest); unsigned char *src, unsigned char **dest);
@ -98,7 +102,7 @@ z_dest3 .req z28
z_dest4 .req z29 z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
gf_5vect_mad_sve: cdecl(gf_5vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_6vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_6vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_6vect_dot_prod_sve, %function .type gf_6vect_dot_prod_sve, %function
#endif
/* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -113,7 +117,7 @@ z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
z_dest6 .req z31 z_dest6 .req z31
gf_6vect_dot_prod_sve: cdecl(gf_6vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -27,10 +27,13 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
.text #include "../include/aarch64_label.h"
.global gf_6vect_mad_neon
.type gf_6vect_mad_neon, %function
.text
.global cdecl(gf_6vect_mad_neon)
#ifndef __APPLE__
.type gf_6vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -166,7 +169,7 @@ q_data .req q21
v_data_lo .req v22 v_data_lo .req v22
v_data_hi .req v23 v_data_hi .req v23
gf_6vect_mad_neon: cdecl(gf_6vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -526,8 +529,13 @@ gf_6vect_mad_neon:
sub x_dest5, x_dest5, x_tmp sub x_dest5, x_dest5, x_tmp
sub x_dest6, x_dest6, x_tmp sub x_dest6, x_dest6, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -603,7 +611,7 @@ gf_6vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_6vect_mad_sve #include "../include/aarch64_label.h"
.global cdecl(gf_6vect_mad_sve)
#ifndef __APPLE__
.type gf_6vect_mad_sve, %function .type gf_6vect_mad_sve, %function
#endif
/* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char **dest); unsigned char *src, unsigned char **dest);
@ -105,7 +109,7 @@ z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
z_dest6 .req z31 z_dest6 .req z31
gf_6vect_mad_sve: cdecl(gf_6vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_7vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_7vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_7vect_dot_prod_sve, %function .type gf_7vect_dot_prod_sve, %function
#endif
/* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -122,7 +126,7 @@ z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
z_dest6 .req z31 z_dest6 .req z31
gf_7vect_dot_prod_sve: cdecl(gf_7vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_8vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_8vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_8vect_dot_prod_sve, %function .type gf_8vect_dot_prod_sve, %function
#endif
/* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char **dest); unsigned char **src, unsigned char **dest);
*/ */
@ -131,7 +135,7 @@ z_dest4 .req z29
z_dest5 .req z30 z_dest5 .req z30
z_dest6 .req z31 z_dest6 .req z31
gf_8vect_dot_prod_sve: cdecl(gf_8vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,10 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_vect_dot_prod_neon .global cdecl(gf_vect_dot_prod_neon)
#ifndef __APPLE__
.type gf_vect_dot_prod_neon, %function .type gf_vect_dot_prod_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -115,7 +120,7 @@ v_data_lo .req v_p2
v_data_hi .req v_p3 v_data_hi .req v_p3
gf_vect_dot_prod_neon: cdecl(gf_vect_dot_prod_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_vect_dot_prod_sve #include "../include/aarch64_label.h"
.global cdecl(gf_vect_dot_prod_sve)
#ifndef __APPLE__
.type gf_vect_dot_prod_sve, %function .type gf_vect_dot_prod_sve, %function
#endif
/* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls, /* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest); unsigned char **src, unsigned char *dest);
*/ */
@ -66,7 +70,7 @@ z_gft1_hi .req z5
q_gft1_lo .req q4 q_gft1_lo .req q4
q_gft1_hi .req q5 q_gft1_hi .req q5
gf_vect_dot_prod_sve: cdecl(gf_vect_dot_prod_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -26,11 +26,15 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_vect_mad_neon .global cdecl(gf_vect_mad_neon)
#ifndef __APPLE__
.type gf_vect_mad_neon, %function .type gf_vect_mad_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -121,7 +125,7 @@ v_data_lo .req v_d1_2
v_data_hi .req v_d1_3 v_data_hi .req v_d1_3
gf_vect_mad_neon: cdecl(gf_vect_mad_neon):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail
@ -281,8 +285,13 @@ gf_vect_mad_neon:
mov x_src, x_src_end mov x_src, x_src_end
sub x_dest1, x_dest1, x_tmp sub x_dest1, x_dest1, x_tmp
#ifndef __APPLE__
adrp x_const, const_tbl adrp x_const, const_tbl
add x_const, x_const, :lo12:const_tbl add x_const, x_const, :lo12:const_tbl
#else
adrp x_const, const_tbl@PAGE
add x_const, x_const, const_tbl@PAGEOFF
#endif
sub x_const, x_const, x_tmp sub x_const, x_const, x_tmp
ldr q_tmp, [x_const, #16] ldr q_tmp, [x_const, #16]
@ -308,7 +317,7 @@ gf_vect_mad_neon:
mov w_ret, #1 mov w_ret, #1
ret ret
.section .rodata ASM_DEF_RODATA
.balign 8 .balign 8
const_tbl: const_tbl:
.dword 0x0000000000000000, 0x0000000000000000 .dword 0x0000000000000000, 0x0000000000000000

View File

@ -30,9 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
#include "../include/aarch64_label.h"
.global gf_vect_mad_sve .global cdecl(gf_vect_mad_sve)
#ifndef __APPLE__
.type gf_vect_mad_sve, %function .type gf_vect_mad_sve, %function
#endif
/* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls, /* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
unsigned char *src, unsigned char *dest); unsigned char *src, unsigned char *dest);
@ -68,7 +71,7 @@ z_gft1_hi .req z7
q_gft1_lo .req q6 q_gft1_lo .req q6
q_gft1_hi .req q7 q_gft1_hi .req q7
gf_vect_mad_sve: cdecl(gf_vect_mad_sve):
/* less than 16 bytes, return_fail */ /* less than 16 bytes, return_fail */
cmp x_len, #16 cmp x_len, #16
blt .return_fail blt .return_fail

View File

@ -27,11 +27,14 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.text .text
.global gf_vect_mul_neon .global cdecl(gf_vect_mul_neon)
#ifndef __APPLE__
.type gf_vect_mul_neon, %function .type gf_vect_mul_neon, %function
#endif
/* arguments */ /* arguments */
x_len .req x0 x_len .req x0
@ -90,7 +93,7 @@ v_data_6_hi .req v_data_6
v_data_7_hi .req v_data_7 v_data_7_hi .req v_data_7
gf_vect_mul_neon: cdecl(gf_vect_mul_neon):
/* less than 32 bytes, return_fail */ /* less than 32 bytes, return_fail */
cmp x_len, #32 cmp x_len, #32
blt .return_fail blt .return_fail

View File

@ -30,8 +30,12 @@
.align 6 .align 6
.arch armv8-a+sve .arch armv8-a+sve
.global gf_vect_mul_sve #include "../include/aarch64_label.h"
.global cdecl(gf_vect_mul_sve)
#ifndef __APPLE__
.type gf_vect_mul_sve, %function .type gf_vect_mul_sve, %function
#endif
/* Refer to include/gf_vect_mul.h /* Refer to include/gf_vect_mul.h
* *
@ -72,7 +76,7 @@ z_gft1_hi .req z7
q_gft1_lo .req q6 q_gft1_lo .req q6
q_gft1_hi .req q7 q_gft1_hi .req q7
gf_vect_mul_sve: cdecl(gf_vect_mul_sve):
/* less than 32 bytes, return_fail */ /* less than 32 bytes, return_fail */
cmp x_len, #32 cmp x_len, #32
blt .return_fail blt .return_fail

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -46,8 +48,10 @@ declare Macros
x_\name .req x\reg x_\name .req x\reg
.endm .endm
.global encode_deflate_icf_aarch64 .global cdecl(encode_deflate_icf_aarch64)
#ifndef __APPLE__
.type encode_deflate_icf_aarch64, %function .type encode_deflate_icf_aarch64, %function
#endif
/* /*
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
@ -86,7 +90,7 @@ declare Macros
.equ offset_m_out_buf, 16 .equ offset_m_out_buf, 16
.equ offset_m_out_end, 24 .equ offset_m_out_end, 24
encode_deflate_icf_aarch64: cdecl(encode_deflate_icf_aarch64):
cmp next_in, end_in cmp next_in, end_in
bcs .done bcs .done
@ -156,4 +160,6 @@ encode_deflate_icf_aarch64:
.done: .done:
ret ret
#ifndef __APPLE__
.size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64 .size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc+crypto .arch armv8-a+crc+crypto
.text .text
.align 2 .align 2
@ -62,8 +65,10 @@ declare Macros
.endm .endm
.align 2 .align 2
.global gen_icf_map_h1_aarch64 .global cdecl(gen_icf_map_h1_aarch64)
#ifndef __APPLE__
.type gen_icf_map_h1_aarch64, %function .type gen_icf_map_h1_aarch64, %function
#endif
/* arguments */ /* arguments */
declare_generic_reg stream_param, 0,x declare_generic_reg stream_param, 0,x
@ -137,7 +142,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
struct deflate_icf *matches_icf_lookup, uint64_t input_size) struct deflate_icf *matches_icf_lookup, uint64_t input_size)
*/ */
gen_icf_map_h1_aarch64: cdecl(gen_icf_map_h1_aarch64):
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
bls .fast_exit bls .fast_exit
stp x29, x30, [sp, -16]! stp x29, x30, [sp, -16]!
@ -263,4 +268,6 @@ gen_icf_map_h1_aarch64:
.fast_exit: .fast_exit:
mov ret_val, 0 mov ret_val, 0
ret ret
#ifndef __APPLE__
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
#endif

View File

@ -27,6 +27,8 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a .arch armv8-a
.text .text
.align 2 .align 2
@ -257,8 +259,10 @@ declare Macros
.endm .endm
.global decode_huffman_code_block_stateless_aarch64 .global cdecl(decode_huffman_code_block_stateless_aarch64)
#ifndef __APPLE__
.type decode_huffman_code_block_stateless_aarch64, %function .type decode_huffman_code_block_stateless_aarch64, %function
#endif
/* /*
void decode_huffman_code_block_stateless_aarch64( void decode_huffman_code_block_stateless_aarch64(
struct inflate_state *state, struct inflate_state *state,
@ -305,7 +309,7 @@ declare Macros
declare_generic_reg write_overflow_lits,26,w declare_generic_reg write_overflow_lits,26,w
declare_generic_reg repeat_length,27,w declare_generic_reg repeat_length,27,w
decode_huffman_code_block_stateless_aarch64: cdecl(decode_huffman_code_block_stateless_aarch64):
//save registers //save registers
push_stack push_stack
@ -324,8 +328,13 @@ decode_huffman_code_block_stateless_aarch64:
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
//init rfc_table //init rfc_table
#ifndef __APPLE__
adrp rfc_table,rfc_lookup_table adrp rfc_table,rfc_lookup_table
add rfc_table,rfc_table,:lo12:rfc_lookup_table add rfc_table,rfc_table,:lo12:rfc_lookup_table
#else
adrp rfc_table,rfc_lookup_table@PAGE
add rfc_table,rfc_table,rfc_lookup_table@PAGEOFF
#endif
#if ENABLE_TBL_INSTRUCTION #if ENABLE_TBL_INSTRUCTION
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table] ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
add rfc_table,rfc_table,48 add rfc_table,rfc_table,48
@ -661,8 +670,10 @@ byte_copy_loop:
strb w_arg0, [next_out],1 strb w_arg0, [next_out],1
bne byte_copy_loop bne byte_copy_loop
b decompress_data_end b decompress_data_end
#ifndef __APPLE__
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64 .size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
.type rfc_lookup_table, %object .type rfc_lookup_table, %object
#endif
rfc_lookup_table: rfc_lookup_table:
#if ENABLE_TBL_INSTRUCTION #if ENABLE_TBL_INSTRUCTION
@ -686,4 +697,6 @@ rfc_lookup_table:
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01 .short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 .short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
#endif #endif
#ifndef __APPLE__
.size rfc_lookup_table, . - rfc_lookup_table .size rfc_lookup_table, . - rfc_lookup_table
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -72,8 +75,10 @@ skip_has_hist:
.endm .endm
.global isal_deflate_body_aarch64 .global cdecl(isal_deflate_body_aarch64)
#ifndef __APPLE__
.type isal_deflate_body_aarch64, %function .type isal_deflate_body_aarch64, %function
#endif
/* /*
void isal_deflate_body_aarch64(struct isal_zstream *stream) void isal_deflate_body_aarch64(struct isal_zstream *stream)
*/ */
@ -115,7 +120,7 @@ skip_has_hist:
declare_generic_reg code_len2, 4,x declare_generic_reg code_len2, 4,x
isal_deflate_body_aarch64: cdecl(isal_deflate_body_aarch64):
//save registers //save registers
push_stack push_stack
ldr avail_in, [stream, _avail_in] ldr avail_in, [stream, _avail_in]
@ -258,4 +263,6 @@ exit_save_state:
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
str w_tmp0, [stream, _internal_state+_state] str w_tmp0, [stream, _internal_state+_state]
b exit_ret b exit_ret
#ifndef __APPLE__
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64 .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -73,9 +76,11 @@ skip_has_hist:
sub w_\next_in,w_\next_in,w_\m_out_buf sub w_\next_in,w_\next_in,w_\m_out_buf
stp w_\next_in,w_\start_in,[\stream,_avail_out] stp w_\next_in,w_\start_in,[\stream,_avail_out]
.endm .endm
.global isal_deflate_finish_aarch64 .global cdecl(isal_deflate_finish_aarch64)
.arch armv8-a+crc .arch armv8-a+crc
#ifndef __APPLE__
.type isal_deflate_finish_aarch64, %function .type isal_deflate_finish_aarch64, %function
#endif
/* /*
void isal_deflate_finish_aarch64(struct isal_zstream *stream) void isal_deflate_finish_aarch64(struct isal_zstream *stream)
*/ */
@ -117,7 +122,7 @@ skip_has_hist:
declare_generic_reg code_len2, 4,x declare_generic_reg code_len2, 4,x
isal_deflate_finish_aarch64: cdecl(isal_deflate_finish_aarch64):
//save registers //save registers
push_stack push_stack
@ -260,5 +265,6 @@ update_state_exit:
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
pop_stack pop_stack
ret ret
#ifndef __APPLE__
.size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -46,8 +49,10 @@ declare Macros
.global isal_deflate_hash_aarch64 .global cdecl(isal_deflate_hash_aarch64)
#ifndef __APPLE__
.type isal_deflate_hash_aarch64, %function .type isal_deflate_hash_aarch64, %function
#endif
/* /*
void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask, void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
uint32_t current_index, uint8_t * dict, uint32_t dict_len) uint32_t current_index, uint8_t * dict, uint32_t dict_len)
@ -58,14 +63,14 @@ declare Macros
declare_generic_reg dict, 3,x declare_generic_reg dict, 3,x
declare_generic_reg dict_len, 4,w declare_generic_reg dict_len, 4,w
declare_generic_reg next_in 3,x declare_generic_reg next_in, 3,x
declare_generic_reg end_in 6,x declare_generic_reg end_in, 6,x
declare_generic_reg ind 5,w declare_generic_reg ind, 5,w
declare_generic_reg hash 2,w declare_generic_reg hash, 2,w
declare_generic_reg literal 2,w declare_generic_reg literal, 2,w
#define SHORTEST_MATCH #4 #define SHORTEST_MATCH #4
isal_deflate_hash_aarch64: cdecl(isal_deflate_hash_aarch64):
sub ind, current_index, dict_len sub ind, current_index, dict_len
and ind,ind,0xffff and ind,ind,0xffff
@ -92,4 +97,6 @@ loop_start:
exit_func: exit_func:
ret ret
#ifndef __APPLE__
.size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64 .size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crypto .arch armv8-a+crypto
.text .text
.align 3 .align 3
@ -56,9 +59,11 @@ Arguements list
adler32 .req w0 adler32 .req w0
start .req x1 start .req x1
length .req x2 length .req x2
.global adler32_neon .global cdecl(adler32_neon)
#ifndef __APPLE__
.type adler32_neon, %function .type adler32_neon, %function
adler32_neon: #endif
cdecl(adler32_neon):
/* /*
local variables local variables
*/ */
@ -92,8 +97,13 @@ local variables
lsr adler1, adler32, 16 lsr adler1, adler32, 16
lsr loop_cnt,length,5 lsr loop_cnt,length,5
#ifndef __APPLE__
adrp x3,factors adrp x3,factors
add x3,x3,:lo12:factors add x3,x3,:lo12:factors
#else
adrp x3,factors@PAGE
add x3,x3,factors@PAGEOFF
#endif
ld1 {factor0_v.16b-factor1_v.16b},[x3] ld1 {factor0_v.16b-factor1_v.16b},[x3]
add end,start,length add end,start,length
@ -162,12 +172,15 @@ end_func:
orr w0,adler0,adler1,lsl 16 orr w0,adler0,adler1,lsl 16
ret ret
#ifndef __APPLE__
.size adler32_neon, .-adler32_neon .size adler32_neon, .-adler32_neon
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
#else
.section __TEXT,__const
#endif
.align 4 .align 4
factors: factors:
.quad 0x191a1b1c1d1e1f20 .quad 0x191a1b1c1d1e1f20
.quad 0x1112131415161718 .quad 0x1112131415161718
.quad 0x090a0b0c0d0e0f10 .quad 0x090a0b0c0d0e0f10
.quad 0x0102030405060708 .quad 0x0102030405060708

View File

@ -30,86 +30,121 @@
DEFINE_INTERFACE_DISPATCHER(isal_adler32) DEFINE_INTERFACE_DISPATCHER(isal_adler32)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(adler32_neon); return PROVIDER_INFO(adler32_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(adler32_neon);
#endif
return PROVIDER_BASIC(adler32); return PROVIDER_BASIC(adler32);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_body) DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_body_aarch64); return PROVIDER_INFO(isal_deflate_body_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_body_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_body); return PROVIDER_BASIC(isal_deflate_body);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish) DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_finish_aarch64); return PROVIDER_INFO(isal_deflate_finish_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_finish_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_finish); return PROVIDER_BASIC(isal_deflate_finish);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
#endif
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3) DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
#endif
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map); return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
} }
@ -125,64 +160,92 @@ DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram) DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_update_histogram_aarch64); return PROVIDER_INFO(isal_update_histogram_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_update_histogram_aarch64);
#endif
return PROVIDER_BASIC(isal_update_histogram); return PROVIDER_BASIC(isal_update_histogram);
} }
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1) DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) { if (auxval & HWCAP_CRC32) {
return PROVIDER_INFO(gen_icf_map_h1_aarch64); return PROVIDER_INFO(gen_icf_map_h1_aarch64);
} }
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
#endif
return PROVIDER_BASIC(gen_icf_map_h1); return PROVIDER_BASIC(gen_icf_map_h1);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0) DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64); return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash); return PROVIDER_BASIC(isal_deflate_hash);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1) DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64); return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash); return PROVIDER_BASIC(isal_deflate_hash);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2) DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64); return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash); return PROVIDER_BASIC(isal_deflate_hash);
} }
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3) DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(isal_deflate_hash_aarch64); return PROVIDER_INFO(isal_deflate_hash_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(isal_deflate_hash_aarch64);
#endif
return PROVIDER_BASIC(isal_deflate_hash); return PROVIDER_BASIC(isal_deflate_hash);
} }
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless) DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32) if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64); return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_CRC32_KEY))
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
#endif
return PROVIDER_BASIC(decode_huffman_code_block_stateless); return PROVIDER_BASIC(decode_huffman_code_block_stateless);
} }

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a .arch armv8-a
.text .text
.align 2 .align 2
@ -48,8 +51,10 @@ declare Macros
.text .text
.align 2 .align 2
.global set_long_icf_fg_aarch64 .global cdecl(set_long_icf_fg_aarch64)
#ifndef __APPLE__
.type set_long_icf_fg_aarch64, %function .type set_long_icf_fg_aarch64, %function
#endif
/* /*
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size, void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
@ -69,7 +74,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
/* local variable */ /* local variable */
declare_generic_reg len, 7,w declare_generic_reg len, 7,w
declare_generic_reg dist_code, 8,w declare_generic_reg dist_code, 8,w
declare_generic_reg shortest_match_len 9,w declare_generic_reg shortest_match_len, 9,w
declare_generic_reg len_max, 10,w declare_generic_reg len_max, 10,w
declare_generic_reg dist_extra, 11,w declare_generic_reg dist_extra, 11,w
declare_generic_reg const_8, 13,x declare_generic_reg const_8, 13,x
@ -90,7 +95,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
.equ SHORTEST_MATCH, 4 .equ SHORTEST_MATCH, 4
.equ LEN_MAX_CONST, 512 .equ LEN_MAX_CONST, 512
set_long_icf_fg_aarch64: cdecl(set_long_icf_fg_aarch64):
stp x29, x30, [sp, -192]! stp x29, x30, [sp, -192]!
add x29, sp, 0 add x29, sp, 0
stp x21, x22, [sp, 32] stp x21, x22, [sp, 32]
@ -103,11 +108,18 @@ set_long_icf_fg_aarch64:
add end_in, next_in_param, input_size_param add end_in, next_in_param, input_size_param
mov match_lookup, match_lookup_param mov match_lookup, match_lookup_param
#ifndef __APPLE__
adrp x1, .data_dist_start adrp x1, .data_dist_start
mov x2, DIST_START_SIZE // 128 mov x2, DIST_START_SIZE // 128
add x1, x1, :lo12:.data_dist_start add x1, x1, :lo12:.data_dist_start
mov x0, dist_start mov x0, dist_start
bl memcpy #else
adrp x1, .data_dist_start@PAGE
mov x2, DIST_START_SIZE // 128
add x1, x1, .data_dist_start@PAGEOFF
mov x0, dist_start
#endif
bl cdecl(memcpy)
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288 add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
cmp end_in, x_tmp0 cmp end_in, x_tmp0
@ -182,9 +194,11 @@ set_long_icf_fg_aarch64:
ldr x23, [sp, 48] ldr x23, [sp, 48]
ldp x29, x30, [sp], 192 ldp x29, x30, [sp], 192
ret ret
#ifndef __APPLE__
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64 .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
#endif
.section .rodata ASM_DEF_RODATA
.align 3 .align 3
.set .data_dist_start,. + 0 .set .data_dist_start,. + 0
.real_data_dist_start: .real_data_dist_start:

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -45,8 +48,10 @@ declare Macros
x_\name .req x\reg x_\name .req x\reg
.endm .endm
.global isal_deflate_icf_body_hash_hist_aarch64 .global cdecl(isal_deflate_icf_body_hash_hist_aarch64)
#ifndef __APPLE__
.type isal_deflate_icf_body_hash_hist_aarch64, %function .type isal_deflate_icf_body_hash_hist_aarch64, %function
#endif
/* /*
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
*/ */
@ -126,7 +131,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
declare_generic_reg tmp0, 4,x declare_generic_reg tmp0, 4,x
declare_generic_reg tmp1, 5,x declare_generic_reg tmp1, 5,x
isal_deflate_icf_body_hash_hist_aarch64: cdecl(isal_deflate_icf_body_hash_hist_aarch64):
stp x29, x30, [sp, -80]! stp x29, x30, [sp, -80]!
add x29, sp, 0 add x29, sp, 0
str x24, [sp, 56] str x24, [sp, 56]
@ -360,5 +365,6 @@ isal_deflate_icf_body_hash_hist_aarch64:
ldr x24, [sp, 56] ldr x24, [sp, 56]
ldp x29, x30, [sp], 80 ldp x29, x30, [sp], 80
ret ret
#ifndef __APPLE__
.size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64 .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
@ -129,7 +132,9 @@ void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
declare_generic_reg tmp3, 28,x declare_generic_reg tmp3, 28,x
.align 2 .align 2
#ifndef __APPLE__
.type write_deflate_icf_constprop, %function .type write_deflate_icf_constprop, %function
#endif
write_deflate_icf_constprop: write_deflate_icf_constprop:
ldrh w2, [x0] ldrh w2, [x0]
mov w3, 30 mov w3, 30
@ -141,10 +146,14 @@ write_deflate_icf_constprop:
ubfx x1, x1, 16, 3 ubfx x1, x1, 16, 3
strh w1, [x0, 2] strh w1, [x0, 2]
ret ret
#ifndef __APPLE__
.size write_deflate_icf_constprop, .-write_deflate_icf_constprop .size write_deflate_icf_constprop, .-write_deflate_icf_constprop
#endif
.align 2 .align 2
#ifndef __APPLE__
.type write_deflate_icf, %function .type write_deflate_icf, %function
#endif
write_deflate_icf: write_deflate_icf:
ldrh w4, [x0] ldrh w4, [x0]
bfi w4, w1, 0, 10 bfi w4, w1, 0, 10
@ -156,10 +165,14 @@ write_deflate_icf:
bfi w1, w3, 3, 13 bfi w1, w3, 3, 13
strh w1, [x0, 2] strh w1, [x0, 2]
ret ret
#ifndef __APPLE__
.size write_deflate_icf, .-write_deflate_icf .size write_deflate_icf, .-write_deflate_icf
#endif
.align 2 .align 2
#ifndef __APPLE__
.type update_state, %function .type update_state, %function
#endif
update_state: update_state:
sub x7, x2, x1 sub x7, x2, x1
ldr x4, [x0, 48] ldr x4, [x0, 48]
@ -179,12 +192,16 @@ update_state:
str x5, [x4, 4688] str x5, [x4, 4688]
str x6, [x4, 4696] str x6, [x4, 4696]
ret ret
#ifndef __APPLE__
.size update_state, .-update_state .size update_state, .-update_state
#endif
.align 2 .align 2
.global isal_deflate_icf_finish_hash_hist_aarch64 .global cdecl(isal_deflate_icf_finish_hash_hist_aarch64)
#ifndef __APPLE__
.type isal_deflate_icf_finish_hash_hist_aarch64, %function .type isal_deflate_icf_finish_hash_hist_aarch64, %function
isal_deflate_icf_finish_hash_hist_aarch64: #endif
cdecl(isal_deflate_icf_finish_hash_hist_aarch64):
ldr w_end_in, [stream, 8] // stream->avail_in ldr w_end_in, [stream, 8] // stream->avail_in
cbz w_end_in, .stream_not_available cbz w_end_in, .stream_not_available
@ -393,5 +410,6 @@ isal_deflate_icf_finish_hash_hist_aarch64:
str w1, [stream, offset_state_state] // 84 str w1, [stream, offset_state_state] // 84
.done: .done:
ret ret
#ifndef __APPLE__
.size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
#endif

View File

@ -26,6 +26,9 @@
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/ **********************************************************************/
#include "../include/aarch64_label.h"
.arch armv8-a+crc .arch armv8-a+crc
.text .text
.align 2 .align 2
@ -63,17 +66,24 @@ declare Macros
.endm .endm
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req .macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
#ifndef __APPLE__
adrp x_\tmp0, .len_to_code_tab_lanchor adrp x_\tmp0, .len_to_code_tab_lanchor
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
#else
adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE
add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF
#endif
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
add w_\length_out, w_\length_out, 256 add w_\length_out, w_\length_out, 256
.endm .endm
.section .rodata ASM_DEF_RODATA
.align 4 .align 4
.len_to_code_tab_lanchor = . + 0 .len_to_code_tab_lanchor = . + 0
#ifndef __APPLE__
.type len_to_code_tab, %object .type len_to_code_tab, %object
.size len_to_code_tab, 1056 .size len_to_code_tab, 1056
#endif
len_to_code_tab: len_to_code_tab:
.word 0x00, 0x00, 0x00 .word 0x00, 0x00, 0x00
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
@ -111,9 +121,11 @@ len_to_code_tab:
.word 0x00, 0x00, 0x00, 0x00, 0x00 .word 0x00, 0x00, 0x00, 0x00, 0x00
.text .text
.global isal_update_histogram_aarch64 .global cdecl(isal_update_histogram_aarch64)
.arch armv8-a+crc .arch armv8-a+crc
#ifndef __APPLE__
.type isal_update_histogram_aarch64, %function .type isal_update_histogram_aarch64, %function
#endif
/* /*
void isal_update_histogram_aarch64(uint8_t * start_stream, int length, void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
@ -157,7 +169,7 @@ void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 .equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
.equ hash_table_size, (8*1024*2) // 16384 .equ hash_table_size, (8*1024*2) // 16384
isal_update_histogram_aarch64: cdecl(isal_update_histogram_aarch64):
cmp w_length, 0 cmp w_length, 0
ble .done ble .done
@ -176,7 +188,7 @@ isal_update_histogram_aarch64:
mov x0, last_seen mov x0, last_seen
mov w1, 0 mov w1, 0
mov x2, hash_table_size mov x2, hash_table_size
bl memset bl cdecl(memset)
cmp current, loop_end_iter cmp current, loop_end_iter
bcs .loop_end bcs .loop_end
@ -308,4 +320,6 @@ isal_update_histogram_aarch64:
.align 2 .align 2
.done: .done:
ret ret
#ifndef __APPLE__
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
#endif

18
include/aarch64_label.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef __AARCH64_LABEL_H__
#define __AARCH64_LABEL_H__
#ifdef __USER_LABEL_PREFIX__
#define CONCAT1(a, b) CONCAT2(a, b)
#define CONCAT2(a, b) a ## b
#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
#else
#define cdecl(x) x
#endif
#ifdef __APPLE__
#define ASM_DEF_RODATA .section __TEXT,__const
#else
#define ASM_DEF_RODATA .section .rodata
#endif
#endif

View File

@ -31,7 +31,14 @@
#ifndef __aarch64__ #ifndef __aarch64__
#error "This file is for aarch64 only" #error "This file is for aarch64 only"
#endif #endif
#ifdef __APPLE__
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
#else
#include <asm/hwcap.h> #include <asm/hwcap.h>
#endif
#include "aarch64_label.h"
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
/** /**
* # mbin_interface : the wrapper layer for isal-l api * # mbin_interface : the wrapper layer for isal-l api
@ -48,17 +55,18 @@
* 4. The dispather should return the right function pointer , revision and a string information . * 4. The dispather should return the right function pointer , revision and a string information .
**/ **/
.macro mbin_interface name:req .macro mbin_interface name:req
.extern \name\()_dispatcher .extern cdecl(\name\()_dispatcher)
.section .data .data
.balign 8 .balign 8
.global \name\()_dispatcher_info .global cdecl(\name\()_dispatcher_info)
#ifndef __APPLE__
.type \name\()_dispatcher_info,%object .type \name\()_dispatcher_info,%object
#endif
\name\()_dispatcher_info: cdecl(\name\()_dispatcher_info):
.quad \name\()_mbinit //func_entry .quad \name\()_mbinit //func_entry
#ifndef __APPLE__
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
#endif
.balign 8 .balign 8
.text .text
\name\()_mbinit: \name\()_mbinit:
@ -108,7 +116,7 @@
*/ */
bl \name\()_dispatcher bl cdecl(\name\()_dispatcher)
//restore temp/indirect result registers //restore temp/indirect result registers
ldp x8, x9, [sp, 16] ldp x8, x9, [sp, 16]
.cfi_restore 8 .cfi_restore 8
@ -150,16 +158,24 @@
.cfi_def_cfa_offset 0 .cfi_def_cfa_offset 0
.cfi_endproc .cfi_endproc
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name,%function .type \name,%function
#endif
.align 2 .align 2
\name\(): cdecl(\name\()):
#ifndef __APPLE__
adrp x9, :got:\name\()_dispatcher_info adrp x9, :got:\name\()_dispatcher_info
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
#else
adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE
ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF]
#endif
ldr x10,[x9] ldr x10,[x9]
br x10 br x10
#ifndef __APPLE__
.size \name,. - \name .size \name,. - \name
#endif
.endm .endm
/** /**
@ -168,32 +184,53 @@
*/ */
.macro mbin_interface_base name:req, base:req .macro mbin_interface_base name:req, base:req
.extern \base .extern \base
.section .data .data
.balign 8 .balign 8
.global \name\()_dispatcher_info .global cdecl(\name\()_dispatcher_info)
#ifndef __APPLE__
.type \name\()_dispatcher_info,%object .type \name\()_dispatcher_info,%object
#endif
\name\()_dispatcher_info: cdecl(\name\()_dispatcher_info):
.quad \base //func_entry .quad \base //func_entry
#ifndef __APPLE__
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
#endif
.balign 8 .balign 8
.text .text
.global \name .global cdecl(\name)
#ifndef __APPLE__
.type \name,%function .type \name,%function
#endif
.align 2 .align 2
\name\(): cdecl(\name\()):
adrp x9, :got:\name\()_dispatcher_info #ifndef __APPLE__
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] adrp x9, :got:cdecl(_\name\()_dispatcher_info)
ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)]
#else
adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE
ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF]
#endif
ldr x10,[x9] ldr x10,[x9]
br x10 br x10
#ifndef __APPLE__
.size \name,. - \name .size \name,. - \name
#endif
.endm .endm
#else /* __ASSEMBLY__ */ #else /* __ASSEMBLY__ */
#include <stdint.h>
#if defined(__linux__)
#include <sys/auxv.h> #include <sys/auxv.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#include <stddef.h>
static inline int sysctlEnabled(const char* name){
int enabled;
size_t size = sizeof(enabled);
int status = sysctlbyname(name, &enabled, &size, NULL, 0);
return status ? 0 : enabled;
}
#endif
#define DEFINE_INTERFACE_DISPATCHER(name) \ #define DEFINE_INTERFACE_DISPATCHER(name) \
@ -298,10 +335,12 @@
static inline uint32_t get_micro_arch_id(void) static inline uint32_t get_micro_arch_id(void)
{ {
uint32_t id=CPU_IMPLEMENTER_RESERVE; uint32_t id=CPU_IMPLEMENTER_RESERVE;
#ifndef __APPLE__
if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) { if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
/** Here will trap into kernel space */ /** Here will trap into kernel space */
asm("mrs %0, MIDR_EL1 " : "=r" (id)); asm("mrs %0, MIDR_EL1 " : "=r" (id));
} }
#endif
return id&0xff00fff0; return id&0xff00fff0;
} }

View File

@ -30,10 +30,12 @@
DEFINE_INTERFACE_DISPATCHER(isal_zero_detect) DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
{ {
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP); unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_ASIMD) if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(mem_zero_detect_neon); return PROVIDER_INFO(mem_zero_detect_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(mem_zero_detect_neon);
#endif
return PROVIDER_BASIC(mem_zero_detect); return PROVIDER_BASIC(mem_zero_detect);
} }

View File

@ -27,6 +27,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.text .text
.arch armv8-a .arch armv8-a
@ -36,10 +38,12 @@
// input: n -> x1 // input: n -> x1
// output: -> x0 (true or false) // output: -> x0 (true or false)
.global mem_zero_detect_neon .global cdecl(mem_zero_detect_neon)
#ifndef __APPLE__
.type mem_zero_detect_neon, %function .type mem_zero_detect_neon, %function
#endif
mem_zero_detect_neon: cdecl(mem_zero_detect_neon):
cmp x1, #(16*24-1) cmp x1, #(16*24-1)
b.ls .loop_16x24_end b.ls .loop_16x24_end

View File

@ -27,10 +27,13 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.text .text
.global pq_check_neon .global cdecl(pq_check_neon)
#ifndef __APPLE__
.type pq_check_neon, %function .type pq_check_neon, %function
#endif
/* int pq_check_neon(int vects, int len, void **src) */ /* int pq_check_neon(int vects, int len, void **src) */
@ -85,7 +88,7 @@ v_0x80 .req v29
* +----------+ +------------------+ * +----------+ +------------------+
*/ */
pq_check_neon: cdecl(pq_check_neon):
sub x_src_ptr_end, x_src, #8 sub x_src_ptr_end, x_src, #8
sub w_vects, w_vects, #3 sub w_vects, w_vects, #3

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.text .text
.global pq_gen_neon .global cdecl(pq_gen_neon)
#ifndef __APPLE__
.type pq_gen_neon, %function .type pq_gen_neon, %function
#endif
/* int pq_gen_neon(int vects, int len, void **src) */ /* int pq_gen_neon(int vects, int len, void **src) */
@ -84,7 +88,7 @@ v_0x80 .req v29
* +----------+ +------------------+ * +----------+ +------------------+
*/ */
pq_gen_neon: cdecl(pq_gen_neon):
sub x_src_ptr_end, x_src, #8 sub x_src_ptr_end, x_src, #8
sub w_vects, w_vects, #3 sub w_vects, w_vects, #3

View File

@ -30,32 +30,48 @@
DEFINE_INTERFACE_DISPATCHER(xor_gen) DEFINE_INTERFACE_DISPATCHER(xor_gen)
{ {
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD) if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(xor_gen_neon); return PROVIDER_INFO(xor_gen_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(xor_gen_neon);
#endif
return PROVIDER_BASIC(xor_gen); return PROVIDER_BASIC(xor_gen);
} }
DEFINE_INTERFACE_DISPATCHER(xor_check) DEFINE_INTERFACE_DISPATCHER(xor_check)
{ {
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD) if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(xor_check_neon); return PROVIDER_INFO(xor_check_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(xor_check_neon);
#endif
return PROVIDER_BASIC(xor_check); return PROVIDER_BASIC(xor_check);
} }
DEFINE_INTERFACE_DISPATCHER(pq_gen) DEFINE_INTERFACE_DISPATCHER(pq_gen)
{ {
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD) if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(pq_gen_neon); return PROVIDER_INFO(pq_gen_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(pq_gen_neon);
#endif
return PROVIDER_BASIC(pq_gen); return PROVIDER_BASIC(pq_gen);
} }
DEFINE_INTERFACE_DISPATCHER(pq_check) DEFINE_INTERFACE_DISPATCHER(pq_check)
{ {
#if defined(__linux__)
if (getauxval(AT_HWCAP) & HWCAP_ASIMD) if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
return PROVIDER_INFO(pq_check_neon); return PROVIDER_INFO(pq_check_neon);
#elif defined(__APPLE__)
return PROVIDER_INFO(pq_check_neon);
#endif
return PROVIDER_BASIC(pq_check); return PROVIDER_BASIC(pq_check);
} }

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.text .text
.global xor_check_neon .global cdecl(xor_check_neon)
#ifndef __APPLE__
.type xor_check_neon, %function .type xor_check_neon, %function
#endif
/* int xor_check_neon(int vects, int len, void **src) */ /* int xor_check_neon(int vects, int len, void **src) */
@ -76,7 +80,7 @@ w_xor .req w11
* src_ptr_end --> * src_ptr_end -->
*/ */
xor_check_neon: cdecl(xor_check_neon):
add x_src_ptr_end, x_src, x_vects, lsl #3 add x_src_ptr_end, x_src, x_vects, lsl #3
ldr x_src0, [x_src] ldr x_src0, [x_src]
add x_src0_end, x_src0, x_len add x_src0_end, x_src0, x_len

View File

@ -27,10 +27,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
######################################################################### #########################################################################
#include "../include/aarch64_label.h"
.text .text
.global xor_gen_neon .global cdecl(xor_gen_neon)
#ifndef __APPLE__
.type xor_gen_neon, %function .type xor_gen_neon, %function
#endif
/* int xor_gen_neon(int vects, int len, void **src) */ /* int xor_gen_neon(int vects, int len, void **src) */
@ -78,7 +82,7 @@ x_dst_ptr .req x11
* +----------+ +------------------+ * +----------+ +------------------+
*/ */
xor_gen_neon: cdecl(xor_gen_neon):
add x_dst_ptr, x_src, x_vects, lsl #3 add x_dst_ptr, x_src, x_vects, lsl #3
ldr x_dst, [x_dst_ptr, #-8]! ldr x_dst, [x_dst_ptr, #-8]!
ldr x_src0, [x_src] ldr x_src0, [x_src]