mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
Fixes for aarch64 mac
- It should be fine to enable pmull always on Apple Silicon - macOS 12+ is required for PMULL instruction. - Changed the conditional macro to __APPLE__ - Rewritten dispatcher using sysctlbyname - Use __USER_LABEL_PREFIX__ - Use __TEXT,__const as readonly section - use ASM_DEF_RODATA macro - fix func decl Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974 Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
parent
85716fe2fe
commit
1187583a97
@ -27,11 +27,15 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global crc16_t10dif_copy_pmull
|
||||
.global cdecl(crc16_t10dif_copy_pmull)
|
||||
#ifndef __APPLE__
|
||||
.type crc16_t10dif_copy_pmull, %function
|
||||
#endif
|
||||
|
||||
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
||||
|
||||
@ -67,7 +71,7 @@ x_crc16tab .req x5
|
||||
x_src_saved .req x0
|
||||
x_dst_saved .req x12
|
||||
|
||||
crc16_t10dif_copy_pmull:
|
||||
cdecl(crc16_t10dif_copy_pmull):
|
||||
cmp x_len, 63
|
||||
sub sp, sp, #16
|
||||
uxth w_seed, w_seed
|
||||
@ -80,11 +84,19 @@ crc16_t10dif_copy_pmull:
|
||||
cmp x_len, x_tmp
|
||||
bls .end
|
||||
|
||||
#ifndef __APPLE__
|
||||
sxtw x_counter, w_counter
|
||||
adrp x_crc16tab, .LANCHOR0
|
||||
sub x_src, x_src, x_counter
|
||||
sub x_dst, x_dst, x_counter
|
||||
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
||||
#else
|
||||
sxtw x_counter, w_counter
|
||||
adrp x_crc16tab, .LANCHOR0@PAGE
|
||||
sub x_src, x_src, x_counter
|
||||
sub x_dst, x_dst, x_counter
|
||||
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 2
|
||||
.crc_table_loop:
|
||||
@ -145,8 +157,13 @@ v_tmp3 .req v16
|
||||
stp q_x0, q_x1, [x_dst]
|
||||
stp q_x2, q_x3, [x_dst, 32]
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .shuffle_mask_lanchor
|
||||
ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
||||
#else
|
||||
adrp x_tmp, .shuffle_mask_lanchor@PAGE
|
||||
ldr q_permutation, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
|
||||
#endif
|
||||
|
||||
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
||||
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
||||
@ -193,7 +210,7 @@ v_tmp1_x3 .req v27
|
||||
q_fold_const .req q17
|
||||
v_fold_const .req v17
|
||||
|
||||
ldr q_fold_const, =0x371d00000000000087e70000;
|
||||
ldr q_fold_const, fold_constant
|
||||
|
||||
.align 2
|
||||
.crc_fold_loop:
|
||||
@ -358,23 +375,32 @@ v_br1 .req v5
|
||||
umov x0, v_x0.d[0]
|
||||
ubfx x0, x0, 16, 16
|
||||
b .crc_table_loop_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
|
||||
#endif
|
||||
|
||||
.section .rodata
|
||||
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
fold_constant:
|
||||
.word 0x87e70000
|
||||
.word 0x00000000
|
||||
.word 0x371d0000
|
||||
.word 0x00000000
|
||||
.shuffle_mask_lanchor = . + 0
|
||||
#ifndef __APPLE__
|
||||
.type shuffle_mask, %object
|
||||
.size shuffle_mask, 16
|
||||
#endif
|
||||
shuffle_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.align 4
|
||||
.LANCHOR0 = . + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc16tab, %object
|
||||
.size crc16tab, 512
|
||||
#endif
|
||||
crc16tab:
|
||||
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
||||
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
||||
|
@ -27,11 +27,15 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global crc16_t10dif_pmull
|
||||
.global cdecl(crc16_t10dif_pmull)
|
||||
#ifndef __APPLE__
|
||||
.type crc16_t10dif_pmull, %function
|
||||
#endif
|
||||
|
||||
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
||||
|
||||
@ -65,7 +69,7 @@ x_counter .req x3
|
||||
x_crc16tab .req x4
|
||||
x_buf_saved .req x0
|
||||
|
||||
crc16_t10dif_pmull:
|
||||
cdecl(crc16_t10dif_pmull):
|
||||
cmp x_len, 63
|
||||
sub sp, sp, #16
|
||||
uxth w_seed, w_seed
|
||||
@ -78,10 +82,17 @@ crc16_t10dif_pmull:
|
||||
cmp x_len, x_tmp
|
||||
bls .end
|
||||
|
||||
#ifndef __APPLE__
|
||||
sxtw x_counter, w_counter
|
||||
adrp x_crc16tab, .LANCHOR0
|
||||
sub x_buf, x_buf, x_counter
|
||||
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
||||
#else
|
||||
sxtw x_counter, w_counter
|
||||
adrp x_crc16tab, .LANCHOR0@PAGE
|
||||
sub x_buf, x_buf, x_counter
|
||||
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 2
|
||||
.crc_table_loop:
|
||||
@ -137,8 +148,13 @@ v_tmp3 .req v16
|
||||
ldp q_x0, q_x1, [x_buf]
|
||||
ldp q_x2, q_x3, [x_buf, 32]
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .shuffle_mask_lanchor
|
||||
ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
||||
#else
|
||||
adrp x_tmp, .shuffle_mask_lanchor@PAGE
|
||||
ldr q7, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
|
||||
#endif
|
||||
|
||||
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
||||
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
||||
@ -185,7 +201,7 @@ v_tmp1_x3 .req v27
|
||||
q_fold_const .req q17
|
||||
v_fold_const .req v17
|
||||
|
||||
ldr q_fold_const, =0x371d00000000000087e70000;
|
||||
ldr q_fold_const, fold_constant
|
||||
|
||||
.align 2
|
||||
.crc_fold_loop:
|
||||
@ -344,22 +360,32 @@ v_br1 .req v5
|
||||
ubfx x0, x0, 16, 16
|
||||
b .crc_table_loop_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size crc16_t10dif_pmull, .-crc16_t10dif_pmull
|
||||
#endif
|
||||
|
||||
.section .rodata
|
||||
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
fold_constant:
|
||||
.word 0x87e70000
|
||||
.word 0x00000000
|
||||
.word 0x371d0000
|
||||
.word 0x00000000
|
||||
.shuffle_mask_lanchor = . + 0
|
||||
#ifndef __APPLE__
|
||||
.type shuffle_mask, %object
|
||||
.size shuffle_mask, 16
|
||||
#endif
|
||||
shuffle_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.align 4
|
||||
.LANCHOR0 = . + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc16tab, %object
|
||||
.size crc16tab, 512
|
||||
#endif
|
||||
crc16tab:
|
||||
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
||||
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
||||
|
@ -27,8 +27,7 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.macro crc32_hw_common poly_type
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.macro declare_var_vector_reg name:req,reg:req
|
||||
\name\()_q .req q\reg
|
||||
@ -429,4 +430,3 @@ start_final:
|
||||
.endif
|
||||
ret
|
||||
.endm
|
||||
|
||||
|
@ -88,8 +88,12 @@
|
||||
);
|
||||
*/
|
||||
|
||||
.global crc32_gzip_refl_3crc_fold
|
||||
.global cdecl(crc32_gzip_refl_3crc_fold)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_gzip_refl_3crc_fold, %function
|
||||
crc32_gzip_refl_3crc_fold:
|
||||
#endif
|
||||
cdecl(crc32_gzip_refl_3crc_fold):
|
||||
crc32_3crc_fold crc32
|
||||
#ifndef __APPLE__
|
||||
.size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
|
||||
#endif
|
||||
|
@ -59,8 +59,12 @@
|
||||
* uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
|
||||
* uint64_t LEN,uint32_t wCRC);
|
||||
*/
|
||||
.global crc32_gzip_refl_crc_ext
|
||||
.global cdecl(crc32_gzip_refl_crc_ext)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_gzip_refl_crc_ext, %function
|
||||
crc32_gzip_refl_crc_ext:
|
||||
#endif
|
||||
cdecl(crc32_gzip_refl_crc_ext):
|
||||
crc32_hw_common crc32
|
||||
#ifndef __APPLE__
|
||||
.size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
|
||||
#endif
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc32_gzip_refl_pmull.h"
|
||||
#include "crc32_refl_common_pmull.h"
|
||||
|
||||
|
@ -47,11 +47,13 @@
|
||||
.equ br_high_b2, 0x1
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc32_table_gzip_refl, %object
|
||||
.size crc32_table_gzip_refl, 1024
|
||||
#endif
|
||||
crc32_table_gzip_refl:
|
||||
.word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
|
||||
.word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc32_ieee_norm_pmull.h"
|
||||
#include "crc32_norm_common_pmull.h"
|
||||
|
||||
|
@ -47,11 +47,13 @@
|
||||
.equ br_high_b2, 0x1
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc32_table_ieee_norm, %object
|
||||
.size crc32_table_ieee_norm, 1024
|
||||
#endif
|
||||
crc32_table_ieee_norm:
|
||||
.word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
|
||||
.word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd
|
||||
|
@ -90,8 +90,12 @@
|
||||
|
||||
*/
|
||||
|
||||
.global crc32_iscsi_3crc_fold
|
||||
.global cdecl(crc32_iscsi_3crc_fold)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_iscsi_3crc_fold, %function
|
||||
crc32_iscsi_3crc_fold:
|
||||
#endif
|
||||
cdecl(crc32_iscsi_3crc_fold):
|
||||
crc32_3crc_fold crc32c
|
||||
#ifndef __APPLE__
|
||||
.size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
|
||||
#endif
|
||||
|
@ -58,8 +58,12 @@
|
||||
* uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
|
||||
* uint64_t LEN,uint32_t wCRC);
|
||||
*/
|
||||
.global crc32_iscsi_crc_ext
|
||||
.global cdecl(crc32_iscsi_crc_ext)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_iscsi_crc_ext, %function
|
||||
crc32_iscsi_crc_ext:
|
||||
#endif
|
||||
cdecl(crc32_iscsi_crc_ext):
|
||||
crc32_hw_common crc32c
|
||||
#ifndef __APPLE__
|
||||
.size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
|
||||
#endif
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc32_iscsi_refl_pmull.h"
|
||||
#include "crc32_refl_common_pmull.h"
|
||||
|
||||
@ -35,9 +36,11 @@ crc32_refl_func crc32_iscsi_refl_pmull_internal
|
||||
.arch armv8-a+crc+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global crc32_iscsi_refl_pmull
|
||||
.global cdecl(crc32_iscsi_refl_pmull)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_iscsi_refl_pmull, %function
|
||||
crc32_iscsi_refl_pmull:
|
||||
#endif
|
||||
cdecl(crc32_iscsi_refl_pmull):
|
||||
stp x29, x30, [sp, -32]!
|
||||
mov x29, sp
|
||||
|
||||
@ -47,7 +50,7 @@ crc32_iscsi_refl_pmull:
|
||||
mov w0, w7
|
||||
mvn w0, w0
|
||||
|
||||
bl crc32_iscsi_refl_pmull_internal
|
||||
bl cdecl(crc32_iscsi_refl_pmull_internal)
|
||||
mvn w0, w0
|
||||
ldp x29, x30, [sp], 32
|
||||
ret
|
||||
|
@ -47,11 +47,14 @@
|
||||
.equ br_high_b2, 0x0
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc32_table_iscsi_refl, %object
|
||||
.size crc32_table_iscsi_refl, 1024
|
||||
#endif
|
||||
crc32_table_iscsi_refl:
|
||||
.word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
|
||||
.word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24
|
||||
|
@ -51,42 +51,56 @@
|
||||
|
||||
#include "crc32_mix_default_common.S"
|
||||
|
||||
.global crc32_mix_default
|
||||
.global cdecl(crc32_mix_default)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_mix_default, %function
|
||||
crc32_mix_default:
|
||||
#endif
|
||||
cdecl(crc32_mix_default):
|
||||
crc32_mix_main_default
|
||||
#ifndef __APPLE__
|
||||
.size crc32_mix_default, .-crc32_mix_default
|
||||
#endif
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set lanchor_crc32,. + 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k1k2, %object
|
||||
.size k1k2, 16
|
||||
#endif
|
||||
k1k2:
|
||||
.xword 0x0154442bd4
|
||||
.xword 0x01c6e41596
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k3k4, %object
|
||||
.size k3k4, 16
|
||||
#endif
|
||||
k3k4:
|
||||
.xword 0x01751997d0
|
||||
.xword 0x00ccaa009e
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k5k0, %object
|
||||
.size k5k0, 16
|
||||
#endif
|
||||
k5k0:
|
||||
.xword 0x0163cd6124
|
||||
.xword 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type poly, %object
|
||||
.size poly, 16
|
||||
#endif
|
||||
poly:
|
||||
.xword 0x01db710641
|
||||
.xword 0x01f7011641
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type crc32_const, %object
|
||||
.size crc32_const, 48
|
||||
#endif
|
||||
crc32_const:
|
||||
.xword 0x1753ab84
|
||||
.xword 0
|
||||
@ -98,8 +112,10 @@ crc32_const:
|
||||
.align 4
|
||||
.set .lanchor_mask,. + 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type mask, %object
|
||||
.size mask, 16
|
||||
#endif
|
||||
mask:
|
||||
.word -1
|
||||
.word 0
|
||||
|
@ -27,6 +27,8 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.macro declare_generic_reg name:req, reg:req, default:req
|
||||
\name .req \default\reg
|
||||
w_\name .req w\reg
|
||||
@ -207,8 +209,13 @@
|
||||
fmov s_a1, w_crc
|
||||
movi v_neon_tmp.4s, 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_pconst, lanchor_crc32
|
||||
add x_buf_tmp, x_buf, 64
|
||||
#else
|
||||
adrp x_pconst, lanchor_crc32@PAGE
|
||||
add x_buf_tmp, x_buf, 64
|
||||
#endif
|
||||
|
||||
ldr x_data_crc0, [x_buf, 512]
|
||||
ldr x_data_crc1, [x_buf, 1024]
|
||||
@ -231,7 +238,11 @@
|
||||
ldr x_data_crc2, [x_buf, 1544]
|
||||
|
||||
eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
|
||||
#ifndef __APPLE__
|
||||
ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
|
||||
#else
|
||||
ldr q_a0, [x_pconst, #lanchor_crc32@PAGEOFF] // k1k2
|
||||
#endif
|
||||
|
||||
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
||||
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
||||
@ -261,7 +272,11 @@
|
||||
// loop end
|
||||
|
||||
// PMULL: fold into 128-bits
|
||||
#ifndef __APPLE__
|
||||
add x_pconst, x_pconst, :lo12:lanchor_crc32
|
||||
#else
|
||||
add x_pconst, x_pconst, lanchor_crc32@PAGEOFF
|
||||
#endif
|
||||
|
||||
ldr x_data_crc0, [x_buf, 976]
|
||||
ldr x_data_crc1, [x_buf, 1488]
|
||||
@ -321,7 +336,11 @@
|
||||
|
||||
movi v_neon_zero.4s, 0
|
||||
ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .lanchor_mask
|
||||
#else
|
||||
adrp x_tmp, .lanchor_mask@PAGE
|
||||
#endif
|
||||
|
||||
ldr x_data_crc0, [x_buf, 1008]
|
||||
ldr x_data_crc1, [x_buf, 1520]
|
||||
@ -329,7 +348,11 @@
|
||||
|
||||
ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
|
||||
eor v_a1.16b, v_a2.16b, v_a1.16b
|
||||
#ifndef __APPLE__
|
||||
ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
|
||||
#else
|
||||
ldr q_neon_tmp3, [x_tmp, #.lanchor_mask@PAGEOFF]
|
||||
#endif
|
||||
|
||||
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
||||
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
||||
|
@ -62,9 +62,12 @@
|
||||
CRC .req x0
|
||||
wCRC .req w0
|
||||
.align 6
|
||||
.global crc32_mix_neoverse_n1
|
||||
.global cdecl(crc32_mix_neoverse_n1)
|
||||
#ifndef __APPLE__
|
||||
.type crc32_mix_neoverse_n1, %function
|
||||
crc32_mix_neoverse_n1:
|
||||
#endif
|
||||
cdecl(crc32_mix_neoverse_n1):
|
||||
crc32_common_mix crc32
|
||||
#ifndef __APPLE__
|
||||
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
|
||||
|
||||
#endif
|
||||
|
@ -33,12 +33,14 @@
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name, %function
|
||||
#endif
|
||||
|
||||
/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
||||
|
||||
\name\():
|
||||
cdecl(\name\()):
|
||||
mvn w_seed, w_seed
|
||||
mov x_counter, 0
|
||||
cmp x_len, (FOLD_SIZE - 1)
|
||||
@ -48,10 +50,17 @@
|
||||
cmp x_len, x_counter
|
||||
bls .done
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .lanchor_crc_tab
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||
#else
|
||||
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 3
|
||||
.loop_crc_tab:
|
||||
@ -124,10 +133,12 @@
|
||||
umov w_seed, v_tmp_high.s[0]
|
||||
|
||||
b .crc_tab_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size \name, .-\name
|
||||
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
#else
|
||||
.section __TEXT,__const
|
||||
#endif
|
||||
.align 4
|
||||
.shuffle_data:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9
|
||||
|
@ -33,12 +33,14 @@
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name, %function
|
||||
#endif
|
||||
|
||||
/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
||||
|
||||
\name\():
|
||||
cdecl(\name\()):
|
||||
mvn w_seed, w_seed
|
||||
mov x_counter, 0
|
||||
cmp x_len, (FOLD_SIZE - 1)
|
||||
@ -48,10 +50,17 @@
|
||||
cmp x_len, x_counter
|
||||
bls .done
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .lanchor_crc_tab
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||
#else
|
||||
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 3
|
||||
.loop_crc_tab:
|
||||
@ -121,6 +130,7 @@
|
||||
umov w_seed, v_tmp_high.s[1]
|
||||
|
||||
b .crc_tab_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size \name, .-\name
|
||||
#endif
|
||||
.endm
|
||||
|
@ -49,46 +49,60 @@
|
||||
|
||||
#include "crc32_mix_default_common.S"
|
||||
|
||||
.global crc32c_mix_default
|
||||
.global cdecl(crc32c_mix_default)
|
||||
#ifndef __APPLE__
|
||||
.type crc32c_mix_default, %function
|
||||
crc32c_mix_default:
|
||||
#endif
|
||||
cdecl(crc32c_mix_default):
|
||||
mov w3, w2
|
||||
sxtw x2, w1
|
||||
mov x1, x0
|
||||
mov w0, w3
|
||||
crc32_mix_main_default
|
||||
#ifndef __APPLE__
|
||||
.size crc32c_mix_default, .-crc32c_mix_default
|
||||
#endif
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set lanchor_crc32,. + 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k1k2, %object
|
||||
.size k1k2, 16
|
||||
#endif
|
||||
k1k2:
|
||||
.xword 0x00740eef02
|
||||
.xword 0x009e4addf8
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k3k4, %object
|
||||
.size k3k4, 16
|
||||
#endif
|
||||
k3k4:
|
||||
.xword 0x00f20c0dfe
|
||||
.xword 0x014cd00bd6
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type k5k0, %object
|
||||
.size k5k0, 16
|
||||
#endif
|
||||
k5k0:
|
||||
.xword 0x00dd45aab8
|
||||
.xword 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type poly, %object
|
||||
.size poly, 16
|
||||
#endif
|
||||
poly:
|
||||
.xword 0x0105ec76f0
|
||||
.xword 0x00dea713f1
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type crc32_const, %object
|
||||
.size crc32_const, 48
|
||||
#endif
|
||||
crc32_const:
|
||||
.xword 0x9ef68d35
|
||||
.xword 0
|
||||
@ -100,8 +114,10 @@ crc32_const:
|
||||
.align 4
|
||||
.set .lanchor_mask,. + 0
|
||||
|
||||
#ifndef __APPLE__
|
||||
.type mask, %object
|
||||
.size mask, 16
|
||||
#endif
|
||||
mask:
|
||||
.word -1
|
||||
.word 0
|
||||
|
@ -61,8 +61,12 @@
|
||||
CRC .req x2
|
||||
wCRC .req w2
|
||||
.align 6
|
||||
.global crc32c_mix_neoverse_n1
|
||||
.global cdecl(crc32c_mix_neoverse_n1)
|
||||
#ifndef __APPLE__
|
||||
.type crc32c_mix_neoverse_n1, %function
|
||||
crc32c_mix_neoverse_n1:
|
||||
#endif
|
||||
cdecl(crc32c_mix_neoverse_n1):
|
||||
crc32_common_mix crc32c
|
||||
#ifndef __APPLE__
|
||||
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
|
||||
#endif
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_ecma_norm_pmull.h"
|
||||
#include "crc64_norm_common_pmull.h"
|
||||
|
||||
|
@ -64,11 +64,13 @@
|
||||
.equ br_high_b3, 0x42f0
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0x42f0e1eba9ea3693
|
||||
.xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_ecma_refl_pmull.h"
|
||||
#include "crc64_refl_common_pmull.h"
|
||||
|
||||
|
@ -60,11 +60,13 @@
|
||||
.equ br_high_b3, 0x92d8
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0xb32e4cbe03a75f6f
|
||||
.xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_iso_norm_pmull.h"
|
||||
#include "crc64_norm_common_pmull.h"
|
||||
|
||||
|
@ -64,11 +64,13 @@
|
||||
.equ br_high_b3, 0x0000
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0x000000000000001b
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_iso_refl_pmull.h"
|
||||
#include "crc64_refl_common_pmull.h"
|
||||
|
||||
|
@ -60,11 +60,13 @@
|
||||
.equ br_high_b3, 0xb000
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0x01b0000000000000
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_jones_norm_pmull.h"
|
||||
#include "crc64_norm_common_pmull.h"
|
||||
|
||||
|
@ -64,11 +64,14 @@
|
||||
.equ br_high_b3, 0xad93
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0xad93d23594c935a9
|
||||
.xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52
|
||||
|
@ -27,6 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
#include "crc64_jones_refl_pmull.h"
|
||||
#include "crc64_refl_common_pmull.h"
|
||||
|
||||
|
@ -60,11 +60,14 @@
|
||||
.equ br_high_b3, 0x2b59
|
||||
|
||||
.text
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.set .lanchor_crc_tab,. + 0
|
||||
#ifndef __APPLE__
|
||||
.type crc64_tab, %object
|
||||
.size crc64_tab, 2048
|
||||
#endif
|
||||
|
||||
crc64_tab:
|
||||
.xword 0x0000000000000000, 0x7ad870c830358979
|
||||
.xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b
|
||||
|
@ -33,12 +33,14 @@
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name, %function
|
||||
#endif
|
||||
|
||||
/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
||||
|
||||
\name\():
|
||||
cdecl(\name\()):
|
||||
mvn x_seed, x_seed
|
||||
mov x_counter, 0
|
||||
cmp x_len, (FOLD_SIZE-1)
|
||||
@ -48,10 +50,17 @@
|
||||
cmp x_len, x_counter
|
||||
bls .done
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .lanchor_crc_tab
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||
#else
|
||||
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 3
|
||||
.loop_crc_tab:
|
||||
@ -119,9 +128,12 @@
|
||||
|
||||
b .crc_tab_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size \name, .-\name
|
||||
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
#else
|
||||
.section __TEXT,__const
|
||||
#endif
|
||||
.align 4
|
||||
.shuffle_data:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||
|
@ -33,12 +33,14 @@
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.align 3
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name, %function
|
||||
#endif
|
||||
|
||||
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
||||
|
||||
\name\():
|
||||
cdecl(\name\()):
|
||||
mvn x_seed, x_seed
|
||||
mov x_counter, 0
|
||||
cmp x_len, (FOLD_SIZE-1)
|
||||
@ -48,10 +50,17 @@
|
||||
cmp x_len, x_counter
|
||||
bls .done
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .lanchor_crc_tab
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||
#else
|
||||
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||
add x_buf_iter, x_buf, x_counter
|
||||
add x_buf, x_buf, x_len
|
||||
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||
#endif
|
||||
|
||||
.align 3
|
||||
.loop_crc_tab:
|
||||
@ -121,6 +130,7 @@
|
||||
umov x_crc_ret, v_tmp_low.d[1]
|
||||
|
||||
b .crc_tab_pre
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size \name, .-\name
|
||||
#endif
|
||||
.endm
|
||||
|
@ -30,37 +30,50 @@
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc16_t10dif_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc16_t10dif_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc16_t10dif);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc16_t10dif_copy);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL) {
|
||||
return PROVIDER_INFO(crc32_ieee_norm_pmull);
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc32_ieee_norm_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc32_ieee);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32) {
|
||||
switch (get_micro_arch_id()) {
|
||||
@ -77,12 +90,19 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
||||
if (auxval & HWCAP_PMULL) {
|
||||
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(crc32_iscsi_3crc_fold);
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc32_iscsi);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_CRC32) {
|
||||
@ -99,68 +119,97 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
||||
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc32_gzip_refl_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc32_gzip_refl_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc32_gzip_refl);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_ecma_refl_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_ecma_refl_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_ecma_refl);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_ecma_norm_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_ecma_norm_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_ecma_norm);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_iso_refl_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_iso_refl_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_iso_refl);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_iso_norm_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_iso_norm_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_iso_norm);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_jones_refl_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_jones_refl_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_jones_refl);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
return PROVIDER_INFO(crc64_jones_norm_pmull);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||
return PROVIDER_INFO(crc64_jones_norm_pmull);
|
||||
#endif
|
||||
return PROVIDER_BASIC(crc64_jones_norm);
|
||||
|
||||
}
|
||||
|
@ -27,6 +27,8 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
// parameters
|
||||
#define w_seed w0
|
||||
#define x_seed x0
|
||||
@ -126,8 +128,13 @@
|
||||
.endm
|
||||
|
||||
.macro crc_norm_load_first_block
|
||||
#ifndef __APPLE__
|
||||
adrp x_tmp, .shuffle_data
|
||||
ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
|
||||
#else
|
||||
adrp x_tmp, .shuffle_data@PAGE
|
||||
ldr q_shuffle, [x_tmp, #.shuffle_data@PAGEOFF]
|
||||
#endif
|
||||
|
||||
ldr q_x0_tmp, [x_buf]
|
||||
ldr q_x1, [x_buf, 16]
|
||||
@ -299,4 +306,4 @@
|
||||
pmull v_tmp_low.1q, v_x2.1d, v_p1.1d
|
||||
eor v_x3.16b, v_x3.16b, v_tmp_high.16b
|
||||
eor v_x3.16b, v_x3.16b, v_tmp_low.16b
|
||||
.endm
|
||||
.endm
|
||||
|
@ -30,60 +30,90 @@
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_dot_prod);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_mad);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(ec_encode_data);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(ec_encode_data_update);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_SVE)
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gf_vect_mul);
|
||||
|
||||
}
|
||||
|
@ -27,11 +27,14 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_2vect_dot_prod_neon
|
||||
.global cdecl(gf_2vect_dot_prod_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_2vect_dot_prod_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -130,7 +133,7 @@ q_data .req q_p1_1
|
||||
v_data_lo .req v_p1_2
|
||||
v_data_hi .req v_p1_3
|
||||
|
||||
gf_2vect_dot_prod_neon:
|
||||
cdecl(gf_2vect_dot_prod_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_2vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_2vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_2vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -81,7 +85,7 @@ q_gft2_hi .req q18
|
||||
|
||||
z_dest2 .req z27
|
||||
|
||||
gf_2vect_dot_prod_sve:
|
||||
cdecl(gf_2vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,11 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_2vect_mad_neon
|
||||
.global cdecl(gf_2vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_2vect_mad_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -125,7 +129,7 @@ v_data_lo .req v17
|
||||
v_data_hi .req v18
|
||||
|
||||
|
||||
gf_2vect_mad_neon:
|
||||
cdecl(gf_2vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -360,8 +364,13 @@ gf_2vect_mad_neon:
|
||||
sub x_dest1, x_dest1, x_tmp
|
||||
sub x_dest2, x_dest2, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -395,7 +404,7 @@ gf_2vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_2vect_mad_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_2vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_2vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
@ -77,7 +81,7 @@ q_gft2_hi .req q18
|
||||
|
||||
z_dest2 .req z27
|
||||
|
||||
gf_2vect_mad_sve:
|
||||
cdecl(gf_2vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -27,11 +27,14 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_3vect_dot_prod_neon
|
||||
.global cdecl(gf_3vect_dot_prod_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_3vect_dot_prod_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -117,7 +120,7 @@ v_data_lo .req v_p1_2
|
||||
v_data_hi .req v_p1_3
|
||||
|
||||
|
||||
gf_3vect_dot_prod_neon:
|
||||
cdecl(gf_3vect_dot_prod_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_3vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_3vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_3vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -89,7 +93,7 @@ q_gft3_hi .req q20
|
||||
z_dest2 .req z27
|
||||
z_dest3 .req z28
|
||||
|
||||
gf_3vect_dot_prod_sve:
|
||||
cdecl(gf_3vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,11 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_3vect_mad_neon
|
||||
.global cdecl(gf_3vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_3vect_mad_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -122,7 +126,7 @@ q_data .req q21
|
||||
v_data_lo .req v22
|
||||
v_data_hi .req v23
|
||||
|
||||
gf_3vect_mad_neon:
|
||||
cdecl(gf_3vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -332,8 +336,13 @@ gf_3vect_mad_neon:
|
||||
sub x_dest2, x_dest2, x_tmp
|
||||
sub x_dest3, x_dest3, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -375,7 +384,7 @@ gf_3vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_3vect_mad_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_3vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_3vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
@ -84,7 +88,7 @@ q_gft3_hi .req q20
|
||||
z_dest2 .req z27
|
||||
z_dest3 .req z28
|
||||
|
||||
gf_3vect_mad_sve:
|
||||
cdecl(gf_3vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,11 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_4vect_dot_prod_neon
|
||||
.global cdecl(gf_4vect_dot_prod_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_4vect_dot_prod_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -132,7 +136,7 @@ q_data .req q_tmp1
|
||||
v_data_lo .req v_tmp1_lo
|
||||
v_data_hi .req v_tmp1_hi
|
||||
|
||||
gf_4vect_dot_prod_neon:
|
||||
cdecl(gf_4vect_dot_prod_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_4vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_4vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_4vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -97,7 +101,7 @@ z_dest2 .req z27
|
||||
z_dest3 .req z28
|
||||
z_dest4 .req z29
|
||||
|
||||
gf_4vect_dot_prod_sve:
|
||||
cdecl(gf_4vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -27,11 +27,14 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_4vect_mad_neon
|
||||
.global cdecl(gf_4vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_4vect_mad_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -138,7 +141,7 @@ q_data .req q21
|
||||
v_data_lo .req v22
|
||||
v_data_hi .req v23
|
||||
|
||||
gf_4vect_mad_neon:
|
||||
cdecl(gf_4vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -397,8 +400,13 @@ gf_4vect_mad_neon:
|
||||
sub x_dest3, x_dest3, x_tmp
|
||||
sub x_dest4, x_dest4, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -449,7 +457,7 @@ gf_4vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_4vect_mad_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_4vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_4vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
@ -91,7 +95,7 @@ z_dest2 .req z27
|
||||
z_dest3 .req z28
|
||||
z_dest4 .req z29
|
||||
|
||||
gf_4vect_mad_sve:
|
||||
cdecl(gf_4vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -27,11 +27,14 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_5vect_dot_prod_neon
|
||||
.global cdecl(gf_5vect_dot_prod_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_5vect_dot_prod_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -159,7 +162,7 @@ q_gft5_lo .req q_p2_3
|
||||
q_gft5_hi .req q_p3_3
|
||||
|
||||
|
||||
gf_5vect_dot_prod_neon:
|
||||
cdecl(gf_5vect_dot_prod_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_5vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_5vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_5vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -105,7 +109,7 @@ z_dest3 .req z28
|
||||
z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
|
||||
gf_5vect_dot_prod_sve:
|
||||
cdecl(gf_5vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,11 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_5vect_mad_neon
|
||||
.global cdecl(gf_5vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_5vect_mad_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -152,7 +156,7 @@ q_data .req q21
|
||||
v_data_lo .req v22
|
||||
v_data_hi .req v23
|
||||
|
||||
gf_5vect_mad_neon:
|
||||
cdecl(gf_5vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -463,8 +467,13 @@ gf_5vect_mad_neon:
|
||||
sub x_dest4, x_dest4, x_tmp
|
||||
sub x_dest5, x_dest5, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -528,7 +537,7 @@ gf_5vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_5vect_mad_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_5vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_5vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
@ -98,7 +102,7 @@ z_dest3 .req z28
|
||||
z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
|
||||
gf_5vect_mad_sve:
|
||||
cdecl(gf_5vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_6vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_6vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_6vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -113,7 +117,7 @@ z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
z_dest6 .req z31
|
||||
|
||||
gf_6vect_dot_prod_sve:
|
||||
cdecl(gf_6vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -27,10 +27,13 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
.text
|
||||
.global gf_6vect_mad_neon
|
||||
.type gf_6vect_mad_neon, %function
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
.global cdecl(gf_6vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_6vect_mad_neon, %function
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -166,7 +169,7 @@ q_data .req q21
|
||||
v_data_lo .req v22
|
||||
v_data_hi .req v23
|
||||
|
||||
gf_6vect_mad_neon:
|
||||
cdecl(gf_6vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -526,8 +529,13 @@ gf_6vect_mad_neon:
|
||||
sub x_dest5, x_dest5, x_tmp
|
||||
sub x_dest6, x_dest6, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -603,7 +611,7 @@ gf_6vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_6vect_mad_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_6vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_6vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char **dest);
|
||||
@ -105,7 +109,7 @@ z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
z_dest6 .req z31
|
||||
|
||||
gf_6vect_mad_sve:
|
||||
cdecl(gf_6vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_7vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_7vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_7vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -122,7 +126,7 @@ z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
z_dest6 .req z31
|
||||
|
||||
gf_7vect_dot_prod_sve:
|
||||
cdecl(gf_7vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_8vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_8vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_8vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
*/
|
||||
@ -131,7 +135,7 @@ z_dest4 .req z29
|
||||
z_dest5 .req z30
|
||||
z_dest6 .req z31
|
||||
|
||||
gf_8vect_dot_prod_sve:
|
||||
cdecl(gf_8vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,10 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_vect_dot_prod_neon
|
||||
.global cdecl(gf_vect_dot_prod_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_dot_prod_neon, %function
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -115,7 +120,7 @@ v_data_lo .req v_p2
|
||||
v_data_hi .req v_p3
|
||||
|
||||
|
||||
gf_vect_dot_prod_neon:
|
||||
cdecl(gf_vect_dot_prod_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_vect_dot_prod_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_vect_dot_prod_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_dot_prod_sve, %function
|
||||
#endif
|
||||
/* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
*/
|
||||
@ -66,7 +70,7 @@ z_gft1_hi .req z5
|
||||
q_gft1_lo .req q4
|
||||
q_gft1_hi .req q5
|
||||
|
||||
gf_vect_dot_prod_sve:
|
||||
cdecl(gf_vect_dot_prod_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -26,11 +26,15 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_vect_mad_neon
|
||||
.global cdecl(gf_vect_mad_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_mad_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -121,7 +125,7 @@ v_data_lo .req v_d1_2
|
||||
v_data_hi .req v_d1_3
|
||||
|
||||
|
||||
gf_vect_mad_neon:
|
||||
cdecl(gf_vect_mad_neon):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
@ -281,8 +285,13 @@ gf_vect_mad_neon:
|
||||
mov x_src, x_src_end
|
||||
sub x_dest1, x_dest1, x_tmp
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x_const, const_tbl
|
||||
add x_const, x_const, :lo12:const_tbl
|
||||
#else
|
||||
adrp x_const, const_tbl@PAGE
|
||||
add x_const, x_const, const_tbl@PAGEOFF
|
||||
#endif
|
||||
sub x_const, x_const, x_tmp
|
||||
ldr q_tmp, [x_const, #16]
|
||||
|
||||
@ -308,7 +317,7 @@ gf_vect_mad_neon:
|
||||
mov w_ret, #1
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.balign 8
|
||||
const_tbl:
|
||||
.dword 0x0000000000000000, 0x0000000000000000
|
||||
|
@ -30,9 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global gf_vect_mad_sve
|
||||
.global cdecl(gf_vect_mad_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_mad_sve, %function
|
||||
#endif
|
||||
|
||||
/* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||
unsigned char *src, unsigned char *dest);
|
||||
@ -68,7 +71,7 @@ z_gft1_hi .req z7
|
||||
q_gft1_lo .req q6
|
||||
q_gft1_hi .req q7
|
||||
|
||||
gf_vect_mad_sve:
|
||||
cdecl(gf_vect_mad_sve):
|
||||
/* less than 16 bytes, return_fail */
|
||||
cmp x_len, #16
|
||||
blt .return_fail
|
||||
|
@ -27,11 +27,14 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global gf_vect_mul_neon
|
||||
.global cdecl(gf_vect_mul_neon)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_mul_neon, %function
|
||||
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
x_len .req x0
|
||||
@ -90,7 +93,7 @@ v_data_6_hi .req v_data_6
|
||||
v_data_7_hi .req v_data_7
|
||||
|
||||
|
||||
gf_vect_mul_neon:
|
||||
cdecl(gf_vect_mul_neon):
|
||||
/* less than 32 bytes, return_fail */
|
||||
cmp x_len, #32
|
||||
blt .return_fail
|
||||
|
@ -30,8 +30,12 @@
|
||||
.align 6
|
||||
.arch armv8-a+sve
|
||||
|
||||
.global gf_vect_mul_sve
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.global cdecl(gf_vect_mul_sve)
|
||||
#ifndef __APPLE__
|
||||
.type gf_vect_mul_sve, %function
|
||||
#endif
|
||||
|
||||
/* Refer to include/gf_vect_mul.h
|
||||
*
|
||||
@ -72,7 +76,7 @@ z_gft1_hi .req z7
|
||||
q_gft1_lo .req q6
|
||||
q_gft1_hi .req q7
|
||||
|
||||
gf_vect_mul_sve:
|
||||
cdecl(gf_vect_mul_sve):
|
||||
/* less than 32 bytes, return_fail */
|
||||
cmp x_len, #32
|
||||
blt .return_fail
|
||||
|
@ -27,6 +27,8 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -46,8 +48,10 @@ declare Macros
|
||||
x_\name .req x\reg
|
||||
.endm
|
||||
|
||||
.global encode_deflate_icf_aarch64
|
||||
.global cdecl(encode_deflate_icf_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type encode_deflate_icf_aarch64, %function
|
||||
#endif
|
||||
|
||||
/*
|
||||
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
|
||||
@ -86,7 +90,7 @@ declare Macros
|
||||
.equ offset_m_out_buf, 16
|
||||
.equ offset_m_out_end, 24
|
||||
|
||||
encode_deflate_icf_aarch64:
|
||||
cdecl(encode_deflate_icf_aarch64):
|
||||
cmp next_in, end_in
|
||||
bcs .done
|
||||
|
||||
@ -156,4 +160,6 @@ encode_deflate_icf_aarch64:
|
||||
|
||||
.done:
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc+crypto
|
||||
.text
|
||||
.align 2
|
||||
@ -62,8 +65,10 @@ declare Macros
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
.global gen_icf_map_h1_aarch64
|
||||
.global cdecl(gen_icf_map_h1_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type gen_icf_map_h1_aarch64, %function
|
||||
#endif
|
||||
|
||||
/* arguments */
|
||||
declare_generic_reg stream_param, 0,x
|
||||
@ -137,7 +142,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
||||
*/
|
||||
|
||||
gen_icf_map_h1_aarch64:
|
||||
cdecl(gen_icf_map_h1_aarch64):
|
||||
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
|
||||
bls .fast_exit
|
||||
stp x29, x30, [sp, -16]!
|
||||
@ -263,4 +268,6 @@ gen_icf_map_h1_aarch64:
|
||||
.fast_exit:
|
||||
mov ret_val, 0
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
|
||||
#endif
|
||||
|
@ -27,6 +27,8 @@
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a
|
||||
.text
|
||||
.align 2
|
||||
@ -257,8 +259,10 @@ declare Macros
|
||||
.endm
|
||||
|
||||
|
||||
.global decode_huffman_code_block_stateless_aarch64
|
||||
.global cdecl(decode_huffman_code_block_stateless_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type decode_huffman_code_block_stateless_aarch64, %function
|
||||
#endif
|
||||
/*
|
||||
void decode_huffman_code_block_stateless_aarch64(
|
||||
struct inflate_state *state,
|
||||
@ -305,7 +309,7 @@ declare Macros
|
||||
declare_generic_reg write_overflow_lits,26,w
|
||||
declare_generic_reg repeat_length,27,w
|
||||
|
||||
decode_huffman_code_block_stateless_aarch64:
|
||||
cdecl(decode_huffman_code_block_stateless_aarch64):
|
||||
//save registers
|
||||
push_stack
|
||||
|
||||
@ -324,8 +328,13 @@ decode_huffman_code_block_stateless_aarch64:
|
||||
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
|
||||
|
||||
//init rfc_table
|
||||
#ifndef __APPLE__
|
||||
adrp rfc_table,rfc_lookup_table
|
||||
add rfc_table,rfc_table,:lo12:rfc_lookup_table
|
||||
#else
|
||||
adrp rfc_table,rfc_lookup_table@PAGE
|
||||
add rfc_table,rfc_table,rfc_lookup_table@PAGEOFF
|
||||
#endif
|
||||
#if ENABLE_TBL_INSTRUCTION
|
||||
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
|
||||
add rfc_table,rfc_table,48
|
||||
@ -661,8 +670,10 @@ byte_copy_loop:
|
||||
strb w_arg0, [next_out],1
|
||||
bne byte_copy_loop
|
||||
b decompress_data_end
|
||||
#ifndef __APPLE__
|
||||
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
|
||||
.type rfc_lookup_table, %object
|
||||
#endif
|
||||
|
||||
rfc_lookup_table:
|
||||
#if ENABLE_TBL_INSTRUCTION
|
||||
@ -686,4 +697,6 @@ rfc_lookup_table:
|
||||
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
||||
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
||||
#endif
|
||||
#ifndef __APPLE__
|
||||
.size rfc_lookup_table, . - rfc_lookup_table
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -72,8 +75,10 @@ skip_has_hist:
|
||||
.endm
|
||||
|
||||
|
||||
.global isal_deflate_body_aarch64
|
||||
.global cdecl(isal_deflate_body_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type isal_deflate_body_aarch64, %function
|
||||
#endif
|
||||
/*
|
||||
void isal_deflate_body_aarch64(struct isal_zstream *stream)
|
||||
*/
|
||||
@ -115,7 +120,7 @@ skip_has_hist:
|
||||
declare_generic_reg code_len2, 4,x
|
||||
|
||||
|
||||
isal_deflate_body_aarch64:
|
||||
cdecl(isal_deflate_body_aarch64):
|
||||
//save registers
|
||||
push_stack
|
||||
ldr avail_in, [stream, _avail_in]
|
||||
@ -258,4 +263,6 @@ exit_save_state:
|
||||
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
|
||||
str w_tmp0, [stream, _internal_state+_state]
|
||||
b exit_ret
|
||||
#ifndef __APPLE__
|
||||
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -73,9 +76,11 @@ skip_has_hist:
|
||||
sub w_\next_in,w_\next_in,w_\m_out_buf
|
||||
stp w_\next_in,w_\start_in,[\stream,_avail_out]
|
||||
.endm
|
||||
.global isal_deflate_finish_aarch64
|
||||
.global cdecl(isal_deflate_finish_aarch64)
|
||||
.arch armv8-a+crc
|
||||
#ifndef __APPLE__
|
||||
.type isal_deflate_finish_aarch64, %function
|
||||
#endif
|
||||
/*
|
||||
void isal_deflate_finish_aarch64(struct isal_zstream *stream)
|
||||
*/
|
||||
@ -117,7 +122,7 @@ skip_has_hist:
|
||||
declare_generic_reg code_len2, 4,x
|
||||
|
||||
|
||||
isal_deflate_finish_aarch64:
|
||||
cdecl(isal_deflate_finish_aarch64):
|
||||
//save registers
|
||||
push_stack
|
||||
|
||||
@ -260,5 +265,6 @@ update_state_exit:
|
||||
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
|
||||
pop_stack
|
||||
ret
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -46,8 +49,10 @@ declare Macros
|
||||
|
||||
|
||||
|
||||
.global isal_deflate_hash_aarch64
|
||||
.global cdecl(isal_deflate_hash_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type isal_deflate_hash_aarch64, %function
|
||||
#endif
|
||||
/*
|
||||
void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
|
||||
uint32_t current_index, uint8_t * dict, uint32_t dict_len)
|
||||
@ -58,14 +63,14 @@ declare Macros
|
||||
declare_generic_reg dict, 3,x
|
||||
declare_generic_reg dict_len, 4,w
|
||||
|
||||
declare_generic_reg next_in 3,x
|
||||
declare_generic_reg end_in 6,x
|
||||
declare_generic_reg ind 5,w
|
||||
declare_generic_reg hash 2,w
|
||||
declare_generic_reg literal 2,w
|
||||
declare_generic_reg next_in, 3,x
|
||||
declare_generic_reg end_in, 6,x
|
||||
declare_generic_reg ind, 5,w
|
||||
declare_generic_reg hash, 2,w
|
||||
declare_generic_reg literal, 2,w
|
||||
#define SHORTEST_MATCH #4
|
||||
|
||||
isal_deflate_hash_aarch64:
|
||||
cdecl(isal_deflate_hash_aarch64):
|
||||
sub ind, current_index, dict_len
|
||||
and ind,ind,0xffff
|
||||
|
||||
@ -92,4 +97,6 @@ loop_start:
|
||||
exit_func:
|
||||
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.align 3
|
||||
@ -56,9 +59,11 @@ Arguements list
|
||||
adler32 .req w0
|
||||
start .req x1
|
||||
length .req x2
|
||||
.global adler32_neon
|
||||
.global cdecl(adler32_neon)
|
||||
#ifndef __APPLE__
|
||||
.type adler32_neon, %function
|
||||
adler32_neon:
|
||||
#endif
|
||||
cdecl(adler32_neon):
|
||||
/*
|
||||
local variables
|
||||
*/
|
||||
@ -92,8 +97,13 @@ local variables
|
||||
lsr adler1, adler32, 16
|
||||
|
||||
lsr loop_cnt,length,5
|
||||
#ifndef __APPLE__
|
||||
adrp x3,factors
|
||||
add x3,x3,:lo12:factors
|
||||
#else
|
||||
adrp x3,factors@PAGE
|
||||
add x3,x3,factors@PAGEOFF
|
||||
#endif
|
||||
ld1 {factor0_v.16b-factor1_v.16b},[x3]
|
||||
|
||||
add end,start,length
|
||||
@ -162,12 +172,15 @@ end_func:
|
||||
orr w0,adler0,adler1,lsl 16
|
||||
ret
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size adler32_neon, .-adler32_neon
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
#else
|
||||
.section __TEXT,__const
|
||||
#endif
|
||||
.align 4
|
||||
factors:
|
||||
.quad 0x191a1b1c1d1e1f20
|
||||
.quad 0x1112131415161718
|
||||
.quad 0x090a0b0c0d0e0f10
|
||||
.quad 0x0102030405060708
|
||||
|
||||
|
@ -30,86 +30,121 @@
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_adler32)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(adler32_neon);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(adler32_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(adler32);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_body_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_body_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_body);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_finish_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_finish_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_finish);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||
#endif
|
||||
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
||||
}
|
||||
|
||||
@ -125,64 +160,92 @@ DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_update_histogram_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_update_histogram_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_update_histogram);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32) {
|
||||
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(gen_icf_map_h1);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_hash);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_hash);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_hash);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(isal_deflate_hash);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
|
||||
#endif
|
||||
return PROVIDER_BASIC(decode_huffman_code_block_stateless);
|
||||
}
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a
|
||||
.text
|
||||
.align 2
|
||||
@ -48,8 +51,10 @@ declare Macros
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.global set_long_icf_fg_aarch64
|
||||
.global cdecl(set_long_icf_fg_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type set_long_icf_fg_aarch64, %function
|
||||
#endif
|
||||
|
||||
/*
|
||||
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
||||
@ -69,7 +74,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
|
||||
/* local variable */
|
||||
declare_generic_reg len, 7,w
|
||||
declare_generic_reg dist_code, 8,w
|
||||
declare_generic_reg shortest_match_len 9,w
|
||||
declare_generic_reg shortest_match_len, 9,w
|
||||
declare_generic_reg len_max, 10,w
|
||||
declare_generic_reg dist_extra, 11,w
|
||||
declare_generic_reg const_8, 13,x
|
||||
@ -90,7 +95,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
|
||||
.equ SHORTEST_MATCH, 4
|
||||
.equ LEN_MAX_CONST, 512
|
||||
|
||||
set_long_icf_fg_aarch64:
|
||||
cdecl(set_long_icf_fg_aarch64):
|
||||
stp x29, x30, [sp, -192]!
|
||||
add x29, sp, 0
|
||||
stp x21, x22, [sp, 32]
|
||||
@ -103,11 +108,18 @@ set_long_icf_fg_aarch64:
|
||||
add end_in, next_in_param, input_size_param
|
||||
mov match_lookup, match_lookup_param
|
||||
|
||||
#ifndef __APPLE__
|
||||
adrp x1, .data_dist_start
|
||||
mov x2, DIST_START_SIZE // 128
|
||||
add x1, x1, :lo12:.data_dist_start
|
||||
mov x0, dist_start
|
||||
bl memcpy
|
||||
#else
|
||||
adrp x1, .data_dist_start@PAGE
|
||||
mov x2, DIST_START_SIZE // 128
|
||||
add x1, x1, .data_dist_start@PAGEOFF
|
||||
mov x0, dist_start
|
||||
#endif
|
||||
bl cdecl(memcpy)
|
||||
|
||||
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
||||
cmp end_in, x_tmp0
|
||||
@ -182,9 +194,11 @@ set_long_icf_fg_aarch64:
|
||||
ldr x23, [sp, 48]
|
||||
ldp x29, x30, [sp], 192
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
||||
#endif
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 3
|
||||
.set .data_dist_start,. + 0
|
||||
.real_data_dist_start:
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -45,8 +48,10 @@ declare Macros
|
||||
x_\name .req x\reg
|
||||
.endm
|
||||
|
||||
.global isal_deflate_icf_body_hash_hist_aarch64
|
||||
.global cdecl(isal_deflate_icf_body_hash_hist_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type isal_deflate_icf_body_hash_hist_aarch64, %function
|
||||
#endif
|
||||
/*
|
||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
||||
*/
|
||||
@ -126,7 +131,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
||||
declare_generic_reg tmp0, 4,x
|
||||
declare_generic_reg tmp1, 5,x
|
||||
|
||||
isal_deflate_icf_body_hash_hist_aarch64:
|
||||
cdecl(isal_deflate_icf_body_hash_hist_aarch64):
|
||||
stp x29, x30, [sp, -80]!
|
||||
add x29, sp, 0
|
||||
str x24, [sp, 56]
|
||||
@ -360,5 +365,6 @@ isal_deflate_icf_body_hash_hist_aarch64:
|
||||
ldr x24, [sp, 56]
|
||||
ldp x29, x30, [sp], 80
|
||||
ret
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
|
||||
@ -129,7 +132,9 @@ void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
|
||||
declare_generic_reg tmp3, 28,x
|
||||
|
||||
.align 2
|
||||
#ifndef __APPLE__
|
||||
.type write_deflate_icf_constprop, %function
|
||||
#endif
|
||||
write_deflate_icf_constprop:
|
||||
ldrh w2, [x0]
|
||||
mov w3, 30
|
||||
@ -141,10 +146,14 @@ write_deflate_icf_constprop:
|
||||
ubfx x1, x1, 16, 3
|
||||
strh w1, [x0, 2]
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size write_deflate_icf_constprop, .-write_deflate_icf_constprop
|
||||
#endif
|
||||
|
||||
.align 2
|
||||
#ifndef __APPLE__
|
||||
.type write_deflate_icf, %function
|
||||
#endif
|
||||
write_deflate_icf:
|
||||
ldrh w4, [x0]
|
||||
bfi w4, w1, 0, 10
|
||||
@ -156,10 +165,14 @@ write_deflate_icf:
|
||||
bfi w1, w3, 3, 13
|
||||
strh w1, [x0, 2]
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size write_deflate_icf, .-write_deflate_icf
|
||||
#endif
|
||||
|
||||
.align 2
|
||||
#ifndef __APPLE__
|
||||
.type update_state, %function
|
||||
#endif
|
||||
update_state:
|
||||
sub x7, x2, x1
|
||||
ldr x4, [x0, 48]
|
||||
@ -179,12 +192,16 @@ update_state:
|
||||
str x5, [x4, 4688]
|
||||
str x6, [x4, 4696]
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size update_state, .-update_state
|
||||
#endif
|
||||
|
||||
.align 2
|
||||
.global isal_deflate_icf_finish_hash_hist_aarch64
|
||||
.global cdecl(isal_deflate_icf_finish_hash_hist_aarch64)
|
||||
#ifndef __APPLE__
|
||||
.type isal_deflate_icf_finish_hash_hist_aarch64, %function
|
||||
isal_deflate_icf_finish_hash_hist_aarch64:
|
||||
#endif
|
||||
cdecl(isal_deflate_icf_finish_hash_hist_aarch64):
|
||||
ldr w_end_in, [stream, 8] // stream->avail_in
|
||||
cbz w_end_in, .stream_not_available
|
||||
|
||||
@ -393,5 +410,6 @@ isal_deflate_icf_finish_hash_hist_aarch64:
|
||||
str w1, [stream, offset_state_state] // 84
|
||||
.done:
|
||||
ret
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
|
||||
#endif
|
||||
|
@ -26,6 +26,9 @@
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.arch armv8-a+crc
|
||||
.text
|
||||
.align 2
|
||||
@ -63,17 +66,24 @@ declare Macros
|
||||
.endm
|
||||
|
||||
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
|
||||
#ifndef __APPLE__
|
||||
adrp x_\tmp0, .len_to_code_tab_lanchor
|
||||
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
|
||||
#else
|
||||
adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE
|
||||
add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF
|
||||
#endif
|
||||
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
|
||||
add w_\length_out, w_\length_out, 256
|
||||
.endm
|
||||
|
||||
.section .rodata
|
||||
ASM_DEF_RODATA
|
||||
.align 4
|
||||
.len_to_code_tab_lanchor = . + 0
|
||||
#ifndef __APPLE__
|
||||
.type len_to_code_tab, %object
|
||||
.size len_to_code_tab, 1056
|
||||
#endif
|
||||
len_to_code_tab:
|
||||
.word 0x00, 0x00, 0x00
|
||||
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
|
||||
@ -111,9 +121,11 @@ len_to_code_tab:
|
||||
.word 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
|
||||
.text
|
||||
.global isal_update_histogram_aarch64
|
||||
.global cdecl(isal_update_histogram_aarch64)
|
||||
.arch armv8-a+crc
|
||||
#ifndef __APPLE__
|
||||
.type isal_update_histogram_aarch64, %function
|
||||
#endif
|
||||
|
||||
/*
|
||||
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
||||
@ -157,7 +169,7 @@ void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
||||
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
|
||||
.equ hash_table_size, (8*1024*2) // 16384
|
||||
|
||||
isal_update_histogram_aarch64:
|
||||
cdecl(isal_update_histogram_aarch64):
|
||||
cmp w_length, 0
|
||||
ble .done
|
||||
|
||||
@ -176,7 +188,7 @@ isal_update_histogram_aarch64:
|
||||
mov x0, last_seen
|
||||
mov w1, 0
|
||||
mov x2, hash_table_size
|
||||
bl memset
|
||||
bl cdecl(memset)
|
||||
|
||||
cmp current, loop_end_iter
|
||||
bcs .loop_end
|
||||
@ -308,4 +320,6 @@ isal_update_histogram_aarch64:
|
||||
.align 2
|
||||
.done:
|
||||
ret
|
||||
#ifndef __APPLE__
|
||||
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
|
||||
#endif
|
||||
|
18
include/aarch64_label.h
Normal file
18
include/aarch64_label.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef __AARCH64_LABEL_H__
|
||||
#define __AARCH64_LABEL_H__
|
||||
|
||||
#ifdef __USER_LABEL_PREFIX__
|
||||
#define CONCAT1(a, b) CONCAT2(a, b)
|
||||
#define CONCAT2(a, b) a ## b
|
||||
#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
|
||||
#else
|
||||
#define cdecl(x) x
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define ASM_DEF_RODATA .section __TEXT,__const
|
||||
#else
|
||||
#define ASM_DEF_RODATA .section .rodata
|
||||
#endif
|
||||
|
||||
#endif
|
@ -31,7 +31,14 @@
|
||||
#ifndef __aarch64__
|
||||
#error "This file is for aarch64 only"
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
|
||||
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
|
||||
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
|
||||
#else
|
||||
#include <asm/hwcap.h>
|
||||
#endif
|
||||
#include "aarch64_label.h"
|
||||
#ifdef __ASSEMBLY__
|
||||
/**
|
||||
* # mbin_interface : the wrapper layer for isal-l api
|
||||
@ -48,17 +55,18 @@
|
||||
* 4. The dispather should return the right function pointer , revision and a string information .
|
||||
**/
|
||||
.macro mbin_interface name:req
|
||||
.extern \name\()_dispatcher
|
||||
.section .data
|
||||
.extern cdecl(\name\()_dispatcher)
|
||||
.data
|
||||
.balign 8
|
||||
.global \name\()_dispatcher_info
|
||||
.global cdecl(\name\()_dispatcher_info)
|
||||
#ifndef __APPLE__
|
||||
.type \name\()_dispatcher_info,%object
|
||||
|
||||
\name\()_dispatcher_info:
|
||||
#endif
|
||||
cdecl(\name\()_dispatcher_info):
|
||||
.quad \name\()_mbinit //func_entry
|
||||
|
||||
#ifndef __APPLE__
|
||||
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
||||
|
||||
#endif
|
||||
.balign 8
|
||||
.text
|
||||
\name\()_mbinit:
|
||||
@ -108,7 +116,7 @@
|
||||
*/
|
||||
|
||||
|
||||
bl \name\()_dispatcher
|
||||
bl cdecl(\name\()_dispatcher)
|
||||
//restore temp/indirect result registers
|
||||
ldp x8, x9, [sp, 16]
|
||||
.cfi_restore 8
|
||||
@ -150,16 +158,24 @@
|
||||
.cfi_def_cfa_offset 0
|
||||
.cfi_endproc
|
||||
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name,%function
|
||||
#endif
|
||||
.align 2
|
||||
\name\():
|
||||
cdecl(\name\()):
|
||||
#ifndef __APPLE__
|
||||
adrp x9, :got:\name\()_dispatcher_info
|
||||
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
|
||||
#else
|
||||
adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE
|
||||
ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF]
|
||||
#endif
|
||||
ldr x10,[x9]
|
||||
br x10
|
||||
#ifndef __APPLE__
|
||||
.size \name,. - \name
|
||||
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/**
|
||||
@ -168,32 +184,53 @@
|
||||
*/
|
||||
.macro mbin_interface_base name:req, base:req
|
||||
.extern \base
|
||||
.section .data
|
||||
.data
|
||||
.balign 8
|
||||
.global \name\()_dispatcher_info
|
||||
.global cdecl(\name\()_dispatcher_info)
|
||||
#ifndef __APPLE__
|
||||
.type \name\()_dispatcher_info,%object
|
||||
|
||||
\name\()_dispatcher_info:
|
||||
#endif
|
||||
cdecl(\name\()_dispatcher_info):
|
||||
.quad \base //func_entry
|
||||
#ifndef __APPLE__
|
||||
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
||||
|
||||
#endif
|
||||
.balign 8
|
||||
.text
|
||||
.global \name
|
||||
.global cdecl(\name)
|
||||
#ifndef __APPLE__
|
||||
.type \name,%function
|
||||
#endif
|
||||
.align 2
|
||||
\name\():
|
||||
adrp x9, :got:\name\()_dispatcher_info
|
||||
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
|
||||
cdecl(\name\()):
|
||||
#ifndef __APPLE__
|
||||
adrp x9, :got:cdecl(_\name\()_dispatcher_info)
|
||||
ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)]
|
||||
#else
|
||||
adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE
|
||||
ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF]
|
||||
#endif
|
||||
ldr x10,[x9]
|
||||
br x10
|
||||
#ifndef __APPLE__
|
||||
.size \name,. - \name
|
||||
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#else /* __ASSEMBLY__ */
|
||||
#include <stdint.h>
|
||||
#if defined(__linux__)
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
#include <sys/sysctl.h>
|
||||
#include <stddef.h>
|
||||
static inline int sysctlEnabled(const char* name){
|
||||
int enabled;
|
||||
size_t size = sizeof(enabled);
|
||||
int status = sysctlbyname(name, &enabled, &size, NULL, 0);
|
||||
return status ? 0 : enabled;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define DEFINE_INTERFACE_DISPATCHER(name) \
|
||||
@ -298,10 +335,12 @@
|
||||
static inline uint32_t get_micro_arch_id(void)
|
||||
{
|
||||
uint32_t id=CPU_IMPLEMENTER_RESERVE;
|
||||
#ifndef __APPLE__
|
||||
if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
||||
/** Here will trap into kernel space */
|
||||
asm("mrs %0, MIDR_EL1 " : "=r" (id));
|
||||
}
|
||||
#endif
|
||||
return id&0xff00fff0;
|
||||
}
|
||||
|
||||
|
@ -30,10 +30,12 @@
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(mem_zero_detect_neon);
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(mem_zero_detect_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(mem_zero_detect);
|
||||
|
||||
}
|
||||
|
@ -27,6 +27,8 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
.arch armv8-a
|
||||
|
||||
@ -36,10 +38,12 @@
|
||||
// input: n -> x1
|
||||
// output: -> x0 (true or false)
|
||||
|
||||
.global mem_zero_detect_neon
|
||||
.global cdecl(mem_zero_detect_neon)
|
||||
#ifndef __APPLE__
|
||||
.type mem_zero_detect_neon, %function
|
||||
#endif
|
||||
|
||||
mem_zero_detect_neon:
|
||||
cdecl(mem_zero_detect_neon):
|
||||
cmp x1, #(16*24-1)
|
||||
b.ls .loop_16x24_end
|
||||
|
||||
|
@ -27,10 +27,13 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
.text
|
||||
|
||||
.global pq_check_neon
|
||||
.global cdecl(pq_check_neon)
|
||||
#ifndef __APPLE__
|
||||
.type pq_check_neon, %function
|
||||
#endif
|
||||
|
||||
/* int pq_check_neon(int vects, int len, void **src) */
|
||||
|
||||
@ -85,7 +88,7 @@ v_0x80 .req v29
|
||||
* +----------+ +------------------+
|
||||
*/
|
||||
|
||||
pq_check_neon:
|
||||
cdecl(pq_check_neon):
|
||||
sub x_src_ptr_end, x_src, #8
|
||||
|
||||
sub w_vects, w_vects, #3
|
||||
|
@ -27,10 +27,14 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global pq_gen_neon
|
||||
.global cdecl(pq_gen_neon)
|
||||
#ifndef __APPLE__
|
||||
.type pq_gen_neon, %function
|
||||
#endif
|
||||
|
||||
/* int pq_gen_neon(int vects, int len, void **src) */
|
||||
|
||||
@ -84,7 +88,7 @@ v_0x80 .req v29
|
||||
* +----------+ +------------------+
|
||||
*/
|
||||
|
||||
pq_gen_neon:
|
||||
cdecl(pq_gen_neon):
|
||||
sub x_src_ptr_end, x_src, #8
|
||||
|
||||
sub w_vects, w_vects, #3
|
||||
|
@ -30,32 +30,48 @@
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(xor_gen)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(xor_gen_neon);
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(xor_gen_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(xor_gen);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(xor_check)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(xor_check_neon);
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(xor_check_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(xor_check);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(pq_gen)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(pq_gen_neon);
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(pq_gen_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(pq_gen);
|
||||
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(pq_check)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||
return PROVIDER_INFO(pq_check_neon);
|
||||
#elif defined(__APPLE__)
|
||||
return PROVIDER_INFO(pq_check_neon);
|
||||
#endif
|
||||
return PROVIDER_BASIC(pq_check);
|
||||
|
||||
}
|
||||
|
@ -27,10 +27,14 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global xor_check_neon
|
||||
.global cdecl(xor_check_neon)
|
||||
#ifndef __APPLE__
|
||||
.type xor_check_neon, %function
|
||||
#endif
|
||||
|
||||
/* int xor_check_neon(int vects, int len, void **src) */
|
||||
|
||||
@ -76,7 +80,7 @@ w_xor .req w11
|
||||
* src_ptr_end -->
|
||||
*/
|
||||
|
||||
xor_check_neon:
|
||||
cdecl(xor_check_neon):
|
||||
add x_src_ptr_end, x_src, x_vects, lsl #3
|
||||
ldr x_src0, [x_src]
|
||||
add x_src0_end, x_src0, x_len
|
||||
|
@ -27,10 +27,14 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#########################################################################
|
||||
|
||||
#include "../include/aarch64_label.h"
|
||||
|
||||
.text
|
||||
|
||||
.global xor_gen_neon
|
||||
.global cdecl(xor_gen_neon)
|
||||
#ifndef __APPLE__
|
||||
.type xor_gen_neon, %function
|
||||
#endif
|
||||
|
||||
/* int xor_gen_neon(int vects, int len, void **src) */
|
||||
|
||||
@ -78,7 +82,7 @@ x_dst_ptr .req x11
|
||||
* +----------+ +------------------+
|
||||
*/
|
||||
|
||||
xor_gen_neon:
|
||||
cdecl(xor_gen_neon):
|
||||
add x_dst_ptr, x_src, x_vects, lsl #3
|
||||
ldr x_dst, [x_dst_ptr, #-8]!
|
||||
ldr x_src0, [x_src]
|
||||
|
Loading…
Reference in New Issue
Block a user