mirror of
https://github.com/intel/isa-l.git
synced 2025-01-22 05:20:02 +01:00
Fixes for aarch64 mac
- It should be fine to enable pmull always on Apple Silicon - macOS 12+ is required for PMULL instruction. - Changed the conditional macro to __APPLE__ - Rewritten dispatcher using sysctlbyname - Use __USER_LABEL_PREFIX__ - Use __TEXT,__const as readonly section - use ASM_DEF_RODATA macro - fix func decl Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974 Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
This commit is contained in:
parent
85716fe2fe
commit
1187583a97
@ -27,11 +27,15 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc+crypto
|
.arch armv8-a+crc+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global crc16_t10dif_copy_pmull
|
.global cdecl(crc16_t10dif_copy_pmull)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc16_t10dif_copy_pmull, %function
|
.type crc16_t10dif_copy_pmull, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
||||||
|
|
||||||
@ -67,7 +71,7 @@ x_crc16tab .req x5
|
|||||||
x_src_saved .req x0
|
x_src_saved .req x0
|
||||||
x_dst_saved .req x12
|
x_dst_saved .req x12
|
||||||
|
|
||||||
crc16_t10dif_copy_pmull:
|
cdecl(crc16_t10dif_copy_pmull):
|
||||||
cmp x_len, 63
|
cmp x_len, 63
|
||||||
sub sp, sp, #16
|
sub sp, sp, #16
|
||||||
uxth w_seed, w_seed
|
uxth w_seed, w_seed
|
||||||
@ -80,11 +84,19 @@ crc16_t10dif_copy_pmull:
|
|||||||
cmp x_len, x_tmp
|
cmp x_len, x_tmp
|
||||||
bls .end
|
bls .end
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
sxtw x_counter, w_counter
|
sxtw x_counter, w_counter
|
||||||
adrp x_crc16tab, .LANCHOR0
|
adrp x_crc16tab, .LANCHOR0
|
||||||
sub x_src, x_src, x_counter
|
sub x_src, x_src, x_counter
|
||||||
sub x_dst, x_dst, x_counter
|
sub x_dst, x_dst, x_counter
|
||||||
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
||||||
|
#else
|
||||||
|
sxtw x_counter, w_counter
|
||||||
|
adrp x_crc16tab, .LANCHOR0@PAGE
|
||||||
|
sub x_src, x_src, x_counter
|
||||||
|
sub x_dst, x_dst, x_counter
|
||||||
|
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.crc_table_loop:
|
.crc_table_loop:
|
||||||
@ -145,8 +157,13 @@ v_tmp3 .req v16
|
|||||||
stp q_x0, q_x1, [x_dst]
|
stp q_x0, q_x1, [x_dst]
|
||||||
stp q_x2, q_x3, [x_dst, 32]
|
stp q_x2, q_x3, [x_dst, 32]
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .shuffle_mask_lanchor
|
adrp x_tmp, .shuffle_mask_lanchor
|
||||||
ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
ldr q_permutation, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .shuffle_mask_lanchor@PAGE
|
||||||
|
ldr q_permutation, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
|
||||||
|
#endif
|
||||||
|
|
||||||
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
||||||
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
||||||
@ -193,7 +210,7 @@ v_tmp1_x3 .req v27
|
|||||||
q_fold_const .req q17
|
q_fold_const .req q17
|
||||||
v_fold_const .req v17
|
v_fold_const .req v17
|
||||||
|
|
||||||
ldr q_fold_const, =0x371d00000000000087e70000;
|
ldr q_fold_const, fold_constant
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.crc_fold_loop:
|
.crc_fold_loop:
|
||||||
@ -358,23 +375,32 @@ v_br1 .req v5
|
|||||||
umov x0, v_x0.d[0]
|
umov x0, v_x0.d[0]
|
||||||
ubfx x0, x0, 16, 16
|
ubfx x0, x0, 16, 16
|
||||||
b .crc_table_loop_pre
|
b .crc_table_loop_pre
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
|
.size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
|
||||||
|
#endif
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
|
fold_constant:
|
||||||
|
.word 0x87e70000
|
||||||
|
.word 0x00000000
|
||||||
|
.word 0x371d0000
|
||||||
|
.word 0x00000000
|
||||||
.shuffle_mask_lanchor = . + 0
|
.shuffle_mask_lanchor = . + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type shuffle_mask, %object
|
.type shuffle_mask, %object
|
||||||
.size shuffle_mask, 16
|
.size shuffle_mask, 16
|
||||||
|
#endif
|
||||||
shuffle_mask:
|
shuffle_mask:
|
||||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||||
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.LANCHOR0 = . + 0
|
.LANCHOR0 = . + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc16tab, %object
|
.type crc16tab, %object
|
||||||
.size crc16tab, 512
|
.size crc16tab, 512
|
||||||
|
#endif
|
||||||
crc16tab:
|
crc16tab:
|
||||||
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
||||||
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
||||||
|
@ -27,11 +27,15 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc+crypto
|
.arch armv8-a+crc+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global crc16_t10dif_pmull
|
.global cdecl(crc16_t10dif_pmull)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc16_t10dif_pmull, %function
|
.type crc16_t10dif_pmull, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
|
||||||
|
|
||||||
@ -65,7 +69,7 @@ x_counter .req x3
|
|||||||
x_crc16tab .req x4
|
x_crc16tab .req x4
|
||||||
x_buf_saved .req x0
|
x_buf_saved .req x0
|
||||||
|
|
||||||
crc16_t10dif_pmull:
|
cdecl(crc16_t10dif_pmull):
|
||||||
cmp x_len, 63
|
cmp x_len, 63
|
||||||
sub sp, sp, #16
|
sub sp, sp, #16
|
||||||
uxth w_seed, w_seed
|
uxth w_seed, w_seed
|
||||||
@ -78,10 +82,17 @@ crc16_t10dif_pmull:
|
|||||||
cmp x_len, x_tmp
|
cmp x_len, x_tmp
|
||||||
bls .end
|
bls .end
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
sxtw x_counter, w_counter
|
sxtw x_counter, w_counter
|
||||||
adrp x_crc16tab, .LANCHOR0
|
adrp x_crc16tab, .LANCHOR0
|
||||||
sub x_buf, x_buf, x_counter
|
sub x_buf, x_buf, x_counter
|
||||||
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
|
||||||
|
#else
|
||||||
|
sxtw x_counter, w_counter
|
||||||
|
adrp x_crc16tab, .LANCHOR0@PAGE
|
||||||
|
sub x_buf, x_buf, x_counter
|
||||||
|
add x_crc16tab, x_crc16tab, .LANCHOR0@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.crc_table_loop:
|
.crc_table_loop:
|
||||||
@ -137,8 +148,13 @@ v_tmp3 .req v16
|
|||||||
ldp q_x0, q_x1, [x_buf]
|
ldp q_x0, q_x1, [x_buf]
|
||||||
ldp q_x2, q_x3, [x_buf, 32]
|
ldp q_x2, q_x3, [x_buf, 32]
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .shuffle_mask_lanchor
|
adrp x_tmp, .shuffle_mask_lanchor
|
||||||
ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .shuffle_mask_lanchor@PAGE
|
||||||
|
ldr q7, [x_tmp, .shuffle_mask_lanchor@PAGEOFF]
|
||||||
|
#endif
|
||||||
|
|
||||||
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
tbl v_tmp1.16b, {v_x0.16b}, v7.16b
|
||||||
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
|
||||||
@ -185,7 +201,7 @@ v_tmp1_x3 .req v27
|
|||||||
q_fold_const .req q17
|
q_fold_const .req q17
|
||||||
v_fold_const .req v17
|
v_fold_const .req v17
|
||||||
|
|
||||||
ldr q_fold_const, =0x371d00000000000087e70000;
|
ldr q_fold_const, fold_constant
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.crc_fold_loop:
|
.crc_fold_loop:
|
||||||
@ -344,22 +360,32 @@ v_br1 .req v5
|
|||||||
ubfx x0, x0, 16, 16
|
ubfx x0, x0, 16, 16
|
||||||
b .crc_table_loop_pre
|
b .crc_table_loop_pre
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc16_t10dif_pmull, .-crc16_t10dif_pmull
|
.size crc16_t10dif_pmull, .-crc16_t10dif_pmull
|
||||||
|
#endif
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
|
fold_constant:
|
||||||
|
.word 0x87e70000
|
||||||
|
.word 0x00000000
|
||||||
|
.word 0x371d0000
|
||||||
|
.word 0x00000000
|
||||||
.shuffle_mask_lanchor = . + 0
|
.shuffle_mask_lanchor = . + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type shuffle_mask, %object
|
.type shuffle_mask, %object
|
||||||
.size shuffle_mask, 16
|
.size shuffle_mask, 16
|
||||||
|
#endif
|
||||||
shuffle_mask:
|
shuffle_mask:
|
||||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||||
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
.byte 7, 6, 5, 4, 3, 2, 1, 0
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.LANCHOR0 = . + 0
|
.LANCHOR0 = . + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc16tab, %object
|
.type crc16tab, %object
|
||||||
.size crc16tab, 512
|
.size crc16tab, 512
|
||||||
|
#endif
|
||||||
crc16tab:
|
crc16tab:
|
||||||
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
.hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
|
||||||
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
.hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
|
||||||
|
@ -27,8 +27,7 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
|
||||||
.macro crc32_hw_common poly_type
|
.macro crc32_hw_common poly_type
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.macro declare_var_vector_reg name:req,reg:req
|
.macro declare_var_vector_reg name:req,reg:req
|
||||||
\name\()_q .req q\reg
|
\name\()_q .req q\reg
|
||||||
@ -429,4 +430,3 @@ start_final:
|
|||||||
.endif
|
.endif
|
||||||
ret
|
ret
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -88,8 +88,12 @@
|
|||||||
);
|
);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
.global crc32_gzip_refl_3crc_fold
|
.global cdecl(crc32_gzip_refl_3crc_fold)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_gzip_refl_3crc_fold, %function
|
.type crc32_gzip_refl_3crc_fold, %function
|
||||||
crc32_gzip_refl_3crc_fold:
|
#endif
|
||||||
|
cdecl(crc32_gzip_refl_3crc_fold):
|
||||||
crc32_3crc_fold crc32
|
crc32_3crc_fold crc32
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
|
.size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
|
||||||
|
#endif
|
||||||
|
@ -59,8 +59,12 @@
|
|||||||
* uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
|
* uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
|
||||||
* uint64_t LEN,uint32_t wCRC);
|
* uint64_t LEN,uint32_t wCRC);
|
||||||
*/
|
*/
|
||||||
.global crc32_gzip_refl_crc_ext
|
.global cdecl(crc32_gzip_refl_crc_ext)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_gzip_refl_crc_ext, %function
|
.type crc32_gzip_refl_crc_ext, %function
|
||||||
crc32_gzip_refl_crc_ext:
|
#endif
|
||||||
|
cdecl(crc32_gzip_refl_crc_ext):
|
||||||
crc32_hw_common crc32
|
crc32_hw_common crc32
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
|
.size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
|
||||||
|
#endif
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc32_gzip_refl_pmull.h"
|
#include "crc32_gzip_refl_pmull.h"
|
||||||
#include "crc32_refl_common_pmull.h"
|
#include "crc32_refl_common_pmull.h"
|
||||||
|
|
||||||
|
@ -47,11 +47,13 @@
|
|||||||
.equ br_high_b2, 0x1
|
.equ br_high_b2, 0x1
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_table_gzip_refl, %object
|
.type crc32_table_gzip_refl, %object
|
||||||
.size crc32_table_gzip_refl, 1024
|
.size crc32_table_gzip_refl, 1024
|
||||||
|
#endif
|
||||||
crc32_table_gzip_refl:
|
crc32_table_gzip_refl:
|
||||||
.word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
|
.word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
|
||||||
.word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
|
.word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc32_ieee_norm_pmull.h"
|
#include "crc32_ieee_norm_pmull.h"
|
||||||
#include "crc32_norm_common_pmull.h"
|
#include "crc32_norm_common_pmull.h"
|
||||||
|
|
||||||
|
@ -47,11 +47,13 @@
|
|||||||
.equ br_high_b2, 0x1
|
.equ br_high_b2, 0x1
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_table_ieee_norm, %object
|
.type crc32_table_ieee_norm, %object
|
||||||
.size crc32_table_ieee_norm, 1024
|
.size crc32_table_ieee_norm, 1024
|
||||||
|
#endif
|
||||||
crc32_table_ieee_norm:
|
crc32_table_ieee_norm:
|
||||||
.word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
|
.word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
|
||||||
.word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd
|
.word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd
|
||||||
|
@ -90,8 +90,12 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
.global crc32_iscsi_3crc_fold
|
.global cdecl(crc32_iscsi_3crc_fold)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_iscsi_3crc_fold, %function
|
.type crc32_iscsi_3crc_fold, %function
|
||||||
crc32_iscsi_3crc_fold:
|
#endif
|
||||||
|
cdecl(crc32_iscsi_3crc_fold):
|
||||||
crc32_3crc_fold crc32c
|
crc32_3crc_fold crc32c
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
|
.size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
|
||||||
|
#endif
|
||||||
|
@ -58,8 +58,12 @@
|
|||||||
* uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
|
* uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
|
||||||
* uint64_t LEN,uint32_t wCRC);
|
* uint64_t LEN,uint32_t wCRC);
|
||||||
*/
|
*/
|
||||||
.global crc32_iscsi_crc_ext
|
.global cdecl(crc32_iscsi_crc_ext)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_iscsi_crc_ext, %function
|
.type crc32_iscsi_crc_ext, %function
|
||||||
crc32_iscsi_crc_ext:
|
#endif
|
||||||
|
cdecl(crc32_iscsi_crc_ext):
|
||||||
crc32_hw_common crc32c
|
crc32_hw_common crc32c
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
|
.size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
|
||||||
|
#endif
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc32_iscsi_refl_pmull.h"
|
#include "crc32_iscsi_refl_pmull.h"
|
||||||
#include "crc32_refl_common_pmull.h"
|
#include "crc32_refl_common_pmull.h"
|
||||||
|
|
||||||
@ -35,9 +36,11 @@ crc32_refl_func crc32_iscsi_refl_pmull_internal
|
|||||||
.arch armv8-a+crc+crypto
|
.arch armv8-a+crc+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global crc32_iscsi_refl_pmull
|
.global cdecl(crc32_iscsi_refl_pmull)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_iscsi_refl_pmull, %function
|
.type crc32_iscsi_refl_pmull, %function
|
||||||
crc32_iscsi_refl_pmull:
|
#endif
|
||||||
|
cdecl(crc32_iscsi_refl_pmull):
|
||||||
stp x29, x30, [sp, -32]!
|
stp x29, x30, [sp, -32]!
|
||||||
mov x29, sp
|
mov x29, sp
|
||||||
|
|
||||||
@ -47,7 +50,7 @@ crc32_iscsi_refl_pmull:
|
|||||||
mov w0, w7
|
mov w0, w7
|
||||||
mvn w0, w0
|
mvn w0, w0
|
||||||
|
|
||||||
bl crc32_iscsi_refl_pmull_internal
|
bl cdecl(crc32_iscsi_refl_pmull_internal)
|
||||||
mvn w0, w0
|
mvn w0, w0
|
||||||
ldp x29, x30, [sp], 32
|
ldp x29, x30, [sp], 32
|
||||||
ret
|
ret
|
||||||
|
@ -47,11 +47,14 @@
|
|||||||
.equ br_high_b2, 0x0
|
.equ br_high_b2, 0x0
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
|
||||||
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_table_iscsi_refl, %object
|
.type crc32_table_iscsi_refl, %object
|
||||||
.size crc32_table_iscsi_refl, 1024
|
.size crc32_table_iscsi_refl, 1024
|
||||||
|
#endif
|
||||||
crc32_table_iscsi_refl:
|
crc32_table_iscsi_refl:
|
||||||
.word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
|
.word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
|
||||||
.word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24
|
.word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24
|
||||||
|
@ -51,42 +51,56 @@
|
|||||||
|
|
||||||
#include "crc32_mix_default_common.S"
|
#include "crc32_mix_default_common.S"
|
||||||
|
|
||||||
.global crc32_mix_default
|
.global cdecl(crc32_mix_default)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_mix_default, %function
|
.type crc32_mix_default, %function
|
||||||
crc32_mix_default:
|
#endif
|
||||||
|
cdecl(crc32_mix_default):
|
||||||
crc32_mix_main_default
|
crc32_mix_main_default
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_mix_default, .-crc32_mix_default
|
.size crc32_mix_default, .-crc32_mix_default
|
||||||
|
#endif
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set lanchor_crc32,. + 0
|
.set lanchor_crc32,. + 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k1k2, %object
|
.type k1k2, %object
|
||||||
.size k1k2, 16
|
.size k1k2, 16
|
||||||
|
#endif
|
||||||
k1k2:
|
k1k2:
|
||||||
.xword 0x0154442bd4
|
.xword 0x0154442bd4
|
||||||
.xword 0x01c6e41596
|
.xword 0x01c6e41596
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k3k4, %object
|
.type k3k4, %object
|
||||||
.size k3k4, 16
|
.size k3k4, 16
|
||||||
|
#endif
|
||||||
k3k4:
|
k3k4:
|
||||||
.xword 0x01751997d0
|
.xword 0x01751997d0
|
||||||
.xword 0x00ccaa009e
|
.xword 0x00ccaa009e
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k5k0, %object
|
.type k5k0, %object
|
||||||
.size k5k0, 16
|
.size k5k0, 16
|
||||||
|
#endif
|
||||||
k5k0:
|
k5k0:
|
||||||
.xword 0x0163cd6124
|
.xword 0x0163cd6124
|
||||||
.xword 0
|
.xword 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type poly, %object
|
.type poly, %object
|
||||||
.size poly, 16
|
.size poly, 16
|
||||||
|
#endif
|
||||||
poly:
|
poly:
|
||||||
.xword 0x01db710641
|
.xword 0x01db710641
|
||||||
.xword 0x01f7011641
|
.xword 0x01f7011641
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_const, %object
|
.type crc32_const, %object
|
||||||
.size crc32_const, 48
|
.size crc32_const, 48
|
||||||
|
#endif
|
||||||
crc32_const:
|
crc32_const:
|
||||||
.xword 0x1753ab84
|
.xword 0x1753ab84
|
||||||
.xword 0
|
.xword 0
|
||||||
@ -98,8 +112,10 @@ crc32_const:
|
|||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_mask,. + 0
|
.set .lanchor_mask,. + 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type mask, %object
|
.type mask, %object
|
||||||
.size mask, 16
|
.size mask, 16
|
||||||
|
#endif
|
||||||
mask:
|
mask:
|
||||||
.word -1
|
.word -1
|
||||||
.word 0
|
.word 0
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.macro declare_generic_reg name:req, reg:req, default:req
|
.macro declare_generic_reg name:req, reg:req, default:req
|
||||||
\name .req \default\reg
|
\name .req \default\reg
|
||||||
w_\name .req w\reg
|
w_\name .req w\reg
|
||||||
@ -207,8 +209,13 @@
|
|||||||
fmov s_a1, w_crc
|
fmov s_a1, w_crc
|
||||||
movi v_neon_tmp.4s, 0
|
movi v_neon_tmp.4s, 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_pconst, lanchor_crc32
|
adrp x_pconst, lanchor_crc32
|
||||||
add x_buf_tmp, x_buf, 64
|
add x_buf_tmp, x_buf, 64
|
||||||
|
#else
|
||||||
|
adrp x_pconst, lanchor_crc32@PAGE
|
||||||
|
add x_buf_tmp, x_buf, 64
|
||||||
|
#endif
|
||||||
|
|
||||||
ldr x_data_crc0, [x_buf, 512]
|
ldr x_data_crc0, [x_buf, 512]
|
||||||
ldr x_data_crc1, [x_buf, 1024]
|
ldr x_data_crc1, [x_buf, 1024]
|
||||||
@ -231,7 +238,11 @@
|
|||||||
ldr x_data_crc2, [x_buf, 1544]
|
ldr x_data_crc2, [x_buf, 1544]
|
||||||
|
|
||||||
eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
|
eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
|
||||||
|
#ifndef __APPLE__
|
||||||
ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
|
ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
|
||||||
|
#else
|
||||||
|
ldr q_a0, [x_pconst, #lanchor_crc32@PAGEOFF] // k1k2
|
||||||
|
#endif
|
||||||
|
|
||||||
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
||||||
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
||||||
@ -261,7 +272,11 @@
|
|||||||
// loop end
|
// loop end
|
||||||
|
|
||||||
// PMULL: fold into 128-bits
|
// PMULL: fold into 128-bits
|
||||||
|
#ifndef __APPLE__
|
||||||
add x_pconst, x_pconst, :lo12:lanchor_crc32
|
add x_pconst, x_pconst, :lo12:lanchor_crc32
|
||||||
|
#else
|
||||||
|
add x_pconst, x_pconst, lanchor_crc32@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
ldr x_data_crc0, [x_buf, 976]
|
ldr x_data_crc0, [x_buf, 976]
|
||||||
ldr x_data_crc1, [x_buf, 1488]
|
ldr x_data_crc1, [x_buf, 1488]
|
||||||
@ -321,7 +336,11 @@
|
|||||||
|
|
||||||
movi v_neon_zero.4s, 0
|
movi v_neon_zero.4s, 0
|
||||||
ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
|
ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .lanchor_mask
|
adrp x_tmp, .lanchor_mask
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .lanchor_mask@PAGE
|
||||||
|
#endif
|
||||||
|
|
||||||
ldr x_data_crc0, [x_buf, 1008]
|
ldr x_data_crc0, [x_buf, 1008]
|
||||||
ldr x_data_crc1, [x_buf, 1520]
|
ldr x_data_crc1, [x_buf, 1520]
|
||||||
@ -329,7 +348,11 @@
|
|||||||
|
|
||||||
ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
|
ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
|
||||||
eor v_a1.16b, v_a2.16b, v_a1.16b
|
eor v_a1.16b, v_a2.16b, v_a1.16b
|
||||||
|
#ifndef __APPLE__
|
||||||
ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
|
ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
|
||||||
|
#else
|
||||||
|
ldr q_neon_tmp3, [x_tmp, #.lanchor_mask@PAGEOFF]
|
||||||
|
#endif
|
||||||
|
|
||||||
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
crc32_u64 w_crc0, w_crc0, x_data_crc0
|
||||||
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
crc32_u64 w_crc1, w_crc1, x_data_crc1
|
||||||
|
@ -62,9 +62,12 @@
|
|||||||
CRC .req x0
|
CRC .req x0
|
||||||
wCRC .req w0
|
wCRC .req w0
|
||||||
.align 6
|
.align 6
|
||||||
.global crc32_mix_neoverse_n1
|
.global cdecl(crc32_mix_neoverse_n1)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_mix_neoverse_n1, %function
|
.type crc32_mix_neoverse_n1, %function
|
||||||
crc32_mix_neoverse_n1:
|
#endif
|
||||||
|
cdecl(crc32_mix_neoverse_n1):
|
||||||
crc32_common_mix crc32
|
crc32_common_mix crc32
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
|
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
|
||||||
|
#endif
|
||||||
|
@ -33,12 +33,14 @@
|
|||||||
.arch armv8-a+crypto
|
.arch armv8-a+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name, %function
|
.type \name, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
||||||
|
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
mvn w_seed, w_seed
|
mvn w_seed, w_seed
|
||||||
mov x_counter, 0
|
mov x_counter, 0
|
||||||
cmp x_len, (FOLD_SIZE - 1)
|
cmp x_len, (FOLD_SIZE - 1)
|
||||||
@ -48,10 +50,17 @@
|
|||||||
cmp x_len, x_counter
|
cmp x_len, x_counter
|
||||||
bls .done
|
bls .done
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .lanchor_crc_tab
|
adrp x_tmp, .lanchor_crc_tab
|
||||||
add x_buf_iter, x_buf, x_counter
|
add x_buf_iter, x_buf, x_counter
|
||||||
add x_buf, x_buf, x_len
|
add x_buf, x_buf, x_len
|
||||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||||
|
add x_buf_iter, x_buf, x_counter
|
||||||
|
add x_buf, x_buf, x_len
|
||||||
|
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
.loop_crc_tab:
|
.loop_crc_tab:
|
||||||
@ -124,10 +133,12 @@
|
|||||||
umov w_seed, v_tmp_high.s[0]
|
umov w_seed, v_tmp_high.s[0]
|
||||||
|
|
||||||
b .crc_tab_pre
|
b .crc_tab_pre
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name, .-\name
|
.size \name, .-\name
|
||||||
|
|
||||||
.section .rodata.cst16,"aM",@progbits,16
|
.section .rodata.cst16,"aM",@progbits,16
|
||||||
|
#else
|
||||||
|
.section __TEXT,__const
|
||||||
|
#endif
|
||||||
.align 4
|
.align 4
|
||||||
.shuffle_data:
|
.shuffle_data:
|
||||||
.byte 15, 14, 13, 12, 11, 10, 9
|
.byte 15, 14, 13, 12, 11, 10, 9
|
||||||
|
@ -33,12 +33,14 @@
|
|||||||
.arch armv8-a+crypto
|
.arch armv8-a+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name, %function
|
.type \name, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
|
||||||
|
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
mvn w_seed, w_seed
|
mvn w_seed, w_seed
|
||||||
mov x_counter, 0
|
mov x_counter, 0
|
||||||
cmp x_len, (FOLD_SIZE - 1)
|
cmp x_len, (FOLD_SIZE - 1)
|
||||||
@ -48,10 +50,17 @@
|
|||||||
cmp x_len, x_counter
|
cmp x_len, x_counter
|
||||||
bls .done
|
bls .done
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .lanchor_crc_tab
|
adrp x_tmp, .lanchor_crc_tab
|
||||||
add x_buf_iter, x_buf, x_counter
|
add x_buf_iter, x_buf, x_counter
|
||||||
add x_buf, x_buf, x_len
|
add x_buf, x_buf, x_len
|
||||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||||
|
add x_buf_iter, x_buf, x_counter
|
||||||
|
add x_buf, x_buf, x_len
|
||||||
|
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
.loop_crc_tab:
|
.loop_crc_tab:
|
||||||
@ -121,6 +130,7 @@
|
|||||||
umov w_seed, v_tmp_high.s[1]
|
umov w_seed, v_tmp_high.s[1]
|
||||||
|
|
||||||
b .crc_tab_pre
|
b .crc_tab_pre
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name, .-\name
|
.size \name, .-\name
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
@ -49,46 +49,60 @@
|
|||||||
|
|
||||||
#include "crc32_mix_default_common.S"
|
#include "crc32_mix_default_common.S"
|
||||||
|
|
||||||
.global crc32c_mix_default
|
.global cdecl(crc32c_mix_default)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32c_mix_default, %function
|
.type crc32c_mix_default, %function
|
||||||
crc32c_mix_default:
|
#endif
|
||||||
|
cdecl(crc32c_mix_default):
|
||||||
mov w3, w2
|
mov w3, w2
|
||||||
sxtw x2, w1
|
sxtw x2, w1
|
||||||
mov x1, x0
|
mov x1, x0
|
||||||
mov w0, w3
|
mov w0, w3
|
||||||
crc32_mix_main_default
|
crc32_mix_main_default
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32c_mix_default, .-crc32c_mix_default
|
.size crc32c_mix_default, .-crc32c_mix_default
|
||||||
|
#endif
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set lanchor_crc32,. + 0
|
.set lanchor_crc32,. + 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k1k2, %object
|
.type k1k2, %object
|
||||||
.size k1k2, 16
|
.size k1k2, 16
|
||||||
|
#endif
|
||||||
k1k2:
|
k1k2:
|
||||||
.xword 0x00740eef02
|
.xword 0x00740eef02
|
||||||
.xword 0x009e4addf8
|
.xword 0x009e4addf8
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k3k4, %object
|
.type k3k4, %object
|
||||||
.size k3k4, 16
|
.size k3k4, 16
|
||||||
|
#endif
|
||||||
k3k4:
|
k3k4:
|
||||||
.xword 0x00f20c0dfe
|
.xword 0x00f20c0dfe
|
||||||
.xword 0x014cd00bd6
|
.xword 0x014cd00bd6
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type k5k0, %object
|
.type k5k0, %object
|
||||||
.size k5k0, 16
|
.size k5k0, 16
|
||||||
|
#endif
|
||||||
k5k0:
|
k5k0:
|
||||||
.xword 0x00dd45aab8
|
.xword 0x00dd45aab8
|
||||||
.xword 0
|
.xword 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type poly, %object
|
.type poly, %object
|
||||||
.size poly, 16
|
.size poly, 16
|
||||||
|
#endif
|
||||||
poly:
|
poly:
|
||||||
.xword 0x0105ec76f0
|
.xword 0x0105ec76f0
|
||||||
.xword 0x00dea713f1
|
.xword 0x00dea713f1
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32_const, %object
|
.type crc32_const, %object
|
||||||
.size crc32_const, 48
|
.size crc32_const, 48
|
||||||
|
#endif
|
||||||
crc32_const:
|
crc32_const:
|
||||||
.xword 0x9ef68d35
|
.xword 0x9ef68d35
|
||||||
.xword 0
|
.xword 0
|
||||||
@ -100,8 +114,10 @@ crc32_const:
|
|||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_mask,. + 0
|
.set .lanchor_mask,. + 0
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.type mask, %object
|
.type mask, %object
|
||||||
.size mask, 16
|
.size mask, 16
|
||||||
|
#endif
|
||||||
mask:
|
mask:
|
||||||
.word -1
|
.word -1
|
||||||
.word 0
|
.word 0
|
||||||
|
@ -61,8 +61,12 @@
|
|||||||
CRC .req x2
|
CRC .req x2
|
||||||
wCRC .req w2
|
wCRC .req w2
|
||||||
.align 6
|
.align 6
|
||||||
.global crc32c_mix_neoverse_n1
|
.global cdecl(crc32c_mix_neoverse_n1)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc32c_mix_neoverse_n1, %function
|
.type crc32c_mix_neoverse_n1, %function
|
||||||
crc32c_mix_neoverse_n1:
|
#endif
|
||||||
|
cdecl(crc32c_mix_neoverse_n1):
|
||||||
crc32_common_mix crc32c
|
crc32_common_mix crc32c
|
||||||
|
#ifndef __APPLE__
|
||||||
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
|
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
|
||||||
|
#endif
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_ecma_norm_pmull.h"
|
#include "crc64_ecma_norm_pmull.h"
|
||||||
#include "crc64_norm_common_pmull.h"
|
#include "crc64_norm_common_pmull.h"
|
||||||
|
|
||||||
|
@ -64,11 +64,13 @@
|
|||||||
.equ br_high_b3, 0x42f0
|
.equ br_high_b3, 0x42f0
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0x42f0e1eba9ea3693
|
.xword 0x0000000000000000, 0x42f0e1eba9ea3693
|
||||||
.xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5
|
.xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_ecma_refl_pmull.h"
|
#include "crc64_ecma_refl_pmull.h"
|
||||||
#include "crc64_refl_common_pmull.h"
|
#include "crc64_refl_common_pmull.h"
|
||||||
|
|
||||||
|
@ -60,11 +60,13 @@
|
|||||||
.equ br_high_b3, 0x92d8
|
.equ br_high_b3, 0x92d8
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0xb32e4cbe03a75f6f
|
.xword 0x0000000000000000, 0xb32e4cbe03a75f6f
|
||||||
.xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34
|
.xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_iso_norm_pmull.h"
|
#include "crc64_iso_norm_pmull.h"
|
||||||
#include "crc64_norm_common_pmull.h"
|
#include "crc64_norm_common_pmull.h"
|
||||||
|
|
||||||
|
@ -64,11 +64,13 @@
|
|||||||
.equ br_high_b3, 0x0000
|
.equ br_high_b3, 0x0000
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
|
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0x000000000000001b
|
.xword 0x0000000000000000, 0x000000000000001b
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_iso_refl_pmull.h"
|
#include "crc64_iso_refl_pmull.h"
|
||||||
#include "crc64_refl_common_pmull.h"
|
#include "crc64_refl_common_pmull.h"
|
||||||
|
|
||||||
|
@ -60,11 +60,13 @@
|
|||||||
.equ br_high_b3, 0xb000
|
.equ br_high_b3, 0xb000
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
|
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0x01b0000000000000
|
.xword 0x0000000000000000, 0x01b0000000000000
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_jones_norm_pmull.h"
|
#include "crc64_jones_norm_pmull.h"
|
||||||
#include "crc64_norm_common_pmull.h"
|
#include "crc64_norm_common_pmull.h"
|
||||||
|
|
||||||
|
@ -64,11 +64,14 @@
|
|||||||
.equ br_high_b3, 0xad93
|
.equ br_high_b3, 0xad93
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
|
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0xad93d23594c935a9
|
.xword 0x0000000000000000, 0xad93d23594c935a9
|
||||||
.xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52
|
.xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
#include "crc64_jones_refl_pmull.h"
|
#include "crc64_jones_refl_pmull.h"
|
||||||
#include "crc64_refl_common_pmull.h"
|
#include "crc64_refl_common_pmull.h"
|
||||||
|
|
||||||
|
@ -60,11 +60,14 @@
|
|||||||
.equ br_high_b3, 0x2b59
|
.equ br_high_b3, 0x2b59
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.set .lanchor_crc_tab,. + 0
|
.set .lanchor_crc_tab,. + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type crc64_tab, %object
|
.type crc64_tab, %object
|
||||||
.size crc64_tab, 2048
|
.size crc64_tab, 2048
|
||||||
|
#endif
|
||||||
|
|
||||||
crc64_tab:
|
crc64_tab:
|
||||||
.xword 0x0000000000000000, 0x7ad870c830358979
|
.xword 0x0000000000000000, 0x7ad870c830358979
|
||||||
.xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b
|
.xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b
|
||||||
|
@ -33,12 +33,14 @@
|
|||||||
.arch armv8-a+crypto
|
.arch armv8-a+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name, %function
|
.type \name, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
||||||
|
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
mvn x_seed, x_seed
|
mvn x_seed, x_seed
|
||||||
mov x_counter, 0
|
mov x_counter, 0
|
||||||
cmp x_len, (FOLD_SIZE-1)
|
cmp x_len, (FOLD_SIZE-1)
|
||||||
@ -48,10 +50,17 @@
|
|||||||
cmp x_len, x_counter
|
cmp x_len, x_counter
|
||||||
bls .done
|
bls .done
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .lanchor_crc_tab
|
adrp x_tmp, .lanchor_crc_tab
|
||||||
add x_buf_iter, x_buf, x_counter
|
add x_buf_iter, x_buf, x_counter
|
||||||
add x_buf, x_buf, x_len
|
add x_buf, x_buf, x_len
|
||||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||||
|
add x_buf_iter, x_buf, x_counter
|
||||||
|
add x_buf, x_buf, x_len
|
||||||
|
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
.loop_crc_tab:
|
.loop_crc_tab:
|
||||||
@ -119,9 +128,12 @@
|
|||||||
|
|
||||||
b .crc_tab_pre
|
b .crc_tab_pre
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name, .-\name
|
.size \name, .-\name
|
||||||
|
|
||||||
.section .rodata.cst16,"aM",@progbits,16
|
.section .rodata.cst16,"aM",@progbits,16
|
||||||
|
#else
|
||||||
|
.section __TEXT,__const
|
||||||
|
#endif
|
||||||
.align 4
|
.align 4
|
||||||
.shuffle_data:
|
.shuffle_data:
|
||||||
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
.byte 15, 14, 13, 12, 11, 10, 9, 8
|
||||||
|
@ -33,12 +33,14 @@
|
|||||||
.arch armv8-a+crypto
|
.arch armv8-a+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name, %function
|
.type \name, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
|
||||||
|
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
mvn x_seed, x_seed
|
mvn x_seed, x_seed
|
||||||
mov x_counter, 0
|
mov x_counter, 0
|
||||||
cmp x_len, (FOLD_SIZE-1)
|
cmp x_len, (FOLD_SIZE-1)
|
||||||
@ -48,10 +50,17 @@
|
|||||||
cmp x_len, x_counter
|
cmp x_len, x_counter
|
||||||
bls .done
|
bls .done
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .lanchor_crc_tab
|
adrp x_tmp, .lanchor_crc_tab
|
||||||
add x_buf_iter, x_buf, x_counter
|
add x_buf_iter, x_buf, x_counter
|
||||||
add x_buf, x_buf, x_len
|
add x_buf, x_buf, x_len
|
||||||
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .lanchor_crc_tab@PAGE
|
||||||
|
add x_buf_iter, x_buf, x_counter
|
||||||
|
add x_buf, x_buf, x_len
|
||||||
|
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
.loop_crc_tab:
|
.loop_crc_tab:
|
||||||
@ -121,6 +130,7 @@
|
|||||||
umov x_crc_ret, v_tmp_low.d[1]
|
umov x_crc_ret, v_tmp_low.d[1]
|
||||||
|
|
||||||
b .crc_tab_pre
|
b .crc_tab_pre
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name, .-\name
|
.size \name, .-\name
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
@ -30,37 +30,50 @@
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
|
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc16_t10dif_pmull);
|
return PROVIDER_INFO(crc16_t10dif_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc16_t10dif_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc16_t10dif);
|
return PROVIDER_BASIC(crc16_t10dif);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
|
DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
|
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc16_t10dif_copy_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc16_t10dif_copy);
|
return PROVIDER_BASIC(crc16_t10dif_copy);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
|
DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL) {
|
if (auxval & HWCAP_PMULL) {
|
||||||
return PROVIDER_INFO(crc32_ieee_norm_pmull);
|
return PROVIDER_INFO(crc32_ieee_norm_pmull);
|
||||||
}
|
}
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc32_ieee_norm_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc32_ieee);
|
return PROVIDER_BASIC(crc32_ieee);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32) {
|
if (auxval & HWCAP_CRC32) {
|
||||||
switch (get_micro_arch_id()) {
|
switch (get_micro_arch_id()) {
|
||||||
@ -77,12 +90,19 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
|||||||
if (auxval & HWCAP_PMULL) {
|
if (auxval & HWCAP_PMULL) {
|
||||||
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
|
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
|
||||||
}
|
}
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(crc32_iscsi_3crc_fold);
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc32_iscsi_refl_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc32_iscsi);
|
return PROVIDER_BASIC(crc32_iscsi);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_CRC32) {
|
if (auxval & HWCAP_CRC32) {
|
||||||
@ -99,68 +119,97 @@ DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
|||||||
|
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc32_gzip_refl_pmull);
|
return PROVIDER_INFO(crc32_gzip_refl_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc32_gzip_refl_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc32_gzip_refl);
|
return PROVIDER_BASIC(crc32_gzip_refl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
|
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_ecma_refl_pmull);
|
return PROVIDER_INFO(crc64_ecma_refl_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_ecma_refl_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_ecma_refl);
|
return PROVIDER_BASIC(crc64_ecma_refl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
|
DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_ecma_norm_pmull);
|
return PROVIDER_INFO(crc64_ecma_norm_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_ecma_norm_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_ecma_norm);
|
return PROVIDER_BASIC(crc64_ecma_norm);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
|
DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_iso_refl_pmull);
|
return PROVIDER_INFO(crc64_iso_refl_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_iso_refl_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_iso_refl);
|
return PROVIDER_BASIC(crc64_iso_refl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
|
DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_iso_norm_pmull);
|
return PROVIDER_INFO(crc64_iso_norm_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_iso_norm_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_iso_norm);
|
return PROVIDER_BASIC(crc64_iso_norm);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
|
DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_jones_refl_pmull);
|
return PROVIDER_INFO(crc64_jones_refl_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_jones_refl_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_jones_refl);
|
return PROVIDER_BASIC(crc64_jones_refl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
|
DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_PMULL)
|
if (auxval & HWCAP_PMULL)
|
||||||
return PROVIDER_INFO(crc64_jones_norm_pmull);
|
return PROVIDER_INFO(crc64_jones_norm_pmull);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_PMULL_KEY))
|
||||||
|
return PROVIDER_INFO(crc64_jones_norm_pmull);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(crc64_jones_norm);
|
return PROVIDER_BASIC(crc64_jones_norm);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
// parameters
|
// parameters
|
||||||
#define w_seed w0
|
#define w_seed w0
|
||||||
#define x_seed x0
|
#define x_seed x0
|
||||||
@ -126,8 +128,13 @@
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro crc_norm_load_first_block
|
.macro crc_norm_load_first_block
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_tmp, .shuffle_data
|
adrp x_tmp, .shuffle_data
|
||||||
ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
|
ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
|
||||||
|
#else
|
||||||
|
adrp x_tmp, .shuffle_data@PAGE
|
||||||
|
ldr q_shuffle, [x_tmp, #.shuffle_data@PAGEOFF]
|
||||||
|
#endif
|
||||||
|
|
||||||
ldr q_x0_tmp, [x_buf]
|
ldr q_x0_tmp, [x_buf]
|
||||||
ldr q_x1, [x_buf, 16]
|
ldr q_x1, [x_buf, 16]
|
||||||
|
@ -30,60 +30,90 @@
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
|
return PROVIDER_INFO(gf_vect_dot_prod_sve);
|
||||||
|
return PROVIDER_INFO(gf_vect_dot_prod_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_dot_prod);
|
return PROVIDER_BASIC(gf_vect_dot_prod);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_mad_sve);
|
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_mad_neon);
|
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
|
return PROVIDER_INFO(gf_vect_mad_sve);
|
||||||
|
return PROVIDER_INFO(gf_vect_mad_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_mad);
|
return PROVIDER_BASIC(gf_vect_mad);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(ec_encode_data_sve);
|
return PROVIDER_INFO(ec_encode_data_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(ec_encode_data_neon);
|
return PROVIDER_INFO(ec_encode_data_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
|
return PROVIDER_INFO(ec_encode_data_sve);
|
||||||
|
return PROVIDER_INFO(ec_encode_data_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(ec_encode_data);
|
return PROVIDER_BASIC(ec_encode_data);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(ec_encode_data_update_sve);
|
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(ec_encode_data_update_neon);
|
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
|
return PROVIDER_INFO(ec_encode_data_update_sve);
|
||||||
|
return PROVIDER_INFO(ec_encode_data_update_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(ec_encode_data_update);
|
return PROVIDER_BASIC(ec_encode_data_update);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_SVE)
|
if (auxval & HWCAP_SVE)
|
||||||
return PROVIDER_INFO(gf_vect_mul_sve);
|
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(gf_vect_mul_neon);
|
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_SVE_KEY))
|
||||||
|
return PROVIDER_INFO(gf_vect_mul_sve);
|
||||||
|
return PROVIDER_INFO(gf_vect_mul_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(gf_vect_mul);
|
return PROVIDER_BASIC(gf_vect_mul);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -27,11 +27,14 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_2vect_dot_prod_neon
|
.global cdecl(gf_2vect_dot_prod_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_2vect_dot_prod_neon, %function
|
.type gf_2vect_dot_prod_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -130,7 +133,7 @@ q_data .req q_p1_1
|
|||||||
v_data_lo .req v_p1_2
|
v_data_lo .req v_p1_2
|
||||||
v_data_hi .req v_p1_3
|
v_data_hi .req v_p1_3
|
||||||
|
|
||||||
gf_2vect_dot_prod_neon:
|
cdecl(gf_2vect_dot_prod_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_2vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_2vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_2vect_dot_prod_sve, %function
|
.type gf_2vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_2vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -81,7 +85,7 @@ q_gft2_hi .req q18
|
|||||||
|
|
||||||
z_dest2 .req z27
|
z_dest2 .req z27
|
||||||
|
|
||||||
gf_2vect_dot_prod_sve:
|
cdecl(gf_2vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,11 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_2vect_mad_neon
|
.global cdecl(gf_2vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_2vect_mad_neon, %function
|
.type gf_2vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -125,7 +129,7 @@ v_data_lo .req v17
|
|||||||
v_data_hi .req v18
|
v_data_hi .req v18
|
||||||
|
|
||||||
|
|
||||||
gf_2vect_mad_neon:
|
cdecl(gf_2vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -360,8 +364,13 @@ gf_2vect_mad_neon:
|
|||||||
sub x_dest1, x_dest1, x_tmp
|
sub x_dest1, x_dest1, x_tmp
|
||||||
sub x_dest2, x_dest2, x_tmp
|
sub x_dest2, x_dest2, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -395,7 +404,7 @@ gf_2vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_2vect_mad_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_2vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_2vect_mad_sve, %function
|
.type gf_2vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_2vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *src, unsigned char **dest);
|
||||||
@ -77,7 +81,7 @@ q_gft2_hi .req q18
|
|||||||
|
|
||||||
z_dest2 .req z27
|
z_dest2 .req z27
|
||||||
|
|
||||||
gf_2vect_mad_sve:
|
cdecl(gf_2vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,11 +27,14 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_3vect_dot_prod_neon
|
.global cdecl(gf_3vect_dot_prod_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_3vect_dot_prod_neon, %function
|
.type gf_3vect_dot_prod_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -117,7 +120,7 @@ v_data_lo .req v_p1_2
|
|||||||
v_data_hi .req v_p1_3
|
v_data_hi .req v_p1_3
|
||||||
|
|
||||||
|
|
||||||
gf_3vect_dot_prod_neon:
|
cdecl(gf_3vect_dot_prod_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_3vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_3vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_3vect_dot_prod_sve, %function
|
.type gf_3vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_3vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -89,7 +93,7 @@ q_gft3_hi .req q20
|
|||||||
z_dest2 .req z27
|
z_dest2 .req z27
|
||||||
z_dest3 .req z28
|
z_dest3 .req z28
|
||||||
|
|
||||||
gf_3vect_dot_prod_sve:
|
cdecl(gf_3vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,11 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_3vect_mad_neon
|
.global cdecl(gf_3vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_3vect_mad_neon, %function
|
.type gf_3vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -122,7 +126,7 @@ q_data .req q21
|
|||||||
v_data_lo .req v22
|
v_data_lo .req v22
|
||||||
v_data_hi .req v23
|
v_data_hi .req v23
|
||||||
|
|
||||||
gf_3vect_mad_neon:
|
cdecl(gf_3vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -332,8 +336,13 @@ gf_3vect_mad_neon:
|
|||||||
sub x_dest2, x_dest2, x_tmp
|
sub x_dest2, x_dest2, x_tmp
|
||||||
sub x_dest3, x_dest3, x_tmp
|
sub x_dest3, x_dest3, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -375,7 +384,7 @@ gf_3vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_3vect_mad_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_3vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_3vect_mad_sve, %function
|
.type gf_3vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_3vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *src, unsigned char **dest);
|
||||||
@ -84,7 +88,7 @@ q_gft3_hi .req q20
|
|||||||
z_dest2 .req z27
|
z_dest2 .req z27
|
||||||
z_dest3 .req z28
|
z_dest3 .req z28
|
||||||
|
|
||||||
gf_3vect_mad_sve:
|
cdecl(gf_3vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,11 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_4vect_dot_prod_neon
|
.global cdecl(gf_4vect_dot_prod_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_4vect_dot_prod_neon, %function
|
.type gf_4vect_dot_prod_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -132,7 +136,7 @@ q_data .req q_tmp1
|
|||||||
v_data_lo .req v_tmp1_lo
|
v_data_lo .req v_tmp1_lo
|
||||||
v_data_hi .req v_tmp1_hi
|
v_data_hi .req v_tmp1_hi
|
||||||
|
|
||||||
gf_4vect_dot_prod_neon:
|
cdecl(gf_4vect_dot_prod_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_4vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_4vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_4vect_dot_prod_sve, %function
|
.type gf_4vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_4vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -97,7 +101,7 @@ z_dest2 .req z27
|
|||||||
z_dest3 .req z28
|
z_dest3 .req z28
|
||||||
z_dest4 .req z29
|
z_dest4 .req z29
|
||||||
|
|
||||||
gf_4vect_dot_prod_sve:
|
cdecl(gf_4vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,11 +27,14 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_4vect_mad_neon
|
.global cdecl(gf_4vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_4vect_mad_neon, %function
|
.type gf_4vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -138,7 +141,7 @@ q_data .req q21
|
|||||||
v_data_lo .req v22
|
v_data_lo .req v22
|
||||||
v_data_hi .req v23
|
v_data_hi .req v23
|
||||||
|
|
||||||
gf_4vect_mad_neon:
|
cdecl(gf_4vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -397,8 +400,13 @@ gf_4vect_mad_neon:
|
|||||||
sub x_dest3, x_dest3, x_tmp
|
sub x_dest3, x_dest3, x_tmp
|
||||||
sub x_dest4, x_dest4, x_tmp
|
sub x_dest4, x_dest4, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -449,7 +457,7 @@ gf_4vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_4vect_mad_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_4vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_4vect_mad_sve, %function
|
.type gf_4vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_4vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *src, unsigned char **dest);
|
||||||
@ -91,7 +95,7 @@ z_dest2 .req z27
|
|||||||
z_dest3 .req z28
|
z_dest3 .req z28
|
||||||
z_dest4 .req z29
|
z_dest4 .req z29
|
||||||
|
|
||||||
gf_4vect_mad_sve:
|
cdecl(gf_4vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,11 +27,14 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_5vect_dot_prod_neon
|
.global cdecl(gf_5vect_dot_prod_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_5vect_dot_prod_neon, %function
|
.type gf_5vect_dot_prod_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -159,7 +162,7 @@ q_gft5_lo .req q_p2_3
|
|||||||
q_gft5_hi .req q_p3_3
|
q_gft5_hi .req q_p3_3
|
||||||
|
|
||||||
|
|
||||||
gf_5vect_dot_prod_neon:
|
cdecl(gf_5vect_dot_prod_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_5vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_5vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_5vect_dot_prod_sve, %function
|
.type gf_5vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_5vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -105,7 +109,7 @@ z_dest3 .req z28
|
|||||||
z_dest4 .req z29
|
z_dest4 .req z29
|
||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
|
|
||||||
gf_5vect_dot_prod_sve:
|
cdecl(gf_5vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,11 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_5vect_mad_neon
|
.global cdecl(gf_5vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_5vect_mad_neon, %function
|
.type gf_5vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -152,7 +156,7 @@ q_data .req q21
|
|||||||
v_data_lo .req v22
|
v_data_lo .req v22
|
||||||
v_data_hi .req v23
|
v_data_hi .req v23
|
||||||
|
|
||||||
gf_5vect_mad_neon:
|
cdecl(gf_5vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -463,8 +467,13 @@ gf_5vect_mad_neon:
|
|||||||
sub x_dest4, x_dest4, x_tmp
|
sub x_dest4, x_dest4, x_tmp
|
||||||
sub x_dest5, x_dest5, x_tmp
|
sub x_dest5, x_dest5, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -528,7 +537,7 @@ gf_5vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_5vect_mad_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_5vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_5vect_mad_sve, %function
|
.type gf_5vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_5vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *src, unsigned char **dest);
|
||||||
@ -98,7 +102,7 @@ z_dest3 .req z28
|
|||||||
z_dest4 .req z29
|
z_dest4 .req z29
|
||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
|
|
||||||
gf_5vect_mad_sve:
|
cdecl(gf_5vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_6vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_6vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_6vect_dot_prod_sve, %function
|
.type gf_6vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_6vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -113,7 +117,7 @@ z_dest4 .req z29
|
|||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
z_dest6 .req z31
|
z_dest6 .req z31
|
||||||
|
|
||||||
gf_6vect_dot_prod_sve:
|
cdecl(gf_6vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,10 +27,13 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
.text
|
#include "../include/aarch64_label.h"
|
||||||
.global gf_6vect_mad_neon
|
|
||||||
.type gf_6vect_mad_neon, %function
|
|
||||||
|
|
||||||
|
.text
|
||||||
|
.global cdecl(gf_6vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
|
.type gf_6vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -166,7 +169,7 @@ q_data .req q21
|
|||||||
v_data_lo .req v22
|
v_data_lo .req v22
|
||||||
v_data_hi .req v23
|
v_data_hi .req v23
|
||||||
|
|
||||||
gf_6vect_mad_neon:
|
cdecl(gf_6vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -526,8 +529,13 @@ gf_6vect_mad_neon:
|
|||||||
sub x_dest5, x_dest5, x_tmp
|
sub x_dest5, x_dest5, x_tmp
|
||||||
sub x_dest6, x_dest6, x_tmp
|
sub x_dest6, x_dest6, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -603,7 +611,7 @@ gf_6vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_6vect_mad_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_6vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_6vect_mad_sve, %function
|
.type gf_6vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_6vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char **dest);
|
unsigned char *src, unsigned char **dest);
|
||||||
@ -105,7 +109,7 @@ z_dest4 .req z29
|
|||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
z_dest6 .req z31
|
z_dest6 .req z31
|
||||||
|
|
||||||
gf_6vect_mad_sve:
|
cdecl(gf_6vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_7vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_7vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_7vect_dot_prod_sve, %function
|
.type gf_7vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_7vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -122,7 +126,7 @@ z_dest4 .req z29
|
|||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
z_dest6 .req z31
|
z_dest6 .req z31
|
||||||
|
|
||||||
gf_7vect_dot_prod_sve:
|
cdecl(gf_7vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_8vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_8vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_8vect_dot_prod_sve, %function
|
.type gf_8vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_8vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char **dest);
|
unsigned char **src, unsigned char **dest);
|
||||||
*/
|
*/
|
||||||
@ -131,7 +135,7 @@ z_dest4 .req z29
|
|||||||
z_dest5 .req z30
|
z_dest5 .req z30
|
||||||
z_dest6 .req z31
|
z_dest6 .req z31
|
||||||
|
|
||||||
gf_8vect_dot_prod_sve:
|
cdecl(gf_8vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,10 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_vect_dot_prod_neon
|
.global cdecl(gf_vect_dot_prod_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_dot_prod_neon, %function
|
.type gf_vect_dot_prod_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -115,7 +120,7 @@ v_data_lo .req v_p2
|
|||||||
v_data_hi .req v_p3
|
v_data_hi .req v_p3
|
||||||
|
|
||||||
|
|
||||||
gf_vect_dot_prod_neon:
|
cdecl(gf_vect_dot_prod_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_vect_dot_prod_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_vect_dot_prod_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_dot_prod_sve, %function
|
.type gf_vect_dot_prod_sve, %function
|
||||||
|
#endif
|
||||||
/* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
/* void gf_vect_dot_prod_sve(int len, int vlen, unsigned char *gftbls,
|
||||||
unsigned char **src, unsigned char *dest);
|
unsigned char **src, unsigned char *dest);
|
||||||
*/
|
*/
|
||||||
@ -66,7 +70,7 @@ z_gft1_hi .req z5
|
|||||||
q_gft1_lo .req q4
|
q_gft1_lo .req q4
|
||||||
q_gft1_hi .req q5
|
q_gft1_hi .req q5
|
||||||
|
|
||||||
gf_vect_dot_prod_sve:
|
cdecl(gf_vect_dot_prod_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -26,11 +26,15 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_vect_mad_neon
|
.global cdecl(gf_vect_mad_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_mad_neon, %function
|
.type gf_vect_mad_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -121,7 +125,7 @@ v_data_lo .req v_d1_2
|
|||||||
v_data_hi .req v_d1_3
|
v_data_hi .req v_d1_3
|
||||||
|
|
||||||
|
|
||||||
gf_vect_mad_neon:
|
cdecl(gf_vect_mad_neon):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
@ -281,8 +285,13 @@ gf_vect_mad_neon:
|
|||||||
mov x_src, x_src_end
|
mov x_src, x_src_end
|
||||||
sub x_dest1, x_dest1, x_tmp
|
sub x_dest1, x_dest1, x_tmp
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_const, const_tbl
|
adrp x_const, const_tbl
|
||||||
add x_const, x_const, :lo12:const_tbl
|
add x_const, x_const, :lo12:const_tbl
|
||||||
|
#else
|
||||||
|
adrp x_const, const_tbl@PAGE
|
||||||
|
add x_const, x_const, const_tbl@PAGEOFF
|
||||||
|
#endif
|
||||||
sub x_const, x_const, x_tmp
|
sub x_const, x_const, x_tmp
|
||||||
ldr q_tmp, [x_const, #16]
|
ldr q_tmp, [x_const, #16]
|
||||||
|
|
||||||
@ -308,7 +317,7 @@ gf_vect_mad_neon:
|
|||||||
mov w_ret, #1
|
mov w_ret, #1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.balign 8
|
.balign 8
|
||||||
const_tbl:
|
const_tbl:
|
||||||
.dword 0x0000000000000000, 0x0000000000000000
|
.dword 0x0000000000000000, 0x0000000000000000
|
||||||
|
@ -30,9 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.global gf_vect_mad_sve
|
.global cdecl(gf_vect_mad_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_mad_sve, %function
|
.type gf_vect_mad_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
/* gf_vect_mad_sve(int len, int vec, int vec_i, unsigned char *gftbls,
|
||||||
unsigned char *src, unsigned char *dest);
|
unsigned char *src, unsigned char *dest);
|
||||||
@ -68,7 +71,7 @@ z_gft1_hi .req z7
|
|||||||
q_gft1_lo .req q6
|
q_gft1_lo .req q6
|
||||||
q_gft1_hi .req q7
|
q_gft1_hi .req q7
|
||||||
|
|
||||||
gf_vect_mad_sve:
|
cdecl(gf_vect_mad_sve):
|
||||||
/* less than 16 bytes, return_fail */
|
/* less than 16 bytes, return_fail */
|
||||||
cmp x_len, #16
|
cmp x_len, #16
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,11 +27,14 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global gf_vect_mul_neon
|
.global cdecl(gf_vect_mul_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_mul_neon, %function
|
.type gf_vect_mul_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
x_len .req x0
|
x_len .req x0
|
||||||
@ -90,7 +93,7 @@ v_data_6_hi .req v_data_6
|
|||||||
v_data_7_hi .req v_data_7
|
v_data_7_hi .req v_data_7
|
||||||
|
|
||||||
|
|
||||||
gf_vect_mul_neon:
|
cdecl(gf_vect_mul_neon):
|
||||||
/* less than 32 bytes, return_fail */
|
/* less than 32 bytes, return_fail */
|
||||||
cmp x_len, #32
|
cmp x_len, #32
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -30,8 +30,12 @@
|
|||||||
.align 6
|
.align 6
|
||||||
.arch armv8-a+sve
|
.arch armv8-a+sve
|
||||||
|
|
||||||
.global gf_vect_mul_sve
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
|
.global cdecl(gf_vect_mul_sve)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gf_vect_mul_sve, %function
|
.type gf_vect_mul_sve, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Refer to include/gf_vect_mul.h
|
/* Refer to include/gf_vect_mul.h
|
||||||
*
|
*
|
||||||
@ -72,7 +76,7 @@ z_gft1_hi .req z7
|
|||||||
q_gft1_lo .req q6
|
q_gft1_lo .req q6
|
||||||
q_gft1_hi .req q7
|
q_gft1_hi .req q7
|
||||||
|
|
||||||
gf_vect_mul_sve:
|
cdecl(gf_vect_mul_sve):
|
||||||
/* less than 32 bytes, return_fail */
|
/* less than 32 bytes, return_fail */
|
||||||
cmp x_len, #32
|
cmp x_len, #32
|
||||||
blt .return_fail
|
blt .return_fail
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -46,8 +48,10 @@ declare Macros
|
|||||||
x_\name .req x\reg
|
x_\name .req x\reg
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.global encode_deflate_icf_aarch64
|
.global cdecl(encode_deflate_icf_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type encode_deflate_icf_aarch64, %function
|
.type encode_deflate_icf_aarch64, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
|
struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
|
||||||
@ -86,7 +90,7 @@ declare Macros
|
|||||||
.equ offset_m_out_buf, 16
|
.equ offset_m_out_buf, 16
|
||||||
.equ offset_m_out_end, 24
|
.equ offset_m_out_end, 24
|
||||||
|
|
||||||
encode_deflate_icf_aarch64:
|
cdecl(encode_deflate_icf_aarch64):
|
||||||
cmp next_in, end_in
|
cmp next_in, end_in
|
||||||
bcs .done
|
bcs .done
|
||||||
|
|
||||||
@ -156,4 +160,6 @@ encode_deflate_icf_aarch64:
|
|||||||
|
|
||||||
.done:
|
.done:
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
|
.size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc+crypto
|
.arch armv8-a+crc+crypto
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -62,8 +65,10 @@ declare Macros
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.global gen_icf_map_h1_aarch64
|
.global cdecl(gen_icf_map_h1_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type gen_icf_map_h1_aarch64, %function
|
.type gen_icf_map_h1_aarch64, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* arguments */
|
/* arguments */
|
||||||
declare_generic_reg stream_param, 0,x
|
declare_generic_reg stream_param, 0,x
|
||||||
@ -137,7 +142,7 @@ uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
|||||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
gen_icf_map_h1_aarch64:
|
cdecl(gen_icf_map_h1_aarch64):
|
||||||
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
|
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
|
||||||
bls .fast_exit
|
bls .fast_exit
|
||||||
stp x29, x30, [sp, -16]!
|
stp x29, x30, [sp, -16]!
|
||||||
@ -263,4 +268,6 @@ gen_icf_map_h1_aarch64:
|
|||||||
.fast_exit:
|
.fast_exit:
|
||||||
mov ret_val, 0
|
mov ret_val, 0
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
|
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
|
||||||
|
#endif
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a
|
.arch armv8-a
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -257,8 +259,10 @@ declare Macros
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
.global decode_huffman_code_block_stateless_aarch64
|
.global cdecl(decode_huffman_code_block_stateless_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type decode_huffman_code_block_stateless_aarch64, %function
|
.type decode_huffman_code_block_stateless_aarch64, %function
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
void decode_huffman_code_block_stateless_aarch64(
|
void decode_huffman_code_block_stateless_aarch64(
|
||||||
struct inflate_state *state,
|
struct inflate_state *state,
|
||||||
@ -305,7 +309,7 @@ declare Macros
|
|||||||
declare_generic_reg write_overflow_lits,26,w
|
declare_generic_reg write_overflow_lits,26,w
|
||||||
declare_generic_reg repeat_length,27,w
|
declare_generic_reg repeat_length,27,w
|
||||||
|
|
||||||
decode_huffman_code_block_stateless_aarch64:
|
cdecl(decode_huffman_code_block_stateless_aarch64):
|
||||||
//save registers
|
//save registers
|
||||||
push_stack
|
push_stack
|
||||||
|
|
||||||
@ -324,8 +328,13 @@ decode_huffman_code_block_stateless_aarch64:
|
|||||||
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
|
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
|
||||||
|
|
||||||
//init rfc_table
|
//init rfc_table
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp rfc_table,rfc_lookup_table
|
adrp rfc_table,rfc_lookup_table
|
||||||
add rfc_table,rfc_table,:lo12:rfc_lookup_table
|
add rfc_table,rfc_table,:lo12:rfc_lookup_table
|
||||||
|
#else
|
||||||
|
adrp rfc_table,rfc_lookup_table@PAGE
|
||||||
|
add rfc_table,rfc_table,rfc_lookup_table@PAGEOFF
|
||||||
|
#endif
|
||||||
#if ENABLE_TBL_INSTRUCTION
|
#if ENABLE_TBL_INSTRUCTION
|
||||||
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
|
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
|
||||||
add rfc_table,rfc_table,48
|
add rfc_table,rfc_table,48
|
||||||
@ -661,8 +670,10 @@ byte_copy_loop:
|
|||||||
strb w_arg0, [next_out],1
|
strb w_arg0, [next_out],1
|
||||||
bne byte_copy_loop
|
bne byte_copy_loop
|
||||||
b decompress_data_end
|
b decompress_data_end
|
||||||
|
#ifndef __APPLE__
|
||||||
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
|
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
|
||||||
.type rfc_lookup_table, %object
|
.type rfc_lookup_table, %object
|
||||||
|
#endif
|
||||||
|
|
||||||
rfc_lookup_table:
|
rfc_lookup_table:
|
||||||
#if ENABLE_TBL_INSTRUCTION
|
#if ENABLE_TBL_INSTRUCTION
|
||||||
@ -686,4 +697,6 @@ rfc_lookup_table:
|
|||||||
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
||||||
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef __APPLE__
|
||||||
.size rfc_lookup_table, . - rfc_lookup_table
|
.size rfc_lookup_table, . - rfc_lookup_table
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -72,8 +75,10 @@ skip_has_hist:
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
.global isal_deflate_body_aarch64
|
.global cdecl(isal_deflate_body_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_deflate_body_aarch64, %function
|
.type isal_deflate_body_aarch64, %function
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
void isal_deflate_body_aarch64(struct isal_zstream *stream)
|
void isal_deflate_body_aarch64(struct isal_zstream *stream)
|
||||||
*/
|
*/
|
||||||
@ -115,7 +120,7 @@ skip_has_hist:
|
|||||||
declare_generic_reg code_len2, 4,x
|
declare_generic_reg code_len2, 4,x
|
||||||
|
|
||||||
|
|
||||||
isal_deflate_body_aarch64:
|
cdecl(isal_deflate_body_aarch64):
|
||||||
//save registers
|
//save registers
|
||||||
push_stack
|
push_stack
|
||||||
ldr avail_in, [stream, _avail_in]
|
ldr avail_in, [stream, _avail_in]
|
||||||
@ -258,4 +263,6 @@ exit_save_state:
|
|||||||
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
|
mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
|
||||||
str w_tmp0, [stream, _internal_state+_state]
|
str w_tmp0, [stream, _internal_state+_state]
|
||||||
b exit_ret
|
b exit_ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
|
.size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -73,9 +76,11 @@ skip_has_hist:
|
|||||||
sub w_\next_in,w_\next_in,w_\m_out_buf
|
sub w_\next_in,w_\next_in,w_\m_out_buf
|
||||||
stp w_\next_in,w_\start_in,[\stream,_avail_out]
|
stp w_\next_in,w_\start_in,[\stream,_avail_out]
|
||||||
.endm
|
.endm
|
||||||
.global isal_deflate_finish_aarch64
|
.global cdecl(isal_deflate_finish_aarch64)
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_deflate_finish_aarch64, %function
|
.type isal_deflate_finish_aarch64, %function
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
void isal_deflate_finish_aarch64(struct isal_zstream *stream)
|
void isal_deflate_finish_aarch64(struct isal_zstream *stream)
|
||||||
*/
|
*/
|
||||||
@ -117,7 +122,7 @@ skip_has_hist:
|
|||||||
declare_generic_reg code_len2, 4,x
|
declare_generic_reg code_len2, 4,x
|
||||||
|
|
||||||
|
|
||||||
isal_deflate_finish_aarch64:
|
cdecl(isal_deflate_finish_aarch64):
|
||||||
//save registers
|
//save registers
|
||||||
push_stack
|
push_stack
|
||||||
|
|
||||||
@ -260,5 +265,6 @@ update_state_exit:
|
|||||||
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
|
update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
|
||||||
pop_stack
|
pop_stack
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
|
.size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -46,8 +49,10 @@ declare Macros
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
.global isal_deflate_hash_aarch64
|
.global cdecl(isal_deflate_hash_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_deflate_hash_aarch64, %function
|
.type isal_deflate_hash_aarch64, %function
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
|
void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
|
||||||
uint32_t current_index, uint8_t * dict, uint32_t dict_len)
|
uint32_t current_index, uint8_t * dict, uint32_t dict_len)
|
||||||
@ -58,14 +63,14 @@ declare Macros
|
|||||||
declare_generic_reg dict, 3,x
|
declare_generic_reg dict, 3,x
|
||||||
declare_generic_reg dict_len, 4,w
|
declare_generic_reg dict_len, 4,w
|
||||||
|
|
||||||
declare_generic_reg next_in 3,x
|
declare_generic_reg next_in, 3,x
|
||||||
declare_generic_reg end_in 6,x
|
declare_generic_reg end_in, 6,x
|
||||||
declare_generic_reg ind 5,w
|
declare_generic_reg ind, 5,w
|
||||||
declare_generic_reg hash 2,w
|
declare_generic_reg hash, 2,w
|
||||||
declare_generic_reg literal 2,w
|
declare_generic_reg literal, 2,w
|
||||||
#define SHORTEST_MATCH #4
|
#define SHORTEST_MATCH #4
|
||||||
|
|
||||||
isal_deflate_hash_aarch64:
|
cdecl(isal_deflate_hash_aarch64):
|
||||||
sub ind, current_index, dict_len
|
sub ind, current_index, dict_len
|
||||||
and ind,ind,0xffff
|
and ind,ind,0xffff
|
||||||
|
|
||||||
@ -92,4 +97,6 @@ loop_start:
|
|||||||
exit_func:
|
exit_func:
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
|
.size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crypto
|
.arch armv8-a+crypto
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
@ -56,9 +59,11 @@ Arguements list
|
|||||||
adler32 .req w0
|
adler32 .req w0
|
||||||
start .req x1
|
start .req x1
|
||||||
length .req x2
|
length .req x2
|
||||||
.global adler32_neon
|
.global cdecl(adler32_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type adler32_neon, %function
|
.type adler32_neon, %function
|
||||||
adler32_neon:
|
#endif
|
||||||
|
cdecl(adler32_neon):
|
||||||
/*
|
/*
|
||||||
local variables
|
local variables
|
||||||
*/
|
*/
|
||||||
@ -92,8 +97,13 @@ local variables
|
|||||||
lsr adler1, adler32, 16
|
lsr adler1, adler32, 16
|
||||||
|
|
||||||
lsr loop_cnt,length,5
|
lsr loop_cnt,length,5
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x3,factors
|
adrp x3,factors
|
||||||
add x3,x3,:lo12:factors
|
add x3,x3,:lo12:factors
|
||||||
|
#else
|
||||||
|
adrp x3,factors@PAGE
|
||||||
|
add x3,x3,factors@PAGEOFF
|
||||||
|
#endif
|
||||||
ld1 {factor0_v.16b-factor1_v.16b},[x3]
|
ld1 {factor0_v.16b-factor1_v.16b},[x3]
|
||||||
|
|
||||||
add end,start,length
|
add end,start,length
|
||||||
@ -162,12 +172,15 @@ end_func:
|
|||||||
orr w0,adler0,adler1,lsl 16
|
orr w0,adler0,adler1,lsl 16
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
.size adler32_neon, .-adler32_neon
|
.size adler32_neon, .-adler32_neon
|
||||||
.section .rodata.cst16,"aM",@progbits,16
|
.section .rodata.cst16,"aM",@progbits,16
|
||||||
|
#else
|
||||||
|
.section __TEXT,__const
|
||||||
|
#endif
|
||||||
.align 4
|
.align 4
|
||||||
factors:
|
factors:
|
||||||
.quad 0x191a1b1c1d1e1f20
|
.quad 0x191a1b1c1d1e1f20
|
||||||
.quad 0x1112131415161718
|
.quad 0x1112131415161718
|
||||||
.quad 0x090a0b0c0d0e0f10
|
.quad 0x090a0b0c0d0e0f10
|
||||||
.quad 0x0102030405060708
|
.quad 0x0102030405060708
|
||||||
|
|
||||||
|
@ -30,86 +30,121 @@
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_adler32)
|
DEFINE_INTERFACE_DISPATCHER(isal_adler32)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(adler32_neon);
|
return PROVIDER_INFO(adler32_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(adler32_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(adler32);
|
return PROVIDER_BASIC(adler32);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_body_aarch64);
|
return PROVIDER_INFO(isal_deflate_body_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_body_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_body);
|
return PROVIDER_BASIC(isal_deflate_body);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_finish_aarch64);
|
return PROVIDER_INFO(isal_deflate_finish_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_finish_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_finish);
|
return PROVIDER_BASIC(isal_deflate_finish);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||||
|
#endif
|
||||||
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
|
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,64 +160,92 @@ DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_update_histogram_aarch64);
|
return PROVIDER_INFO(isal_update_histogram_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_update_histogram_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_update_histogram);
|
return PROVIDER_BASIC(isal_update_histogram);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
|
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32) {
|
if (auxval & HWCAP_CRC32) {
|
||||||
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
||||||
}
|
}
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(gen_icf_map_h1);
|
return PROVIDER_BASIC(gen_icf_map_h1);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_hash);
|
return PROVIDER_BASIC(isal_deflate_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_hash);
|
return PROVIDER_BASIC(isal_deflate_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_hash);
|
return PROVIDER_BASIC(isal_deflate_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
|
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(isal_deflate_hash_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(isal_deflate_hash);
|
return PROVIDER_BASIC(isal_deflate_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
|
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_CRC32)
|
if (auxval & HWCAP_CRC32)
|
||||||
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
|
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
if (sysctlEnabled(SYSCTL_CRC32_KEY))
|
||||||
|
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(decode_huffman_code_block_stateless);
|
return PROVIDER_BASIC(decode_huffman_code_block_stateless);
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a
|
.arch armv8-a
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -48,8 +51,10 @@ declare Macros
|
|||||||
|
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
.global set_long_icf_fg_aarch64
|
.global cdecl(set_long_icf_fg_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type set_long_icf_fg_aarch64, %function
|
.type set_long_icf_fg_aarch64, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
||||||
@ -69,7 +74,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
|
|||||||
/* local variable */
|
/* local variable */
|
||||||
declare_generic_reg len, 7,w
|
declare_generic_reg len, 7,w
|
||||||
declare_generic_reg dist_code, 8,w
|
declare_generic_reg dist_code, 8,w
|
||||||
declare_generic_reg shortest_match_len 9,w
|
declare_generic_reg shortest_match_len, 9,w
|
||||||
declare_generic_reg len_max, 10,w
|
declare_generic_reg len_max, 10,w
|
||||||
declare_generic_reg dist_extra, 11,w
|
declare_generic_reg dist_extra, 11,w
|
||||||
declare_generic_reg const_8, 13,x
|
declare_generic_reg const_8, 13,x
|
||||||
@ -90,7 +95,7 @@ void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t inp
|
|||||||
.equ SHORTEST_MATCH, 4
|
.equ SHORTEST_MATCH, 4
|
||||||
.equ LEN_MAX_CONST, 512
|
.equ LEN_MAX_CONST, 512
|
||||||
|
|
||||||
set_long_icf_fg_aarch64:
|
cdecl(set_long_icf_fg_aarch64):
|
||||||
stp x29, x30, [sp, -192]!
|
stp x29, x30, [sp, -192]!
|
||||||
add x29, sp, 0
|
add x29, sp, 0
|
||||||
stp x21, x22, [sp, 32]
|
stp x21, x22, [sp, 32]
|
||||||
@ -103,11 +108,18 @@ set_long_icf_fg_aarch64:
|
|||||||
add end_in, next_in_param, input_size_param
|
add end_in, next_in_param, input_size_param
|
||||||
mov match_lookup, match_lookup_param
|
mov match_lookup, match_lookup_param
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x1, .data_dist_start
|
adrp x1, .data_dist_start
|
||||||
mov x2, DIST_START_SIZE // 128
|
mov x2, DIST_START_SIZE // 128
|
||||||
add x1, x1, :lo12:.data_dist_start
|
add x1, x1, :lo12:.data_dist_start
|
||||||
mov x0, dist_start
|
mov x0, dist_start
|
||||||
bl memcpy
|
#else
|
||||||
|
adrp x1, .data_dist_start@PAGE
|
||||||
|
mov x2, DIST_START_SIZE // 128
|
||||||
|
add x1, x1, .data_dist_start@PAGEOFF
|
||||||
|
mov x0, dist_start
|
||||||
|
#endif
|
||||||
|
bl cdecl(memcpy)
|
||||||
|
|
||||||
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
||||||
cmp end_in, x_tmp0
|
cmp end_in, x_tmp0
|
||||||
@ -182,9 +194,11 @@ set_long_icf_fg_aarch64:
|
|||||||
ldr x23, [sp, 48]
|
ldr x23, [sp, 48]
|
||||||
ldp x29, x30, [sp], 192
|
ldp x29, x30, [sp], 192
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
||||||
|
#endif
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 3
|
.align 3
|
||||||
.set .data_dist_start,. + 0
|
.set .data_dist_start,. + 0
|
||||||
.real_data_dist_start:
|
.real_data_dist_start:
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -45,8 +48,10 @@ declare Macros
|
|||||||
x_\name .req x\reg
|
x_\name .req x\reg
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.global isal_deflate_icf_body_hash_hist_aarch64
|
.global cdecl(isal_deflate_icf_body_hash_hist_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_deflate_icf_body_hash_hist_aarch64, %function
|
.type isal_deflate_icf_body_hash_hist_aarch64, %function
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
||||||
*/
|
*/
|
||||||
@ -126,7 +131,7 @@ void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
|
|||||||
declare_generic_reg tmp0, 4,x
|
declare_generic_reg tmp0, 4,x
|
||||||
declare_generic_reg tmp1, 5,x
|
declare_generic_reg tmp1, 5,x
|
||||||
|
|
||||||
isal_deflate_icf_body_hash_hist_aarch64:
|
cdecl(isal_deflate_icf_body_hash_hist_aarch64):
|
||||||
stp x29, x30, [sp, -80]!
|
stp x29, x30, [sp, -80]!
|
||||||
add x29, sp, 0
|
add x29, sp, 0
|
||||||
str x24, [sp, 56]
|
str x24, [sp, 56]
|
||||||
@ -360,5 +365,6 @@ isal_deflate_icf_body_hash_hist_aarch64:
|
|||||||
ldr x24, [sp, 56]
|
ldr x24, [sp, 56]
|
||||||
ldp x29, x30, [sp], 80
|
ldp x29, x30, [sp], 80
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
|
.size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
|
|
||||||
@ -129,7 +132,9 @@ void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
|
|||||||
declare_generic_reg tmp3, 28,x
|
declare_generic_reg tmp3, 28,x
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
|
#ifndef __APPLE__
|
||||||
.type write_deflate_icf_constprop, %function
|
.type write_deflate_icf_constprop, %function
|
||||||
|
#endif
|
||||||
write_deflate_icf_constprop:
|
write_deflate_icf_constprop:
|
||||||
ldrh w2, [x0]
|
ldrh w2, [x0]
|
||||||
mov w3, 30
|
mov w3, 30
|
||||||
@ -141,10 +146,14 @@ write_deflate_icf_constprop:
|
|||||||
ubfx x1, x1, 16, 3
|
ubfx x1, x1, 16, 3
|
||||||
strh w1, [x0, 2]
|
strh w1, [x0, 2]
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size write_deflate_icf_constprop, .-write_deflate_icf_constprop
|
.size write_deflate_icf_constprop, .-write_deflate_icf_constprop
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
|
#ifndef __APPLE__
|
||||||
.type write_deflate_icf, %function
|
.type write_deflate_icf, %function
|
||||||
|
#endif
|
||||||
write_deflate_icf:
|
write_deflate_icf:
|
||||||
ldrh w4, [x0]
|
ldrh w4, [x0]
|
||||||
bfi w4, w1, 0, 10
|
bfi w4, w1, 0, 10
|
||||||
@ -156,10 +165,14 @@ write_deflate_icf:
|
|||||||
bfi w1, w3, 3, 13
|
bfi w1, w3, 3, 13
|
||||||
strh w1, [x0, 2]
|
strh w1, [x0, 2]
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size write_deflate_icf, .-write_deflate_icf
|
.size write_deflate_icf, .-write_deflate_icf
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
|
#ifndef __APPLE__
|
||||||
.type update_state, %function
|
.type update_state, %function
|
||||||
|
#endif
|
||||||
update_state:
|
update_state:
|
||||||
sub x7, x2, x1
|
sub x7, x2, x1
|
||||||
ldr x4, [x0, 48]
|
ldr x4, [x0, 48]
|
||||||
@ -179,12 +192,16 @@ update_state:
|
|||||||
str x5, [x4, 4688]
|
str x5, [x4, 4688]
|
||||||
str x6, [x4, 4696]
|
str x6, [x4, 4696]
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size update_state, .-update_state
|
.size update_state, .-update_state
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
.global isal_deflate_icf_finish_hash_hist_aarch64
|
.global cdecl(isal_deflate_icf_finish_hash_hist_aarch64)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_deflate_icf_finish_hash_hist_aarch64, %function
|
.type isal_deflate_icf_finish_hash_hist_aarch64, %function
|
||||||
isal_deflate_icf_finish_hash_hist_aarch64:
|
#endif
|
||||||
|
cdecl(isal_deflate_icf_finish_hash_hist_aarch64):
|
||||||
ldr w_end_in, [stream, 8] // stream->avail_in
|
ldr w_end_in, [stream, 8] // stream->avail_in
|
||||||
cbz w_end_in, .stream_not_available
|
cbz w_end_in, .stream_not_available
|
||||||
|
|
||||||
@ -393,5 +410,6 @@ isal_deflate_icf_finish_hash_hist_aarch64:
|
|||||||
str w1, [stream, offset_state_state] // 84
|
str w1, [stream, offset_state_state] // 84
|
||||||
.done:
|
.done:
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
|
.size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
|
||||||
|
#endif
|
||||||
|
@ -26,6 +26,9 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
@ -63,17 +66,24 @@ declare Macros
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
|
.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x_\tmp0, .len_to_code_tab_lanchor
|
adrp x_\tmp0, .len_to_code_tab_lanchor
|
||||||
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
|
add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
|
||||||
|
#else
|
||||||
|
adrp x_\tmp0, .len_to_code_tab_lanchor@PAGE
|
||||||
|
add x_\tmp0, x_\tmp0, .len_to_code_tab_lanchor@PAGEOFF
|
||||||
|
#endif
|
||||||
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
|
ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
|
||||||
add w_\length_out, w_\length_out, 256
|
add w_\length_out, w_\length_out, 256
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.section .rodata
|
ASM_DEF_RODATA
|
||||||
.align 4
|
.align 4
|
||||||
.len_to_code_tab_lanchor = . + 0
|
.len_to_code_tab_lanchor = . + 0
|
||||||
|
#ifndef __APPLE__
|
||||||
.type len_to_code_tab, %object
|
.type len_to_code_tab, %object
|
||||||
.size len_to_code_tab, 1056
|
.size len_to_code_tab, 1056
|
||||||
|
#endif
|
||||||
len_to_code_tab:
|
len_to_code_tab:
|
||||||
.word 0x00, 0x00, 0x00
|
.word 0x00, 0x00, 0x00
|
||||||
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
|
.word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
|
||||||
@ -111,9 +121,11 @@ len_to_code_tab:
|
|||||||
.word 0x00, 0x00, 0x00, 0x00, 0x00
|
.word 0x00, 0x00, 0x00, 0x00, 0x00
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.global isal_update_histogram_aarch64
|
.global cdecl(isal_update_histogram_aarch64)
|
||||||
.arch armv8-a+crc
|
.arch armv8-a+crc
|
||||||
|
#ifndef __APPLE__
|
||||||
.type isal_update_histogram_aarch64, %function
|
.type isal_update_histogram_aarch64, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
||||||
@ -157,7 +169,7 @@ void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
|
|||||||
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
|
.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
|
||||||
.equ hash_table_size, (8*1024*2) // 16384
|
.equ hash_table_size, (8*1024*2) // 16384
|
||||||
|
|
||||||
isal_update_histogram_aarch64:
|
cdecl(isal_update_histogram_aarch64):
|
||||||
cmp w_length, 0
|
cmp w_length, 0
|
||||||
ble .done
|
ble .done
|
||||||
|
|
||||||
@ -176,7 +188,7 @@ isal_update_histogram_aarch64:
|
|||||||
mov x0, last_seen
|
mov x0, last_seen
|
||||||
mov w1, 0
|
mov w1, 0
|
||||||
mov x2, hash_table_size
|
mov x2, hash_table_size
|
||||||
bl memset
|
bl cdecl(memset)
|
||||||
|
|
||||||
cmp current, loop_end_iter
|
cmp current, loop_end_iter
|
||||||
bcs .loop_end
|
bcs .loop_end
|
||||||
@ -308,4 +320,6 @@ isal_update_histogram_aarch64:
|
|||||||
.align 2
|
.align 2
|
||||||
.done:
|
.done:
|
||||||
ret
|
ret
|
||||||
|
#ifndef __APPLE__
|
||||||
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
|
.size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
|
||||||
|
#endif
|
||||||
|
18
include/aarch64_label.h
Normal file
18
include/aarch64_label.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef __AARCH64_LABEL_H__
|
||||||
|
#define __AARCH64_LABEL_H__
|
||||||
|
|
||||||
|
#ifdef __USER_LABEL_PREFIX__
|
||||||
|
#define CONCAT1(a, b) CONCAT2(a, b)
|
||||||
|
#define CONCAT2(a, b) a ## b
|
||||||
|
#define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
|
||||||
|
#else
|
||||||
|
#define cdecl(x) x
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define ASM_DEF_RODATA .section __TEXT,__const
|
||||||
|
#else
|
||||||
|
#define ASM_DEF_RODATA .section .rodata
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -31,7 +31,14 @@
|
|||||||
#ifndef __aarch64__
|
#ifndef __aarch64__
|
||||||
#error "This file is for aarch64 only"
|
#error "This file is for aarch64 only"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
|
||||||
|
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
|
||||||
|
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
|
||||||
|
#else
|
||||||
#include <asm/hwcap.h>
|
#include <asm/hwcap.h>
|
||||||
|
#endif
|
||||||
|
#include "aarch64_label.h"
|
||||||
#ifdef __ASSEMBLY__
|
#ifdef __ASSEMBLY__
|
||||||
/**
|
/**
|
||||||
* # mbin_interface : the wrapper layer for isal-l api
|
* # mbin_interface : the wrapper layer for isal-l api
|
||||||
@ -48,17 +55,18 @@
|
|||||||
* 4. The dispather should return the right function pointer , revision and a string information .
|
* 4. The dispather should return the right function pointer , revision and a string information .
|
||||||
**/
|
**/
|
||||||
.macro mbin_interface name:req
|
.macro mbin_interface name:req
|
||||||
.extern \name\()_dispatcher
|
.extern cdecl(\name\()_dispatcher)
|
||||||
.section .data
|
.data
|
||||||
.balign 8
|
.balign 8
|
||||||
.global \name\()_dispatcher_info
|
.global cdecl(\name\()_dispatcher_info)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name\()_dispatcher_info,%object
|
.type \name\()_dispatcher_info,%object
|
||||||
|
#endif
|
||||||
\name\()_dispatcher_info:
|
cdecl(\name\()_dispatcher_info):
|
||||||
.quad \name\()_mbinit //func_entry
|
.quad \name\()_mbinit //func_entry
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
||||||
|
#endif
|
||||||
.balign 8
|
.balign 8
|
||||||
.text
|
.text
|
||||||
\name\()_mbinit:
|
\name\()_mbinit:
|
||||||
@ -108,7 +116,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
bl \name\()_dispatcher
|
bl cdecl(\name\()_dispatcher)
|
||||||
//restore temp/indirect result registers
|
//restore temp/indirect result registers
|
||||||
ldp x8, x9, [sp, 16]
|
ldp x8, x9, [sp, 16]
|
||||||
.cfi_restore 8
|
.cfi_restore 8
|
||||||
@ -150,16 +158,24 @@
|
|||||||
.cfi_def_cfa_offset 0
|
.cfi_def_cfa_offset 0
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
|
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name,%function
|
.type \name,%function
|
||||||
|
#endif
|
||||||
.align 2
|
.align 2
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
|
#ifndef __APPLE__
|
||||||
adrp x9, :got:\name\()_dispatcher_info
|
adrp x9, :got:\name\()_dispatcher_info
|
||||||
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
|
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
|
||||||
|
#else
|
||||||
|
adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE
|
||||||
|
ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF]
|
||||||
|
#endif
|
||||||
ldr x10,[x9]
|
ldr x10,[x9]
|
||||||
br x10
|
br x10
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name,. - \name
|
.size \name,. - \name
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -168,32 +184,53 @@
|
|||||||
*/
|
*/
|
||||||
.macro mbin_interface_base name:req, base:req
|
.macro mbin_interface_base name:req, base:req
|
||||||
.extern \base
|
.extern \base
|
||||||
.section .data
|
.data
|
||||||
.balign 8
|
.balign 8
|
||||||
.global \name\()_dispatcher_info
|
.global cdecl(\name\()_dispatcher_info)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name\()_dispatcher_info,%object
|
.type \name\()_dispatcher_info,%object
|
||||||
|
#endif
|
||||||
\name\()_dispatcher_info:
|
cdecl(\name\()_dispatcher_info):
|
||||||
.quad \base //func_entry
|
.quad \base //func_entry
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
.size \name\()_dispatcher_info,. - \name\()_dispatcher_info
|
||||||
|
#endif
|
||||||
.balign 8
|
.balign 8
|
||||||
.text
|
.text
|
||||||
.global \name
|
.global cdecl(\name)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type \name,%function
|
.type \name,%function
|
||||||
|
#endif
|
||||||
.align 2
|
.align 2
|
||||||
\name\():
|
cdecl(\name\()):
|
||||||
adrp x9, :got:\name\()_dispatcher_info
|
#ifndef __APPLE__
|
||||||
ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
|
adrp x9, :got:cdecl(_\name\()_dispatcher_info)
|
||||||
|
ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)]
|
||||||
|
#else
|
||||||
|
adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE
|
||||||
|
ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF]
|
||||||
|
#endif
|
||||||
ldr x10,[x9]
|
ldr x10,[x9]
|
||||||
br x10
|
br x10
|
||||||
|
#ifndef __APPLE__
|
||||||
.size \name,. - \name
|
.size \name,. - \name
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
#else /* __ASSEMBLY__ */
|
#else /* __ASSEMBLY__ */
|
||||||
|
#include <stdint.h>
|
||||||
|
#if defined(__linux__)
|
||||||
#include <sys/auxv.h>
|
#include <sys/auxv.h>
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
static inline int sysctlEnabled(const char* name){
|
||||||
|
int enabled;
|
||||||
|
size_t size = sizeof(enabled);
|
||||||
|
int status = sysctlbyname(name, &enabled, &size, NULL, 0);
|
||||||
|
return status ? 0 : enabled;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define DEFINE_INTERFACE_DISPATCHER(name) \
|
#define DEFINE_INTERFACE_DISPATCHER(name) \
|
||||||
@ -298,10 +335,12 @@
|
|||||||
static inline uint32_t get_micro_arch_id(void)
|
static inline uint32_t get_micro_arch_id(void)
|
||||||
{
|
{
|
||||||
uint32_t id=CPU_IMPLEMENTER_RESERVE;
|
uint32_t id=CPU_IMPLEMENTER_RESERVE;
|
||||||
|
#ifndef __APPLE__
|
||||||
if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
||||||
/** Here will trap into kernel space */
|
/** Here will trap into kernel space */
|
||||||
asm("mrs %0, MIDR_EL1 " : "=r" (id));
|
asm("mrs %0, MIDR_EL1 " : "=r" (id));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
return id&0xff00fff0;
|
return id&0xff00fff0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,10 +30,12 @@
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
|
DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_ASIMD)
|
if (auxval & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(mem_zero_detect_neon);
|
return PROVIDER_INFO(mem_zero_detect_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(mem_zero_detect_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(mem_zero_detect);
|
return PROVIDER_BASIC(mem_zero_detect);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.arch armv8-a
|
.arch armv8-a
|
||||||
|
|
||||||
@ -36,10 +38,12 @@
|
|||||||
// input: n -> x1
|
// input: n -> x1
|
||||||
// output: -> x0 (true or false)
|
// output: -> x0 (true or false)
|
||||||
|
|
||||||
.global mem_zero_detect_neon
|
.global cdecl(mem_zero_detect_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type mem_zero_detect_neon, %function
|
.type mem_zero_detect_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
mem_zero_detect_neon:
|
cdecl(mem_zero_detect_neon):
|
||||||
cmp x1, #(16*24-1)
|
cmp x1, #(16*24-1)
|
||||||
b.ls .loop_16x24_end
|
b.ls .loop_16x24_end
|
||||||
|
|
||||||
|
@ -27,10 +27,13 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global pq_check_neon
|
.global cdecl(pq_check_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type pq_check_neon, %function
|
.type pq_check_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* int pq_check_neon(int vects, int len, void **src) */
|
/* int pq_check_neon(int vects, int len, void **src) */
|
||||||
|
|
||||||
@ -85,7 +88,7 @@ v_0x80 .req v29
|
|||||||
* +----------+ +------------------+
|
* +----------+ +------------------+
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pq_check_neon:
|
cdecl(pq_check_neon):
|
||||||
sub x_src_ptr_end, x_src, #8
|
sub x_src_ptr_end, x_src, #8
|
||||||
|
|
||||||
sub w_vects, w_vects, #3
|
sub w_vects, w_vects, #3
|
||||||
|
@ -27,10 +27,14 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global pq_gen_neon
|
.global cdecl(pq_gen_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type pq_gen_neon, %function
|
.type pq_gen_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* int pq_gen_neon(int vects, int len, void **src) */
|
/* int pq_gen_neon(int vects, int len, void **src) */
|
||||||
|
|
||||||
@ -84,7 +88,7 @@ v_0x80 .req v29
|
|||||||
* +----------+ +------------------+
|
* +----------+ +------------------+
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pq_gen_neon:
|
cdecl(pq_gen_neon):
|
||||||
sub x_src_ptr_end, x_src, #8
|
sub x_src_ptr_end, x_src, #8
|
||||||
|
|
||||||
sub w_vects, w_vects, #3
|
sub w_vects, w_vects, #3
|
||||||
|
@ -30,32 +30,48 @@
|
|||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(xor_gen)
|
DEFINE_INTERFACE_DISPATCHER(xor_gen)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(xor_gen_neon);
|
return PROVIDER_INFO(xor_gen_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(xor_gen_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(xor_gen);
|
return PROVIDER_BASIC(xor_gen);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(xor_check)
|
DEFINE_INTERFACE_DISPATCHER(xor_check)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(xor_check_neon);
|
return PROVIDER_INFO(xor_check_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(xor_check_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(xor_check);
|
return PROVIDER_BASIC(xor_check);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(pq_gen)
|
DEFINE_INTERFACE_DISPATCHER(pq_gen)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(pq_gen_neon);
|
return PROVIDER_INFO(pq_gen_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(pq_gen_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(pq_gen);
|
return PROVIDER_BASIC(pq_gen);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(pq_check)
|
DEFINE_INTERFACE_DISPATCHER(pq_check)
|
||||||
{
|
{
|
||||||
|
#if defined(__linux__)
|
||||||
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
|
||||||
return PROVIDER_INFO(pq_check_neon);
|
return PROVIDER_INFO(pq_check_neon);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return PROVIDER_INFO(pq_check_neon);
|
||||||
|
#endif
|
||||||
return PROVIDER_BASIC(pq_check);
|
return PROVIDER_BASIC(pq_check);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -27,10 +27,14 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global xor_check_neon
|
.global cdecl(xor_check_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type xor_check_neon, %function
|
.type xor_check_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* int xor_check_neon(int vects, int len, void **src) */
|
/* int xor_check_neon(int vects, int len, void **src) */
|
||||||
|
|
||||||
@ -76,7 +80,7 @@ w_xor .req w11
|
|||||||
* src_ptr_end -->
|
* src_ptr_end -->
|
||||||
*/
|
*/
|
||||||
|
|
||||||
xor_check_neon:
|
cdecl(xor_check_neon):
|
||||||
add x_src_ptr_end, x_src, x_vects, lsl #3
|
add x_src_ptr_end, x_src, x_vects, lsl #3
|
||||||
ldr x_src0, [x_src]
|
ldr x_src0, [x_src]
|
||||||
add x_src0_end, x_src0, x_len
|
add x_src0_end, x_src0, x_len
|
||||||
|
@ -27,10 +27,14 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
|
#include "../include/aarch64_label.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global xor_gen_neon
|
.global cdecl(xor_gen_neon)
|
||||||
|
#ifndef __APPLE__
|
||||||
.type xor_gen_neon, %function
|
.type xor_gen_neon, %function
|
||||||
|
#endif
|
||||||
|
|
||||||
/* int xor_gen_neon(int vects, int len, void **src) */
|
/* int xor_gen_neon(int vects, int len, void **src) */
|
||||||
|
|
||||||
@ -78,7 +82,7 @@ x_dst_ptr .req x11
|
|||||||
* +----------+ +------------------+
|
* +----------+ +------------------+
|
||||||
*/
|
*/
|
||||||
|
|
||||||
xor_gen_neon:
|
cdecl(xor_gen_neon):
|
||||||
add x_dst_ptr, x_src, x_vects, lsl #3
|
add x_dst_ptr, x_src, x_vects, lsl #3
|
||||||
ldr x_dst, [x_dst_ptr, #-8]!
|
ldr x_dst, [x_dst_ptr, #-8]!
|
||||||
ldr x_src0, [x_src]
|
ldr x_src0, [x_src]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user