mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
crc32:Add optimization implementation for Neoverse N1
This patch is base on reference(1) algorithm with some changes. - Redefine the block number to two. - That's due to only two pipe-line can be used in CRC32 calculate. - Redefine the block size: - The block size of CRC is 1536B and PMULL is 512B - Interleave CRC and PMULL instructions. The optimization parameters are calculated base on reference(2) References: - https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf - https://developer.arm.com/docs/swog309707/a Change-Id: I1c9e593d59b521f56e4b3c807b396c083c181636 Signed-off-by: Jerry Yu <jerry.h.yu@arm.com>
This commit is contained in:
parent
f2cf2609cd
commit
a2fc2c000d
@ -1,5 +1,5 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2019 Arm Corporation All rights reserved.
|
||||
# Copyright(c) 2020 Arm Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
@ -44,4 +44,6 @@ lsrc_aarch64 += \
|
||||
crc/aarch64/crc64_iso_refl_pmull.S \
|
||||
crc/aarch64/crc64_iso_norm_pmull.S \
|
||||
crc/aarch64/crc64_jones_refl_pmull.S \
|
||||
crc/aarch64/crc64_jones_norm_pmull.S
|
||||
crc/aarch64/crc64_jones_norm_pmull.S \
|
||||
crc/aarch64/crc32_mix_neoverse_n1.S \
|
||||
crc/aarch64/crc32c_mix_neoverse_n1.S
|
||||
|
434
crc/aarch64/crc32_common_mix_neoverse_n1.S
Normal file
434
crc/aarch64/crc32_common_mix_neoverse_n1.S
Normal file
@ -0,0 +1,434 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2020 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Arm Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
.macro declare_var_vector_reg name:req,reg:req
|
||||
\name\()_q .req q\reg
|
||||
\name\()_v .req v\reg
|
||||
\name\()_s .req s\reg
|
||||
\name\()_d .req d\reg
|
||||
.endm
|
||||
declare_var_vector_reg k1k2,20
|
||||
declare_var_vector_reg k3k4,21
|
||||
declare_var_vector_reg poly,22
|
||||
declare_var_vector_reg k5k0,23
|
||||
declare_var_vector_reg mask,24
|
||||
declare_var_vector_reg fold_poly,25
|
||||
|
||||
declare_var_vector_reg tmp0,0
|
||||
declare_var_vector_reg tmp1,1
|
||||
declare_var_vector_reg tmp2,2
|
||||
declare_var_vector_reg tmp3,3
|
||||
declare_var_vector_reg tmp4,4
|
||||
declare_var_vector_reg tmp5,5
|
||||
declare_var_vector_reg tmp6,6
|
||||
declare_var_vector_reg tmp7,7
|
||||
declare_var_vector_reg pmull_data0,16
|
||||
declare_var_vector_reg pmull_data1,17
|
||||
declare_var_vector_reg pmull_data2,18
|
||||
declare_var_vector_reg pmull_data3,19
|
||||
|
||||
vzr .req v26
|
||||
|
||||
BUF .req x0
|
||||
LEN .req x1
|
||||
CRC .req x2
|
||||
wCRC .req w2
|
||||
const_addr .req x3
|
||||
crc_blk_ptr .req x4
|
||||
pmull_blk_ptr .req x5
|
||||
crc_data0 .req x6
|
||||
crc_data1 .req x7
|
||||
crc_data2 .req x19
|
||||
crc_data3 .req x20
|
||||
wPmull .req w21
|
||||
|
||||
data0 .req x4
|
||||
data1 .req x5
|
||||
data2 .req x6
|
||||
data3 .req x7
|
||||
wdata .req w4
|
||||
|
||||
.macro pmull_fold
|
||||
|
||||
pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d
|
||||
pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d
|
||||
pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d
|
||||
pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d
|
||||
|
||||
pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d
|
||||
pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d
|
||||
pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d
|
||||
pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d
|
||||
ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
|
||||
eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b
|
||||
eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b
|
||||
eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b
|
||||
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, v16.16b
|
||||
eor tmp1_v.16b, tmp1_v.16b, v17.16b
|
||||
eor tmp2_v.16b, tmp2_v.16b, v18.16b
|
||||
eor tmp3_v.16b, tmp3_v.16b, v19.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
.macro crc32_common_mix poly_type
|
||||
.set MIX_BLK_SIZE,2048
|
||||
add pmull_blk_ptr,BUF,MIX_BLK_SIZE-512
|
||||
.ifc \poly_type,crc32
|
||||
mvn wCRC,wCRC
|
||||
.endif
|
||||
cmp LEN,MIX_BLK_SIZE-1
|
||||
mov pmull_blk_ptr,BUF
|
||||
bls start_final
|
||||
adr const_addr, .Lconstants
|
||||
ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48
|
||||
mov crc_blk_ptr,BUF
|
||||
movi vzr.16b, #0
|
||||
ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr]
|
||||
|
||||
loop_2048:
|
||||
mov crc_blk_ptr,BUF
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
add pmull_blk_ptr,pmull_blk_ptr,MIX_BLK_SIZE-512
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
ld1 {tmp0_v.16b-tmp3_v.16b}, [pmull_blk_ptr], #0x40
|
||||
sub LEN,LEN,MIX_BLK_SIZE
|
||||
cmp LEN,MIX_BLK_SIZE
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
pmull_fold
|
||||
|
||||
/* Folding cache line into 128bit */
|
||||
pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
|
||||
|
||||
/**
|
||||
* perform the last 64 bit fold, also
|
||||
* adds 32 zeroes to the input stream
|
||||
*/
|
||||
ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
|
||||
/* final 32-bit fold */
|
||||
ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4
|
||||
and tmp0_v.16b, tmp0_v.16b, mask_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
|
||||
|
||||
/**
|
||||
* Finish up with the bit-reversed barrett
|
||||
* reduction 64 ==> 32 bits
|
||||
*/
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
and tmp1_v.16b, tmp0_v.16b, mask_v.16b
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
and tmp1_v.16b, tmp1_v.16b, mask_v.16b
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
mov wPmull, tmp0_v.s[1]
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
ldp crc_data0,crc_data1,[crc_blk_ptr],16
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
ldp crc_data2,crc_data3,[crc_blk_ptr],16
|
||||
|
||||
crc32_u64 wCRC,wCRC,crc_data0
|
||||
crc32_u64 wCRC,wCRC,crc_data1
|
||||
crc32_u64 wCRC,wCRC,crc_data2
|
||||
crc32_u64 wCRC,wCRC,crc_data3
|
||||
|
||||
fmov d0, CRC
|
||||
mov w6, 0
|
||||
pmull v0.1q, v0.1d, fold_poly_v.1d
|
||||
fmov CRC, d0
|
||||
add BUF,BUF,MIX_BLK_SIZE
|
||||
crc32_u64 w6, w6, CRC
|
||||
eor wCRC, w6, wPmull
|
||||
bge loop_2048
|
||||
start_final:
|
||||
cmp LEN, 63
|
||||
bls .loop_16B
|
||||
.loop_64B:
|
||||
ldp data0, data1, [BUF],#16
|
||||
sub LEN,LEN,#64
|
||||
ldp data2, data3, [BUF],#16
|
||||
cmp LEN,#64
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
crc32_u64 wCRC, wCRC, data1
|
||||
ldp data0, data1, [BUF],#16
|
||||
crc32_u64 wCRC, wCRC, data2
|
||||
crc32_u64 wCRC, wCRC, data3
|
||||
ldp data2, data3, [BUF],#16
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
crc32_u64 wCRC, wCRC, data1
|
||||
crc32_u64 wCRC, wCRC, data2
|
||||
crc32_u64 wCRC, wCRC, data3
|
||||
bge .loop_64B
|
||||
|
||||
.loop_16B:
|
||||
cmp x1, 15
|
||||
bls .less_16B
|
||||
ldp data0, data1, [BUF],#16
|
||||
sub LEN,LEN,#16
|
||||
cmp LEN,15
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
crc32_u64 wCRC, wCRC, data1
|
||||
bls .less_16B
|
||||
ldp data0, data1, [BUF],#16
|
||||
sub LEN,LEN,#16
|
||||
cmp LEN,15
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
crc32_u64 wCRC, wCRC, data1
|
||||
bls .less_16B
|
||||
ldp data0, data1, [BUF],#16
|
||||
sub LEN,LEN,#16 //MUST less than 16B
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
crc32_u64 wCRC, wCRC, data1
|
||||
.less_16B:
|
||||
cmp LEN, 7
|
||||
bls .less_8B
|
||||
ldr data0, [BUF], 8
|
||||
sub LEN, LEN, #8
|
||||
crc32_u64 wCRC, wCRC, data0
|
||||
.less_8B:
|
||||
cmp LEN, 3
|
||||
bls .less_4B
|
||||
ldr wdata, [BUF], 4
|
||||
sub LEN, LEN, #4
|
||||
crc32_u32 wCRC, wCRC, wdata
|
||||
.less_4B:
|
||||
cmp LEN, 1
|
||||
bls .less_2B
|
||||
ldrh wdata, [BUF], 2
|
||||
sub LEN, LEN, #2
|
||||
crc32_u16 wCRC, wCRC, wdata
|
||||
.less_2B:
|
||||
cbz LEN, .finish_exit
|
||||
ldrb wdata, [BUF]
|
||||
crc32_u8 wCRC, wCRC, wdata
|
||||
.finish_exit:
|
||||
.ifc \poly_type,crc32
|
||||
mvn w0, wCRC
|
||||
.else
|
||||
mov w0, wCRC
|
||||
.endif
|
||||
ret
|
||||
.endm
|
||||
|
66
crc/aarch64/crc32_mix_neoverse_n1.S
Normal file
66
crc/aarch64/crc32_mix_neoverse_n1.S
Normal file
@ -0,0 +1,66 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2020 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Arm Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
.text
|
||||
.align 6
|
||||
.arch armv8-a+crypto+crc
|
||||
|
||||
#include "crc32_common_mix_neoverse_n1.S"
|
||||
.Lconstants:
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
.quad 0x0000000163cd6124
|
||||
.quad 0x00000000FFFFFFFF
|
||||
.quad 0x000000000c30f51d
|
||||
.macro crc32_u64 dst,src,data
|
||||
crc32x \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u32 dst,src,data
|
||||
crc32w \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u16 dst,src,data
|
||||
crc32h \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u8 dst,src,data
|
||||
crc32b \dst,\src,\data
|
||||
.endm
|
||||
|
||||
|
||||
/**
|
||||
* uint32_t crc32_mix_neoverse_n1(uint32_t * BUF,
|
||||
* size_t LEN, uint CRC)
|
||||
*/
|
||||
.align 6
|
||||
.global crc32_mix_neoverse_n1
|
||||
.type crc32_mix_neoverse_n1, %function
|
||||
crc32_mix_neoverse_n1:
|
||||
crc32_common_mix crc32
|
||||
.size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
|
||||
|
64
crc/aarch64/crc32c_mix_neoverse_n1.S
Normal file
64
crc/aarch64/crc32c_mix_neoverse_n1.S
Normal file
@ -0,0 +1,64 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2020 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Arm Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
.text
|
||||
.align 6
|
||||
.arch armv8-a+crypto+crc
|
||||
|
||||
#include "crc32_common_mix_neoverse_n1.S"
|
||||
.Lconstants:
|
||||
.octa 0x000000009e4addf800000000740eef02
|
||||
.octa 0x000000014cd00bd600000000f20c0dfe
|
||||
.octa 0x00000000dea713f10000000105ec76f0
|
||||
.quad 0x00000000dd45aab8
|
||||
.quad 0x00000000FFFFFFFF
|
||||
.quad 0x00000000dd7e3b0c
|
||||
|
||||
.macro crc32_u64 dst,src,data
|
||||
crc32cx \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u32 dst,src,data
|
||||
crc32cw \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u16 dst,src,data
|
||||
crc32ch \dst,\src,\data
|
||||
.endm
|
||||
.macro crc32_u8 dst,src,data
|
||||
crc32cb \dst,\src,\data
|
||||
.endm
|
||||
/**
|
||||
* uint32_t crc32c_mix_neoverse_n1(uint32_t * BUF,
|
||||
* size_t LEN, uint CRC)
|
||||
*/
|
||||
.align 6
|
||||
.global crc32c_mix_neoverse_n1
|
||||
.type crc32c_mix_neoverse_n1, %function
|
||||
crc32c_mix_neoverse_n1:
|
||||
crc32_common_mix crc32c
|
||||
.size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
|
@ -1,5 +1,5 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2019 Arm Corporation All rights reserved.
|
||||
Copyright(c) 2019-2020 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -62,6 +62,12 @@ DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
|
||||
DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
||||
{
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
|
||||
switch (get_micro_arch_id()) {
|
||||
case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
|
||||
return PROVIDER_INFO(crc32c_mix_neoverse_n1);
|
||||
}
|
||||
}
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(crc32_iscsi_refl_hw_fold);
|
||||
if (auxval & HWCAP_PMULL) {
|
||||
@ -74,6 +80,12 @@ DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
|
||||
DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
|
||||
{
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
|
||||
switch (get_micro_arch_id()) {
|
||||
case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
|
||||
return PROVIDER_INFO(crc32_mix_neoverse_n1);
|
||||
}
|
||||
}
|
||||
if (auxval & HWCAP_CRC32)
|
||||
return PROVIDER_INFO(crc32_gzip_refl_hw_fold);
|
||||
if (auxval & HWCAP_PMULL)
|
||||
|
Loading…
Reference in New Issue
Block a user