mirror of
https://github.com/intel/isa-l.git
synced 2025-01-21 04:52:12 +01:00
a46da529d9
Change-Id: I49166ee06b3ad24babb90aeb0b834d8aacfc2d03 Signed-off-by: Zhiyuan Zhu <zhiyuan.zhu@arm.com>
177 lines
4.2 KiB
ArmAsm
177 lines
4.2 KiB
ArmAsm
########################################################################
|
|
# Copyright(c) 2019 Arm Corporation All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in
|
|
# the documentation and/or other materials provided with the
|
|
# distribution.
|
|
# * Neither the name of Arm Corporation nor the names of its
|
|
# contributors may be used to endorse or promote products derived
|
|
# from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#########################################################################
|
|
|
|
.arch armv8-a+crc+crypto
|
|
.text
|
|
.align 3
|
|
.global crc32_gzip_refl_hw_fold
|
|
.type crc32_gzip_refl_hw_fold, %function
|
|
|
|
/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */
|
|
|
|
w_seed .req w0
|
|
w_crc .req w0
|
|
x_buf .req x1
|
|
x_len .req x2
|
|
|
|
x_buf_loop_end .req x10
|
|
x_buf_iter .req x10
|
|
|
|
x_tmp .req x15
|
|
w_tmp .req w15
|
|
|
|
d_c0 .req d3
|
|
d_c1 .req d1
|
|
v_c0 .req v3
|
|
v_c1 .req v1
|
|
crc32_gzip_refl_hw_fold:
|
|
mvn w_seed, w_seed
|
|
cmp x_len, 1023
|
|
mov x_buf_iter, x_buf
|
|
bls .loop_fold_end
|
|
|
|
sub x_buf_loop_end, x_len, #1024
|
|
and x_buf_loop_end, x_buf_loop_end, -1024
|
|
add x_buf_loop_end, x_buf_loop_end, 1024
|
|
add x_buf_loop_end, x_buf, x_buf_loop_end
|
|
|
|
mov x_tmp, 0x819b
|
|
movk x_tmp, 0xb486, lsl 16
|
|
fmov d_c0, x_tmp
|
|
|
|
mov x_tmp, 0x8617
|
|
movk x_tmp, 0x7627, lsl 16
|
|
fmov d_c1, x_tmp
|
|
|
|
x_in64 .req x3
|
|
w_crc0 .req w0
|
|
w_crc1 .req w4
|
|
w_crc2 .req w5
|
|
|
|
d_crc0 .req d4
|
|
d_crc1 .req d5
|
|
v_crc0 .req v4
|
|
v_crc1 .req v5
|
|
.align 3
|
|
.loop_fold:
|
|
add x9, x_buf, 336
|
|
mov x_in64, x_buf
|
|
mov w_crc1, 0
|
|
mov w_crc2, 0
|
|
|
|
.align 3
|
|
.loop_for:
|
|
ldr x8, [x_in64]
|
|
ldr x7, [x_in64, 336]
|
|
ldr x6, [x_in64, 672]
|
|
|
|
add x_in64, x_in64, 8
|
|
cmp x_in64, x9
|
|
|
|
crc32x w_crc0, w_crc0, x8
|
|
crc32x w_crc1, w_crc1, x7
|
|
crc32x w_crc2, w_crc2, x6
|
|
bne .loop_for
|
|
|
|
uxtw x_tmp, w_crc0
|
|
fmov d_crc0, x_tmp
|
|
pmull v_crc0.1q, v_crc0.1d, v_c0.1d
|
|
|
|
uxtw x_tmp, w_crc1
|
|
fmov d_crc1, x_tmp
|
|
pmull v_crc1.1q, v_crc1.1d, v_c1.1d
|
|
|
|
ldr x_tmp, [x_buf, 1008]
|
|
crc32x w_crc2, w_crc2, x_tmp
|
|
|
|
fmov x_tmp, d_crc0
|
|
crc32x w_crc0, wzr, x_tmp
|
|
|
|
fmov x_tmp, d_crc1
|
|
crc32x w_crc1, wzr, x_tmp
|
|
|
|
eor w_crc0, w_crc0, w_crc1
|
|
eor w_crc0, w_crc0, w_crc2
|
|
|
|
ldr x_tmp, [x_buf, 1016]
|
|
crc32x w_crc0, w_crc0, x_tmp
|
|
|
|
add x_buf, x_buf, 1024
|
|
cmp x_buf_loop_end, x_buf
|
|
bne .loop_fold
|
|
|
|
and x_len, x_len, 1023
|
|
|
|
x_buf_loop_size8_end .req x3
|
|
.loop_fold_end:
|
|
cmp x_len, 7
|
|
bls .size_4
|
|
|
|
sub x_buf_loop_size8_end, x_len, #8
|
|
and x_buf_loop_size8_end, x_buf_loop_size8_end, -8
|
|
add x_buf_loop_size8_end, x_buf_loop_size8_end, 8
|
|
add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
|
|
|
|
.align 3
|
|
.loop_size_8:
|
|
ldr x_tmp, [x_buf_iter], 8
|
|
crc32x w_crc, w_crc, x_tmp
|
|
|
|
cmp x_buf_iter, x_buf_loop_size8_end
|
|
bne .loop_size_8
|
|
|
|
and x_len, x_len, 7
|
|
.size_4:
|
|
cmp x_len, 3
|
|
bls .size_2
|
|
|
|
ldr w_tmp, [x_buf_iter], 4
|
|
crc32w w_crc, w_crc, w_tmp
|
|
|
|
sub x_len, x_len, #4
|
|
.size_2:
|
|
cmp x_len, 1
|
|
bls .size_1
|
|
|
|
ldrh w_tmp, [x_buf_iter], 2
|
|
crc32h w_crc, w_crc, w_tmp
|
|
|
|
sub x_len, x_len, #2
|
|
.size_1:
|
|
cbz x_len, .done
|
|
|
|
ldrb w_tmp, [x_buf_iter]
|
|
crc32b w_crc, w_crc, w_tmp
|
|
|
|
.done:
|
|
mvn w_crc, w_crc
|
|
ret
|
|
|
|
.size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold
|