isa-l/crc/aarch64/crc64_refl_common_pmull.h
Taiju Yamada 1187583a97 Fixes for aarch64 mac
- It should be fine to enable pmull always on Apple Silicon
- macOS 12+ is required for PMULL instruction.
- Changed the conditional macro to __APPLE__
- Rewritten dispatcher using sysctlbyname
- Use __USER_LABEL_PREFIX__
- Use __TEXT,__const as readonly section
- use ASM_DEF_RODATA macro
- fix func decl

Change-Id: I800593f21085d8187b480c8bb3ab2bd70c4a6974
Signed-off-by: Taiju Yamada <tyamada@bi.a.u-tokyo.ac.jp>
2022-10-28 08:27:26 -07:00

137 lines
3.9 KiB
C

########################################################################
# Copyright(c) 2019 Arm Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Arm Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
#include "crc_common_pmull.h"
.macro crc64_refl_func name:req
.arch armv8-a+crypto
.text
.align 3
.global cdecl(\name)
#ifndef __APPLE__
.type \name, %function
#endif
/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
cdecl(\name\()):
mvn x_seed, x_seed
mov x_counter, 0
cmp x_len, (FOLD_SIZE-1)
bhi .crc_clmul_pre
.crc_tab_pre:
cmp x_len, x_counter
bls .done
#ifndef __APPLE__
adrp x_tmp, .lanchor_crc_tab
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
#else
adrp x_tmp, .lanchor_crc_tab@PAGE
add x_buf_iter, x_buf, x_counter
add x_buf, x_buf, x_len
add x_crc_tab_addr, x_tmp, .lanchor_crc_tab@PAGEOFF
#endif
.align 3
.loop_crc_tab:
ldrb w_tmp, [x_buf_iter], 1
eor w_tmp, w_tmp, w0
cmp x_buf, x_buf_iter
and x_tmp, x_tmp, 255
ldr x_tmp, [x_crc_tab_addr, x_tmp, lsl 3]
eor x_seed, x_tmp, x_seed, lsr 8
bne .loop_crc_tab
.done:
mvn x_crc_ret, x_seed
ret
.align 2
.crc_clmul_pre:
fmov d_x0, x_seed // save crc to d_x0
crc_refl_load_first_block
bls .clmul_loop_end
crc64_load_p4
// 1024bit --> 512bit loop
// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t)
crc_refl_loop
.clmul_loop_end:
// folding 512bit --> 128bit
crc64_fold_512b_to_128b
// folding 128bit --> 64bit
mov x_tmp, p0_low_b0
movk x_tmp, p0_low_b1, lsl 16
movk x_tmp, p0_low_b2, lsl 32
movk x_tmp, p0_low_b3, lsl 48
fmov d_p0_low, x_tmp
pmull v_tmp_low.1q, v_x3.1d, v_p0.1d
mov d_tmp_high, v_x3.d[1]
eor v_x3.16b, v_tmp_high.16b, v_tmp_low.16b
// barrett reduction
mov x_tmp, br_low_b0
movk x_tmp, br_low_b1, lsl 16
movk x_tmp, br_low_b2, lsl 32
movk x_tmp, br_low_b3, lsl 48
fmov d_br_low, x_tmp
mov x_tmp2, br_high_b0
movk x_tmp2, br_high_b1, lsl 16
movk x_tmp2, br_high_b2, lsl 32
movk x_tmp2, br_high_b3, lsl 48
fmov d_br_high, x_tmp2
pmull v_tmp_low.1q, v_x3.1d, v_br_low.1d
pmull v_tmp_high.1q, v_tmp_low.1d, v_br_high.1d
ext v_tmp_low.16b, v_br_low.16b, v_tmp_low.16b, #8
eor v_tmp_low.16b, v_tmp_low.16b, v_tmp_high.16b
eor v_tmp_low.16b, v_tmp_low.16b, v_x3.16b
umov x_crc_ret, v_tmp_low.d[1]
b .crc_tab_pre
#ifndef __APPLE__
.size \name, .-\name
#endif
.endm