isa-l/crc/aarch64/crc64_iso_norm_pmull.h
Samuel Lee 4785428d2f crc: arm64 implementation tweaks
+ Utilise `pmull2` instruction in main loops of arm64 crc functions and
avoid the need for `dup` to align multiplicands.
  + Use just 1 ASIMD register to hold both 64b p4 constants,
appropriately aligned.
+ Interleave quadword `ldr` with `pmull{2}` to avoid unnecessary stalls
on existing LITTLE uarch (which can only issue these instructions every
other cycle).
+ Similarly interleave scalar instructions with ASIMD instructions to
increase likelihood of instruction level parallelism on a variety of
uarch.
+ Cut down on needless instructions in non-critical sections to help
performance for small buffers.
+ Extract common instruction sequences into inner macros and moved
them into shared header - crc_common_pmull.h
+ Use the same human readable register aliases and register allocation
in all 4 implementations, never refer to registers without using human
readable alias.
  + Use #defines rather than .req to allow use of same names across
several implementations
+ Reduce tail case size from 1024B to 64B

+ Phrased the `eor` instructions in the main loop to more clearly show
that we can rewrite pairs of `eor` instructions with a single `eor3`
instruction in the presence of Armv8.2-SHA (should probably be an option
in multibinary in future).

Change-Id: I3688193ea4ad88b53cf47e5bd9a7fd5c2b4401e1
Signed-off-by: Samuel Lee <samuel.lee@microsoft.com>
2019-11-13 10:58:19 -07:00

202 lines
8.4 KiB
C

########################################################################
# Copyright(c) 2019 Arm Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Arm Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#########################################################################
.equ p4_low_b0, (0x0101)
.equ p4_low_b1, 0x0100
.equ p4_low_b2, 0x0001
.equ p4_low_b3, 0x0000
.equ p4_high_b0, 0x1b1b
.equ p4_high_b1, 0x1b00
.equ p4_high_b2, 0x001b
.equ p4_high_b3, 0x0000
.equ p1_low_b0, (0x0145)
.equ p1_low_b1, 0x0000
.equ p1_low_b2, 0x0000
.equ p1_low_b3, 0x0000
.equ p1_high_b0, 0x1db7
.equ p1_high_b1, 0x0000
.equ p1_high_b2, 0x0000
.equ p1_high_b3, 0x0000
.equ p0_low_b0, (0x0145)
.equ p0_low_b1, 0x0000
.equ p0_low_b2, 0x0000
.equ p0_low_b3, 0x0000
.equ p0_high_b0, 0x0000
.equ p0_high_b1, 0x0000
.equ p0_high_b2, 0x0000
.equ p0_high_b3, 0x0000
.equ br_low_b0, (0x001b)
.equ br_low_b1, 0x0000
.equ br_low_b2, 0x0000
.equ br_low_b3, 0x0000
.equ br_high_b0, 0x001b
.equ br_high_b1, 0x0000
.equ br_high_b2, 0x0000
.equ br_high_b3, 0x0000
.text
.section .rodata
.align 4
.set .lanchor_crc_tab,. + 0
.type crc64_tab, %object
.size crc64_tab, 2048
crc64_tab:
.xword 0x0000000000000000, 0x000000000000001b
.xword 0x0000000000000036, 0x000000000000002d
.xword 0x000000000000006c, 0x0000000000000077
.xword 0x000000000000005a, 0x0000000000000041
.xword 0x00000000000000d8, 0x00000000000000c3
.xword 0x00000000000000ee, 0x00000000000000f5
.xword 0x00000000000000b4, 0x00000000000000af
.xword 0x0000000000000082, 0x0000000000000099
.xword 0x00000000000001b0, 0x00000000000001ab
.xword 0x0000000000000186, 0x000000000000019d
.xword 0x00000000000001dc, 0x00000000000001c7
.xword 0x00000000000001ea, 0x00000000000001f1
.xword 0x0000000000000168, 0x0000000000000173
.xword 0x000000000000015e, 0x0000000000000145
.xword 0x0000000000000104, 0x000000000000011f
.xword 0x0000000000000132, 0x0000000000000129
.xword 0x0000000000000360, 0x000000000000037b
.xword 0x0000000000000356, 0x000000000000034d
.xword 0x000000000000030c, 0x0000000000000317
.xword 0x000000000000033a, 0x0000000000000321
.xword 0x00000000000003b8, 0x00000000000003a3
.xword 0x000000000000038e, 0x0000000000000395
.xword 0x00000000000003d4, 0x00000000000003cf
.xword 0x00000000000003e2, 0x00000000000003f9
.xword 0x00000000000002d0, 0x00000000000002cb
.xword 0x00000000000002e6, 0x00000000000002fd
.xword 0x00000000000002bc, 0x00000000000002a7
.xword 0x000000000000028a, 0x0000000000000291
.xword 0x0000000000000208, 0x0000000000000213
.xword 0x000000000000023e, 0x0000000000000225
.xword 0x0000000000000264, 0x000000000000027f
.xword 0x0000000000000252, 0x0000000000000249
.xword 0x00000000000006c0, 0x00000000000006db
.xword 0x00000000000006f6, 0x00000000000006ed
.xword 0x00000000000006ac, 0x00000000000006b7
.xword 0x000000000000069a, 0x0000000000000681
.xword 0x0000000000000618, 0x0000000000000603
.xword 0x000000000000062e, 0x0000000000000635
.xword 0x0000000000000674, 0x000000000000066f
.xword 0x0000000000000642, 0x0000000000000659
.xword 0x0000000000000770, 0x000000000000076b
.xword 0x0000000000000746, 0x000000000000075d
.xword 0x000000000000071c, 0x0000000000000707
.xword 0x000000000000072a, 0x0000000000000731
.xword 0x00000000000007a8, 0x00000000000007b3
.xword 0x000000000000079e, 0x0000000000000785
.xword 0x00000000000007c4, 0x00000000000007df
.xword 0x00000000000007f2, 0x00000000000007e9
.xword 0x00000000000005a0, 0x00000000000005bb
.xword 0x0000000000000596, 0x000000000000058d
.xword 0x00000000000005cc, 0x00000000000005d7
.xword 0x00000000000005fa, 0x00000000000005e1
.xword 0x0000000000000578, 0x0000000000000563
.xword 0x000000000000054e, 0x0000000000000555
.xword 0x0000000000000514, 0x000000000000050f
.xword 0x0000000000000522, 0x0000000000000539
.xword 0x0000000000000410, 0x000000000000040b
.xword 0x0000000000000426, 0x000000000000043d
.xword 0x000000000000047c, 0x0000000000000467
.xword 0x000000000000044a, 0x0000000000000451
.xword 0x00000000000004c8, 0x00000000000004d3
.xword 0x00000000000004fe, 0x00000000000004e5
.xword 0x00000000000004a4, 0x00000000000004bf
.xword 0x0000000000000492, 0x0000000000000489
.xword 0x0000000000000d80, 0x0000000000000d9b
.xword 0x0000000000000db6, 0x0000000000000dad
.xword 0x0000000000000dec, 0x0000000000000df7
.xword 0x0000000000000dda, 0x0000000000000dc1
.xword 0x0000000000000d58, 0x0000000000000d43
.xword 0x0000000000000d6e, 0x0000000000000d75
.xword 0x0000000000000d34, 0x0000000000000d2f
.xword 0x0000000000000d02, 0x0000000000000d19
.xword 0x0000000000000c30, 0x0000000000000c2b
.xword 0x0000000000000c06, 0x0000000000000c1d
.xword 0x0000000000000c5c, 0x0000000000000c47
.xword 0x0000000000000c6a, 0x0000000000000c71
.xword 0x0000000000000ce8, 0x0000000000000cf3
.xword 0x0000000000000cde, 0x0000000000000cc5
.xword 0x0000000000000c84, 0x0000000000000c9f
.xword 0x0000000000000cb2, 0x0000000000000ca9
.xword 0x0000000000000ee0, 0x0000000000000efb
.xword 0x0000000000000ed6, 0x0000000000000ecd
.xword 0x0000000000000e8c, 0x0000000000000e97
.xword 0x0000000000000eba, 0x0000000000000ea1
.xword 0x0000000000000e38, 0x0000000000000e23
.xword 0x0000000000000e0e, 0x0000000000000e15
.xword 0x0000000000000e54, 0x0000000000000e4f
.xword 0x0000000000000e62, 0x0000000000000e79
.xword 0x0000000000000f50, 0x0000000000000f4b
.xword 0x0000000000000f66, 0x0000000000000f7d
.xword 0x0000000000000f3c, 0x0000000000000f27
.xword 0x0000000000000f0a, 0x0000000000000f11
.xword 0x0000000000000f88, 0x0000000000000f93
.xword 0x0000000000000fbe, 0x0000000000000fa5
.xword 0x0000000000000fe4, 0x0000000000000fff
.xword 0x0000000000000fd2, 0x0000000000000fc9
.xword 0x0000000000000b40, 0x0000000000000b5b
.xword 0x0000000000000b76, 0x0000000000000b6d
.xword 0x0000000000000b2c, 0x0000000000000b37
.xword 0x0000000000000b1a, 0x0000000000000b01
.xword 0x0000000000000b98, 0x0000000000000b83
.xword 0x0000000000000bae, 0x0000000000000bb5
.xword 0x0000000000000bf4, 0x0000000000000bef
.xword 0x0000000000000bc2, 0x0000000000000bd9
.xword 0x0000000000000af0, 0x0000000000000aeb
.xword 0x0000000000000ac6, 0x0000000000000add
.xword 0x0000000000000a9c, 0x0000000000000a87
.xword 0x0000000000000aaa, 0x0000000000000ab1
.xword 0x0000000000000a28, 0x0000000000000a33
.xword 0x0000000000000a1e, 0x0000000000000a05
.xword 0x0000000000000a44, 0x0000000000000a5f
.xword 0x0000000000000a72, 0x0000000000000a69
.xword 0x0000000000000820, 0x000000000000083b
.xword 0x0000000000000816, 0x000000000000080d
.xword 0x000000000000084c, 0x0000000000000857
.xword 0x000000000000087a, 0x0000000000000861
.xword 0x00000000000008f8, 0x00000000000008e3
.xword 0x00000000000008ce, 0x00000000000008d5
.xword 0x0000000000000894, 0x000000000000088f
.xword 0x00000000000008a2, 0x00000000000008b9
.xword 0x0000000000000990, 0x000000000000098b
.xword 0x00000000000009a6, 0x00000000000009bd
.xword 0x00000000000009fc, 0x00000000000009e7
.xword 0x00000000000009ca, 0x00000000000009d1
.xword 0x0000000000000948, 0x0000000000000953
.xword 0x000000000000097e, 0x0000000000000965
.xword 0x0000000000000924, 0x000000000000093f
.xword 0x0000000000000912, 0x0000000000000909