Remove inconsistency in ARM support.

This facilitates "universal" builds, ones that target multiple
architectures, e.g. ARMv5 through ARMv7. See commentary in
Configure for details.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Caswell <matt@openssl.org>
(cherry picked from commit c1669e1c205dc8e695fb0c10a655f434e758b9f7)
This commit is contained in:
Andy Polyakov 2014-11-07 22:48:22 +01:00
parent 4aaf1e493c
commit f4868c9921
14 changed files with 224 additions and 160 deletions

View File

@ -351,8 +351,34 @@ my %table=(
# throw in -D[BL]_ENDIAN, whichever appropriate... # throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# It's believed that majority of ARM toolchains predefine appropriate -march.
# If you compiler does not, do complement config command line with one! #######################################################################
# Note that -march is not among compiler options in below linux-armv4
# target line. Not specifying one is intentional to give you choice to:
#
# a) rely on your compiler default by not specifying one;
# b) specify your target platform explicitly for optimal performance,
# e.g. -march=armv6 or -march=armv7-a;
# c) build "universal" binary that targets *range* of platforms by
# specifying minimum and maximum supported architecture;
#
# As for c) option. It actually makes no sense to specify maximum to be
# less than ARMv7, because it's the least requirement for run-time
# switch between platform-specific code paths. And without run-time
# switch performance would be equivalent to one for minimum. Secondly,
# there are some natural limitations that you'd have to accept and
# respect. Most notably you can *not* build "universal" binary for
# big-endian platform. This is because ARMv7 processor always picks
# instructions in little-endian order. Another similar limitation is
# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
# where it became Thumb-2. Well, this limitation is a bit artificial,
# because it's not really impossible, but it's deemed too tricky to
# support. And of course you have to be sure that your binutils are
# actually up to the task of handling maximum target platform. With all
# this in mind here is an example of how to configure "universal" build:
#
# ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
#
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# Configure script adds minimally required -march for assembly support, # Configure script adds minimally required -march for assembly support,

View File

@ -35,11 +35,13 @@ $prefix="aes_v8";
$code=<<___; $code=<<___;
#include "arm_arch.h" #include "arm_arch.h"
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.text .text
___ ___
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); $code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); $code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
#^^^^^^ this is done to simplify adoption by not depending
# on latest binutils.
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to # NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to

View File

@ -702,13 +702,17 @@ $code.=<<___;
# define BSAES_ASM_EXTENDED_KEY # define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK # define XTS_CHAIN_TWEAK
# define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
#endif #endif
#ifdef __thumb__ #ifdef __thumb__
# define adrl adr # define adrl adr
#endif #endif
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.text .text
.syntax unified @ ARMv7-capable assembler is expected to handle this .syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__ #ifdef __thumb2__
@ -717,8 +721,6 @@ $code.=<<___;
.code 32 .code 32
#endif #endif
.fpu neon
.type _bsaes_decrypt8,%function .type _bsaes_decrypt8,%function
.align 4 .align 4
_bsaes_decrypt8: _bsaes_decrypt8:

View File

@ -52,6 +52,18 @@
#include <openssl/fipssyms.h> #include <openssl/fipssyms.h>
#endif #endif
#if !defined(__ARM_MAX_ARCH__)
# define __ARM_MAX_ARCH__ __ARM_ARCH__
#endif
#if __ARM_MAX_ARCH__<__ARM_ARCH__
# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
#elif __ARM_MAX_ARCH__!=__ARM_ARCH__
# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
# error "can't build universal big-endian binary"
# endif
#endif
#if !__ASSEMBLER__ #if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P; extern unsigned int OPENSSL_armcap_P;
#endif #endif

View File

@ -7,8 +7,12 @@
#include "arm_arch.h" #include "arm_arch.h"
unsigned int OPENSSL_armcap_P; unsigned int OPENSSL_armcap_P=0;
#if __ARM_MAX_ARCH__<7
void OPENSSL_cpuid_setup(void) {}
unsigned long OPENSSL_rdtsc(void) { return 0; }
#else
static sigset_t all_masked; static sigset_t all_masked;
static sigjmp_buf ill_jmp; static sigjmp_buf ill_jmp;
@ -155,3 +159,4 @@ void OPENSSL_cpuid_setup(void)
sigaction (SIGILL,&ill_oact,NULL); sigaction (SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL); sigprocmask(SIG_SETMASK,&oset,NULL);
} }
#endif

View File

@ -3,69 +3,6 @@
.text .text
.code 32 .code 32
@ Special note about using .byte directives to encode instructions.
@ Initial reason for hand-coding instructions was to allow module to
@ be compilable by legacy tool-chains. At later point it was pointed
@ out that since ARMv7, instructions are always encoded in little-endian
@ order, therefore one has to opt for endian-neutral presentation.
@ Contemporary tool-chains offer .inst directive for this purpose,
@ but not legacy ones. Therefore .byte. But there is an exception,
@ namely ARMv7-R profile still allows for big-endian encoding even for
@ instructions. This raises the question what if probe instructions
@ appear executable to such processor operating in big-endian order?
@ They have to be chosen in a way that avoids this problem. As failed
@ NEON probe disables a number of other probes we have to ensure that
@ only NEON probe instruction doesn't appear executable in big-endian
@ order, therefore 'vorr q8,q8,q8', and not some other register. The
@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
@ that if fetched in alternative byte oder instruction should crash to
@ denote lack of probed capability...
.align 5
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
.byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
.byte 0x06,0x00,0xa0,0xe1 @ mov r0,r6
.byte 0x1e,0x0f,0x51,0xec @ mrrc p15,1,r0,r1,c14 @ CNTVCT
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
nop
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.align 5 .align 5
.global OPENSSL_atomic_add .global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function .type OPENSSL_atomic_add,%function
@ -139,30 +76,81 @@ OPENSSL_cleanse:
#endif #endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse .size OPENSSL_cleanse,.-OPENSSL_cleanse
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
vorr q0,q0,q0
bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
mrrc p15,1,r0,r1,c14 @ CNTVCT
bx lr
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
#endif
.global OPENSSL_wipe_cpu .global OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function .type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu: OPENSSL_wipe_cpu:
#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0] ldr r0,[r1,r0]
#endif
eor r2,r2,r2 eor r2,r2,r2
eor r3,r3,r3 eor r3,r3,r3
eor ip,ip,ip eor ip,ip,ip
#if __ARM_MAX_ARCH__>=7
tst r0,#1 tst r0,#1
beq .Lwipe_done beq .Lwipe_done
.byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0 veor q0, q0, q0
.byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1 veor q1, q1, q1
.byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2 veor q2, q2, q2
.byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3 veor q3, q3, q3
.byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8 veor q8, q8, q8
.byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9 veor q9, q9, q9
.byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10 veor q10, q10, q10
.byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11 veor q11, q11, q11
.byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12 veor q12, q12, q12
.byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13 veor q13, q13, q13
.byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14 veor q14, q14, q14
.byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14 veor q15, q15, q15
.Lwipe_done: .Lwipe_done:
#endif
mov r0,sp mov r0,sp
#if __ARM_ARCH__>=5 #if __ARM_ARCH__>=5
bx lr bx lr
@ -200,8 +188,10 @@ OPENSSL_instrument_bus2:
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5 .align 5
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-.LOPENSSL_armcap .word OPENSSL_armcap_P-.LOPENSSL_armcap
#endif
#if __ARM_ARCH__>=6 #if __ARM_ARCH__>=6
.align 5 .align 5
#else #else

View File

@ -40,10 +40,6 @@ $code=<<___;
.text .text
.code 32 .code 32
#if __ARM_ARCH__>=7
.fpu neon
#endif
___ ___
################ ################
# private interface to mul_1x1_ialu # private interface to mul_1x1_ialu
@ -142,72 +138,18 @@ ___
# BN_ULONG a1,BN_ULONG a0, # BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{ {
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___; $code.=<<___;
.global bn_GF2m_mul_2x2 .global bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function .type bn_GF2m_mul_2x2,%function
.align 5 .align 5
bn_GF2m_mul_2x2: bn_GF2m_mul_2x2:
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
.Lpic: ldr r12,[pc,r12] .Lpic: ldr r12,[pc,r12]
tst r12,#1 tst r12,#1
beq .Lialu bne .LNEON
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
vext.8 $t0#lo, $a, $a, #1 @ A1
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
vext.8 $r#lo, $b, $b, #1 @ B1
vmull.p8 $r, $a, $r#lo @ E = A*B1
vext.8 $t1#lo, $a, $a, #2 @ A2
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
vext.8 $t3#lo, $b, $b, #2 @ B2
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
vext.8 $t2#lo, $a, $a, #3 @ A3
veor $t0, $t0, $r @ L = E + F
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
vext.8 $r#lo, $b, $b, #3 @ B3
veor $t1, $t1, $t3 @ M = G + H
vmull.p8 $r, $a, $r#lo @ I = A*B3
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
vand $t0#hi, $t0#hi, $k48
vext.8 $t3#lo, $b, $b, #4 @ B4
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
vand $t1#hi, $t1#hi, $k32
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
veor $t2, $t2, $r @ N = I + J
veor $t0#lo, $t0#lo, $t0#hi
veor $t1#lo, $t1#lo, $t1#hi
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
vand $t2#hi, $t2#hi, $k16
vext.8 $t0, $t0, $t0, #15
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
vmov.i64 $t3#hi, #0
vext.8 $t1, $t1, $t1, #14
veor $t2#lo, $t2#lo, $t2#hi
vmull.p8 $r, $a, $b @ D = A*B
vext.8 $t3, $t3, $t3, #12
vext.8 $t2, $t2, $t2, #13
veor $t0, $t0, $t1
veor $t2, $t2, $t3
veor $r, $r, $t0
veor $r, $r, $t2
vst1.32 {$r}, [r0]
ret @ bx lr
.align 4
.Lialu:
#endif #endif
___ ___
}
$ret="r10"; # reassigned 1st argument $ret="r10"; # reassigned 1st argument
$code.=<<___; $code.=<<___;
stmdb sp!,{r4-r10,lr} stmdb sp!,{r4-r10,lr}
@ -257,8 +199,72 @@ $code.=<<___;
moveq pc,lr @ be binary compatible with V4, yet moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-) bx lr @ interoperable with Thumb ISA:-)
#endif #endif
___
}
{
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.LNEON:
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
vext.8 $t0#lo, $a, $a, #1 @ A1
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
vext.8 $r#lo, $b, $b, #1 @ B1
vmull.p8 $r, $a, $r#lo @ E = A*B1
vext.8 $t1#lo, $a, $a, #2 @ A2
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
vext.8 $t3#lo, $b, $b, #2 @ B2
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
vext.8 $t2#lo, $a, $a, #3 @ A3
veor $t0, $t0, $r @ L = E + F
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
vext.8 $r#lo, $b, $b, #3 @ B3
veor $t1, $t1, $t3 @ M = G + H
vmull.p8 $r, $a, $r#lo @ I = A*B3
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
vand $t0#hi, $t0#hi, $k48
vext.8 $t3#lo, $b, $b, #4 @ B4
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
vand $t1#hi, $t1#hi, $k32
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
veor $t2, $t2, $r @ N = I + J
veor $t0#lo, $t0#lo, $t0#hi
veor $t1#lo, $t1#lo, $t1#hi
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
vand $t2#hi, $t2#hi, $k16
vext.8 $t0, $t0, $t0, #15
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
vmov.i64 $t3#hi, #0
vext.8 $t1, $t1, $t1, #14
veor $t2#lo, $t2#lo, $t2#hi
vmull.p8 $r, $a, $b @ D = A*B
vext.8 $t3, $t3, $t3, #12
vext.8 $t2, $t2, $t2, #13
veor $t0, $t0, $t1
veor $t2, $t2, $t3
veor $r, $r, $t0
veor $r, $r, $t2
vst1.32 {$r}, [r0]
ret @ bx lr
#endif
___
}
$code.=<<___;
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.align 5 .align 5
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-(.Lpic+8) .word OPENSSL_armcap_P-(.Lpic+8)
@ -266,7 +272,9 @@ $code.=<<___;
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5 .align 5
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif
___ ___
foreach (split("\n",$code)) { foreach (split("\n",$code)) {

View File

@ -72,7 +72,7 @@ $code=<<___;
.text .text
.code 32 .code 32
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.align 5 .align 5
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-bn_mul_mont .word OPENSSL_armcap_P-bn_mul_mont
@ -85,7 +85,7 @@ $code=<<___;
bn_mul_mont: bn_mul_mont:
ldr ip,[sp,#4] @ load num ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
tst ip,#7 tst ip,#7
bne .Lialu bne .Lialu
adr r0,bn_mul_mont adr r0,bn_mul_mont
@ -256,7 +256,8 @@ my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon .fpu neon
.type bn_mul8x_mont_neon,%function .type bn_mul8x_mont_neon,%function
@ -663,7 +664,7 @@ ___
$code.=<<___; $code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2 .align 2
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif #endif
___ ___

View File

@ -911,7 +911,7 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) #if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
#include "arm_arch.h" #include "arm_arch.h"
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
# if defined(BSAES_ASM) # if defined(BSAES_ASM)
# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) # define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
# endif # endif

View File

@ -365,7 +365,8 @@ ___
} }
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon .fpu neon
.global gcm_init_neon .global gcm_init_neon

View File

@ -675,7 +675,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
# endif # endif
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h" # include "arm_arch.h"
# if __ARM_ARCH__>=7 # if __ARM_MAX_ARCH__>=7
# define GHASH_ASM_ARM # define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT # define GCM_FUNCREF_4BIT
# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)

View File

@ -174,7 +174,7 @@ $code=<<___;
.align 5 .align 5
sha1_block_data_order: sha1_block_data_order:
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order sub r3,pc,#8 @ sha1_block_data_order
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P ldr r12,[r3,r12] @ OPENSSL_armcap_P
@ -264,8 +264,10 @@ $code.=<<___;
.LK_20_39: .word 0x6ed9eba1 .LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc .LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6 .LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order .word OPENSSL_armcap_P-sha1_block_data_order
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5 .align 5
___ ___
@ -476,7 +478,8 @@ sub Xloop()
} }
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon .fpu neon
.type sha1_block_data_order_neon,%function .type sha1_block_data_order_neon,%function
@ -563,7 +566,7 @@ my @Kxx=map("q$_",(8..11));
my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.type sha1_block_data_order_armv8,%function .type sha1_block_data_order_armv8,%function
.align 5 .align 5
sha1_block_data_order_armv8: sha1_block_data_order_armv8:
@ -637,7 +640,9 @@ $code.=<<___;
___ ___
}}} }}}
$code.=<<___; $code.=<<___;
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif
___ ___
{ my %opcode = ( { my %opcode = (

View File

@ -177,8 +177,10 @@ K256:
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256 .size K256,.-K256
.word 0 @ terminator .word 0 @ terminator
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha256_block_data_order .word OPENSSL_armcap_P-sha256_block_data_order
#endif
.align 5 .align 5
.global sha256_block_data_order .global sha256_block_data_order
@ -186,7 +188,7 @@ K256:
sha256_block_data_order: sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order sub r3,pc,#8 @ sha256_block_data_order
add $len,$inp,$len,lsl#6 @ len to point at the end of inp add $len,$inp,$len,lsl#6 @ len to point at the end of inp
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#ARMV8_SHA256 tst r12,#ARMV8_SHA256
@ -423,7 +425,8 @@ sub body_00_15 () {
} }
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon .fpu neon
.type sha256_block_data_order_neon,%function .type sha256_block_data_order_neon,%function
@ -545,7 +548,7 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
my $Ktbl="r3"; my $Ktbl="r3";
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.type sha256_block_data_order_armv8,%function .type sha256_block_data_order_armv8,%function
.align 5 .align 5
sha256_block_data_order_armv8: sha256_block_data_order_armv8:
@ -616,7 +619,9 @@ ___
$code.=<<___; $code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2 .align 2
#if __ARM_MARCH_ARCH__>=7
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif
___ ___
{ my %opcode = ( { my %opcode = (

View File

@ -237,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512 .size K512,.-K512
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha512_block_data_order .word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4 .skip 32-4
#else
.skip 32
#endif
.global sha512_block_data_order .global sha512_block_data_order
.type sha512_block_data_order,%function .type sha512_block_data_order,%function
sha512_block_data_order: sha512_block_data_order:
sub r3,pc,#8 @ sha512_block_data_order sub r3,pc,#8 @ sha512_block_data_order
add $len,$inp,$len,lsl#7 @ len to point at the end of inp add $len,$inp,$len,lsl#7 @ len to point at the end of inp
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1 tst r12,#1
@ -551,7 +555,8 @@ ___
} }
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon .fpu neon
.align 4 .align 4
@ -592,7 +597,9 @@ $code.=<<___;
.size sha512_block_data_order,.-sha512_block_data_order .size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2 .align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif
___ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\`([^\`]*)\`/eval $1/gem;