Remove inconsistency in ARM support.

This facilitates "universal" builds, ones that target multiple architectures, e.g. ARMv5 through ARMv7. See commentary in Configure for details. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Reviewed-by: Matt Caswell <matt@openssl.org> (cherry picked from commit c1669e1c205dc8e695fb0c10a655f434e758b9f7)
2014-11-07 22:48:22 +01:00 · 2014-11-07 22:48:22 +01:00 · f4868c9921
commit f4868c9921
parent 4aaf1e493c
14 changed files with 224 additions and 160 deletions
--- a/30
+++ b/30
@ -351,8 +351,34 @@ my %table=(
 # throw in -D[BL]_ENDIAN, whichever appropriate...
 "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-# It's believed that majority of ARM toolchains predefine appropriate -march.
+
-# If you compiler does not, do complement config command line with one!
+#######################################################################
 # Note that -march is not among compiler options in below linux-armv4
 # target line. Not specifying one is intentional to give you choice to:
 #
 # a) rely on your compiler default by not specifying one;
 # b) specify your target platform explicitly for optimal performance,
 #    e.g. -march=armv6 or -march=armv7-a;
 # c) build "universal" binary that targets *range* of platforms by
 #    specifying minimum and maximum supported architecture;
 #
 # As for c) option. It actually makes no sense to specify maximum to be
 # less than ARMv7, because it's the least requirement for run-time
 # switch between platform-specific code paths. And without run-time
 # switch performance would be equivalent to one for minimum. Secondly,
 # there are some natural limitations that you'd have to accept and
 # respect. Most notably you can *not* build "universal" binary for
 # big-endian platform. This is because ARMv7 processor always picks
 # instructions in little-endian order. Another similar limitation is
 # that -mthumb can't "cross" -march=armv6t2 boundary, because that's
 # where it became Thumb-2. Well, this limitation is a bit artificial,
 # because it's not really impossible, but it's deemed too tricky to
 # support. And of course you have to be sure that your binutils are
 # actually up to the task of handling maximum target platform. With all
 # this in mind here is an example of how to configure "universal" build:
 #
 #       ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
 #
 "linux-armv4",	"gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 # Configure script adds minimally required -march for assembly support,
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@ -35,11 +35,13 @@ $prefix="aes_v8";
 $code=<<___;
 #include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .text
 ___
-$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/);
+$code.=".arch	armv8-a+crypto\n"			if ($flavour =~ /64/);
-$code.=".fpu	neon\n.code	32\n"	if ($flavour !~ /64/);
+$code.=".arch	armv7-a\n.fpu	neon\n.code	32\n"	if ($flavour !~ /64/);
 		#^^^^^^ this is done to simplify adoption by not depending
 		#	on latest binutils.
 # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
 # NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
--- a/crypto/aes/asm/bsaes-armv7.pl
+++ b/crypto/aes/asm/bsaes-armv7.pl
@ -702,13 +702,17 @@ $code.=<<___;
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
 # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
 #endif
 #ifdef __thumb__
 # define adrl adr
 #endif
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .text
 .syntax	unified 	@ ARMv7-capable assembler is expected to handle this
 #ifdef __thumb2__
@ -717,8 +721,6 @@ $code.=<<___;
 .code   32
 #endif
 .fpu	neon
 .type	_bsaes_decrypt8,%function
 .align	4
 _bsaes_decrypt8:
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@ -52,6 +52,18 @@
 #include <openssl/fipssyms.h>
 #endif
 #if !defined(__ARM_MAX_ARCH__)
 # define __ARM_MAX_ARCH__ __ARM_ARCH__
 #endif
 #if __ARM_MAX_ARCH__<__ARM_ARCH__
 # error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
 #elif __ARM_MAX_ARCH__!=__ARM_ARCH__
 # if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
 #  error "can't build universal big-endian binary"
 # endif
 #endif
 #if !__ASSEMBLER__
 extern unsigned int OPENSSL_armcap_P;
 #endif
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@ -7,8 +7,12 @@
 #include "arm_arch.h"
-unsigned int OPENSSL_armcap_P;
+unsigned int OPENSSL_armcap_P=0;
 #if __ARM_MAX_ARCH__<7
 void OPENSSL_cpuid_setup(void) {}
 unsigned long OPENSSL_rdtsc(void) { return 0; }
 #else
 static sigset_t all_masked;
 static sigjmp_buf ill_jmp;
@ -155,3 +159,4 @@ void OPENSSL_cpuid_setup(void)
 	sigaction (SIGILL,&ill_oact,NULL);
 	sigprocmask(SIG_SETMASK,&oset,NULL);
 	}
 #endif
--- a/crypto/armv4cpuid.S
+++ b/crypto/armv4cpuid.S
@ -3,69 +3,6 @@
 .text
 .code	32
@ Special note about using .byte directives to encode instructions.
@ Initial reason for hand-coding instructions was to allow module to
@ be compilable by legacy tool-chains. At later point it was pointed
@ out that since ARMv7, instructions are always encoded in little-endian
@ order, therefore one has to opt for endian-neutral presentation.
@ Contemporary tool-chains offer .inst directive for this purpose,
@ but not legacy ones. Therefore .byte. But there is an exception,
@ namely ARMv7-R profile still allows for big-endian encoding even for
@ instructions. This raises the question what if probe instructions
@ appear executable to such processor operating in big-endian order?
@ They have to be chosen in a way that avoids this problem. As failed
@ NEON probe disables a number of other probes we have to ensure that
@ only NEON probe instruction doesn't appear executable in big-endian
@ order, therefore 'vorr q8,q8,q8', and not some other register. The
@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
@ that if fetched in alternative byte oder instruction should crash to
@ denote lack of probed capability...
 .align	5
 .global	_armv7_neon_probe
 .type	_armv7_neon_probe,%function
 _armv7_neon_probe:
 	.byte	0xf0,0x01,0x60,0xf2	@ vorr	q8,q8,q8
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
 .size	_armv7_neon_probe,.-_armv7_neon_probe
 .global	_armv7_tick
 .type	_armv7_tick,%function
 _armv7_tick:
 	.byte	0x06,0x00,0xa0,0xe1	@ mov	r0,r6
 	.byte	0x1e,0x0f,0x51,0xec	@ mrrc	p15,1,r0,r1,c14	@ CNTVCT
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
 	nop
 .size	_armv7_tick,.-_armv7_tick
 .global	_armv8_aes_probe
 .type	_armv8_aes_probe,%function
 _armv8_aes_probe:
 	.byte	0x00,0x03,0xb0,0xf3	@ aese.8	q0,q0
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
 .size	_armv8_aes_probe,.-_armv8_aes_probe
 .global	_armv8_sha1_probe
 .type	_armv8_sha1_probe,%function
 _armv8_sha1_probe:
 	.byte	0x40,0x0c,0x00,0xf2	@ sha1c.32	q0,q0,q0
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
 .size	_armv8_sha1_probe,.-_armv8_sha1_probe
 .global	_armv8_sha256_probe
 .type	_armv8_sha256_probe,%function
 _armv8_sha256_probe:
 	.byte	0x40,0x0c,0x00,0xf3	@ sha256h.32	q0,q0,q0
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx lr
 .size	_armv8_sha256_probe,.-_armv8_sha256_probe
 .global	_armv8_pmull_probe
 .type	_armv8_pmull_probe,%function
 _armv8_pmull_probe:
 	.byte	0x00,0x0e,0xa0,0xf2	@ vmull.p64	q0,d0,d0
 	.byte	0x1e,0xff,0x2f,0xe1	@ bx	lr
 .size	_armv8_pmull_probe,.-_armv8_pmull_probe
 .align	5
 .global	OPENSSL_atomic_add
 .type	OPENSSL_atomic_add,%function
@ -139,30 +76,81 @@ OPENSSL_cleanse:
 #endif
 .size	OPENSSL_cleanse,.-OPENSSL_cleanse
 #if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .align	5
 .global	_armv7_neon_probe
 .type	_armv7_neon_probe,%function
 _armv7_neon_probe:
 	vorr	q0,q0,q0
 	bx	lr
 .size	_armv7_neon_probe,.-_armv7_neon_probe
 .global	_armv7_tick
 .type	_armv7_tick,%function
 _armv7_tick:
 	mrrc	p15,1,r0,r1,c14		@ CNTVCT
 	bx	lr
 .size	_armv7_tick,.-_armv7_tick
 .global	_armv8_aes_probe
 .type	_armv8_aes_probe,%function
 _armv8_aes_probe:
 	.byte	0x00,0x03,0xb0,0xf3	@ aese.8	q0,q0
 	bx	lr
 .size	_armv8_aes_probe,.-_armv8_aes_probe
 .global	_armv8_sha1_probe
 .type	_armv8_sha1_probe,%function
 _armv8_sha1_probe:
 	.byte	0x40,0x0c,0x00,0xf2	@ sha1c.32	q0,q0,q0
 	bx	lr
 .size	_armv8_sha1_probe,.-_armv8_sha1_probe
 .global	_armv8_sha256_probe
 .type	_armv8_sha256_probe,%function
 _armv8_sha256_probe:
 	.byte	0x40,0x0c,0x00,0xf3	@ sha256h.32	q0,q0,q0
 	bx	lr
 .size	_armv8_sha256_probe,.-_armv8_sha256_probe
 .global	_armv8_pmull_probe
 .type	_armv8_pmull_probe,%function
 _armv8_pmull_probe:
 	.byte	0x00,0x0e,0xa0,0xf2	@ vmull.p64	q0,d0,d0
 	bx	lr
 .size	_armv8_pmull_probe,.-_armv8_pmull_probe
 #endif
 .global	OPENSSL_wipe_cpu
 .type	OPENSSL_wipe_cpu,%function
 OPENSSL_wipe_cpu:
 #if __ARM_MAX_ARCH__>=7
 	ldr	r0,.LOPENSSL_armcap
 	adr	r1,.LOPENSSL_armcap
 	ldr	r0,[r1,r0]
 #endif
 	eor	r2,r2,r2
 	eor	r3,r3,r3
 	eor	ip,ip,ip
 #if __ARM_MAX_ARCH__>=7
 	tst	r0,#1
 	beq	.Lwipe_done
-	.byte	0x50,0x01,0x00,0xf3	@ veor	q0, q0, q0
+	veor	q0, q0, q0
-	.byte	0x52,0x21,0x02,0xf3	@ veor	q1, q1, q1
+	veor	q1, q1, q1
-	.byte	0x54,0x41,0x04,0xf3	@ veor	q2, q2, q2
+	veor	q2, q2, q2
-	.byte	0x56,0x61,0x06,0xf3	@ veor	q3, q3, q3
+	veor	q3, q3, q3
-	.byte	0xf0,0x01,0x40,0xf3	@ veor	q8, q8, q8
+	veor	q8, q8, q8
-	.byte	0xf2,0x21,0x42,0xf3	@ veor	q9, q9, q9
+	veor	q9, q9, q9
-	.byte	0xf4,0x41,0x44,0xf3	@ veor	q10, q10, q10
+	veor	q10, q10, q10
-	.byte	0xf6,0x61,0x46,0xf3	@ veor	q11, q11, q11
+	veor	q11, q11, q11
-	.byte	0xf8,0x81,0x48,0xf3	@ veor	q12, q12, q12
+	veor	q12, q12, q12
-	.byte	0xfa,0xa1,0x4a,0xf3	@ veor	q13, q13, q13
+	veor	q13, q13, q13
-	.byte	0xfc,0xc1,0x4c,0xf3	@ veor	q14, q14, q14
+	veor	q14, q14, q14
-	.byte	0xfe,0xe1,0x4e,0xf3	@ veor	q14, q14, q14
+	veor	q15, q15, q15
 .Lwipe_done:
 #endif
 	mov	r0,sp
 #if __ARM_ARCH__>=5
 	bx	lr
@ -200,8 +188,10 @@ OPENSSL_instrument_bus2:
 .size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
 .align	5
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-.LOPENSSL_armcap
 #endif
 #if __ARM_ARCH__>=6
 .align	5
 #else
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@ -40,10 +40,6 @@ $code=<<___;
 .text
 .code	32
 #if __ARM_ARCH__>=7
 .fpu	neon
 #endif
 ___
 ################
 # private interface to mul_1x1_ialu
@ -142,72 +138,18 @@ ___
 #	BN_ULONG a1,BN_ULONG a0,
 #	BN_ULONG b1,BN_ULONG b0);	# r[3..0]=a1a0·b1b0
 {
 my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
 my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
 $code.=<<___;
 .global	bn_GF2m_mul_2x2
 .type	bn_GF2m_mul_2x2,%function
 .align	5
 bn_GF2m_mul_2x2:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 	ldr	r12,.LOPENSSL_armcap
 .Lpic:	ldr	r12,[pc,r12]
 	tst	r12,#1
-	beq	.Lialu
+	bne	.LNEON
 	ldr		r12, [sp]		@ 5th argument
 	vmov.32		$a, r2, r1
 	vmov.32		$b, r12, r3
 	vmov.i64	$k48, #0x0000ffffffffffff
 	vmov.i64	$k32, #0x00000000ffffffff
 	vmov.i64	$k16, #0x000000000000ffff
 	vext.8		$t0#lo, $a, $a, #1	@ A1
 	vmull.p8	$t0, $t0#lo, $b		@ F = A1*B
 	vext.8		$r#lo, $b, $b, #1	@ B1
 	vmull.p8	$r, $a, $r#lo		@ E = A*B1
 	vext.8		$t1#lo, $a, $a, #2	@ A2
 	vmull.p8	$t1, $t1#lo, $b		@ H = A2*B
 	vext.8		$t3#lo, $b, $b, #2	@ B2
 	vmull.p8	$t3, $a, $t3#lo		@ G = A*B2
 	vext.8		$t2#lo, $a, $a, #3	@ A3
 	veor		$t0, $t0, $r		@ L = E + F
 	vmull.p8	$t2, $t2#lo, $b		@ J = A3*B
 	vext.8		$r#lo, $b, $b, #3	@ B3
 	veor		$t1, $t1, $t3		@ M = G + H
 	vmull.p8	$r, $a, $r#lo		@ I = A*B3
 	veor		$t0#lo, $t0#lo, $t0#hi	@ t0 = (L) (P0 + P1) << 8
 	vand		$t0#hi, $t0#hi, $k48
 	vext.8		$t3#lo, $b, $b, #4	@ B4
 	veor		$t1#lo, $t1#lo, $t1#hi	@ t1 = (M) (P2 + P3) << 16
 	vand		$t1#hi, $t1#hi, $k32
 	vmull.p8	$t3, $a, $t3#lo		@ K = A*B4
 	veor		$t2, $t2, $r		@ N = I + J
 	veor		$t0#lo, $t0#lo, $t0#hi
 	veor		$t1#lo, $t1#lo, $t1#hi
 	veor		$t2#lo, $t2#lo, $t2#hi	@ t2 = (N) (P4 + P5) << 24
 	vand		$t2#hi, $t2#hi, $k16
 	vext.8		$t0, $t0, $t0, #15
 	veor		$t3#lo, $t3#lo, $t3#hi	@ t3 = (K) (P6 + P7) << 32
 	vmov.i64	$t3#hi, #0
 	vext.8		$t1, $t1, $t1, #14
 	veor		$t2#lo, $t2#lo, $t2#hi
 	vmull.p8	$r, $a, $b		@ D = A*B
 	vext.8		$t3, $t3, $t3, #12
 	vext.8		$t2, $t2, $t2, #13
 	veor		$t0, $t0, $t1
 	veor		$t2, $t2, $t3
 	veor		$r, $r, $t0
 	veor		$r, $r, $t2
 	vst1.32		{$r}, [r0]
 	ret		@ bx lr
 .align	4
 .Lialu:
 #endif
 ___
 }
 $ret="r10";	# reassigned 1st argument
 $code.=<<___;
 	stmdb	sp!,{r4-r10,lr}
@ -257,8 +199,72 @@ $code.=<<___;
 	moveq	pc,lr			@ be binary compatible with V4, yet
 	bx	lr			@ interoperable with Thumb ISA:-)
 #endif
 ___
 }
 {
 my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
 my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
 $code.=<<___;
 #if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .align	5
 .LNEON:
 	ldr		r12, [sp]		@ 5th argument
 	vmov.32		$a, r2, r1
 	vmov.32		$b, r12, r3
 	vmov.i64	$k48, #0x0000ffffffffffff
 	vmov.i64	$k32, #0x00000000ffffffff
 	vmov.i64	$k16, #0x000000000000ffff
 	vext.8		$t0#lo, $a, $a, #1	@ A1
 	vmull.p8	$t0, $t0#lo, $b		@ F = A1*B
 	vext.8		$r#lo, $b, $b, #1	@ B1
 	vmull.p8	$r, $a, $r#lo		@ E = A*B1
 	vext.8		$t1#lo, $a, $a, #2	@ A2
 	vmull.p8	$t1, $t1#lo, $b		@ H = A2*B
 	vext.8		$t3#lo, $b, $b, #2	@ B2
 	vmull.p8	$t3, $a, $t3#lo		@ G = A*B2
 	vext.8		$t2#lo, $a, $a, #3	@ A3
 	veor		$t0, $t0, $r		@ L = E + F
 	vmull.p8	$t2, $t2#lo, $b		@ J = A3*B
 	vext.8		$r#lo, $b, $b, #3	@ B3
 	veor		$t1, $t1, $t3		@ M = G + H
 	vmull.p8	$r, $a, $r#lo		@ I = A*B3
 	veor		$t0#lo, $t0#lo, $t0#hi	@ t0 = (L) (P0 + P1) << 8
 	vand		$t0#hi, $t0#hi, $k48
 	vext.8		$t3#lo, $b, $b, #4	@ B4
 	veor		$t1#lo, $t1#lo, $t1#hi	@ t1 = (M) (P2 + P3) << 16
 	vand		$t1#hi, $t1#hi, $k32
 	vmull.p8	$t3, $a, $t3#lo		@ K = A*B4
 	veor		$t2, $t2, $r		@ N = I + J
 	veor		$t0#lo, $t0#lo, $t0#hi
 	veor		$t1#lo, $t1#lo, $t1#hi
 	veor		$t2#lo, $t2#lo, $t2#hi	@ t2 = (N) (P4 + P5) << 24
 	vand		$t2#hi, $t2#hi, $k16
 	vext.8		$t0, $t0, $t0, #15
 	veor		$t3#lo, $t3#lo, $t3#hi	@ t3 = (K) (P6 + P7) << 32
 	vmov.i64	$t3#hi, #0
 	vext.8		$t1, $t1, $t1, #14
 	veor		$t2#lo, $t2#lo, $t2#hi
 	vmull.p8	$r, $a, $b		@ D = A*B
 	vext.8		$t3, $t3, $t3, #12
 	vext.8		$t2, $t2, $t2, #13
 	veor		$t0, $t0, $t1
 	veor		$t2, $t2, $t3
 	veor		$r, $r, $t0
 	veor		$r, $r, $t2
 	vst1.32		{$r}, [r0]
 	ret		@ bx lr
 #endif
 ___
 }
 $code.=<<___;
 .size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .align	5
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-(.Lpic+8)
@ -266,7 +272,9 @@ $code.=<<___;
 .asciz	"GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align	5
 #if __ARM_MAX_ARCH__>=7
 .comm	OPENSSL_armcap_P,4,4
 #endif
 ___
 foreach (split("\n",$code)) {
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@ -72,7 +72,7 @@ $code=<<___;
 .text
 .code	32
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .align	5
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-bn_mul_mont
@ -85,7 +85,7 @@ $code=<<___;
 bn_mul_mont:
 	ldr	ip,[sp,#4]		@ load num
 	stmdb	sp!,{r0,r2}		@ sp points at argument block
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 	tst	ip,#7
 	bne	.Lialu
 	adr	r0,bn_mul_mont
@ -256,7 +256,8 @@ my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
 my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .type	bn_mul8x_mont_neon,%function
@ -663,7 +664,7 @@ ___
 $code.=<<___;
 .asciz	"Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align	2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .comm	OPENSSL_armcap_P,4,4
 #endif
 ___
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@ -911,7 +911,7 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 #if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
 #include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 # if defined(BSAES_ASM)
 #  define BSAES_CAPABLE	(OPENSSL_armcap_P & ARMV7_NEON)
 # endif
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@ -365,7 +365,8 @@ ___
 }
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .global	gcm_init_neon
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@ -675,7 +675,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 #  endif
 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #  include "arm_arch.h"
-#  if __ARM_ARCH__>=7
+#  if __ARM_MAX_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
 #   define PMULL_CAPABLE	(OPENSSL_armcap_P & ARMV8_PMULL)
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@ -174,7 +174,7 @@ $code=<<___;
 .align	5
 sha1_block_data_order:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 	sub	r3,pc,#8		@ sha1_block_data_order
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
@ -264,8 +264,10 @@ $code.=<<___;
 .LK_20_39:	.word	0x6ed9eba1
 .LK_40_59:	.word	0x8f1bbcdc
 .LK_60_79:	.word	0xca62c1d6
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-sha1_block_data_order
 #endif
 .asciz	"SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align	5
 ___
@ -476,7 +478,8 @@ sub Xloop()
 }
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .type	sha1_block_data_order_neon,%function
@ -563,7 +566,7 @@ my @Kxx=map("q$_",(8..11));
 my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .type	sha1_block_data_order_armv8,%function
 .align	5
 sha1_block_data_order_armv8:
@ -637,7 +640,9 @@ $code.=<<___;
 ___
 }}}
 $code.=<<___;
 #if __ARM_MAX_ARCH__>=7
 .comm	OPENSSL_armcap_P,4,4
 #endif
 ___
 {   my  %opcode = (
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@ -177,8 +177,10 @@ K256:
 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 .size	K256,.-K256
 .word	0				@ terminator
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-sha256_block_data_order
 #endif
 .align	5
 .global	sha256_block_data_order
@ -186,7 +188,7 @@ K256:
 sha256_block_data_order:
 	sub	r3,pc,#8		@ sha256_block_data_order
 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
 	tst	r12,#ARMV8_SHA256
@ -423,7 +425,8 @@ sub body_00_15 () {
 }
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .type	sha256_block_data_order_neon,%function
@ -545,7 +548,7 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
 my $Ktbl="r3";
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .type	sha256_block_data_order_armv8,%function
 .align	5
 sha256_block_data_order_armv8:
@ -616,7 +619,9 @@ ___
 $code.=<<___;
 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align	2
 #if __ARM_MARCH_ARCH__>=7
 .comm   OPENSSL_armcap_P,4,4
 #endif
 ___
 {   my  %opcode = (
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@ -237,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size	K512,.-K512
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word	OPENSSL_armcap_P-sha512_block_data_order
 .skip	32-4
 #else
 .skip	32
 #endif
 .global	sha512_block_data_order
 .type	sha512_block_data_order,%function
 sha512_block_data_order:
 	sub	r3,pc,#8		@ sha512_block_data_order
 	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
 	tst	r12,#1
@ -551,7 +555,8 @@ ___
 }
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .arch	armv7-a
 .fpu	neon
 .align	4
@ -592,7 +597,9 @@ $code.=<<___;
 .size	sha512_block_data_order,.-sha512_block_data_order
 .asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align	2
 #if __ARM_MAX_ARCH__>=7
 .comm	OPENSSL_armcap_P,4,4
 #endif
 ___
 $code =~ s/\`([^\`]*)\`/eval $1/gem;