Add target for i686 cross compilation

Reviewed-by: Stephen Henson <steve@openssl.org>
Add new iOS subdirectory
2016-02-15 10:26:20 -05:00 · 2015-07-04 15:18:46 -04:00 · 2015-07-04 15:17:45 -04:00 · 2015-05-13 16:48:08 +02:00 · 2015-05-13 16:47:55 +02:00 · 2015-05-13 16:47:43 +02:00
45 changed files with 4388 additions and 2374 deletions
--- a/15
+++ b/15
@@ -136,6 +136,7 @@ my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::
 my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
 my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
 my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o:::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:";
 my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
 my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
 my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
@@ -356,6 +357,8 @@ my %table=(
 "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-x86_64",	"gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-x86_64-cross",  "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall -DFIPS_REF_POINT_IS_CROSS_COMPILER_AWARE::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-i686-cross",	"gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DFIPS_REF_POINT_IS_CROSS_COMPILER_AWARE::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux64-s390x",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
 #### So called "highgprs" target for z/Architecture CPUs
 # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see
@@ -403,7 +406,8 @@ my %table=(
 # Android: linux-* but without -DTERMIO and pointers to headers and libs.
 "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-pie%-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"android64-aarch64","gcc:-mandroid -fPIC -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-pie%-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",

 #### *BSD [do see comment about ${BSDthreads} above!]
 "BSD-generic32","gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -433,6 +437,7 @@ my %table=(
 # QNX
 "qnx4",	"cc:-DL_ENDIAN -DTERMIO::(unknown):::${x86_gcc_des} ${x86_gcc_opts}:",
 "QNX6",       "gcc:-DTERMIOS::::-lsocket::${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"QNX6-armv4",	"gcc:-DTERMIOS -O2 -Wall:::::BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "QNX6-i386",  "gcc:-DL_ENDIAN -DTERMIOS -O2 -Wall::::-lsocket:${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",

 # BeOS
@@ -584,6 +589,8 @@ my %table=(
 "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
 # iPhoneOS/iOS
 "iphoneos-cross","llvm-gcc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"ios-cross","cc:-O3 -arch armv7 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:armcap.o armv4cpuid_ios.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::ios32:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"ios64-cross","cc:-O3 -arch arm64 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR:${aarch64_asm}:ios64:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",

 ##### A/UX
 "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::",
@@ -600,12 +607,14 @@ my %table=(
 ##### VxWorks for various targets
 "vxworks-ppc60x","ccppc:-D_REENTRANT -mrtp -mhard-float -mstrict-align -fno-implicit-fp -DPPC32_fp60x -O2 -fstrength-reduce -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/usr/h -I\$(WIND_BASE)/target/usr/h/wrn/coreip:::VXWORKS:-Wl,--defsym,__wrs_rtp_base=0xe0000000 -L \$(WIND_BASE)/target/usr/lib/ppc/PPC32/common:::::",
 "vxworks-ppcgen","ccppc:-D_REENTRANT -mrtp -msoft-float -mstrict-align -O1 -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/usr/h -I\$(WIND_BASE)/target/usr/h/wrn/coreip:::VXWORKS:-Wl,--defsym,__wrs_rtp_base=0xe0000000 -L \$(WIND_BASE)/target/usr/lib/ppc/PPC32/sfcommon:::::",
+"vxworks-ppcgen-kernel","ccppc:-D_REENTRANT -msoft-float -mstrict-align -O1 -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/h -I\$(WIND_BASE)/target/h/wrn/coreip:::VXWORKS::::::",
 "vxworks-ppc405","ccppc:-g -msoft-float -mlongcall -DCPU=PPC405 -I\$(WIND_BASE)/target/h:::VXWORKS:-r:::::",
 "vxworks-ppc750","ccppc:-ansi -nostdinc -DPPC750 -D_REENTRANT -fvolatile -fno-builtin -fno-for-scope -fsigned-char -Wall -msoft-float -mlongcall -DCPU=PPC604 -I\$(WIND_BASE)/target/h \$(DEBUG_FLAG):::VXWORKS:-r:::::",
 "vxworks-ppc750-debug","ccppc:-ansi -nostdinc -DPPC750 -D_REENTRANT -fvolatile -fno-builtin -fno-for-scope -fsigned-char -Wall -msoft-float -mlongcall -DCPU=PPC604 -I\$(WIND_BASE)/target/h -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DPEDANTIC -DDEBUG_SAFESTACK -DDEBUG -g:::VXWORKS:-r:::::",
 "vxworks-ppc860","ccppc:-nostdinc -msoft-float -DCPU=PPC860 -DNO_STRINGS_H -I\$(WIND_BASE)/target/h:::VXWORKS:-r:::::",
 "vxworks-simlinux","ccpentium:-B\$(WIND_BASE)/host/\$(WIND_HOST_TYPE)/lib/gcc-lib/ -D_VSB_CONFIG_FILE=\"\$(WIND_BASE)/target/lib/h/config/vsbConfig.h\" -DL_ENDIAN -DCPU=SIMLINUX -DTOOL_FAMILY=gnu -DTOOL=gnu -fno-builtin -fno-defer-pop -DNO_STRINGS_H -I\$(WIND_BASE)/target/h -I\$(WIND_BASE)/target/h/wrn/coreip -DOPENSSL_NO_HW_PADLOCK:::VXWORKS:-r::${no_asm}::::::ranlibpentium:",
 "vxworks-mips","ccmips:-mrtp -mips2 -O -G 0 -B\$(WIND_BASE)/host/\$(WIND_HOST_TYPE)/lib/gcc-lib/ -D_VSB_CONFIG_FILE=\"\$(WIND_BASE)/target/lib/h/config/vsbConfig.h\" -DCPU=MIPS32 -msoft-float -mno-branch-likely -DTOOL_FAMILY=gnu -DTOOL=gnu -fno-builtin -fno-defer-pop -DNO_STRINGS_H -I\$(WIND_BASE)/target/usr/h -I\$(WIND_BASE)/target/h/wrn/coreip::-D_REENTRANT:VXWORKS:-Wl,--defsym,__wrs_rtp_base=0xe0000000 -L \$(WIND_BASE)/target/usr/lib/mips/MIPSI32/sfcommon::${mips32_asm}:o32::::::ranlibmips:",
+"vxworks-pentium","ccpentium:-Os -B\$(WIND_BASE)/host/\$(WIND_HOST_TYPE)/lib/gcc-lib/ -D_VSB_CONFIG_FILE=\"\$(WIND_BASE)/target/lib/h/config/vsbConfig.h\" -DL_ENDIAN -DCPU=PENTIUM4 -DTOOL_FAMILY=gnu -DTOOL=gnu -fno-builtin -fno-defer-pop -D_WRS_KERNEL -D_WRS_VX_SMP -I\$(WIND_BASE)/target/h -I\$(WIND_BASE)/target/h/wrn/coreip -DOPENSSL_NO_HW_PADLOCK:::VXWORKS:-r::${no_asm}::::::ranlibpentium:",

 ##### Compaq Non-Stop Kernel (Tandem)
 "tandem-c89","c89:-Ww -D__TANDEM -D_XOPEN_SOURCE -D_XOPEN_SOURCE_EXTENDED=1 -D_TANDEM_SOURCE -DB_ENDIAN::(unknown):::THIRTY_TWO_BIT:::",
@@ -1562,7 +1571,7 @@ if ($rmd160_obj =~ /\.o$/)
 	}
 if ($aes_obj =~ /\.o$/)
 	{
-	$cflags.=" -DAES_ASM";
+	 $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/);
 	# aes_ctr.o is not a real file, only indication that assembler
 	# module implements AES_ctr32_encrypt...
 	$cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes_ctr\.o//);
@@ -1583,7 +1592,7 @@ else	{
 	$wp_obj="wp_block.o";
 	}
 $cmll_obj=$cmll_enc	unless ($cmll_obj =~ /.o$/);
-if ($modes_obj =~ /ghash/)
+if ($modes_obj =~ /ghash\-/)
 	{
 	$cflags.=" -DGHASH_ASM";
 	}
--- a/21
+++ b/21
@@ -166,6 +166,14 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in
 	echo "mips4-sgi-irix64"; exit 0
 	;;

+    Linux:*:cross:i686)
+	echo "${MACHINE}-cross-linux"; exit 0
+	;;
+
+    Linux:[2-9].*:cross:x86_64)
+	echo "${MACHINE}-cross-linux"; exit 0
+	;;
+
    Linux:[2-9].*)
 	echo "${MACHINE}-whatever-linux2"; exit 0
 	;;
@@ -379,6 +387,10 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in
       echo "nsr-tandem-nsk"; exit 0;
       ;;

+    vxworks:kernel*)
+       echo "${MACHINE}-kernel-vxworks"; exit 0;
+       ;;
+
    vxworks*)
       echo "${MACHINE}-whatever-vxworks"; exit 0;
       ;;
@@ -580,6 +592,10 @@ case "$GUESSOS" in
  *-*-iphoneos)
 	options="$options -arch%20${MACHINE}"
 	OUT="iphoneos-cross" ;;
+  armv7-*-ios)
+	OUT="ios-cross" ;;
+  arm64-*-ios*)
+	OUT="ios64-cross" ;;
  alpha-*-linux2)
        ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo`
 	case ${ISA:-generic} in
@@ -605,6 +621,7 @@ case "$GUESSOS" in
 	;;
  ppc-*-linux2) OUT="linux-ppc" ;;
  ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
+  ppcgen-kernel-vxworks*) OUT="vxworks-ppcgen-kernel" ;;
  ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;;
  pentium-*-vxworks*) OUT="vxworks-pentium" ;;
  simlinux-*-vxworks*) OUT="vxworks-simlinux" ;;
@@ -684,6 +701,8 @@ case "$GUESSOS" in
        fi ;;
  *-*-linux1) OUT="linux-aout" ;;
  *-*-linux2) OUT="linux-generic32" ;;
+  i686-cross-linux) OUT="linux-i686-cross" ;;
+  *-cross-linux) OUT="linux-x86_64-cross" ;;
  sun4[uv]*-*-solaris2)
 	OUT="solaris-sparcv9-$CC"
 	ISA64=`(isalist) 2>/dev/null | grep sparcv9`
@@ -853,10 +872,12 @@ case "$GUESSOS" in
  j90-cray-unicos) OUT="cray-j90" ;;
  nsr-tandem-nsk) OUT="tandem-c89" ;;
  beos-*) OUT="$GUESSOS" ;;
+  armv4-*-qnx6) OUT="QNX6-armv4" ;;
  x86pc-*-qnx6) OUT="QNX6-i386" ;;
  *-*-qnx6) OUT="QNX6" ;;
  x86-*-android|i?86-*-android) OUT="android-x86" ;;
  armv[7-9]*-*-android) OUT="android-armv7" ;;
+  aarch64-*-android) OUT="android64-aarch64" ;;
  *) OUT=`echo $GUESSOS | awk -F- '{print $3}'`;;
 esac

--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -87,6 +87,7 @@ ppccpuid.s:	ppccpuid.pl;	$(PERL) ppccpuid.pl $(PERLASM_SCHEME) $@
 pariscid.s:	pariscid.pl;	$(PERL) pariscid.pl $(PERLASM_SCHEME) $@
 alphacpuid.s:	alphacpuid.pl
 	$(PERL) $< | $(CC) -E - | tee $@ > /dev/null
+arm64cpuid.S:	arm64cpuid.pl;	$(PERL) arm64cpuid.pl $(PERLASM_SCHEME) > $@

 subdirs:
 	@target=all; $(RECURSIVE_MAKE)
--- a/crypto/aes/Makefile
+++ b/crypto/aes/Makefile
@@ -78,6 +78,10 @@ aes-parisc.s:	asm/aes-parisc.pl
 aes-mips.S:	asm/aes-mips.pl
 	$(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@

+aesv8-armx.S:	asm/aesv8-armx.pl
+	$(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@
+aesv8-armx.o:	aesv8-armx.S
+
 # GNU make "catch all"
 aes-%.S:	asm/aes-%.pl;	$(PERL) $< $(PERLASM_SCHEME) $@
 aes-armv4.o:	aes-armv4.S
--- a/crypto/aes/asm/aes-armv4.pl
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -32,8 +32,20 @@
 # Profiler-assisted and platform-specific optimization resulted in 16%
 # improvement on Cortex A8 core and ~21.5 cycles per byte.

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $s0="r0";
 $s1="r1";
@@ -171,7 +183,12 @@ AES_encrypt:
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	$rounds,r0		@ inp
 	mov	$key,r2
+#ifdef	__APPLE__
+	mov	$tbl,#AES_encrypt-AES_Te
+	sub	$tbl,r3,$tbl			@ Te
+#else
 	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
+#endif
 #if __ARM_ARCH__<7
 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
 	ldrb	$t1,[$rounds,#2]	@ manner...
@@ -425,7 +442,12 @@ AES_set_encrypt_key:
 	bne	.Labrt

 .Lok:	stmdb   sp!,{r4-r12,lr}
+#ifdef	__APPLE__
+	mov	$tbl,#AES_set_encrypt_key-AES_Te-1024
+	sub	$tbl,r3,$tbl					@ Te4
+#else
 	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
+#endif

 	mov	$rounds,r0		@ inp
 	mov	lr,r1			@ bits
@@ -886,7 +908,12 @@ AES_decrypt:
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	$rounds,r0		@ inp
 	mov	$key,r2
+#ifdef	__APPLE__
+	mov	$tbl,#AES_decrypt-AES_Td
+	sub	$tbl,r3,$tbl				@ Td
+#else
 	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
+#endif
 #if __ARM_ARCH__<7
 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
 	ldrb	$t1,[$rounds,#2]	@ manner...
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -0,0 +1,968 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for ARMv8 AES instructions. The
+# module is endian-agnostic in sense that it supports both big- and
+# little-endian cases. As does it support both 32- and 64-bit modes
+# of operation. Latter is achieved by limiting amount of utilized
+# registers to 16, which implies additional NEON load and integer
+# instructions. This has no effect on mighty Apple A7, where results
+# are literally equal to the theoretical estimates based on AES
+# instruction latencies and issue rates. On Cortex-A53, an in-order
+# execution core, this costs up to 10-15%, which is partially
+# compensated by implementing dedicated code path for 128-bit
+# CBC encrypt case. On Cortex-A57 parallelizable mode performance
+# seems to be limited by sheer amount of NEON instructions...
+#
+# Performance in cycles per byte processed with 128-bit key:
+#
+#		CBC enc		CBC dec		CTR
+# Apple A7	2.39		1.20		1.20
+# Cortex-A53	2.45		1.87		1.94
+# Cortex-A57	3.64		1.34		1.32
+
+$flavour = shift;
+$output  = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+$prefix="aes_v8";
+
+$code=<<___;
+#include "arm_arch.h"
+
+#if __ARM_ARCH__>=7
+.text
+___
+$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/);
+$code.=".fpu	neon\n.code	32\n"	if ($flavour !~ /64/);
+
+# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
+# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
+# maintain both 32- and 64-bit codes within single module and
+# transliterate common code to either flavour with regex vodoo.
+#
+{{{
+my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
+my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
+	$flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
+
+
+$code.=<<___;
+.align	5
+.Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.globl	${prefix}_set_encrypt_key
+.type	${prefix}_set_encrypt_key,%function
+.align	5
+${prefix}_set_encrypt_key:
+.Lenc_key:
+___
+$code.=<<___	if ($flavour =~ /64/);
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+___
+$code.=<<___;
+	mov	$ptr,#-1
+	cmp	$inp,#0
+	b.eq	.Lenc_key_abort
+	cmp	$out,#0
+	b.eq	.Lenc_key_abort
+	mov	$ptr,#-2
+	cmp	$bits,#128
+	b.lt	.Lenc_key_abort
+	cmp	$bits,#256
+	b.gt	.Lenc_key_abort
+	tst	$bits,#0x3f
+	b.ne	.Lenc_key_abort
+
+	adr	$ptr,.Lrcon
+	cmp	$bits,#192
+
+	veor	$zero,$zero,$zero
+	vld1.8	{$in0},[$inp],#16
+	mov	$bits,#8		// reuse $bits
+	vld1.32	{$rcon,$mask},[$ptr],#32
+
+	b.lt	.Loop128
+	b.eq	.L192
+	b	.L256
+
+.align	4
+.Loop128:
+	vtbl.8	$key,{$in0},$mask
+	vext.8	$tmp,$zero,$in0,#12
+	vst1.32	{$in0},[$out],#16
+	aese	$key,$zero
+	subs	$bits,$bits,#1
+
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	 veor	$key,$key,$rcon
+	veor	$in0,$in0,$tmp
+	vshl.u8	$rcon,$rcon,#1
+	veor	$in0,$in0,$key
+	b.ne	.Loop128
+
+	vld1.32	{$rcon},[$ptr]
+
+	vtbl.8	$key,{$in0},$mask
+	vext.8	$tmp,$zero,$in0,#12
+	vst1.32	{$in0},[$out],#16
+	aese	$key,$zero
+
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	 veor	$key,$key,$rcon
+	veor	$in0,$in0,$tmp
+	vshl.u8	$rcon,$rcon,#1
+	veor	$in0,$in0,$key
+
+	vtbl.8	$key,{$in0},$mask
+	vext.8	$tmp,$zero,$in0,#12
+	vst1.32	{$in0},[$out],#16
+	aese	$key,$zero
+
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	 veor	$key,$key,$rcon
+	veor	$in0,$in0,$tmp
+	veor	$in0,$in0,$key
+	vst1.32	{$in0},[$out]
+	add	$out,$out,#0x50
+
+	mov	$rounds,#10
+	b	.Ldone
+
+.align	4
+.L192:
+	vld1.8	{$in1},[$inp],#8
+	vmov.i8	$key,#8			// borrow $key
+	vst1.32	{$in0},[$out],#16
+	vsub.i8	$mask,$mask,$key	// adjust the mask
+
+.Loop192:
+	vtbl.8	$key,{$in1},$mask
+	vext.8	$tmp,$zero,$in0,#12
+	vst1.32	{$in1},[$out],#8
+	aese	$key,$zero
+	subs	$bits,$bits,#1
+
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+
+	vdup.32	$tmp,${in0}[3]
+	veor	$tmp,$tmp,$in1
+	 veor	$key,$key,$rcon
+	vext.8	$in1,$zero,$in1,#12
+	vshl.u8	$rcon,$rcon,#1
+	veor	$in1,$in1,$tmp
+	veor	$in0,$in0,$key
+	veor	$in1,$in1,$key
+	vst1.32	{$in0},[$out],#16
+	b.ne	.Loop192
+
+	mov	$rounds,#12
+	add	$out,$out,#0x20
+	b	.Ldone
+
+.align	4
+.L256:
+	vld1.8	{$in1},[$inp]
+	mov	$bits,#7
+	mov	$rounds,#14
+	vst1.32	{$in0},[$out],#16
+
+.Loop256:
+	vtbl.8	$key,{$in1},$mask
+	vext.8	$tmp,$zero,$in0,#12
+	vst1.32	{$in1},[$out],#16
+	aese	$key,$zero
+	subs	$bits,$bits,#1
+
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in0,$in0,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	 veor	$key,$key,$rcon
+	veor	$in0,$in0,$tmp
+	vshl.u8	$rcon,$rcon,#1
+	veor	$in0,$in0,$key
+	vst1.32	{$in0},[$out],#16
+	b.eq	.Ldone
+
+	vdup.32	$key,${in0}[3]		// just splat
+	vext.8	$tmp,$zero,$in1,#12
+	aese	$key,$zero
+
+	veor	$in1,$in1,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in1,$in1,$tmp
+	vext.8	$tmp,$zero,$tmp,#12
+	veor	$in1,$in1,$tmp
+
+	veor	$in1,$in1,$key
+	b	.Loop256
+
+.Ldone:
+	str	$rounds,[$out]
+	mov	$ptr,#0
+
+.Lenc_key_abort:
+	mov	x0,$ptr			// return value
+	`"ldr	x29,[sp],#16"		if ($flavour =~ /64/)`
+	ret
+.size	${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
+
+.globl	${prefix}_set_decrypt_key
+.type	${prefix}_set_decrypt_key,%function
+.align	5
+${prefix}_set_decrypt_key:
+___
+$code.=<<___	if ($flavour =~ /64/);
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+___
+$code.=<<___	if ($flavour !~ /64/);
+	stmdb	sp!,{r4,lr}
+___
+$code.=<<___;
+	bl	.Lenc_key
+
+	cmp	x0,#0
+	b.ne	.Ldec_key_abort
+
+	sub	$out,$out,#240		// restore original $out
+	mov	x4,#-16
+	add	$inp,$out,x12,lsl#4	// end of key schedule
+
+	vld1.32	{v0.16b},[$out]
+	vld1.32	{v1.16b},[$inp]
+	vst1.32	{v0.16b},[$inp],x4
+	vst1.32	{v1.16b},[$out],#16
+
+.Loop_imc:
+	vld1.32	{v0.16b},[$out]
+	vld1.32	{v1.16b},[$inp]
+	aesimc	v0.16b,v0.16b
+	aesimc	v1.16b,v1.16b
+	vst1.32	{v0.16b},[$inp],x4
+	vst1.32	{v1.16b},[$out],#16
+	cmp	$inp,$out
+	b.hi	.Loop_imc
+
+	vld1.32	{v0.16b},[$out]
+	aesimc	v0.16b,v0.16b
+	vst1.32	{v0.16b},[$inp]
+
+	eor	x0,x0,x0		// return value
+.Ldec_key_abort:
+___
+$code.=<<___	if ($flavour !~ /64/);
+	ldmia	sp!,{r4,pc}
+___
+$code.=<<___	if ($flavour =~ /64/);
+	ldp	x29,x30,[sp],#16
+	ret
+___
+$code.=<<___;
+.size	${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
+___
+}}}
+{{{
+sub gen_block () {
+my $dir = shift;
+my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
+my ($inp,$out,$key)=map("x$_",(0..2));
+my $rounds="w3";
+my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
+
+$code.=<<___;
+.globl	${prefix}_${dir}crypt
+.type	${prefix}_${dir}crypt,%function
+.align	5
+${prefix}_${dir}crypt:
+	ldr	$rounds,[$key,#240]
+	vld1.32	{$rndkey0},[$key],#16
+	vld1.8	{$inout},[$inp]
+	sub	$rounds,$rounds,#2
+	vld1.32	{$rndkey1},[$key],#16
+
+.Loop_${dir}c:
+	aes$e	$inout,$rndkey0
+	vld1.32	{$rndkey0},[$key],#16
+	aes$mc	$inout,$inout
+	subs	$rounds,$rounds,#2
+	aes$e	$inout,$rndkey1
+	vld1.32	{$rndkey1},[$key],#16
+	aes$mc	$inout,$inout
+	b.gt	.Loop_${dir}c
+
+	aes$e	$inout,$rndkey0
+	vld1.32	{$rndkey0},[$key]
+	aes$mc	$inout,$inout
+	aes$e	$inout,$rndkey1
+	veor	$inout,$inout,$rndkey0
+
+	vst1.8	{$inout},[$out]
+	ret
+.size	${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
+my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+
+my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
+
+### q8-q15	preloaded key schedule
+
+$code.=<<___;
+.globl	${prefix}_cbc_encrypt
+.type	${prefix}_cbc_encrypt,%function
+.align	5
+${prefix}_cbc_encrypt:
+___
+$code.=<<___	if ($flavour =~ /64/);
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+___
+$code.=<<___	if ($flavour !~ /64/);
+	mov	ip,sp
+	stmdb	sp!,{r4-r8,lr}
+	vstmdb	sp!,{d8-d15}            @ ABI specification says so
+	ldmia	ip,{r4-r5}		@ load remaining args
+___
+$code.=<<___;
+	subs	$len,$len,#16
+	mov	$step,#16
+	b.lo	.Lcbc_abort
+	cclr	$step,eq
+
+	cmp	$enc,#0			// en- or decrypting?
+	ldr	$rounds,[$key,#240]
+	and	$len,$len,#-16
+	vld1.8	{$ivec},[$ivp]
+	vld1.8	{$dat},[$inp],$step
+
+	vld1.32	{q8-q9},[$key]		// load key schedule...
+	sub	$rounds,$rounds,#6
+	add	$key_,$key,x5,lsl#4	// pointer to last 7 round keys
+	sub	$rounds,$rounds,#2
+	vld1.32	{q10-q11},[$key_],#32
+	vld1.32	{q12-q13},[$key_],#32
+	vld1.32	{q14-q15},[$key_],#32
+	vld1.32	{$rndlast},[$key_]
+
+	add	$key_,$key,#32
+	mov	$cnt,$rounds
+	b.eq	.Lcbc_dec
+
+	cmp	$rounds,#2
+	veor	$dat,$dat,$ivec
+	veor	$rndzero_n_last,q8,$rndlast
+	b.eq	.Lcbc_enc128
+
+.Loop_cbc_enc:
+	aese	$dat,q8
+	vld1.32	{q8},[$key_],#16
+	aesmc	$dat,$dat
+	subs	$cnt,$cnt,#2
+	aese	$dat,q9
+	vld1.32	{q9},[$key_],#16
+	aesmc	$dat,$dat
+	b.gt	.Loop_cbc_enc
+
+	aese	$dat,q8
+	aesmc	$dat,$dat
+	 subs	$len,$len,#16
+	aese	$dat,q9
+	aesmc	$dat,$dat
+	 cclr	$step,eq
+	aese	$dat,q10
+	aesmc	$dat,$dat
+	 add	$key_,$key,#16
+	aese	$dat,q11
+	aesmc	$dat,$dat
+	 vld1.8	{q8},[$inp],$step
+	aese	$dat,q12
+	aesmc	$dat,$dat
+	 veor	q8,q8,$rndzero_n_last
+	aese	$dat,q13
+	aesmc	$dat,$dat
+	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1]
+	aese	$dat,q14
+	aesmc	$dat,$dat
+	aese	$dat,q15
+
+	 mov	$cnt,$rounds
+	veor	$ivec,$dat,$rndlast
+	vst1.8	{$ivec},[$out],#16
+	b.hs	.Loop_cbc_enc
+
+	b	.Lcbc_done
+
+.align	5
+.Lcbc_enc128:
+	vld1.32	{$in0-$in1},[$key_]
+	aese	$dat,q8
+	aesmc	$dat,$dat
+	b	.Lenter_cbc_enc128
+.Loop_cbc_enc128:
+	aese	$dat,q8
+	aesmc	$dat,$dat
+	 vst1.8	{$ivec},[$out],#16
+.Lenter_cbc_enc128:
+	aese	$dat,q9
+	aesmc	$dat,$dat
+	 subs	$len,$len,#16
+	aese	$dat,$in0
+	aesmc	$dat,$dat
+	 cclr	$step,eq
+	aese	$dat,$in1
+	aesmc	$dat,$dat
+	aese	$dat,q10
+	aesmc	$dat,$dat
+	aese	$dat,q11
+	aesmc	$dat,$dat
+	 vld1.8	{q8},[$inp],$step
+	aese	$dat,q12
+	aesmc	$dat,$dat
+	aese	$dat,q13
+	aesmc	$dat,$dat
+	aese	$dat,q14
+	aesmc	$dat,$dat
+	 veor	q8,q8,$rndzero_n_last
+	aese	$dat,q15
+	veor	$ivec,$dat,$rndlast
+	b.hs	.Loop_cbc_enc128
+
+	vst1.8	{$ivec},[$out],#16
+	b	.Lcbc_done
+___
+{
+my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+$code.=<<___;
+.align	5
+.Lcbc_dec:
+	vld1.8	{$dat2},[$inp],#16
+	subs	$len,$len,#32		// bias
+	add	$cnt,$rounds,#2
+	vorr	$in1,$dat,$dat
+	vorr	$dat1,$dat,$dat
+	vorr	$in2,$dat2,$dat2
+	b.lo	.Lcbc_dec_tail
+
+	vorr	$dat1,$dat2,$dat2
+	vld1.8	{$dat2},[$inp],#16
+	vorr	$in0,$dat,$dat
+	vorr	$in1,$dat1,$dat1
+	vorr	$in2,$dat2,$dat2
+
+.Loop3x_cbc_dec:
+	aesd	$dat0,q8
+	aesd	$dat1,q8
+	aesd	$dat2,q8
+	vld1.32	{q8},[$key_],#16
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	subs	$cnt,$cnt,#2
+	aesd	$dat0,q9
+	aesd	$dat1,q9
+	aesd	$dat2,q9
+	vld1.32	{q9},[$key_],#16
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	b.gt	.Loop3x_cbc_dec
+
+	aesd	$dat0,q8
+	aesd	$dat1,q8
+	aesd	$dat2,q8
+	 veor	$tmp0,$ivec,$rndlast
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 veor	$tmp1,$in0,$rndlast
+	aesd	$dat0,q9
+	aesd	$dat1,q9
+	aesd	$dat2,q9
+	 veor	$tmp2,$in1,$rndlast
+	 subs	$len,$len,#0x30
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 vorr	$ivec,$in2,$in2
+	 mov.lo	x6,$len			// x6, $cnt, is zero at this point
+	aesd	$dat0,q12
+	aesd	$dat1,q12
+	aesd	$dat2,q12
+	 add	$inp,$inp,x6		// $inp is adjusted in such way that
+					// at exit from the loop $dat1-$dat2
+					// are loaded with last "words"
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 mov	$key_,$key
+	aesd	$dat0,q13
+	aesd	$dat1,q13
+	aesd	$dat2,q13
+	 vld1.8	{$in0},[$inp],#16
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 vld1.8	{$in1},[$inp],#16
+	aesd	$dat0,q14
+	aesd	$dat1,q14
+	aesd	$dat2,q14
+	 vld1.8	{$in2},[$inp],#16
+	aesimc	$dat0,$dat0
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 vld1.32 {q8},[$key_],#16	// re-pre-load rndkey[0]
+	aesd	$dat0,q15
+	aesd	$dat1,q15
+	aesd	$dat2,q15
+
+	 add	$cnt,$rounds,#2
+	veor	$tmp0,$tmp0,$dat0
+	veor	$tmp1,$tmp1,$dat1
+	veor	$dat2,$dat2,$tmp2
+	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1]
+	 vorr	$dat0,$in0,$in0
+	vst1.8	{$tmp0},[$out],#16
+	 vorr	$dat1,$in1,$in1
+	vst1.8	{$tmp1},[$out],#16
+	vst1.8	{$dat2},[$out],#16
+	 vorr	$dat2,$in2,$in2
+	b.hs	.Loop3x_cbc_dec
+
+	cmn	$len,#0x30
+	b.eq	.Lcbc_done
+	nop
+
+.Lcbc_dec_tail:
+	aesd	$dat1,q8
+	aesd	$dat2,q8
+	vld1.32	{q8},[$key_],#16
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	subs	$cnt,$cnt,#2
+	aesd	$dat1,q9
+	aesd	$dat2,q9
+	vld1.32	{q9},[$key_],#16
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	b.gt	.Lcbc_dec_tail
+
+	aesd	$dat1,q8
+	aesd	$dat2,q8
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	aesd	$dat1,q9
+	aesd	$dat2,q9
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	aesd	$dat1,q12
+	aesd	$dat2,q12
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 cmn	$len,#0x20
+	aesd	$dat1,q13
+	aesd	$dat2,q13
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 veor	$tmp1,$ivec,$rndlast
+	aesd	$dat1,q14
+	aesd	$dat2,q14
+	aesimc	$dat1,$dat1
+	aesimc	$dat2,$dat2
+	 veor	$tmp2,$in1,$rndlast
+	aesd	$dat1,q15
+	aesd	$dat2,q15
+	b.eq	.Lcbc_dec_one
+	veor	$tmp1,$tmp1,$dat1
+	veor	$tmp2,$tmp2,$dat2
+	 vorr	$ivec,$in2,$in2
+	vst1.8	{$tmp1},[$out],#16
+	vst1.8	{$tmp2},[$out],#16
+	b	.Lcbc_done
+
+.Lcbc_dec_one:
+	veor	$tmp1,$tmp1,$dat2
+	 vorr	$ivec,$in2,$in2
+	vst1.8	{$tmp1},[$out],#16
+
+.Lcbc_done:
+	vst1.8	{$ivec},[$ivp]
+.Lcbc_abort:
+___
+}
+$code.=<<___	if ($flavour !~ /64/);
+	vldmia	sp!,{d8-d15}
+	ldmia	sp!,{r4-r8,pc}
+___
+$code.=<<___	if ($flavour =~ /64/);
+	ldr	x29,[sp],#16
+	ret
+___
+$code.=<<___;
+.size	${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
+___
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
+my ($rounds,$cnt,$key_)=("w5","w6","x7");
+my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
+my $step="x12";		# aliases with $tctr2
+
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+
+my ($dat,$tmp)=($dat0,$tmp0);
+
+### q8-q15	preloaded key schedule
+
+$code.=<<___;
+.globl	${prefix}_ctr32_encrypt_blocks
+.type	${prefix}_ctr32_encrypt_blocks,%function
+.align	5
+${prefix}_ctr32_encrypt_blocks:
+___
+$code.=<<___	if ($flavour =~ /64/);
+	stp		x29,x30,[sp,#-16]!
+	add		x29,sp,#0
+___
+$code.=<<___	if ($flavour !~ /64/);
+	mov		ip,sp
+	stmdb		sp!,{r4-r10,lr}
+	vstmdb		sp!,{d8-d15}            @ ABI specification says so
+	ldr		r4, [ip]		@ load remaining arg
+___
+$code.=<<___;
+	ldr		$rounds,[$key,#240]
+
+	ldr		$ctr, [$ivp, #12]
+	vld1.32		{$dat0},[$ivp]
+
+	vld1.32		{q8-q9},[$key]		// load key schedule...
+	sub		$rounds,$rounds,#4
+	mov		$step,#16
+	cmp		$len,#2
+	add		$key_,$key,x5,lsl#4	// pointer to last 5 round keys
+	sub		$rounds,$rounds,#2
+	vld1.32		{q12-q13},[$key_],#32
+	vld1.32		{q14-q15},[$key_],#32
+	vld1.32		{$rndlast},[$key_]
+	add		$key_,$key,#32
+	mov		$cnt,$rounds
+	cclr		$step,lo
+#ifndef __ARMEB__
+	rev		$ctr, $ctr
+#endif
+	vorr		$dat1,$dat0,$dat0
+	add		$tctr1, $ctr, #1
+	vorr		$dat2,$dat0,$dat0
+	add		$ctr, $ctr, #2
+	vorr		$ivec,$dat0,$dat0
+	rev		$tctr1, $tctr1
+	vmov.32		${dat1}[3],$tctr1
+	b.ls		.Lctr32_tail
+	rev		$tctr2, $ctr
+	sub		$len,$len,#3		// bias
+	vmov.32		${dat2}[3],$tctr2
+	b		.Loop3x_ctr32
+
+.align	4
+.Loop3x_ctr32:
+	aese		$dat0,q8
+	aese		$dat1,q8
+	aese		$dat2,q8
+	vld1.32		{q8},[$key_],#16
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	aesmc		$dat2,$dat2
+	subs		$cnt,$cnt,#2
+	aese		$dat0,q9
+	aese		$dat1,q9
+	aese		$dat2,q9
+	vld1.32		{q9},[$key_],#16
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	aesmc		$dat2,$dat2
+	b.gt		.Loop3x_ctr32
+
+	aese		$dat0,q8
+	aese		$dat1,q8
+	aese		$dat2,q8
+	 mov		$key_,$key
+	aesmc		$tmp0,$dat0
+	 vld1.8		{$in0},[$inp],#16
+	aesmc		$tmp1,$dat1
+	aesmc		$dat2,$dat2
+	 vorr		$dat0,$ivec,$ivec
+	aese		$tmp0,q9
+	 vld1.8		{$in1},[$inp],#16
+	aese		$tmp1,q9
+	aese		$dat2,q9
+	 vorr		$dat1,$ivec,$ivec
+	aesmc		$tmp0,$tmp0
+	 vld1.8		{$in2},[$inp],#16
+	aesmc		$tmp1,$tmp1
+	aesmc		$tmp2,$dat2
+	 vorr		$dat2,$ivec,$ivec
+	 add		$tctr0,$ctr,#1
+	aese		$tmp0,q12
+	aese		$tmp1,q12
+	aese		$tmp2,q12
+	 veor		$in0,$in0,$rndlast
+	 add		$tctr1,$ctr,#2
+	aesmc		$tmp0,$tmp0
+	aesmc		$tmp1,$tmp1
+	aesmc		$tmp2,$tmp2
+	 veor		$in1,$in1,$rndlast
+	 add		$ctr,$ctr,#3
+	aese		$tmp0,q13
+	aese		$tmp1,q13
+	aese		$tmp2,q13
+	 veor		$in2,$in2,$rndlast
+	 rev		$tctr0,$tctr0
+	aesmc		$tmp0,$tmp0
+	 vld1.32	 {q8},[$key_],#16	// re-pre-load rndkey[0]
+	aesmc		$tmp1,$tmp1
+	aesmc		$tmp2,$tmp2
+	 vmov.32	${dat0}[3], $tctr0
+	 rev		$tctr1,$tctr1
+	aese		$tmp0,q14
+	aese		$tmp1,q14
+	aese		$tmp2,q14
+	 vmov.32	${dat1}[3], $tctr1
+	 rev		$tctr2,$ctr
+	aesmc		$tmp0,$tmp0
+	aesmc		$tmp1,$tmp1
+	aesmc		$tmp2,$tmp2
+	 vmov.32	${dat2}[3], $tctr2
+	 subs		$len,$len,#3
+	aese		$tmp0,q15
+	aese		$tmp1,q15
+	aese		$tmp2,q15
+
+	 mov		$cnt,$rounds
+	veor		$in0,$in0,$tmp0
+	veor		$in1,$in1,$tmp1
+	veor		$in2,$in2,$tmp2
+	 vld1.32	 {q9},[$key_],#16	// re-pre-load rndkey[1]
+	vst1.8		{$in0},[$out],#16
+	vst1.8		{$in1},[$out],#16
+	vst1.8		{$in2},[$out],#16
+	b.hs		.Loop3x_ctr32
+
+	adds		$len,$len,#3
+	b.eq		.Lctr32_done
+	cmp		$len,#1
+	mov		$step,#16
+	cclr		$step,eq
+
+.Lctr32_tail:
+	aese		$dat0,q8
+	aese		$dat1,q8
+	vld1.32		{q8},[$key_],#16
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	subs		$cnt,$cnt,#2
+	aese		$dat0,q9
+	aese		$dat1,q9
+	vld1.32		{q9},[$key_],#16
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	b.gt		.Lctr32_tail
+
+	aese		$dat0,q8
+	aese		$dat1,q8
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	aese		$dat0,q9
+	aese		$dat1,q9
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	 vld1.8		{$in0},[$inp],$step
+	aese		$dat0,q12
+	aese		$dat1,q12
+	 vld1.8		{$in1},[$inp]
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	aese		$dat0,q13
+	aese		$dat1,q13
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	aese		$dat0,q14
+	aese		$dat1,q14
+	 veor		$in0,$in0,$rndlast
+	aesmc		$dat0,$dat0
+	aesmc		$dat1,$dat1
+	 veor		$in1,$in1,$rndlast
+	aese		$dat0,q15
+	aese		$dat1,q15
+
+	cmp		$len,#1
+	veor		$in0,$in0,$dat0
+	veor		$in1,$in1,$dat1
+	vst1.8		{$in0},[$out],#16
+	b.eq		.Lctr32_done
+	vst1.8		{$in1},[$out]
+
+.Lctr32_done:
+___
+$code.=<<___	if ($flavour !~ /64/);
+	vldmia		sp!,{d8-d15}
+	ldmia		sp!,{r4-r10,pc}
+___
+$code.=<<___	if ($flavour =~ /64/);
+	ldr		x29,[sp],#16
+	ret
+___
+$code.=<<___;
+.size	${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
+___
+}}}
+$code.=<<___;
+#endif
+___
+########################################
+if ($flavour =~ /64/) {			######## 64-bit code
+    my %opcode = (
+	"aesd"	=>	0x4e285800,	"aese"	=>	0x4e284800,
+	"aesimc"=>	0x4e287800,	"aesmc"	=>	0x4e286800	);
+
+    local *unaes = sub {
+	my ($mnemonic,$arg)=@_;
+
+	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o	&&
+	sprintf ".inst\t0x%08x\t//%s %s",
+			$opcode{$mnemonic}|$1|($2<<5),
+			$mnemonic,$arg;
+    };
+
+    foreach(split("\n",$code)) {
+	s/\`([^\`]*)\`/eval($1)/geo;
+
+	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers
+	s/@\s/\/\//o;			# old->new style commentary
+
+	#s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
+	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or
+	s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel	$2,$3,$2,$1/o	or
+	s/vmov\.i8/movi/o	or	# fix up legacy mnemonics
+	s/vext\.8/ext/o		or
+	s/vrev32\.8/rev32/o	or
+	s/vtst\.8/cmtst/o	or
+	s/vshr/ushr/o		or
+	s/^(\s+)v/$1/o		or	# strip off v prefix
+	s/\bbx\s+lr\b/ret/o;
+
+	# fix up remainig legacy suffixes
+	s/\.[ui]?8//o;
+	m/\],#8/o and s/\.16b/\.8b/go;
+	s/\.[ui]?32//o and s/\.16b/\.4s/go;
+	s/\.[ui]?64//o and s/\.16b/\.2d/go;
+	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+	print $_,"\n";
+    }
+} else {				######## 32-bit code
+    my %opcode = (
+	"aesd"	=>	0xf3b00340,	"aese"	=>	0xf3b00300,
+	"aesimc"=>	0xf3b003c0,	"aesmc"	=>	0xf3b00380	);
+
+    local *unaes = sub {
+	my ($mnemonic,$arg)=@_;
+
+	if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
+	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+					 |(($2&7)<<1) |(($2&8)<<2);
+	    # since ARMv7 instructions are always encoded little-endian.
+	    # correct solution is to use .inst directive, but older
+	    # assemblers don't implement it:-(
+	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+			$word&0xff,($word>>8)&0xff,
+			($word>>16)&0xff,($word>>24)&0xff,
+			$mnemonic,$arg;
+	}
+    };
+
+    sub unvtbl {
+	my $arg=shift;
+
+	$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
+	sprintf	"vtbl.8	d%d,{q%d},d%d\n\t".
+		"vtbl.8	d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;	
+    }
+
+    sub unvdup32 {
+	my $arg=shift;
+
+	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;	
+    }
+
+    sub unvmov32 {
+	my $arg=shift;
+
+	$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
+	sprintf	"vmov.32	d%d[%d],%s",2*$1+($2>>1),$2&1,$3;	
+    }
+
+    foreach(split("\n",$code)) {
+	s/\`([^\`]*)\`/eval($1)/geo;
+
+	s/\b[wx]([0-9]+)\b/r$1/go;		# new->old registers
+	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
+	s/\/\/\s?/@ /o;				# new->old style commentary
+
+	# fix up remainig new-style suffixes
+	s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo	or
+	s/\],#[0-9]+/]!/o;
+
+	s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
+	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o	or
+	s/vtbl\.8\s+(.*)/unvtbl($1)/geo			or
+	s/vdup\.32\s+(.*)/unvdup32($1)/geo		or
+	s/vmov\.32\s+(.*)/unvmov32($1)/geo		or
+	s/^(\s+)b\./$1b/o				or
+	s/^(\s+)mov\./$1mov/o				or
+	s/^(\s+)ret/$1bx\tlr/o;
+
+	print $_,"\n";
+    }
+}
+
+close STDOUT;
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -0,0 +1,68 @@
+#!/usr/bin/env perl
+
+$flavour = shift;
+$output  = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+.arch	armv8-a+crypto
+
+.align	5
+.globl	_armv7_neon_probe
+.type	_armv7_neon_probe,%function
+_armv7_neon_probe:
+	orr	v15.16b, v15.16b, v15.16b
+	ret
+.size	_armv7_neon_probe,.-_armv7_neon_probe
+
+.globl	_armv7_tick
+.type	_armv7_tick,%function
+_armv7_tick:
+#ifdef	__APPLE__
+	mrs	x0, CNTPCT_EL0
+#else
+	mrs	x0, CNTVCT_EL0
+#endif
+	ret
+.size	_armv7_tick,.-_armv7_tick
+
+.globl	_armv8_aes_probe
+.type	_armv8_aes_probe,%function
+_armv8_aes_probe:
+	aese	v0.16b, v0.16b
+	ret
+.size	_armv8_aes_probe,.-_armv8_aes_probe
+
+.globl	_armv8_sha1_probe
+.type	_armv8_sha1_probe,%function
+_armv8_sha1_probe:
+	sha1h	s0, s0
+	ret
+.size	_armv8_sha1_probe,.-_armv8_sha1_probe
+
+.globl	_armv8_sha256_probe
+.type	_armv8_sha256_probe,%function
+_armv8_sha256_probe:
+	sha256su0	v0.4s, v0.4s
+	ret
+.size	_armv8_sha256_probe,.-_armv8_sha256_probe
+.globl	_armv8_pmull_probe
+.type	_armv8_pmull_probe,%function
+_armv8_pmull_probe:
+	pmull	v0.1q, v0.1d, v0.1d
+	ret
+.size	_armv8_pmull_probe,.-_armv8_pmull_probe
+___
+
+print $code;
+close STDOUT;
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -10,13 +10,22 @@
 #   define __ARMEL__
 #  endif
 # elif defined(__GNUC__)
+#  if  defined(__aarch64__)
+#   define __ARM_ARCH__ 8
+#   if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#    define __ARMEB__
+#   else
+#    define __ARMEL__
+#   endif
  /*
   * Why doesn't gcc define __ARM_ARCH__? Instead it defines
   * bunch of below macros. See all_architectires[] table in
   * gcc/config/arm/arm.c. On a side note it defines
   * __ARMEL__/__ARMEB__ for little-/big-endian.
   */
-#  if	defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__)	|| \
+#  elif defined(__ARM_ARCH_8A__)
+#    define __ARM_ARCH__ 8
+#  elif	defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__)	|| \
 	defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)	|| \
 	defined(__ARM_ARCH_7EM__)
 #   define __ARM_ARCH__ 7
@@ -42,10 +51,14 @@

 #if !__ASSEMBLER__
 extern unsigned int OPENSSL_armcap_P;
+#endif
                                     
 #define ARMV7_NEON      (1<<0)
 #define ARMV7_TICK      (1<<1)
-#endif
+#define ARMV8_AES       (1<<2)
+#define ARMV8_SHA1      (1<<3)
+#define ARMV8_SHA256    (1<<4)
+#define ARMV8_PMULL     (1<<5)

 #endif
 #endif
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -20,6 +20,10 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
 */
 void _armv7_neon_probe(void);
 unsigned int _armv7_tick(void);
+void _armv8_aes_probe(void);
+void _armv8_sha1_probe(void);
+void _armv8_sha256_probe(void);
+void _armv8_pmull_probe(void);

 unsigned int OPENSSL_rdtsc(void)
 	{
@@ -68,6 +72,28 @@ void OPENSSL_cpuid_setup(void)
 		{
 		_armv7_neon_probe();
 		OPENSSL_armcap_P |= ARMV7_NEON;
+#ifdef __aarch64__
+		if (sigsetjmp(ill_jmp,1) == 0)
+			{
+			_armv8_pmull_probe();
+			OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES;
+			}
+		else if (sigsetjmp(ill_jmp,1) == 0)
+			{
+			_armv8_aes_probe();
+			OPENSSL_armcap_P |= ARMV8_AES;
+			}
+		if (sigsetjmp(ill_jmp,1) == 0)
+			{
+			_armv8_sha1_probe();
+			OPENSSL_armcap_P |= ARMV8_SHA1;
+			}
+		if (sigsetjmp(ill_jmp,1) == 0)
+			{
+			_armv8_sha256_probe();
+			OPENSSL_armcap_P |= ARMV8_SHA256;
+			}
+#endif
 		}
 	if (sigsetjmp(ill_jmp,1) == 0)
 		{
--- a/crypto/armv4cpuid_ios.S
+++ b/crypto/armv4cpuid_ios.S
@@ -0,0 +1,210 @@
+#include "arm_arch.h"
+
+.text
+.code	32
+
+.align	5
+.globl	_OPENSSL_atomic_add
+
+_OPENSSL_atomic_add:
+#if __ARM_ARCH__>=6
+Ladd:	ldrex	r2,[r0]
+	add	r3,r2,r1
+	strex	r2,r3,[r0]
+	cmp	r2,#0
+	bne	Ladd
+	mov	r0,r3
+	bx	lr
+#else
+	stmdb	sp!,{r4,r5,r6,lr}
+	ldr	r2,Lspinlock
+	adr	r3,Lspinlock
+	mov	r4,r0
+	mov	r5,r1
+	add	r6,r3,r2	@ &spinlock
+	b	.+8
+Lspin:	bl	sched_yield
+	mov	r0,#-1
+	swp	r0,r0,[r6]
+	cmp	r0,#0
+	bne	Lspin
+
+	ldr	r2,[r4]
+	add	r2,r2,r5
+	str	r2,[r4]
+	str	r0,[r6]		@ release spinlock
+	ldmia	sp!,{r4,r5,r6,lr}
+	tst	lr,#1
+	moveq	pc,lr
+.word	0xe12fff1e	@ bx	lr
+#endif
+
+
+.globl	_OPENSSL_cleanse
+
+_OPENSSL_cleanse:
+	eor	ip,ip,ip
+	cmp	r1,#7
+	subhs	r1,r1,#4
+	bhs	Lot
+	cmp	r1,#0
+	beq	Lcleanse_done
+Little:
+	strb	ip,[r0],#1
+	subs	r1,r1,#1
+	bhi	Little
+	b	Lcleanse_done
+
+Lot:	tst	r0,#3
+	beq	Laligned
+	strb	ip,[r0],#1
+	sub	r1,r1,#1
+	b	Lot
+Laligned:
+	str	ip,[r0],#4
+	subs	r1,r1,#4
+	bhs	Laligned
+	adds	r1,r1,#4
+	bne	Little
+Lcleanse_done:
+#if __ARM_ARCH__>=5
+	bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr
+.word	0xe12fff1e	@ bx	lr
+#endif
+
+
+
+
+.align	5
+.globl	__armv7_neon_probe
+
+__armv7_neon_probe:
+	vorr	q0,q0,q0
+	bx	lr
+
+
+.globl	__armv7_tick
+
+__armv7_tick:
+#ifdef	__APPLE__
+	mrrc	p15,0,r0,r1,c14		@ CNTPCT
+#else
+	mrrc	p15,1,r0,r1,c14		@ CNTVCT
+#endif
+	bx	lr
+
+
+.globl	__armv8_aes_probe
+
+__armv8_aes_probe:
+.byte	0x00,0x03,0xb0,0xf3	@ aese.8	q0,q0
+	bx	lr
+
+
+.globl	__armv8_sha1_probe
+
+__armv8_sha1_probe:
+.byte	0x40,0x0c,0x00,0xf2	@ sha1c.32	q0,q0,q0
+	bx	lr
+
+
+.globl	__armv8_sha256_probe
+
+__armv8_sha256_probe:
+.byte	0x40,0x0c,0x00,0xf3	@ sha256h.32	q0,q0,q0
+	bx	lr
+
+.globl	__armv8_pmull_probe
+
+__armv8_pmull_probe:
+.byte	0x00,0x0e,0xa0,0xf2	@ vmull.p64	q0,d0,d0
+	bx	lr
+
+.globl	_OPENSSL_wipe_cpu
+
+_OPENSSL_wipe_cpu:
+	ldr	r0,LOPENSSL_armcap
+	adr	r1,LOPENSSL_armcap
+	ldr	r0,[r1,r0]
+#ifdef	__APPLE__
+	ldr	r0,[r0]
+#endif
+	eor	r2,r2,r2
+	eor	r3,r3,r3
+	eor	ip,ip,ip
+	tst	r0,#1
+	beq	Lwipe_done
+	veor	q0, q0, q0
+	veor	q1, q1, q1
+	veor	q2, q2, q2
+	veor	q3, q3, q3
+	veor	q8, q8, q8
+	veor	q9, q9, q9
+	veor	q10, q10, q10
+	veor	q11, q11, q11
+	veor	q12, q12, q12
+	veor	q13, q13, q13
+	veor	q14, q14, q14
+	veor	q15, q15, q15
+Lwipe_done:
+	mov	r0,sp
+#if __ARM_ARCH__>=5
+	bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr
+.word	0xe12fff1e	@ bx	lr
+#endif
+
+
+.globl	_OPENSSL_instrument_bus
+
+_OPENSSL_instrument_bus:
+	eor	r0,r0,r0
+#if __ARM_ARCH__>=5
+	bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr
+.word	0xe12fff1e	@ bx	lr
+#endif
+
+
+.globl	_OPENSSL_instrument_bus2
+
+_OPENSSL_instrument_bus2:
+	eor	r0,r0,r0
+#if __ARM_ARCH__>=5
+	bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr
+.word	0xe12fff1e	@ bx	lr
+#endif
+
+
+.align	5
+LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-.
+#if __ARM_ARCH__>=6
+.align	5
+#else
+Lspinlock:
+.word	atomic_add_spinlock-Lspinlock
+.align	5
+
+.data
+.align	2
+atomic_add_spinlock:
+.word
+#endif
+
+.comm	_OPENSSL_armcap_P,4
+.non_lazy_symbol_pointer
+OPENSSL_armcap_P:
+.indirect_symbol	_OPENSSL_armcap_P
+.long	0
+.private_extern	_OPENSSL_armcap_P
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -21,8 +21,20 @@
 # runs in even less cycles, ~30, improvement is measurable only on
 # longer keys. One has to optimize code elsewhere to get NEON glow...

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
 sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
@@ -170,11 +182,18 @@ bn_GF2m_mul_2x2:
 #if __ARM_ARCH__>=7
 	ldr	r12,.LOPENSSL_armcap
 .Lpic:	ldr	r12,[pc,r12]
+#ifdef	__APPLE__
+	ldr	r12,[r12]
+#endif
 	tst	r12,#1
 	beq	.Lialu

 	veor	$A1,$A1
+#ifdef	__APPLE__
+	vmov	$B1,r3,r3		@ two copies of b1
+#else
 	vmov.32	$B1,r3,r3		@ two copies of b1
+#endif
 	vmov.32	${A1}[0],r1		@ a1

 	veor	$A0,$A0
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -23,8 +23,20 @@
 # than 1/2KB. Windows CE port would be trivial, as it's exclusively
 # about decorations, ABI and instruction syntax are identical.

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $num="r0";	# starts as num argument, but holds &tp[num-1]
 $ap="r1";
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -471,6 +471,35 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 { return &aes_##keylen##_##mode; }
 #endif

+#if defined(OPENSSL_CPUID_OBJ) && defined(__aarch64__)
+#include "arm_arch.h"
+#if __ARM_ARCH__>=7
+# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
+# define HWAES_set_encrypt_key aes_v8_set_encrypt_key
+# define HWAES_set_decrypt_key aes_v8_set_decrypt_key
+# define HWAES_encrypt aes_v8_encrypt
+# define HWAES_decrypt aes_v8_decrypt
+# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
+# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
+#endif
+#endif
+
+#if defined(HWAES_CAPABLE)
+int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+void HWAES_encrypt(const unsigned char *in, unsigned char *out,
+	const AES_KEY *key);
+void HWAES_decrypt(const unsigned char *in, unsigned char *out,
+	const AES_KEY *key);
+void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, const int enc);
+void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+	size_t len, const AES_KEY *key, const unsigned char ivec[16]);
+#endif
+
 #define BLOCK_CIPHER_generic_pack(nid,keylen,flags)		\
 	BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)	\
 	BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)	\
@@ -489,6 +518,19 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 	mode = ctx->cipher->flags & EVP_CIPH_MODE;
 	if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
 	    && !enc)
+#ifdef HWAES_CAPABLE
+	    if (HWAES_CAPABLE)
+		{
+		ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
+		dat->block      = (block128_f)HWAES_decrypt;
+		dat->stream.cbc = NULL;
+#ifdef HWAES_cbc_encrypt
+		if (mode==EVP_CIPH_CBC_MODE)
+		    dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
+#endif
+		}
+	    else
+#endif
 #ifdef BSAES_CAPABLE
 	    if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)
 		{
@@ -517,6 +559,26 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 					NULL;
 		}
 	else
+#ifdef HWAES_CAPABLE
+	    if (HWAES_CAPABLE)
+		{
+		ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
+		dat->block      = (block128_f)HWAES_encrypt;
+		dat->stream.cbc = NULL;
+#ifdef HWAES_cbc_encrypt
+		if (mode==EVP_CIPH_CBC_MODE)
+		    dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
+		else
+#endif
+#ifdef HWAES_ctr32_encrypt_blocks
+		if (mode==EVP_CIPH_CTR_MODE)
+		    dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
+		else
+#endif
+		(void)0;	/* terminate potentially open 'else' */
+		}
+	    else
+#endif
 #ifdef BSAES_CAPABLE
 	    if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)
 		{
@@ -809,6 +871,21 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 		return 1;
 	if (key)
 		{ do {
+#ifdef HWAES_CAPABLE
+		if (HWAES_CAPABLE)
+			{
+			HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
+			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
+					(block128_f)HWAES_encrypt);
+#ifdef HWAES_ctr32_encrypt_blocks
+			gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
+#else
+			gctx->ctr = NULL;
+#endif
+			break;
+			}
+		else
+#endif
 #ifdef BSAES_CAPABLE
 		if (BSAES_CAPABLE)
 			{
@@ -1047,6 +1124,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 		{
 		xctx->stream = NULL;
 		/* key_len is two AES keys */
+#ifdef HWAES_CAPABLE
+		if (HWAES_CAPABLE)
+			{
+			if (enc)
+			    {
+			    HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+			    xctx->xts.block1 = (block128_f)HWAES_encrypt;
+			    }
+			else
+			    {
+			    HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+			    xctx->xts.block1 = (block128_f)HWAES_decrypt;
+			    }
+
+			HWAES_set_encrypt_key(key + ctx->key_len/2,
+						    ctx->key_len * 4, &xctx->ks2);
+			xctx->xts.block2 = (block128_f)HWAES_encrypt;
+
+			xctx->xts.key1 = &xctx->ks1;
+			break;
+			}
+		else
+#endif
 #ifdef VPAES_CAPABLE
 		if (VPAES_CAPABLE)
 		    {
@@ -1189,6 +1289,19 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 		return 1;
 	if (key) do
 		{
+#ifdef HWAES_CAPABLE
+		if (HWAES_CAPABLE)
+			{
+			HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks);
+
+			CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+					&cctx->ks, (block128_f)HWAES_encrypt);
+			cctx->str = NULL;
+			cctx->key_set = 1;
+			break;
+			}
+		else
+#endif
 #ifdef VPAES_CAPABLE
 		if (VPAES_CAPABLE)
 			{
--- a/crypto/modes/Makefile
+++ b/crypto/modes/Makefile
@@ -56,11 +56,14 @@ ghash-alpha.s:	asm/ghash-alpha.pl
 	$(PERL) $< | $(CC) -E - | tee $@ > /dev/null
 ghash-parisc.s:	asm/ghash-parisc.pl
 	$(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
+ghashv8-armx.S:	asm/ghashv8-armx.pl
+	$(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@

 # GNU make "catch all"
 ghash-%.S:	asm/ghash-%.pl;	$(PERL) $< $(PERLASM_SCHEME) $@

 ghash-armv4.o:	ghash-armv4.S
+ghashv8-armx.o:	ghashv8-armx.S

 files:
 	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -57,8 +57,20 @@
 # *native* byte order on current platform. See gcm128.c for working
 # example...

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $Xi="r0";	# argument block
 $Htbl="r1";
@@ -112,6 +124,11 @@ $code=<<___;
 .text
 .code	32

+#ifdef  __APPLE__
+#define ldrplb	ldrbpl
+#define ldrneb	ldrbne
+#endif
+
 .type	rem_4bit,%object
 .align	5
 rem_4bit:
@@ -326,9 +343,9 @@ $code.=<<___;
 .align	4
 gcm_gmult_neon:
 	sub		$Htbl,#16		@ point at H in GCM128_CTX
-	vld1.64		`&Dhi("$IN")`,[$Xi,:64]!@ load Xi
+	vld1.64		`&Dhi("$IN")`,[$Xi]!	@ load Xi
 	vmov.i32	$mod,#0xe1		@ our irreducible polynomial
-	vld1.64		`&Dlo("$IN")`,[$Xi,:64]!
+	vld1.64		`&Dlo("$IN")`,[$Xi]!
 	vshr.u64	$mod,#32
 	vldmia		$Htbl,{$Hhi-$Hlo}	@ load H
 	veor		$zero,$zero
@@ -349,9 +366,9 @@ gcm_gmult_neon:
 .type	gcm_ghash_neon,%function
 .align	4
 gcm_ghash_neon:
-	vld1.64		`&Dhi("$Z")`,[$Xi,:64]!	@ load Xi
+	vld1.64		`&Dhi("$Z")`,[$Xi]!	@ load Xi
 	vmov.i32	$mod,#0xe1		@ our irreducible polynomial
-	vld1.64		`&Dlo("$Z")`,[$Xi,:64]!
+	vld1.64		`&Dlo("$Z")`,[$Xi]!
 	vshr.u64	$mod,#32
 	vldmia		$Xi,{$Hhi-$Hlo}		@ load H
 	veor		$zero,$zero
@@ -410,8 +427,8 @@ gcm_ghash_neon:
 	vrev64.8	$Z,$Z
 #endif
 	sub		$Xi,#16	
-	vst1.64		`&Dhi("$Z")`,[$Xi,:64]!	@ write out Xi
-	vst1.64		`&Dlo("$Z")`,[$Xi,:64]
+	vst1.64		`&Dhi("$Z")`,[$Xi]!	@ write out Xi
+	vst1.64		`&Dlo("$Z")`,[$Xi]

 	bx	lr
 .size	gcm_ghash_neon,.-gcm_ghash_neon
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -0,0 +1,376 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
+#
+# June 2014
+#
+# Initial version was developed in tight cooperation with Ard
+# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from
+# other assembly modules. Just like aesv8-armx.pl this module
+# supports both AArch32 and AArch64 execution modes.
+#
+# July 2014
+#
+# Implement 2x aggregated reduction [see ghash-x86.pl for background
+# information].
+#
+# Current performance in cycles per processed byte:
+#
+#		PMULL[2]	32-bit NEON(*)
+# Apple A7	0.92		5.62
+# Cortex-A53	1.01		8.39
+# Cortex-A57	1.17		7.61
+#
+# (*)	presented for reference/comparison purposes;
+
+$flavour = shift;
+$output  = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+$Xi="x0";	# argument block
+$Htbl="x1";
+$inp="x2";
+$len="x3";
+
+$inc="x12";
+
+{
+my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
+my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
+
+$code=<<___;
+#include "arm_arch.h"
+
+.text
+___
+$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/);
+$code.=".fpu	neon\n.code	32\n"	if ($flavour !~ /64/);
+
+$code.=<<___;
+.global	gcm_init_v8
+.type	gcm_init_v8,%function
+.align	4
+gcm_init_v8:
+	vld1.64		{$t1},[x1]		@ load H
+	vmov.i8		$xC2,#0xe1
+	vshl.i64	$xC2,$xC2,#57		@ 0xc2.0
+	vext.8		$IN,$t1,$t1,#8
+	vshr.u64	$t2,$xC2,#63
+	vdup.32		$t1,${t1}[1]
+	vext.8		$t0,$t2,$xC2,#8		@ t0=0xc2....01
+	vshr.u64	$t2,$IN,#63
+	vshr.s32	$t1,$t1,#31		@ broadcast carry bit
+	vand		$t2,$t2,$t0
+	vshl.i64	$IN,$IN,#1
+	vext.8		$t2,$t2,$t2,#8
+	vand		$t0,$t0,$t1
+	vorr		$IN,$IN,$t2		@ H<<<=1
+	veor		$H,$IN,$t0		@ twisted H
+	vst1.64		{$H},[x0],#16
+
+	@ calculate H^2
+	vext.8		$t0,$H,$H,#8		@ Karatsuba pre-processing
+	vpmull.p64	$Xl,$H,$H
+	veor		$t0,$t0,$H
+	vpmull2.p64	$Xh,$H,$H
+	vpmull.p64	$Xm,$t0,$t0
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$H2,$Xl,$t2
+
+	vext.8		$t1,$H2,$H2,#8		@ Karatsuba pre-processing
+	veor		$t1,$t1,$H2
+	vext.8		$Hhl,$t0,$t1,#8		@ pack Karatsuba pre-processed
+	vst1.64		{$Hhl-$H2},[x0]
+
+	ret
+.size	gcm_init_v8,.-gcm_init_v8
+
+.global	gcm_gmult_v8
+.type	gcm_gmult_v8,%function
+.align	4
+gcm_gmult_v8:
+	vld1.64		{$t1},[$Xi]		@ load Xi
+	vmov.i8		$xC2,#0xe1
+	vld1.64		{$H-$Hhl},[$Htbl]	@ load twisted H, ...
+	vshl.u64	$xC2,$xC2,#57
+#ifndef __ARMEB__
+	vrev64.8	$t1,$t1
+#endif
+	vext.8		$IN,$t1,$t1,#8
+
+	vpmull.p64	$Xl,$H,$IN		@ H.lo<6C>Xi.lo
+	veor		$t1,$t1,$IN		@ Karatsuba pre-processing
+	vpmull2.p64	$Xh,$H,$IN		@ H.hi<68>Xi.hi
+	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)<29>(Xi.lo+Xi.hi)
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+#endif
+	vext.8		$Xl,$Xl,$Xl,#8
+	vst1.64		{$Xl},[$Xi]		@ write out Xi
+
+	ret
+.size	gcm_gmult_v8,.-gcm_gmult_v8
+
+.global	gcm_ghash_v8
+.type	gcm_ghash_v8,%function
+.align	4
+gcm_ghash_v8:
+___
+$code.=<<___		if ($flavour !~ /64/);
+	vstmdb		sp!,{d8-d15}
+___
+$code.=<<___;
+	vld1.64		{$Xl},[$Xi]		@ load [rotated] Xi
+	subs		$len,$len,#32
+	vmov.i8		$xC2,#0xe1
+	mov		$inc,#16
+	vld1.64		{$H-$Hhl},[$Htbl],#32	@ load twisted H, ..., H^2
+	vld1.64		{$H2},[$Htbl]
+	cclr		$inc,eq
+	vext.8		$Xl,$Xl,$Xl,#8
+	vld1.64		{$t0},[$inp],#16	@ load [rotated] I[0]
+	vshl.u64	$xC2,$xC2,#57		@ 0xc2.0
+#ifndef __ARMEB__
+	vrev64.8	$t0,$t0
+	vrev64.8	$Xl,$Xl
+#endif
+	vext.8		$IN,$t0,$t0,#8
+	b.lo		.Lodd_tail_v8
+___
+{ my ($Xln,$Xmn,$Xhn,$In) = map("q$_",(4..7));
+	#######
+	# Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
+	#	[(H*Ii+1) + (H*Xi+1)] mod P =
+	#	[(H*Ii+1) + H^2*(Ii+Xi)] mod P
+	#
+$code.=<<___;
+	vld1.64		{$t1},[$inp],$inc	@ load [rotated] I[1]
+#ifndef __ARMEB__
+	vrev64.8	$t1,$t1
+#endif
+	vext.8		$In,$t1,$t1,#8
+	veor		$IN,$IN,$Xl		@ I[i]^=Xi
+	vpmull.p64	$Xln,$H,$In		@ H<>Ii+1
+	veor		$t1,$t1,$In		@ Karatsuba pre-processing
+	vpmull2.p64	$Xhn,$H,$In
+	b		.Loop_mod2x_v8
+
+.align	4
+.Loop_mod2x_v8:
+	vext.8		$t2,$IN,$IN,#8
+	subs		$len,$len,#32
+	vpmull.p64	$Xl,$H2,$IN		@ H^2.lo<6C>Xi.lo
+	cclr		$inc,lo
+
+	 vpmull.p64	$Xmn,$Hhl,$t1
+	veor		$t2,$t2,$IN		@ Karatsuba pre-processing
+	vpmull2.p64	$Xh,$H2,$IN		@ H^2.hi<68>Xi.hi
+	veor		$Xl,$Xl,$Xln		@ accumulate
+	vpmull2.p64	$Xm,$Hhl,$t2		@ (H^2.lo+H^2.hi)<29>(Xi.lo+Xi.hi)
+	 vld1.64	{$t0},[$inp],$inc	@ load [rotated] I[i]
+
+	veor		$Xh,$Xh,$Xhn
+	 cclr		$inc,eq
+	veor		$Xm,$Xm,$Xmn
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$t1},[$inp],$inc	@ load [rotated] I[i+1]
+#ifndef __ARMEB__
+	 vrev64.8	$t0,$t0
+#endif
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+
+#ifndef __ARMEB__
+	 vrev64.8	$t1,$t1
+#endif
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vext.8		$In,$t1,$t1,#8
+	 vext.8		$IN,$t0,$t0,#8
+	veor		$Xl,$Xm,$t2
+	 vpmull.p64	$Xln,$H,$In		@ H<>Ii+1
+	veor		$IN,$IN,$Xh		@ accumulate $IN early
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$IN,$IN,$t2
+	 veor		$t1,$t1,$In		@ Karatsuba pre-processing
+	veor		$IN,$IN,$Xl
+	 vpmull2.p64	$Xhn,$H,$In
+	b.hs		.Loop_mod2x_v8
+
+	veor		$Xh,$Xh,$t2
+	vext.8		$IN,$t0,$t0,#8		@ re-construct $IN
+	adds		$len,$len,#32
+	veor		$Xl,$Xl,$Xh		@ re-construct $Xl
+	b.eq		.Ldone_v8
+___
+}
+$code.=<<___;
+.Lodd_tail_v8:
+	vext.8		$t2,$Xl,$Xl,#8
+	veor		$IN,$IN,$Xl		@ inp^=Xi
+	veor		$t1,$t0,$t2		@ $t1 is rotated inp^Xi
+
+	vpmull.p64	$Xl,$H,$IN		@ H.lo<6C>Xi.lo
+	veor		$t1,$t1,$IN		@ Karatsuba pre-processing
+	vpmull2.p64	$Xh,$H,$IN		@ H.hi<68>Xi.hi
+	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)<29>(Xi.lo+Xi.hi)
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+
+.Ldone_v8:
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+#endif
+	vext.8		$Xl,$Xl,$Xl,#8
+	vst1.64		{$Xl},[$Xi]		@ write out Xi
+
+___
+$code.=<<___		if ($flavour !~ /64/);
+	vldmia		sp!,{d8-d15}
+___
+$code.=<<___;
+	ret
+.size	gcm_ghash_v8,.-gcm_ghash_v8
+___
+}
+$code.=<<___;
+.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
+___
+
+if ($flavour =~ /64/) {			######## 64-bit code
+    sub unvmov {
+	my $arg=shift;
+
+	$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
+	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+    }
+    foreach(split("\n",$code)) {
+	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or
+	s/vmov\.i8/movi/o		or	# fix up legacy mnemonics
+	s/vmov\s+(.*)/unvmov($1)/geo	or
+	s/vext\.8/ext/o			or
+	s/vshr\.s/sshr\.s/o		or
+	s/vshr/ushr/o			or
+	s/^(\s+)v/$1/o			or	# strip off v prefix
+	s/\bbx\s+lr\b/ret/o;
+
+	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers
+	s/@\s/\/\//o;				# old->new style commentary
+
+	# fix up remainig legacy suffixes
+	s/\.[ui]?8(\s)/$1/o;
+	s/\.[uis]?32//o and s/\.16b/\.4s/go;
+	m/\.p64/o and s/\.16b/\.1q/o;		# 1st pmull argument
+	m/l\.p64/o and s/\.16b/\.1d/go;		# 2nd and 3rd pmull arguments
+	s/\.[uisp]?64//o and s/\.16b/\.2d/go;
+	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+	print $_,"\n";
+    }
+} else {				######## 32-bit code
+    sub unvdup32 {
+	my $arg=shift;
+
+	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
+    }
+    sub unvpmullp64 {
+	my ($mnemonic,$arg)=@_;
+
+	if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) {
+	    my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19)
+				 |(($2&7)<<17)|(($2&8)<<4)
+				 |(($3&7)<<1) |(($3&8)<<2);
+	    $word |= 0x00010001	 if ($mnemonic =~ "2");
+	    # since ARMv7 instructions are always encoded little-endian.
+	    # correct solution is to use .inst directive, but older
+	    # assemblers don't implement it:-(
+	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+			$word&0xff,($word>>8)&0xff,
+			($word>>16)&0xff,($word>>24)&0xff,
+			$mnemonic,$arg;
+	}
+    }
+
+    foreach(split("\n",$code)) {
+	s/\b[wx]([0-9]+)\b/r$1/go;		# new->old registers
+	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
+	s/\/\/\s?/@ /o;				# new->old style commentary
+
+	# fix up remainig new-style suffixes
+	s/\],#[0-9]+/]!/o;
+
+	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o			or
+	s/vdup\.32\s+(.*)/unvdup32($1)/geo				or
+	s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo		or
+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
+	s/^(\s+)b\./$1b/o						or
+	s/^(\s+)ret/$1bx\tlr/o;
+
+	print $_,"\n";
+    }
+}
+
+close STDOUT; # enforce flush
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -645,7 +645,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])

 #endif

-#if	TABLE_BITS==4 && defined(GHASH_ASM)
+#if	TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
 # if	!defined(I386_ONLY) && \
 	(defined(__i386)	|| defined(__i386__)	|| \
 	 defined(__x86_64)	|| defined(__x86_64__)	|| \
@@ -666,13 +666,22 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
-# elif defined(__arm__) || defined(__arm)
+# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #  include "arm_arch.h"
 #  if __ARM_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
+#   if defined(__aarch64__)
+#    define PMULL_CAPABLE	(OPENSSL_armcap_P & ARMV8_PMULL)
+#   endif
+#   if defined(__arm__) || defined(__arm)
+#    define NEON_CAPABLE	(OPENSSL_armcap_P & ARMV7_NEON)
+#   endif
 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
 # elif defined(_TMS320C6400_PLUS)
 #   define GHASH_ASM_C64Xplus
@@ -740,10 +749,20 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
 	ctx->ghash = gcm_ghash_4bit;
 #  endif
 # elif	defined(GHASH_ASM_ARM)
-	if (OPENSSL_armcap_P & ARMV7_NEON) {
+#  ifdef PMULL_CAPABLE
+	if (PMULL_CAPABLE) {
+		gcm_init_v8(ctx->Htable,ctx->H.u);
+		ctx->gmult = gcm_gmult_v8;
+		ctx->ghash = gcm_ghash_v8;
+	} else
+#  endif
+#  ifdef NEON_CAPABLE
+	if (NEON_CAPABLE) {
 		ctx->gmult = gcm_gmult_neon;
 		ctx->ghash = gcm_ghash_neon;
-	} else {
+	} else
+#  endif
+	{
 		gcm_init_4bit(ctx->Htable,ctx->H.u);
 		ctx->gmult = gcm_gmult_4bit;
 		ctx->ghash = gcm_ghash_4bit;
--- a/crypto/modes/modes_lcl.h
+++ b/crypto/modes/modes_lcl.h
@@ -26,13 +26,16 @@ typedef unsigned int u32;
 typedef unsigned char u8;

 #define STRICT_ALIGNMENT 1
-#if defined(__i386)	|| defined(__i386__)	|| \
-    defined(__x86_64)	|| defined(__x86_64__)	|| \
-    defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64) || \
-    defined(__s390__)	|| defined(__s390x__)	|| \
-    ( (defined(__arm__)	|| defined(__arm)) && \
-      (defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__) || \
-       defined(__ARM_ARCH_7R__)	|| defined(__ARM_ARCH_7M__)) )
+#if defined(__i386)     || defined(__i386__)	|| \
+    defined(__x86_64)   || defined(__x86_64__)	|| \
+    defined(_M_IX86)    || defined(_M_AMD64)	|| defined(_M_X64) || \
+    defined(__s390__)   || defined(__s390x__)	|| \
+    ( \
+    ( (defined(__arm__) || defined(__arm)) && \
+      (defined(__ARM_ARCH_7__)  || defined(__ARM_ARCH_7A__) || \
+       defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)) ) && \
+    !( defined(__arm__) && defined(__APPLE__) ) \
+    )
 # undef STRICT_ALIGNMENT
 #endif

--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -0,0 +1,165 @@
+#!/usr/bin/env perl
+
+# ARM assembler distiller by <appro>.
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+$flavour = "linux32" if (!$flavour or $flavour eq "void");
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $arch = sub {
+    if ($flavour =~ /linux/)	{ ".arch\t".join(',',@_); }
+    else			{ ""; }
+};
+my $fpu = sub {
+    if ($flavour =~ /linux/)	{ ".fpu\t".join(',',@_); }
+    else			{ ""; }
+};
+my $hidden = sub {
+    if ($flavour =~ /ios/)	{ ".private_extern\t".join(',',@_); }
+    else			{ ".hidden\t".join(',',@_); }
+};
+my $comm = sub {
+    my @args = split(/,\s*/,shift);
+    my $name = @args[0];
+    my $global = \$GLOBALS{$name};
+    my $ret;
+
+    if ($flavour =~ /ios32/)	{
+	$ret = ".comm\t_$name,@args[1]\n";
+	$ret .= ".non_lazy_symbol_pointer\n";
+	$ret .= "$name:\n";
+	$ret .= ".indirect_symbol\t_$name\n";
+	$ret .= ".long\t0";
+	$name = "_$name";
+    } else			{ $ret = ".comm\t".join(',',@args); }
+
+    $$global = $name;
+    $ret;
+};
+my $globl = sub {
+    my $name = shift;
+    my $global = \$GLOBALS{$name};
+    my $ret;
+
+    SWITCH: for ($flavour) {
+	/ios/		&& do { $name = "_$name";
+				last;
+			      };
+    }
+
+    $ret = ".globl	$name" if (!$ret);
+    $$global = $name;
+    $ret;
+};
+my $global = $globl;
+my $extern = sub {
+    &$globl(@_);
+    return;	# return nothing
+};
+my $type = sub {
+    if ($flavour =~ /linux/)	{ ".type\t".join(',',@_); }
+    else			{ ""; }
+};
+my $size = sub {
+    if ($flavour =~ /linux/)	{ ".size\t".join(',',@_); }
+    else			{ ""; }
+};
+my $inst = sub {
+    if ($flavour =~ /linux/)    { ".inst\t".join(',',@_); }
+    else                        { ".long\t".join(',',@_); }
+};
+my $asciz = sub {
+    my $line = join(",",@_);
+    if ($line =~ /^"(.*)"$/)
+    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
+    else
+    {	"";	}
+};
+
+sub range {
+  my ($r,$sfx,$start,$end) = @_;
+
+    join(",",map("$r$_$sfx",($start..$end)));
+}
+
+sub expand_line {
+  my $line = shift;
+  my @ret = ();
+
+    pos($line)=0;
+
+    while ($line =~ m/\G[^@\/\{\"]*/g) {
+	if ($line =~ m/\G(@|\/\/|$)/gc) {
+	    last;
+	}
+	elsif ($line =~ m/\G\{/gc) {
+	    my $saved_pos = pos($line);
+	    $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e;
+	    pos($line) = $saved_pos;
+	    $line =~ m/\G[^\}]*\}/g;
+	}
+	elsif ($line =~ m/\G\"/gc) {
+	    $line =~ m/\G[^\"]*\"/g;
+	}
+    }
+
+    $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge;
+
+    return $line;
+}
+
+while($line=<>) {
+
+    if ($line =~ m/^\s*(#|@|\/\/)/)	{ print $line; next; }
+
+    $line =~ s|/\*.*\*/||;	# get rid of C-style comments...
+    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
+    $line =~ s|\s+$||;		# ... and at the end
+
+    {
+	$line =~ s|[\b\.]L(\w{2,})|L$1|g;	# common denominator for Locallabel
+	$line =~ s|\bL(\w{2,})|\.L$1|g	if ($dotinlocallabels);
+    }
+
+    {
+	$line =~ s|(^[\.\w]+)\:\s*||;
+	my $label = $1;
+	if ($label) {
+	    printf "%s:",($GLOBALS{$label} or $label);
+	}
+    }
+
+    if ($line !~ m/^[#@]/) {
+	$line =~ s|^\s*(\.?)(\S+)\s*||;
+	my $c = $1; $c = "\t" if ($c eq "");
+	my $mnemonic = $2;
+	my $opcode;
+	if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) {
+	    $opcode = eval("\$$1_$2");
+	} else {
+	    $opcode = eval("\$$mnemonic");
+	}
+
+	my $arg=expand_line($line);
+
+	if (ref($opcode) eq 'CODE') {
+		$line = &$opcode($arg);
+	} elsif ($mnemonic)         {
+		$line = $c.$mnemonic;
+		$line.= "\t$arg" if ($arg);
+	}
+    }
+
+    print $line if ($line);
+    print "\n";
+}
+
+close STDOUT;
--- a/crypto/sha/Makefile
+++ b/crypto/sha/Makefile
@@ -90,6 +90,9 @@ sha512-%.S:	asm/sha512-%.pl;	$(PERL) $< $(PERLASM_SCHEME) $@
 sha1-armv4-large.o:	sha1-armv4-large.S
 sha256-armv4.o:		sha256-armv4.S
 sha512-armv4.o:		sha512-armv4.S
+sha1-armv8.o:		sha1-armv8.S
+sha256-armv8.o:		sha256-armv8.S
+sha512-armv8.o:		sha512-armv8.S

 files:
 	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -52,8 +52,20 @@
 # Profiler-assisted and platform-specific optimization resulted in 10%
 # improvement on Cortex A8 core and 12.2 cycles per byte.

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $ctx="r0";
 $inp="r1";
--- a/crypto/sha/asm/sha1-armv8.pl
+++ b/crypto/sha/asm/sha1-armv8.pl
@@ -0,0 +1,343 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA1 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+#		hardware-assisted	software(*)
+# Apple A7	2.31			4.13 (+14%)
+# Cortex-A53	2.19			8.73 (+108%)
+# Cortex-A57	2.35			7.88 (+74%)
+#
+# (*)	Software results are presented mostly for reference purposes.
+
+$flavour = shift;
+$output  = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+($ctx,$inp,$num)=("x0","x1","x2");
+@Xw=map("w$_",(3..17,19));
+@Xx=map("x$_",(3..17,19));
+@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
+($t0,$t1,$t2,$K)=map("w$_",(25..28));
+
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i<15 && !($i&1));
+	lsr	@Xx[$i+1],@Xx[$i],#32
+___
+$code.=<<___ if ($i<14 && !($i&1));
+	ldr	@Xx[$i+2],[$inp,#`($i+2)*4-64`]
+___
+$code.=<<___ if ($i<14 && ($i&1));
+#ifdef	__ARMEB__
+	ror	@Xx[$i+1],@Xx[$i+1],#32
+#else
+	rev32	@Xx[$i+1],@Xx[$i+1]
+#endif
+___
+$code.=<<___ if ($i<14);
+	bic	$t0,$d,$b
+	and	$t1,$c,$b
+	ror	$t2,$a,#27
+	add	$d,$d,$K		// future e+=K
+	orr	$t0,$t0,$t1
+	add	$e,$e,$t2		// e+=rot(a,5)
+	ror	$b,$b,#2
+	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+___
+$code.=<<___ if ($i==19);
+	movz	$K,#0xeba1
+	movk	$K,#0x6ed9,lsl#16
+___
+$code.=<<___ if ($i>=14);
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+	bic	$t0,$d,$b
+	and	$t1,$c,$b
+	ror	$t2,$a,#27
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+	add	$d,$d,$K		// future e+=K
+	orr	$t0,$t0,$t1
+	add	$e,$e,$t2		// e+=rot(a,5)
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+	ror	$b,$b,#2
+	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+	 ror	@Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==59);
+	movz	$K,#0xc1d6
+	movk	$K,#0xca62,lsl#16
+___
+$code.=<<___;
+	orr	$t0,$b,$c
+	and	$t1,$b,$c
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+	ror	$t2,$a,#27
+	and	$t0,$t0,$d
+	add	$d,$d,$K		// future e+=K
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+	add	$e,$e,$t2		// e+=rot(a,5)
+	orr	$t0,$t0,$t1
+	ror	$b,$b,#2
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+	 ror	@Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==39);
+	movz	$K,#0xbcdc
+	movk	$K,#0x8f1b,lsl#16
+___
+$code.=<<___ if ($i<78);
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+	eor	$t0,$d,$b
+	ror	$t2,$a,#27
+	add	$d,$d,$K		// future e+=K
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+	eor	$t0,$t0,$c
+	add	$e,$e,$t2		// e+=rot(a,5)
+	ror	$b,$b,#2
+	 eor	@Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+	 ror	@Xw[$j],@Xw[$j],#31
+___
+$code.=<<___ if ($i==78);
+	ldp	@Xw[1],@Xw[2],[$ctx]
+	eor	$t0,$d,$b
+	ror	$t2,$a,#27
+	add	$d,$d,$K		// future e+=K
+	eor	$t0,$t0,$c
+	add	$e,$e,$t2		// e+=rot(a,5)
+	ror	$b,$b,#2
+	add	$d,$d,@Xw[($i+1)&15]	// future e+=X[i]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+___
+$code.=<<___ if ($i==79);
+	ldp	@Xw[3],@Xw[4],[$ctx,#8]
+	eor	$t0,$d,$b
+	ror	$t2,$a,#27
+	eor	$t0,$t0,$c
+	add	$e,$e,$t2		// e+=rot(a,5)
+	ror	$b,$b,#2
+	ldr	@Xw[5],[$ctx,#16]
+	add	$e,$e,$t0		// e+=F(b,c,d)
+___
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.extern	OPENSSL_armcap_P
+.globl	sha1_block_data_order
+.type	sha1_block_data_order,%function
+.align	6
+sha1_block_data_order:
+	ldr	x16,.LOPENSSL_armcap_P
+	adr	x17,.LOPENSSL_armcap_P
+	add	x16,x16,x17
+	ldr	w16,[x16]
+	tst	w16,#ARMV8_SHA1
+	b.ne	.Lv8_entry
+
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+
+	ldp	$A,$B,[$ctx]
+	ldp	$C,$D,[$ctx,#8]
+	ldr	$E,[$ctx,#16]
+
+.Loop:
+	ldr	@Xx[0],[$inp],#64
+	movz	$K,#0x7999
+	sub	$num,$num,#1
+	movk	$K,#0x5a82,lsl#16
+#ifdef	__ARMEB__
+	ror	$Xx[0],@Xx[0],#32
+#else
+	rev32	@Xx[0],@Xx[0]
+#endif
+	add	$E,$E,$K		// warm it up
+	add	$E,$E,@Xw[0]
+___
+for($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+for(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+for(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+for(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	add	$B,$B,@Xw[2]
+	add	$C,$C,@Xw[3]
+	add	$A,$A,@Xw[1]
+	add	$D,$D,@Xw[4]
+	add	$E,$E,@Xw[5]
+	stp	$A,$B,[$ctx]
+	stp	$C,$D,[$ctx,#8]
+	str	$E,[$ctx,#16]
+	cbnz	$num,.Loop
+
+	ldp	x19,x20,[sp,#16]
+	ldp	x21,x22,[sp,#32]
+	ldp	x23,x24,[sp,#48]
+	ldp	x25,x26,[sp,#64]
+	ldp	x27,x28,[sp,#80]
+	ldr	x29,[sp],#96
+	ret
+.size	sha1_block_data_order,.-sha1_block_data_order
+___
+{{{
+my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
+my @MSG=map("v$_.16b",(4..7));
+my @Kxx=map("v$_.4s",(16..19));
+my ($W0,$W1)=("v20.4s","v21.4s");
+my $ABCD_SAVE="v22.16b";
+
+$code.=<<___;
+.type	sha1_block_armv8,%function
+.align	6
+sha1_block_armv8:
+.Lv8_entry:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	adr	x4,.Lconst
+	eor	$E,$E,$E
+	ld1.32	{$ABCD},[$ctx],#16
+	ld1.32	{$E}[0],[$ctx]
+	sub	$ctx,$ctx,#16
+	ld1.32	{@Kxx[0]-@Kxx[3]},[x4]
+
+.Loop_hw:
+	ld1	{@MSG[0]-@MSG[3]},[$inp],#64
+	sub	$num,$num,#1
+	rev32	@MSG[0],@MSG[0]
+	rev32	@MSG[1],@MSG[1]
+
+	add.i32	$W0,@Kxx[0],@MSG[0]
+	rev32	@MSG[2],@MSG[2]
+	orr	$ABCD_SAVE,$ABCD,$ABCD	// offload
+
+	add.i32	$W1,@Kxx[0],@MSG[1]
+	rev32	@MSG[3],@MSG[3]
+	sha1h	$E1,$ABCD
+	sha1c	$ABCD,$E,$W0		// 0
+	add.i32	$W0,@Kxx[$j],@MSG[2]
+	sha1su0	@MSG[0],@MSG[1],@MSG[2]
+___
+for ($j=0,$i=1;$i<20-3;$i++) {
+my $f=("c","p","m","p")[$i/5];
+$code.=<<___;
+	sha1h	$E0,$ABCD		// $i
+	sha1$f	$ABCD,$E1,$W1
+	add.i32	$W1,@Kxx[$j],@MSG[3]
+	sha1su1	@MSG[0],@MSG[3]
+___
+$code.=<<___ if ($i<20-4);
+	sha1su0	@MSG[1],@MSG[2],@MSG[3]
+___
+	($E0,$E1)=($E1,$E0);		($W0,$W1)=($W1,$W0);
+	push(@MSG,shift(@MSG));		$j++ if ((($i+3)%5)==0);
+}
+$code.=<<___;
+	sha1h	$E0,$ABCD		// $i
+	sha1p	$ABCD,$E1,$W1
+	add.i32	$W1,@Kxx[$j],@MSG[3]
+
+	sha1h	$E1,$ABCD		// 18
+	sha1p	$ABCD,$E0,$W0
+
+	sha1h	$E0,$ABCD		// 19
+	sha1p	$ABCD,$E1,$W1
+
+	add.i32	$E,$E,$E0
+	add.i32	$ABCD,$ABCD,$ABCD_SAVE
+
+	cbnz	$num,.Loop_hw
+
+	st1.32	{$ABCD},[$ctx],#16
+	st1.32	{$E}[0],[$ctx]
+
+	ldr	x29,[sp],#16
+	ret
+.size	sha1_block_armv8,.-sha1_block_armv8
+.align	6
+.Lconst:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79
+.LOPENSSL_armcap_P:
+.quad	OPENSSL_armcap_P-.
+.asciz	"SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+.comm	OPENSSL_armcap_P,4,4
+___
+}}}
+
+{   my	%opcode = (
+	"sha1c"		=> 0x5e000000,	"sha1p"		=> 0x5e001000,
+	"sha1m"		=> 0x5e002000,	"sha1su0"	=> 0x5e003000,
+	"sha1h"		=> 0x5e280800,	"sha1su1"	=> 0x5e281800	);
+
+    sub unsha1 {
+	my ($mnemonic,$arg)=@_;
+
+	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+	&&
+	sprintf ".inst\t0x%08x\t//%s %s",
+			$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+			$mnemonic,$arg;
+    }
+}
+
+foreach(split("\n",$code)) {
+
+	s/\`([^\`]*)\`/eval($1)/geo;
+
+	s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
+
+	s/\.\w?32\b//o		and s/\.16b/\.4s/go;
+	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go;
+
+	print $_,"\n";
+}
+
+close STDOUT;
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -23,8 +23,20 @@
 # Profiler-assisted and platform-specific optimization resulted in 16%
 # improvement on Cortex A8 core and ~17 cycles per processed byte.

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $ctx="r0";	$t0="r0";
 $inp="r1";	$t3="r1";
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -38,8 +38,20 @@ $hi="HI";
 $lo="LO";
 # ====================================================================

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+    open STDOUT,">$output";
+}

 $ctx="r0";	# parameter block
 $inp="r1";
@@ -221,17 +233,21 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size	K512,.-K512
 .LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-sha512_block_data_order
+.word	OPENSSL_armcap_P-.Lsha512_block_data_order
 .skip	32-4

 .global	sha512_block_data_order
 .type	sha512_block_data_order,%function
 sha512_block_data_order:
+.Lsha512_block_data_order:
 	sub	r3,pc,#8		@ sha512_block_data_order
 	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
 #if __ARM_ARCH__>=7
 	ldr	r12,.LOPENSSL_armcap
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+#ifdef	__APPLE__
+	ldr	r12,[r12]
+#endif
 	tst	r12,#1
 	bne	.LNEON
 #endif
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -0,0 +1,428 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA256/512 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+#		SHA256-hw	SHA256(*)	SHA512
+# Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
+# Cortex-A53	2.38		15.6 (+110%)	10.1 (+190%(***))
+# Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
+# 
+# (*)	Software SHA256 results are of lesser relevance, presented
+#	mostly for informational purposes.
+# (**)	The result is a trade-off: it's possible to improve it by
+#	10% (or by 1 cycle per round), but at the cost of 20% loss
+#	on Cortex-A53 (or by 4 cycles per round).
+# (***)	Super-impressive coefficients over gcc-generated code are
+#	indication of some compiler "pathology", most notably code
+#	generated with -mgeneral-regs-only is significanty faster
+#	and lags behind assembly only by 50-90%.
+
+$flavour=shift;
+$output=shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+if ($output =~ /512/) {
+	$BITS=512;
+	$SZ=8;
+	@Sigma0=(28,34,39);
+	@Sigma1=(14,18,41);
+	@sigma0=(1,  8, 7);
+	@sigma1=(19,61, 6);
+	$rounds=80;
+	$reg_t="x";
+} else {
+	$BITS=256;
+	$SZ=4;
+	@Sigma0=( 2,13,22);
+	@Sigma1=( 6,11,25);
+	@sigma0=( 7,18, 3);
+	@sigma1=(17,19,10);
+	$rounds=64;
+	$reg_t="w";
+}
+
+$func="sha${BITS}_block_data_order";
+
+($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
+
+@X=map("$reg_t$_",(3..15,0..2));
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
+($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
+
+sub BODY_00_xx {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)&15;
+my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
+   $T0=@X[$i+3] if ($i<11);
+
+$code.=<<___	if ($i<16);
+#ifndef	__ARMEB__
+	rev	@X[$i],@X[$i]			// $i
+#endif
+___
+$code.=<<___	if ($i<13 && ($i&1));
+	ldp	@X[$i+1],@X[$i+2],[$inp],#2*$SZ
+___
+$code.=<<___	if ($i==13);
+	ldp	@X[14],@X[15],[$inp]
+___
+$code.=<<___	if ($i>=14);
+	ldr	@X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
+___
+$code.=<<___	if ($i>0 && $i<16);
+	add	$a,$a,$t1			// h+=Sigma0(a)
+___
+$code.=<<___	if ($i>=11);
+	str	@X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
+___
+# While ARMv8 specifies merged rotate-n-logical operation such as
+# 'eor x,y,z,ror#n', it was found to negatively affect performance
+# on Apple A7. The reason seems to be that it requires even 'y' to
+# be available earlier. This means that such merged instruction is
+# not necessarily best choice on critical path... On the other hand
+# Cortex-A5x handles merged instructions much better than disjoint
+# rotate and logical... See (**) footnote above.
+$code.=<<___	if ($i<15);
+	ror	$t0,$e,#$Sigma1[0]
+	add	$h,$h,$t2			// h+=K[i]
+	eor	$T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
+	and	$t1,$f,$e
+	bic	$t2,$g,$e
+	add	$h,$h,@X[$i&15]			// h+=X[i]
+	orr	$t1,$t1,$t2			// Ch(e,f,g)
+	eor	$t2,$a,$b			// a^b, b^c in next round
+	eor	$t0,$t0,$T0,ror#$Sigma1[1]	// Sigma1(e)
+	ror	$T0,$a,#$Sigma0[0]
+	add	$h,$h,$t1			// h+=Ch(e,f,g)
+	eor	$t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
+	add	$h,$h,$t0			// h+=Sigma1(e)
+	and	$t3,$t3,$t2			// (b^c)&=(a^b)
+	add	$d,$d,$h			// d+=h
+	eor	$t3,$t3,$b			// Maj(a,b,c)
+	eor	$t1,$T0,$t1,ror#$Sigma0[1]	// Sigma0(a)
+	add	$h,$h,$t3			// h+=Maj(a,b,c)
+	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round
+	//add	$h,$h,$t1			// h+=Sigma0(a)
+___
+$code.=<<___	if ($i>=15);
+	ror	$t0,$e,#$Sigma1[0]
+	add	$h,$h,$t2			// h+=K[i]
+	ror	$T1,@X[($j+1)&15],#$sigma0[0]
+	and	$t1,$f,$e
+	ror	$T2,@X[($j+14)&15],#$sigma1[0]
+	bic	$t2,$g,$e
+	ror	$T0,$a,#$Sigma0[0]
+	add	$h,$h,@X[$i&15]			// h+=X[i]
+	eor	$t0,$t0,$e,ror#$Sigma1[1]
+	eor	$T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
+	orr	$t1,$t1,$t2			// Ch(e,f,g)
+	eor	$t2,$a,$b			// a^b, b^c in next round
+	eor	$t0,$t0,$e,ror#$Sigma1[2]	// Sigma1(e)
+	eor	$T0,$T0,$a,ror#$Sigma0[1]
+	add	$h,$h,$t1			// h+=Ch(e,f,g)
+	and	$t3,$t3,$t2			// (b^c)&=(a^b)
+	eor	$T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
+	eor	$T1,$T1,@X[($j+1)&15],lsr#$sigma0[2]	// sigma0(X[i+1])
+	add	$h,$h,$t0			// h+=Sigma1(e)
+	eor	$t3,$t3,$b			// Maj(a,b,c)
+	eor	$t1,$T0,$a,ror#$Sigma0[2]	// Sigma0(a)
+	eor	$T2,$T2,@X[($j+14)&15],lsr#$sigma1[2]	// sigma1(X[i+14])
+	add	@X[$j],@X[$j],@X[($j+9)&15]
+	add	$d,$d,$h			// d+=h
+	add	$h,$h,$t3			// h+=Maj(a,b,c)
+	ldr	$t3,[$Ktbl],#$SZ		// *K++, $t2 in next round
+	add	@X[$j],@X[$j],$T1
+	add	$h,$h,$t1			// h+=Sigma0(a)
+	add	@X[$j],@X[$j],$T2
+___
+	($t2,$t3)=($t3,$t2);
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.extern	OPENSSL_armcap_P
+.globl	$func
+.type	$func,%function
+.align	6
+$func:
+___
+$code.=<<___	if ($SZ==4);
+	ldr	x16,.LOPENSSL_armcap_P
+	adr	x17,.LOPENSSL_armcap_P
+	add	x16,x16,x17
+	ldr	w16,[x16]
+	tst	w16,#ARMV8_SHA256
+	b.ne	.Lv8_entry
+___
+$code.=<<___;
+	stp	x29,x30,[sp,#-128]!
+	add	x29,sp,#0
+
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	sub	sp,sp,#4*$SZ
+
+	ldp	$A,$B,[$ctx]				// load context
+	ldp	$C,$D,[$ctx,#2*$SZ]
+	ldp	$E,$F,[$ctx,#4*$SZ]
+	add	$num,$inp,$num,lsl#`log(16*$SZ)/log(2)`	// end of input
+	ldp	$G,$H,[$ctx,#6*$SZ]
+	adr	$Ktbl,.LK$BITS
+	stp	$ctx,$num,[x29,#96]
+
+.Loop:
+	ldp	@X[0],@X[1],[$inp],#2*$SZ
+	ldr	$t2,[$Ktbl],#$SZ			// *K++
+	eor	$t3,$B,$C				// magic seed
+	str	$inp,[x29,#112]
+___
+for ($i=0;$i<16;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=".Loop_16_xx:\n";
+for (;$i<32;$i++)	{ &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	cbnz	$t2,.Loop_16_xx
+
+	ldp	$ctx,$num,[x29,#96]
+	ldr	$inp,[x29,#112]
+	sub	$Ktbl,$Ktbl,#`$SZ*($rounds+1)`		// rewind
+
+	ldp	@X[0],@X[1],[$ctx]
+	ldp	@X[2],@X[3],[$ctx,#2*$SZ]
+	add	$inp,$inp,#14*$SZ			// advance input pointer
+	ldp	@X[4],@X[5],[$ctx,#4*$SZ]
+	add	$A,$A,@X[0]
+	ldp	@X[6],@X[7],[$ctx,#6*$SZ]
+	add	$B,$B,@X[1]
+	add	$C,$C,@X[2]
+	add	$D,$D,@X[3]
+	stp	$A,$B,[$ctx]
+	add	$E,$E,@X[4]
+	add	$F,$F,@X[5]
+	stp	$C,$D,[$ctx,#2*$SZ]
+	add	$G,$G,@X[6]
+	add	$H,$H,@X[7]
+	cmp	$inp,$num
+	stp	$E,$F,[$ctx,#4*$SZ]
+	stp	$G,$H,[$ctx,#6*$SZ]
+	b.ne	.Loop
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#4*$SZ
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#128
+	ret
+.size	$func,.-$func
+
+.align	6
+.type	.LK$BITS,%object
+.LK$BITS:
+___
+$code.=<<___ if ($SZ==8);
+	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	.quad	0x3956c25bf348b538,0x59f111f1b605d019
+	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	.quad	0xd807aa98a3030242,0x12835b0145706fbe
+	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	.quad	0x06ca6351e003826f,0x142929670a0e6e70
+	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+	.quad	0x81c2c92e47edaee6,0x92722c851482353b
+	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+	.quad	0xd192e819d6ef5218,0xd69906245565a910
+	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	.quad	0x90befffa23631e28,0xa4506cebde82bde9
+	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	.quad	0xca273eceea26619c,0xd186b8c721c0c207
+	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	.quad	0x113f9804bef90dae,0x1b710b35131c471b
+	.quad	0x28db77f523047d84,0x32caab7b40c72493
+	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+	.quad	0	// terminator
+___
+$code.=<<___ if ($SZ==4);
+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+	.long	0	//terminator
+___
+$code.=<<___;
+.size	.LK$BITS,.-.LK$BITS
+.align	3
+.LOPENSSL_armcap_P:
+	.quad	OPENSSL_armcap_P-.
+.asciz	"SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+___
+
+if ($SZ==4) {
+my $Ktbl="x3";
+
+my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
+my @MSG=map("v$_.16b",(4..7));
+my ($W0,$W1)=("v16.4s","v17.4s");
+my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
+
+$code.=<<___;
+.type	sha256_block_armv8,%function
+.align	6
+sha256_block_armv8:
+.Lv8_entry:
+	stp		x29,x30,[sp,#-16]!
+	add		x29,sp,#0
+
+	ld1.32		{$ABCD,$EFGH},[$ctx]
+	adr		$Ktbl,.LK256
+
+.Loop_hw:
+	ld1		{@MSG[0]-@MSG[3]},[$inp],#64
+	sub		$num,$num,#1
+	ld1.32		{$W0},[$Ktbl],#16
+	rev32		@MSG[0],@MSG[0]
+	rev32		@MSG[1],@MSG[1]
+	rev32		@MSG[2],@MSG[2]
+	rev32		@MSG[3],@MSG[3]
+	orr		$ABCD_SAVE,$ABCD,$ABCD		// offload
+	orr		$EFGH_SAVE,$EFGH,$EFGH
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+	ld1.32		{$W1},[$Ktbl],#16
+	add.i32		$W0,$W0,@MSG[0]
+	sha256su0	@MSG[0],@MSG[1]
+	orr		$abcd,$ABCD,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+	sha256su1	@MSG[0],@MSG[2],@MSG[3]
+___
+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+	ld1.32		{$W1},[$Ktbl],#16
+	add.i32		$W0,$W0,@MSG[0]
+	orr		$abcd,$ABCD,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	ld1.32		{$W0},[$Ktbl],#16
+	add.i32		$W1,$W1,@MSG[1]
+	orr		$abcd,$ABCD,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	ld1.32		{$W1},[$Ktbl]
+	add.i32		$W0,$W0,@MSG[2]
+	sub		$Ktbl,$Ktbl,#$rounds*$SZ-16	// rewind
+	orr		$abcd,$ABCD,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	add.i32		$W1,$W1,@MSG[3]
+	orr		$abcd,$ABCD,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	add.i32		$ABCD,$ABCD,$ABCD_SAVE
+	add.i32		$EFGH,$EFGH,$EFGH_SAVE
+
+	cbnz		$num,.Loop_hw
+
+	st1.32		{$ABCD,$EFGH},[$ctx]
+
+	ldr		x29,[sp],#16
+	ret
+.size	sha256_block_armv8,.-sha256_block_armv8
+___
+}
+
+$code.=<<___;
+.comm	OPENSSL_armcap_P,4,4
+___
+
+{   my  %opcode = (
+	"sha256h"	=> 0x5e004000,	"sha256h2"	=> 0x5e005000,
+	"sha256su0"	=> 0x5e282800,	"sha256su1"	=> 0x5e006000	);
+
+    sub unsha256 {
+	my ($mnemonic,$arg)=@_;
+
+	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+	&&
+	sprintf ".inst\t0x%08x\t//%s %s",
+			$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+			$mnemonic,$arg;
+    }
+}
+
+foreach(split("\n",$code)) {
+
+	s/\`([^\`]*)\`/eval($1)/geo;
+
+	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
+
+	s/\.\w?32\b//o		and s/\.16b/\.4s/go;
+	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go;
+
+	print $_,"\n";
+}
+
+close STDOUT;
--- a/fips/fips.c
+++ b/fips/fips.c
@@ -151,7 +151,7 @@ extern const unsigned char FIPS_rodata_start[], FIPS_rodata_end[];
 #ifdef _TMS320C6X
 const
 #endif
-unsigned char              FIPS_signature [20] = { 0 };
+unsigned char              FIPS_signature [20] = { 0, 0xff };
 __fips_constseg
 static const char          FIPS_hmac_key[]="etaonrishdlcupfm";

--- a/fips/fips_canister.c
+++ b/fips/fips_canister.c
@@ -29,9 +29,11 @@ const void         *FIPS_text_end(void);

 #if !defined(FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE)
 # if	(defined(__ANDROID__) && (defined(__arm__) || defined(__arm)	|| \
+				  defined(__aarch64__)			|| \
 				  defined(__i386__)|| defined(__i386)))	|| \
 	(defined(__vxworks)   && (defined(__ppc__) || defined(__ppc)	|| \
-				  defined(__mips__)|| defined(__mips)))	|| \
+				  defined(__mips__)|| defined(__mips)	|| \
+				  defined(__i386__)|| defined(__i386)))	|| \
        (defined(__NetBSD__)  && (defined(__powerpc__) || defined(__i386))) || \
 	(defined(__linux)     && ((defined(__PPC__) && !defined(__PPC64__)) || \
 				  defined(__arm__) || defined(__arm)) || \
@@ -39,7 +41,8 @@ const void         *FIPS_text_end(void);
 	(defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \
 	(defined(_TMS320C6X))						|| \
 	(defined(__ECOS__))						|| \
-	(defined(_WIN32)      && defined(_MSC_VER))
+	(defined(_WIN32)      && defined(_MSC_VER))			|| \
+	(defined(__QNX__)     && defined(__ARM__))
 #  define FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE
 # endif
 #endif
--- a/fips/fips_test_suite.c
+++ b/fips/fips_test_suite.c
@@ -714,18 +714,6 @@ static int do_drbg_all(void)
 		{NID_aes_128_ctr, DRBG_FLAG_CTR_USE_DF},
 		{NID_aes_192_ctr, DRBG_FLAG_CTR_USE_DF},
 		{NID_aes_256_ctr, DRBG_FLAG_CTR_USE_DF},
-		{(NID_X9_62_prime256v1 << 16)|NID_sha1, 0},
-		{(NID_X9_62_prime256v1 << 16)|NID_sha224, 0},
-		{(NID_X9_62_prime256v1 << 16)|NID_sha256, 0},
-		{(NID_X9_62_prime256v1 << 16)|NID_sha384, 0},
-		{(NID_X9_62_prime256v1 << 16)|NID_sha512, 0},
-		{(NID_secp384r1 << 16)|NID_sha224, 0},
-		{(NID_secp384r1 << 16)|NID_sha256, 0},
-		{(NID_secp384r1 << 16)|NID_sha384, 0},
-		{(NID_secp384r1 << 16)|NID_sha512, 0},
-		{(NID_secp521r1 << 16)|NID_sha256, 0},
-		{(NID_secp521r1 << 16)|NID_sha384, 0},
-		{(NID_secp521r1 << 16)|NID_sha512, 0},
 		{0, 0}
 	};
    DRBG_LIST *lst;
@@ -1337,6 +1325,12 @@ int main(int argc, char **argv)

    FIPS_post_set_callback(post_cb);

+#if (defined(__arm__) || defined(__aarch64__))
+    extern unsigned int OPENSSL_armcap_P;
+    if (0 == OPENSSL_armcap_P)
+	fprintf(stderr, "Optimizations disabled\n");
+#endif
+
    printf("\tFIPS-mode test application\n");

    printf("\t%s\n\n", FIPS_module_version_text());
--- a/fips/fipsalgtest.pl
+++ b/fips/fipsalgtest.pl
@@ -7,21 +7,10 @@
 # FIPS test definitions
 # List of all the unqualified file names we expect and command lines to run

-# DSA tests
-my @fips_dsa_test_list = (
-
-    "DSA",
-
-    [ "PQGGen",  "fips_dssvs pqg", "path:[^C]DSA/.*PQGGen" ],
-    [ "KeyPair", "fips_dssvs keypair", "path:[^C]DSA/.*KeyPair" ],
-    [ "SigGen",  "fips_dssvs siggen", "path:[^C]DSA/.*SigGen" ],
-    [ "SigVer",  "fips_dssvs sigver", "path:[^C]DSA/.*SigVer" ]
-
-);

 my @fips_dsa_pqgver_test_list = (
-
-    [ "PQGVer",  "fips_dssvs pqgver", "path:[^C]DSA/.*PQGVer" ]
+    "DSA",
+    [ "PQGVer",  "fips_dssvs pqgver"]

 );

@@ -34,20 +23,13 @@ my @fips_dsa2_test_list = (
    [ "KeyPair", "fips_dssvs keypair", "path:[^C]DSA2/.*KeyPair" ],
    [ "SigGen",  "fips_dssvs siggen", "path:[^C]DSA2/.*SigGen" ],
    [ "SigVer",  "fips_dssvs sigver", "path:[^C]DSA2/.*SigVer" ],
-    [ "PQGVer",  "fips_dssvs pqgver", "path:[^C]DSA2/.*PQGVer" ]
+    [ "PQGVer",  "fips_dssvs pqgver", "file:L\\s*=.*N\\s*=" ]

 );

-# ECDSA and ECDSA2 tests
-my @fips_ecdsa_test_list = (
-
-    "ECDSA",
-
-    [ "KeyPair", "fips_ecdsavs KeyPair", "path:/ECDSA/.*KeyPair" ],
-    [ "PKV",  "fips_ecdsavs PKV", "path:/ECDSA/.*PKV" ],
-    [ "SigGen",  "fips_ecdsavs SigGen", "path:/ECDSA/.*SigGen" ],
-    [ "SigVer",  "fips_ecdsavs SigVer", "path:/ECDSA/.*SigVer" ],
+# ECDSA2 tests

+my @fips_ecdsa2_test_list = (
    "ECDSA2",

    [ "KeyPair", "fips_ecdsavs KeyPair", "path:/ECDSA2/.*KeyPair" ],
@@ -65,9 +47,9 @@ my @fips_rsa_test_list = (

    [ "SigGen15",  "fips_rsastest" ],
    [ "SigVer15",  "fips_rsavtest" ],
-    [ "SigVerRSA", "fips_rsavtest -x931" ],
+    [ "SigVer(X9.31)", "fips_rsavtest -x931", 'file:9\.31' ],
    [ "KeyGenRSA", "fips_rsagtest" ],
-    [ "SigGenRSA", "fips_rsastest -x931" ]
+    [ "SigGen(X9.31)", "fips_rsastest -x931" , 'file:9\.31']

 );

@@ -79,21 +61,22 @@ my @fips_rsa_test_list = (
 # RSA PSS salt length 0 tests

 my @fips_rsa_pss0_test_list = (
-
+    "RSA",
    [ "SigGenPSS(0)", "fips_rsastest -saltlen 0",
-					'file:^\s*#\s*salt\s+len:\s+0\s*$' ],
+					'file:salt\s+len:\s+0' ],
    [ "SigVerPSS(0)", "fips_rsavtest -saltlen 0",
-					'file:^\s*#\s*salt\s+len:\s+0\s*$' ],
+					'file:salt\s+len:\s+0' ],

 );

 # RSA PSS salt length 62 tests

 my @fips_rsa_pss62_test_list = (
+    "RSA",
    [ "SigGenPSS(62)", "fips_rsastest -saltlen 62",
-					'file:^\s*#\s*salt\s+len:\s+62\s*$' ],
+					'file:salt\s+len:\s+62' ],
    [ "SigVerPSS(62)", "fips_rsavtest -saltlen 62",
-					'file:^\s*#\s*salt\s+len:\s+62\s*$' ],
+					'file:salt\s+len:\s+62' ],
 );

 # SHA tests
@@ -354,10 +337,8 @@ my @fips_des3_test_list = (
    "Triple DES",

    [ "TCBCinvperm",   "fips_desmovs -f" ],
-    [ "TCBCMMT1",      "fips_desmovs -f" ],
    [ "TCBCMMT2",      "fips_desmovs -f" ],
    [ "TCBCMMT3",      "fips_desmovs -f" ],
-    [ "TCBCMonte1",    "fips_desmovs -f" ],
    [ "TCBCMonte2",    "fips_desmovs -f" ],
    [ "TCBCMonte3",    "fips_desmovs -f" ],
    [ "TCBCpermop",    "fips_desmovs -f" ],
@@ -365,10 +346,8 @@ my @fips_des3_test_list = (
    [ "TCBCvarkey",    "fips_desmovs -f" ],
    [ "TCBCvartext",   "fips_desmovs -f" ],
    [ "TCFB64invperm", "fips_desmovs -f" ],
-    [ "TCFB64MMT1",    "fips_desmovs -f" ],
    [ "TCFB64MMT2",    "fips_desmovs -f" ],
    [ "TCFB64MMT3",    "fips_desmovs -f" ],
-    [ "TCFB64Monte1",  "fips_desmovs -f" ],
    [ "TCFB64Monte2",  "fips_desmovs -f" ],
    [ "TCFB64Monte3",  "fips_desmovs -f" ],
    [ "TCFB64permop",  "fips_desmovs -f" ],
@@ -376,10 +355,8 @@ my @fips_des3_test_list = (
    [ "TCFB64varkey",  "fips_desmovs -f" ],
    [ "TCFB64vartext", "fips_desmovs -f" ],
    [ "TCFB8invperm",  "fips_desmovs -f" ],
-    [ "TCFB8MMT1",     "fips_desmovs -f" ],
    [ "TCFB8MMT2",     "fips_desmovs -f" ],
    [ "TCFB8MMT3",     "fips_desmovs -f" ],
-    [ "TCFB8Monte1",   "fips_desmovs -f" ],
    [ "TCFB8Monte2",   "fips_desmovs -f" ],
    [ "TCFB8Monte3",   "fips_desmovs -f" ],
    [ "TCFB8permop",   "fips_desmovs -f" ],
@@ -387,10 +364,8 @@ my @fips_des3_test_list = (
    [ "TCFB8varkey",   "fips_desmovs -f" ],
    [ "TCFB8vartext",  "fips_desmovs -f" ],
    [ "TECBinvperm",   "fips_desmovs -f" ],
-    [ "TECBMMT1",      "fips_desmovs -f" ],
    [ "TECBMMT2",      "fips_desmovs -f" ],
    [ "TECBMMT3",      "fips_desmovs -f" ],
-    [ "TECBMonte1",    "fips_desmovs -f" ],
    [ "TECBMonte2",    "fips_desmovs -f" ],
    [ "TECBMonte3",    "fips_desmovs -f" ],
    [ "TECBpermop",    "fips_desmovs -f" ],
@@ -398,10 +373,8 @@ my @fips_des3_test_list = (
    [ "TECBvarkey",    "fips_desmovs -f" ],
    [ "TECBvartext",   "fips_desmovs -f" ],
    [ "TOFBinvperm",   "fips_desmovs -f" ],
-    [ "TOFBMMT1",      "fips_desmovs -f" ],
    [ "TOFBMMT2",      "fips_desmovs -f" ],
    [ "TOFBMMT3",      "fips_desmovs -f" ],
-    [ "TOFBMonte1",    "fips_desmovs -f" ],
    [ "TOFBMonte2",    "fips_desmovs -f" ],
    [ "TOFBMonte3",    "fips_desmovs -f" ],
    [ "TOFBpermop",    "fips_desmovs -f" ],
@@ -416,10 +389,8 @@ my @fips_des3_cfb1_test_list = (
    # DES3 CFB1 tests

    [ "TCFB1invperm",  "fips_desmovs -f" ],
-    [ "TCFB1MMT1",     "fips_desmovs -f" ],
    [ "TCFB1MMT2",     "fips_desmovs -f" ],
    [ "TCFB1MMT3",     "fips_desmovs -f" ],
-    [ "TCFB1Monte1",   "fips_desmovs -f" ],
    [ "TCFB1Monte2",   "fips_desmovs -f" ],
    [ "TCFB1Monte3",   "fips_desmovs -f" ],
    [ "TCFB1permop",   "fips_desmovs -f" ],
@@ -434,7 +405,6 @@ my @fips_drbg_test_list = (
    # SP800-90 DRBG tests
    "SP800-90 DRBG",
    [ "CTR_DRBG",   "fips_drbgvs" ],
-    [ "Dual_EC_DRBG",   "fips_drbgvs" ],
    [ "Hash_DRBG",  "fips_drbgvs" ],
    [ "HMAC_DRBG",  "fips_drbgvs" ]

@@ -473,8 +443,6 @@ my @fips_ecdh_test_list = (
 #

 my %verify_special = (
-    "DSA:PQGGen"        => "fips_dssvs pqgver",
-    "DSA:KeyPair"       => "fips_dssvs keyver",
    "DSA:SigGen"        => "fips_dssvs sigver",
    "DSA2:PQGGen"        => "fips_dssvs pqgver",
    "DSA2:KeyPair"       => "fips_dssvs keyver",
@@ -512,12 +480,14 @@ my $no_warn_bogus = 0;
 my $rmcmd = "rm -rf";
 my $mkcmd = "mkdir";
 my $cmpall = 0;
+my $info = 0;

 my %fips_enabled = (
    "dsa"        => 1,
    "dsa2"       => 2,
    "dsa-pqgver"  => 2,
    "ecdsa"      => 2,
+    "ecdsa2"     => 2,
    "rsa"        => 1,
    "rsa-pss0"  => 2,
    "rsa-pss62" => 1,
@@ -578,6 +548,12 @@ foreach (@ARGV) {
    elsif ( $_ eq "--notest" ) {
        $notest = 1;
    }
+    elsif ( $_ eq "--debug-detect" ) {
+        $notest = 1;
+        $ignore_missing = 1;
+        $ignore_bogus = 1;
+        $info = 1;
+    }
    elsif ( $_ eq "--quiet" ) {
        $quiet = 1;
    }
@@ -640,14 +616,14 @@ if (!$fips_enabled{"v2"}) {
 	}
 }

-push @fips_test_list, @fips_dsa_test_list       if $fips_enabled{"dsa"};
-push @fips_test_list, @fips_dsa_pqgver_test_list if $fips_enabled{"dsa-pqgver"};
 push @fips_test_list, @fips_dsa2_test_list      if $fips_enabled{"dsa2"};
-push @fips_test_list, @fips_ecdsa_test_list     if $fips_enabled{"ecdsa"};
+push @fips_test_list, @fips_dsa_pqgver_test_list if $fips_enabled{"dsa-pqgver"};
+push @fips_test_list, @fips_ecdsa2_test_list     if $fips_enabled{"ecdsa2"};
 push @fips_test_list, @fips_rsa_test_list       if $fips_enabled{"rsa"};
 push @fips_test_list, @fips_rsa_pss0_test_list  if $fips_enabled{"rsa-pss0"};
 push @fips_test_list, @fips_rsa_pss62_test_list if $fips_enabled{"rsa-pss62"};
 push @fips_test_list, @fips_sha_test_list       if $fips_enabled{"sha"};
+push @fips_test_list, @fips_drbg_test_list	if $fips_enabled{"drbg"};
 push @fips_test_list, @fips_hmac_test_list      if $fips_enabled{"hmac"};
 push @fips_test_list, @fips_cmac_test_list      if $fips_enabled{"cmac"};
 push @fips_test_list, @fips_rand_aes_test_list  if $fips_enabled{"rand-aes"};
@@ -656,7 +632,6 @@ push @fips_test_list, @fips_aes_test_list       if $fips_enabled{"aes"};
 push @fips_test_list, @fips_aes_cfb1_test_list  if $fips_enabled{"aes-cfb1"};
 push @fips_test_list, @fips_des3_test_list      if $fips_enabled{"des3"};
 push @fips_test_list, @fips_des3_cfb1_test_list if $fips_enabled{"des3-cfb1"};
-push @fips_test_list, @fips_drbg_test_list	if $fips_enabled{"drbg"};
 push @fips_test_list, @fips_aes_ccm_test_list	if $fips_enabled{"aes-ccm"};
 push @fips_test_list, @fips_aes_gcm_test_list	if $fips_enabled{"aes-gcm"};
 push @fips_test_list, @fips_aes_xts_test_list	if $fips_enabled{"aes-xts"};
@@ -711,9 +686,9 @@ sanity_check_files();
 my ( $runerr, $cmperr, $cmpok, $scheckrunerr, $scheckerr, $scheckok, $skipcnt )
  = ( 0, 0, 0, 0, 0, 0, 0 );

-exit(0) if $notest;
 print "Outputting commands to $outfile\n" if $outfile ne "";
 run_tests( $verify, $win32, $tprefix, $filter, $tvdir, $outfile );
+exit(0) if $notest;

 if ($verify) {
    print "ALGORITHM TEST VERIFY SUMMARY REPORT:\n";
@@ -779,7 +754,7 @@ EOF
 while (my ($key, $value) = each %fips_enabled)
 	{
 	printf "\t\t%-20s(%s by default)\n", $key ,
-			$value == 1 ? "enabled" : "disabled";
+			$value != 0 ? "enabled" : "disabled";
 	}
 }

@@ -818,13 +793,14 @@ sub sanity_check_exe {
 sub find_files {
    my ( $filter, $dir ) = @_;
    my ( $dirh, $testname, $tref );
+    my $ttype;
    opendir( $dirh, $dir );
    while ( $_ = readdir($dirh) ) {
        next if ( $_ eq "." || $_ eq ".." );
        $_ = "$dir/$_";
        if ( -f "$_" ) {
            if (/\/([^\/]*)\.rsp$/) {
-		$tref = find_test($1, $_);
+		$tref = find_test($1, $_, \$ttype);
                if ( defined $tref ) {
 		    $testname = $$tref[0];
                    if ( $$tref[4] eq "" ) {
@@ -832,7 +808,7 @@ sub find_files {
                    }
                    else {
                        print STDERR
-"WARNING: duplicate response file $_ for test $testname\n";
+"WARNING: duplicate response file $_ for $ttype test $testname\n";
                        $nbogus++;
                    }
                }
@@ -843,21 +819,28 @@ sub find_files {
            }
            next unless /$filter.*\.req$/i;
            if (/\/([^\/]*)\.req$/) {
-		$tref = find_test($1, $_);
+		$tref = find_test($1, $_, \$ttype);
                if ( defined $tref ) {
 		    $testname = $$tref[0];
-                    if ( $$tref[3] eq "" ) {
+                    my $tfname = $$tref[3];
+                    if ( $tfname eq "" ) {
                        $$tref[3] = $_;
                    }
                    else {
                        print STDERR
-"WARNING: duplicate request file $_ for test $testname\n";
+"WARNING: duplicate request file $_ for $ttype test $testname\n";
+			if ($info) {
+			    print_file_start($_, \*STDERR);
+			    print STDERR "Original filename $tfname\n";
+			    print_file_start($tfname, \*STDERR);
+			}
                        $nbogus++;
                    }

                }
                elsif ( !/SHAmix\.req$/ ) {
                    print STDERR "WARNING: unrecognized filename $_\n" unless $no_warn_bogus;
+		    print_file_start($_, \*STDERR) if $info;
                    $nbogus++;
                }
            }
@@ -874,13 +857,15 @@ sub find_files {
 #

 sub find_test {
-    my ( $test, $path ) = @_;
+    my ( $test, $path, $type ) = @_;
    foreach $tref (@fips_test_list) {
-        next unless ref($tref);
+	if (!ref($tref)) {
+		$$type = $tref;
+		next;
+	}
        my ( $tst, $cmd, $excmd, $req, $resp ) = @$tref;
 	my $regexp;
 	$tst =~ s/\(.*$//;
-	$test =~ s/_186-2//;
 	if (defined $excmd) {
 		if ($excmd =~ /^path:(.*)$/) {
 			my $fmatch = $1;
@@ -891,7 +876,7 @@ sub find_test {
 			$regexp = $1;
 		}
 	}
-	if ($test eq $tst) {
+	if ($test =~ /^$tst/) {
 		return $tref if (!defined $regexp);
 		my $found = 0;
 		my $line;
@@ -911,22 +896,26 @@ sub find_test {

 sub sanity_check_files {
    my $bad = 0;
+    my $ttype;
    foreach (@fips_test_list) {
-        next unless ref($_);
+	if (!ref($_)) {
+	    $ttype = $_;
+	    next;
+	}
        my ( $tst, $cmd, $regexp, $req, $resp ) = @$_;

        #print STDERR "FILES $tst, $cmd, $req, $resp\n";
        if ( $req eq "" ) {
-            print STDERR "WARNING: missing request file for $tst\n" unless $no_warn_missing;
+            print STDERR "WARNING: missing request file for $ttype test $tst\n" unless $no_warn_missing;
            $bad = 1;
            next;
        }
        if ( $verify && $resp eq "" ) {
-            print STDERR "WARNING: no response file for test $tst\n";
+            print STDERR "WARNING: no response file for $ttype test test $tst\n";
            $bad = 1;
        }
        elsif ( !$verify && $resp ne "" ) {
-            print STDERR "WARNING: response file $resp will be overwritten\n";
+            print STDERR "WARNING: response file $resp for $ttype test $tst will be overwritten\n";
        }
    }
    if ($bad) {
@@ -989,26 +978,37 @@ END
 	    if ($outfile ne "") {
 		print "Generating script for $_ tests\n";
 		print OUT "\n\n\necho \"Running $_ tests\"\n" unless $minimal_script;
-	    } else {	
+	    } elsif ($notest) {	
+            	print "Info for $_ tests:\n";
+	    } else {
            	print "Running $_ tests\n" unless $quiet;
 	    }
 	    $ttype = $_;
            next;
        }
        my ( $tname, $tcmd, $regexp, $req, $rsp ) = @$_;
+	if ($notest) {
+	    if ($req ne "") {
+	    	print "Test $ttype, $tname: $req\n";
+		print_file_start($req, \*STDOUT) if ($info);
+	    } else {
+		print "$tname: not found\n";
+	    }
+	    next;
+	}
        my $out = $rsp;
        if ($verify) {
            $out =~ s/\.rsp$/.tst/;
        }
        if ( $req eq "" ) {
            print STDERR
-              "WARNING: Request file for $tname missing: test skipped\n" unless $no_warn_missing;
+              "WARNING: Request file for $ttype test $tname missing: test skipped\n" unless $no_warn_missing;
            $skipcnt++;
            next;
        }
        if ( $verify && $rsp eq "" ) {
            print STDERR
-              "WARNING: Response file for $tname missing: test skipped\n";
+              "WARNING: Response file for $ttype test $tname missing: test skipped\n";
            $skipcnt++;
            next;
        }
@@ -1212,3 +1212,22 @@ sub next_line {
    }
    return undef;
 }
+
+sub print_file_start {
+    my ($fname, $fh) = @_;
+    print $fh "======\n";
+    open IN, $fname;
+    while (<IN>) {
+	my $line = $_;
+	s/#.*$//;
+	last unless (/^\s*$/);
+	print $fh $line;
+    }
+    my $lines = 0;
+    while (<IN>) {
+	print $fh $_;
+	last if $lines++ > 10;
+    }
+    close IN;
+    print $fh "======\n";
+}
--- a/fips/fipssyms.h
+++ b/fips/fipssyms.h
@@ -668,6 +668,50 @@
 #define bn_mul_mont_gather5 fips_bn_mul_mont_gather5
 #define bn_scatter5 fips_bn_scatter5
 #define bn_gather5 fips_bn_gather5
+#define _armv8_aes_probe _fips_armv8_aes_probe
+#define _armv8_pmull_probe _fips_armv8_pmull_probe
+#define _armv8_sha1_probe _fips_armv8_sha1_probe
+#define _armv8_sha256_probe _fips_armv8_sha256_probe
+#define aes_v8_encrypt fips_aes_v8_encrypt
+#define aes_v8_decrypt fips_aes_v8_decrypt
+#define aes_v8_set_encrypt_key fips_aes_v8_set_encrypt_key
+#define aes_v8_set_decrypt_key fips_aes_v8_set_decrypt_key
+#define aes_v8_cbc_encrypt fips_aes_v8_cbc_encrypt
+#define aes_v8_ctr32_encrypt_blocks fips_aes_v8_ctr32_encrypt_blocks
+#define gcm_init_v8 fips_gcm_init_v8
+#define gcm_gmult_v8 fips_gcm_gmult_v8
+#define gcm_ghash_v8 fips_gcm_ghash_v8
+#if defined(__APPLE__) && __ASSEMBLER__
+#define _OPENSSL_armcap_P _fips_openssl_armcap_P
+#define __armv7_neon_probe __fips_armv7_neon_probe
+#define __armv7_tick __fips_armv7_tick
+#define __armv8_aes_probe __fips_armv8_aes_probe
+#define __armv8_pmull_probe __fips_armv8_pmull_probe
+#define __armv8_sha1_probe __fips_armv8_sha1_probe
+#define __armv8_sha256_probe __fips_armv8_sha256_probe
+#define _aes_v8_encrypt _fips_aes_v8_encrypt
+#define _aes_v8_decrypt _fips_aes_v8_decrypt
+#define _aes_v8_set_encrypt_key _fips_aes_v8_set_encrypt_key
+#define _aes_v8_set_decrypt_key _fips_aes_v8_set_decrypt_key
+#define _aes_v8_cbc_encrypt _fips_aes_v8_cbc_encrypt
+#define _aes_v8_ctr32_encrypt_blocks _fips_aes_v8_ctr32_encrypt_blocks
+#define _gcm_init_v8 _fips_gcm_init_v8
+#define _gcm_gmult_v8 _fips_gcm_gmult_v8
+#define _gcm_ghash_v8 _fips_gcm_ghash_v8
+#define _sha1_block_data_order _fips_sha1_block_data_order
+#define _sha256_block_data_order _fips_sha256_block_data_order
+#define _sha512_block_data_order _fips_sha512_block_data_order
+#define _AES_decrypt _fips_aes_decrypt
+#define _AES_encrypt _fips_aes_encrypt
+#define _AES_set_decrypt_key _fips_aes_set_decrypt_key
+#define _AES_set_encrypt_key _fips_aes_set_encrypt_key
+#define _gcm_gmult_4bit _fips_gcm_gmult_4bit
+#define _gcm_ghash_4bit _fips_gcm_ghash_4bit
+#define _gcm_gmult_neon _fips_gcm_gmult_neon
+#define _gcm_ghash_neon _fips_gcm_ghash_neon
+#define _bn_GF2m_mul_2x2 _fips_bn_GF2m_mul_2x2
+#define _OPENSSL_cleanse _FIPS_openssl_cleanse
+#endif

 #if defined(_MSC_VER)
 # pragma const_seg("fipsro$b")
--- a/fips/rand/Makefile
+++ b/fips/rand/Makefile
@@ -23,10 +23,10 @@ APPS=

 LIB=$(TOP)/libcrypto.a
 LIBSRC=	fips_rand.c fips_rand_selftest.c fips_drbg_lib.c \
-	fips_drbg_hash.c fips_drbg_hmac.c fips_drbg_ctr.c fips_drbg_ec.c \
+	fips_drbg_hash.c fips_drbg_hmac.c fips_drbg_ctr.c \
 	fips_drbg_selftest.c fips_drbg_rand.c fips_rand_lib.c
 LIBOBJ=	fips_rand.o fips_rand_selftest.o fips_drbg_lib.o \
-	fips_drbg_hash.o fips_drbg_hmac.o fips_drbg_ctr.o fips_drbg_ec.o \
+	fips_drbg_hash.o fips_drbg_hmac.o fips_drbg_ctr.o \
 	fips_drbg_selftest.o fips_drbg_rand.o fips_rand_lib.o

 SRC= $(LIBSRC)
@@ -105,18 +105,6 @@ fips_drbg_ctr.o: ../../include/openssl/ossl_typ.h
 fips_drbg_ctr.o: ../../include/openssl/safestack.h
 fips_drbg_ctr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 fips_drbg_ctr.o: fips_drbg_ctr.c fips_rand_lcl.h
-fips_drbg_ec.o: ../../include/openssl/aes.h ../../include/openssl/asn1.h
-fips_drbg_ec.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
-fips_drbg_ec.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-fips_drbg_ec.o: ../../include/openssl/ec.h ../../include/openssl/evp.h
-fips_drbg_ec.o: ../../include/openssl/fips.h ../../include/openssl/fips_rand.h
-fips_drbg_ec.o: ../../include/openssl/hmac.h ../../include/openssl/obj_mac.h
-fips_drbg_ec.o: ../../include/openssl/objects.h
-fips_drbg_ec.o: ../../include/openssl/opensslconf.h
-fips_drbg_ec.o: ../../include/openssl/opensslv.h
-fips_drbg_ec.o: ../../include/openssl/ossl_typ.h
-fips_drbg_ec.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-fips_drbg_ec.o: ../../include/openssl/symhacks.h fips_drbg_ec.c fips_rand_lcl.h
 fips_drbg_hash.o: ../../include/openssl/aes.h ../../include/openssl/asn1.h
 fips_drbg_hash.o: ../../include/openssl/bio.h ../../include/openssl/crypto.h
 fips_drbg_hash.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
--- a/fips/rand/fips_drbg_ec.c
+++ b/fips/rand/fips_drbg_ec.c
@@ -1,542 +0,0 @@
-/* fips/rand/fips_drbg_ec.c */
-/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
- * project.
- */
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- */
-
-#define OPENSSL_FIPSAPI
-
-#include <stdlib.h>
-#include <string.h>
-#include <openssl/crypto.h>
-#include <openssl/fips.h>
-#include <openssl/fips_rand.h>
-#include <openssl/bn.h>
-#include "fips_rand_lcl.h"
-
-/*#define EC_DRBG_TRACE*/
-
-#ifdef EC_DRBG_TRACE
-static void hexprint(FILE *out, const unsigned char *buf, int buflen)
-	{
-	int i;
-	fprintf(out, "\t");
-	for (i = 0; i < buflen; i++)
-		fprintf(out, "%02X", buf[i]);
-	fprintf(out, "\n");
-	}
-static void bnprint(FILE *out, const char *name, const BIGNUM *b)
-	{
-	unsigned char *tmp;
-	int len;
-	len = BN_num_bytes(b);
-	tmp = OPENSSL_malloc(len);
-	BN_bn2bin(b, tmp);
-	fprintf(out, "%s\n", name);
-	hexprint(out, tmp, len);
-	OPENSSL_free(tmp);
-	}
-#if 0
-static void ecprint(FILE *out, EC_GROUP *grp, EC_POINT *pt)
-	{
-	BIGNUM *x, *y;
-	x = BN_new();
-	y = BN_new();
-	EC_POINT_get_affine_coordinates_GFp(grp, pt, x, y, NULL);
-	bnprint(out, "\tPoint X: ", x);
-	bnprint(out, "\tPoint Y: ", y);
-	BN_free(x);
-	BN_free(y);
-	}
-#endif
-#endif
-
-/* This is Hash_df from SP 800-90 10.4.1 */
-
-static int hash_df(DRBG_CTX *dctx, unsigned char *out,
-			const unsigned char *in1, size_t in1len,
-			const unsigned char *in2, size_t in2len,
-			const unsigned char *in3, size_t in3len)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	EVP_MD_CTX *mctx = &ectx->mctx;
-	unsigned char *vtmp = ectx->vtmp;
-	unsigned char tmp[6];
-	size_t mdlen = M_EVP_MD_size(ectx->md);
-	/* Standard only ever needs seedlen bytes which is always less than
-	 * maximum permitted so no need to check length.
-	 */
-	size_t outlen = dctx->seedlen;
-	size_t nbits = (outlen << 3) - ectx->exbits;
-	tmp[0] = 1;
-	tmp[1] = (nbits >> 24) & 0xff;
-	tmp[2] = (nbits >> 16) & 0xff;
-	tmp[3] = (nbits >> 8) & 0xff;
-	tmp[4] = nbits & 0xff;
-	if (!in1)
-		{
-		tmp[5] = (unsigned char)in1len;
-		in1 = tmp + 5;
-		in1len = 1;
-		}
-	for (;;)
-		{
-		if (!FIPS_digestinit(mctx, ectx->md))
-			return 0;
-		if (!FIPS_digestupdate(mctx, tmp, 5))
-			return 0;
-		if (in1 && !FIPS_digestupdate(mctx, in1, in1len))
-			return 0;
-		if (in2 && !FIPS_digestupdate(mctx, in2, in2len))
-			return 0;
-		if (in3 && !FIPS_digestupdate(mctx, in3, in3len))
-			return 0;
-		if (outlen < mdlen)
-			{
-			if (!FIPS_digestfinal(mctx, vtmp, NULL))
-				return 0;
-			memcpy(out, vtmp, outlen);
-			OPENSSL_cleanse(vtmp, mdlen);
-			return 1;
-			}
-		else if(!FIPS_digestfinal(mctx, out, NULL))
-			return 0;
-
-		outlen -= mdlen;
-		if (outlen == 0)
-			return 1;
-		tmp[0]++;
-		out += mdlen;
-		}
-	}
-
-static int bn2binpad(unsigned char *to, size_t tolen, BIGNUM *b)
-	{
-	size_t blen;
-	blen = BN_num_bytes(b);
-	/* If BIGNUM length greater than buffer, mask to get rightmost
-	 * bytes. NB: modifies b but this doesn't matter for our purposes.
-	 */
-	if (blen > tolen)
-		{
-		BN_mask_bits(b, tolen << 3);
-		/* Update length because mask operation might create leading
-		 * zeroes.
-		 */
-		blen = BN_num_bytes(b);
-		}
-	/* If b length smaller than buffer pad with zeroes */
-	if (blen < tolen)
-		{
-		memset(to, 0, tolen - blen);
-		to += tolen - blen;
-		}
-
-	/* This call cannot fail */
-	BN_bn2bin(b, to);
-	return 1;
-	}
-/* Convert buffer to a BIGNUM discarding extra bits if necessary */
-static int bin2bnbits(DRBG_CTX *dctx, BIGNUM *r, const unsigned char *buf)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	if (!BN_bin2bn(buf, dctx->seedlen, r))
-		return 0;
-	/* If we have extra bits right shift off the end of r */
-	if (ectx->exbits)
-		{
-		if (!BN_rshift(r, r, ectx->exbits))
-			return 0;
-		}
-	return 1;
-	}
-
-/* Calculate r = phi(s * P) or r= phi(s * Q) */
-
-static int drbg_ec_mul(DRBG_EC_CTX *ectx, BIGNUM *r, const BIGNUM *s, int use_q)
-	{
-	if (use_q)
-		{
-		if (!EC_POINT_mul(ectx->curve, ectx->ptmp,
-						NULL, ectx->Q, s, ectx->bctx))
-			return 0;
-		}
-	else
-		{
-		if (!EC_POINT_mul(ectx->curve, ectx->ptmp,
-						s, NULL, NULL, ectx->bctx))
-			return 0;
-		}
-	/* Get x coordinate of result */
-	if (!EC_POINT_get_affine_coordinates_GFp(ectx->curve, ectx->ptmp, r,
-							NULL, ectx->bctx))
-		return 0;
-	return 1;
-	}
-
-static int drbg_ec_instantiate(DRBG_CTX *dctx,
-				const unsigned char *ent, size_t ent_len,
-				const unsigned char *nonce, size_t nonce_len,
-				const unsigned char *pstr, size_t pstr_len)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	if (!hash_df(dctx, ectx->sbuf, 
-			ent, ent_len, nonce, nonce_len, pstr, pstr_len))
-		return 0;
-	if (!bin2bnbits(dctx, ectx->s, ectx->sbuf))
-		return 0;
-	return 1;
-	}
-
-	
-static int drbg_ec_reseed(DRBG_CTX *dctx,
-				const unsigned char *ent, size_t ent_len,
-				const unsigned char *adin, size_t adin_len)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	/* Convert s value to a binary buffer. Save it to tbuf as we are
-	 * about to overwrite it.
-	 */
-	if (ectx->exbits)
-		BN_lshift(ectx->s, ectx->s, ectx->exbits);
-	bn2binpad(ectx->tbuf, dctx->seedlen, ectx->s);
-	if (!hash_df(dctx, ectx->sbuf, ectx->tbuf, dctx->seedlen, 
-			ent, ent_len, adin, adin_len))
-		return 0;
-	if (!bin2bnbits(dctx, ectx->s, ectx->sbuf))
-		return 0;
-	dctx->reseed_counter = 0;
-	return 1;
-	}
-
-static int drbg_ec_generate(DRBG_CTX *dctx,
-				unsigned char *out, size_t outlen,
-				const unsigned char *adin, size_t adin_len)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	BIGNUM *t, *r;
-	BIGNUM *s = ectx->s;
-	/* special case: check reseed interval */
-	if (out == NULL)
-		{
-		size_t nb = (outlen + dctx->blocklength - 1)/dctx->blocklength;
-		if (dctx->reseed_counter + nb > dctx->reseed_interval)
-			dctx->status = DRBG_STATUS_RESEED;
-		return 1;
-		}
-
-	BN_CTX_start(ectx->bctx);
-	r = BN_CTX_get(ectx->bctx);
-	if (!r)
-		goto err;
-	if (adin && adin_len)
-		{
-		size_t i;
-		t = BN_CTX_get(ectx->bctx);
-		if (!t)
-			goto err;
-		/* Convert s to buffer */
-		if (ectx->exbits)
-			BN_lshift(s, s, ectx->exbits);
-		bn2binpad(ectx->sbuf, dctx->seedlen, s);
-		/* Step 2 */
-		if (!hash_df(dctx, ectx->tbuf, adin, adin_len,
-				NULL, 0, NULL, 0))
-			goto err;
-		/* Step 5 */
-		for (i = 0; i < dctx->seedlen; i++)
-			ectx->tbuf[i] ^= ectx->sbuf[i];
-		if (!bin2bnbits(dctx, t, ectx->tbuf))
-			return 0;
-		}
-	else
-		/* Note if no additional input the algorithm never
-		 * needs separate values for t and s.
-		 */
-		t = s;
-
-#ifdef EC_DRBG_TRACE
-	bnprint(stderr, "s at start of generate: ", s);
-#endif
-
-	for (;;)
-		{
-		/* Step #6, calculate s = t * P */
-		if (!drbg_ec_mul(ectx, s, t, 0))
-			goto err;
-#ifdef EC_DRBG_TRACE
-		bnprint(stderr, "s in generate: ", ectx->s);
-#endif
-		/* Step #7, calculate r = s * Q */
-		if (!drbg_ec_mul(ectx, r, s, 1))
-			goto err;
-#ifdef EC_DRBG_TRACE
-	bnprint(stderr, "r in generate is: ", r);
-#endif
-		dctx->reseed_counter++;
-		/* Get rightmost bits of r to output buffer */
-
-		if (!(dctx->xflags & DRBG_FLAG_TEST) && !dctx->lb_valid)
-			{
-			if (!bn2binpad(dctx->lb, dctx->blocklength, r))
-				goto err;
-			dctx->lb_valid = 1;
-			continue;
-			}
-		if (outlen < dctx->blocklength)
-			{
-			if (!bn2binpad(ectx->vtmp, dctx->blocklength, r))
-				goto err;
-			if (!fips_drbg_cprng_test(dctx, ectx->vtmp))
-				goto err;
-			memcpy(out, ectx->vtmp, outlen);
-			break;
-			}
-		else
-			{
-			if (!bn2binpad(out, dctx->blocklength, r))
-				goto err;
-			if (!fips_drbg_cprng_test(dctx, out))
-				goto err;
-			}	
-		outlen -= dctx->blocklength;
-		if (!outlen)
-			break;
-		out += dctx->blocklength;
-		/* Step #5 after first pass */
-		t = s;
-#ifdef EC_DRBG_TRACE
-		fprintf(stderr, "Random bits written:\n");
-		hexprint(stderr, out, dctx->blocklength);
-#endif
-		}
-	if (!drbg_ec_mul(ectx, ectx->s, ectx->s, 0))
-		return 0;
-#ifdef EC_DRBG_TRACE
-	bnprint(stderr, "s after generate is: ", s);
-#endif
-	BN_CTX_end(ectx->bctx);
-	return 1;
-	err:
-	BN_CTX_end(ectx->bctx);
-	return 0;
-	}
-
-static int drbg_ec_uninstantiate(DRBG_CTX *dctx)
-	{
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	EVP_MD_CTX_cleanup(&ectx->mctx);
-	EC_GROUP_free(ectx->curve);
-	EC_POINT_free(ectx->Q);
-	EC_POINT_free(ectx->ptmp);
-	BN_clear_free(ectx->s);
-	BN_CTX_free(ectx->bctx);
-	OPENSSL_cleanse(&dctx->d.ec, sizeof(DRBG_EC_CTX));
-	return 1;
-	}
-
-/* Q points from SP 800-90 A.1, P is generator */
-
-__fips_constseg
-static const unsigned char p_256_qx[] = {
-	0xc9,0x74,0x45,0xf4,0x5c,0xde,0xf9,0xf0,0xd3,0xe0,0x5e,0x1e,
-	0x58,0x5f,0xc2,0x97,0x23,0x5b,0x82,0xb5,0xbe,0x8f,0xf3,0xef,
-	0xca,0x67,0xc5,0x98,0x52,0x01,0x81,0x92
-};
-__fips_constseg
-static const unsigned char p_256_qy[] = {
-	0xb2,0x8e,0xf5,0x57,0xba,0x31,0xdf,0xcb,0xdd,0x21,0xac,0x46,
-	0xe2,0xa9,0x1e,0x3c,0x30,0x4f,0x44,0xcb,0x87,0x05,0x8a,0xda,
-	0x2c,0xb8,0x15,0x15,0x1e,0x61,0x00,0x46
-};
-
-__fips_constseg
-static const unsigned char p_384_qx[] = {
-	0x8e,0x72,0x2d,0xe3,0x12,0x5b,0xdd,0xb0,0x55,0x80,0x16,0x4b,
-	0xfe,0x20,0xb8,0xb4,0x32,0x21,0x6a,0x62,0x92,0x6c,0x57,0x50,
-	0x2c,0xee,0xde,0x31,0xc4,0x78,0x16,0xed,0xd1,0xe8,0x97,0x69,
-	0x12,0x41,0x79,0xd0,0xb6,0x95,0x10,0x64,0x28,0x81,0x50,0x65
-};
-__fips_constseg
-static const unsigned char p_384_qy[] = {
-	0x02,0x3b,0x16,0x60,0xdd,0x70,0x1d,0x08,0x39,0xfd,0x45,0xee,
-	0xc3,0x6f,0x9e,0xe7,0xb3,0x2e,0x13,0xb3,0x15,0xdc,0x02,0x61,
-	0x0a,0xa1,0xb6,0x36,0xe3,0x46,0xdf,0x67,0x1f,0x79,0x0f,0x84,
-	0xc5,0xe0,0x9b,0x05,0x67,0x4d,0xbb,0x7e,0x45,0xc8,0x03,0xdd
-};
-
-__fips_constseg
-static const unsigned char p_521_qx[] = {
-	0x01,0xb9,0xfa,0x3e,0x51,0x8d,0x68,0x3c,0x6b,0x65,0x76,0x36,
-	0x94,0xac,0x8e,0xfb,0xae,0xc6,0xfa,0xb4,0x4f,0x22,0x76,0x17,
-	0x1a,0x42,0x72,0x65,0x07,0xdd,0x08,0xad,0xd4,0xc3,0xb3,0xf4,
-	0xc1,0xeb,0xc5,0xb1,0x22,0x2d,0xdb,0xa0,0x77,0xf7,0x22,0x94,
-	0x3b,0x24,0xc3,0xed,0xfa,0x0f,0x85,0xfe,0x24,0xd0,0xc8,0xc0,
-	0x15,0x91,0xf0,0xbe,0x6f,0x63
-};
-__fips_constseg
-static const unsigned char p_521_qy[] = {
-	0x01,0xf3,0xbd,0xba,0x58,0x52,0x95,0xd9,0xa1,0x11,0x0d,0x1d,
-	0xf1,0xf9,0x43,0x0e,0xf8,0x44,0x2c,0x50,0x18,0x97,0x6f,0xf3,
-	0x43,0x7e,0xf9,0x1b,0x81,0xdc,0x0b,0x81,0x32,0xc8,0xd5,0xc3,
-	0x9c,0x32,0xd0,0xe0,0x04,0xa3,0x09,0x2b,0x7d,0x32,0x7c,0x0e,
-	0x7a,0x4d,0x26,0xd2,0xc7,0xb6,0x9b,0x58,0xf9,0x06,0x66,0x52,
-	0x91,0x1e,0x45,0x77,0x79,0xde
-};
-
-int fips_drbg_ec_init(DRBG_CTX *dctx)
-	{
-	const EVP_MD *md;
-	const unsigned char *Q_x, *Q_y;
-	BIGNUM *x, *y;
-	size_t ptlen;
-	int md_nid = dctx->type & 0xffff;
-	int curve_nid = dctx->type >> 16;
-	DRBG_EC_CTX *ectx = &dctx->d.ec;
-	md = FIPS_get_digestbynid(md_nid);
-	if (!md)
-		return -2;
-
-	/* These are taken from SP 800-90 10.3.1 table 4 */
-	switch (curve_nid)
-		{
-		case NID_X9_62_prime256v1:
-		dctx->strength = 128;
-		dctx->seedlen = 32;
-		dctx->blocklength = 30;
-		ectx->exbits = 0;
-		Q_x = p_256_qx;
-		Q_y = p_256_qy;
-		ptlen = sizeof(p_256_qx);
-		break;
-
-		case NID_secp384r1:
-		if (md_nid == NID_sha1)
-			return -2;
-		dctx->strength = 192;
-		dctx->seedlen = 48;
-		dctx->blocklength = 46;
-		ectx->exbits = 0;
-		Q_x = p_384_qx;
-		Q_y = p_384_qy;
-		ptlen = sizeof(p_384_qx);
-		break;
-
-		case NID_secp521r1:
-		if (md_nid == NID_sha1 || md_nid == NID_sha224)
-			return -2;
-		dctx->strength = 256;
-		dctx->seedlen = 66;
-		dctx->blocklength = 63;
-		ectx->exbits = 7;
-		Q_x = p_521_qx;
-		Q_y = p_521_qy;
-		ptlen = sizeof(p_521_qx);
-		break;
-
-		default:
-		return -2;
-		}
-
-	dctx->iflags |= DRBG_CUSTOM_RESEED;
-	dctx->reseed_counter = 0;
-	dctx->instantiate = drbg_ec_instantiate;
-	dctx->reseed = drbg_ec_reseed;
-	dctx->generate = drbg_ec_generate;
-	dctx->uninstantiate = drbg_ec_uninstantiate;
-
-	ectx->md = md;
-	EVP_MD_CTX_init(&ectx->mctx);
-
-	dctx->min_entropy = dctx->strength / 8;
-	dctx->max_entropy = 2 << 10;
-
-	dctx->min_nonce = dctx->min_entropy / 2;
-	dctx->max_nonce = 2 << 10;
-
-	dctx->max_pers = 2 << 10;
-	dctx->max_adin = 2 << 10;
-
-	dctx->reseed_interval = 1<<24;
-	dctx->max_request = dctx->reseed_interval * dctx->blocklength;
-
-	/* Setup internal structures */
-	ectx->bctx = BN_CTX_new();
-	if (!ectx->bctx)
-		return 0;
-	BN_CTX_start(ectx->bctx);
-
-	ectx->s = BN_new();
-
-	ectx->curve = EC_GROUP_new_by_curve_name(curve_nid);
-
-	ectx->Q = EC_POINT_new(ectx->curve);
-	ectx->ptmp = EC_POINT_new(ectx->curve);
-
-	x = BN_CTX_get(ectx->bctx);
-	y = BN_CTX_get(ectx->bctx);
-
-	if (!ectx->s || !ectx->curve || !ectx->Q || !y)
-		goto err;
-
-	if (!BN_bin2bn(Q_x, ptlen, x) || !BN_bin2bn(Q_y, ptlen, y))
-		goto err;
-	if (!EC_POINT_set_affine_coordinates_GFp(ectx->curve, ectx->Q,
-							x, y, ectx->bctx))
-		goto err;
-
-	BN_CTX_end(ectx->bctx);
-
-	return 1;
-	err:
-	BN_CTX_end(ectx->bctx);
-	drbg_ec_uninstantiate(dctx);
-	return 0;
-	}
--- a/fips/rand/fips_drbg_lib.c
+++ b/fips/rand/fips_drbg_lib.c
@@ -79,8 +79,6 @@ int FIPS_drbg_init(DRBG_CTX *dctx, int type, unsigned int flags)
 		rv = fips_drbg_ctr_init(dctx);
 	if (rv == -2)
 		rv = fips_drbg_hmac_init(dctx);
-	if (rv == -2)
-		rv = fips_drbg_ec_init(dctx);

 	if (rv <= 0)
 		{
--- a/fips/rand/fips_drbg_selftest.c
+++ b/fips/rand/fips_drbg_selftest.c
@@ -133,9 +133,6 @@ typedef struct {
 #define make_drbg_test_data_df(nid, pr, p) \
 	make_drbg_test_data(nid, DRBG_FLAG_CTR_USE_DF, pr, p)

-#define make_drbg_test_data_ec(curve, md, pr, p) \
-	make_drbg_test_data((curve << 16) | md , 0, pr, p)
-
 static DRBG_SELFTEST_DATA drbg_test[] = {
 	make_drbg_test_data_df(NID_aes_128_ctr, aes_128_use_df, 0),
 	make_drbg_test_data_df(NID_aes_192_ctr, aes_192_use_df, 0),
@@ -153,18 +150,6 @@ static DRBG_SELFTEST_DATA drbg_test[] = {
 	make_drbg_test_data(NID_hmacWithSHA256, 0, hmac_sha256, 1),
 	make_drbg_test_data(NID_hmacWithSHA384, 0, hmac_sha384, 0),
 	make_drbg_test_data(NID_hmacWithSHA512, 0, hmac_sha512, 0),
-	make_drbg_test_data_ec(NID_X9_62_prime256v1, NID_sha1, p_256_sha1, 0),
-	make_drbg_test_data_ec(NID_X9_62_prime256v1, NID_sha224, p_256_sha224, 0),
-	make_drbg_test_data_ec(NID_X9_62_prime256v1, NID_sha256, p_256_sha256, 1),
-	make_drbg_test_data_ec(NID_X9_62_prime256v1, NID_sha384, p_256_sha384, 0),
-	make_drbg_test_data_ec(NID_X9_62_prime256v1, NID_sha512, p_256_sha512, 0),
-	make_drbg_test_data_ec(NID_secp384r1, NID_sha224, p_384_sha224, 0),
-	make_drbg_test_data_ec(NID_secp384r1, NID_sha256, p_384_sha256, 0),
-	make_drbg_test_data_ec(NID_secp384r1, NID_sha384, p_384_sha384, 0),
-	make_drbg_test_data_ec(NID_secp384r1, NID_sha512, p_384_sha512, 0),
-	make_drbg_test_data_ec(NID_secp521r1, NID_sha256, p_521_sha256, 0),
-	make_drbg_test_data_ec(NID_secp521r1, NID_sha384, p_521_sha384, 0),
-	make_drbg_test_data_ec(NID_secp521r1, NID_sha512, p_521_sha512, 0),
 	{0,0,0}
 	};

--- a/fips/rand/fips_drbg_selftest.h
+++ b/fips/rand/fips_drbg_selftest.h
--- a/fips/rand/fips_rand_lcl.h
+++ b/fips/rand/fips_rand_lcl.h
@@ -236,6 +236,5 @@ struct drbg_ctx_st
 int fips_drbg_ctr_init(DRBG_CTX *dctx);
 int fips_drbg_hash_init(DRBG_CTX *dctx);
 int fips_drbg_hmac_init(DRBG_CTX *dctx);
-int fips_drbg_ec_init(DRBG_CTX *dctx);
 int fips_drbg_kat(DRBG_CTX *dctx, int nid, unsigned int flags);
 int fips_drbg_cprng_test(DRBG_CTX *dctx, const unsigned char *out);
--- a/iOS/Makefile
+++ b/iOS/Makefile
@@ -0,0 +1,76 @@
+#
+#  OpenSSL/iOS/Makefile
+#
+
+DIR=		iOS
+TOP=		..
+CC=		cc
+INCLUDES=	-I$(TOP) -I$(TOP)/include
+CFLAG=		-g -static
+MAKEFILE=	Makefile
+PERL=		perl
+RM=		rm -f
+
+EXE=incore_macho
+
+CFLAGS= $(INCLUDES) $(CFLAG)
+
+top:
+	@$(MAKE) -f $(TOP)/Makefile reflect THIS=exe
+
+exe:	fips_algvs.app/fips_algvs
+
+incore_macho:			incore_macho.c $(TOP)/crypto/sha/sha1dgst.c
+	$(HOSTCC) $(HOSTCFLAGS) -I$(TOP)/include -I$(TOP)/crypto -o $@ incore_macho.c $(TOP)/crypto/sha/sha1dgst.c
+
+fips_algvs.app/fips_algvs:	$(TOP)/test/fips_algvs.c $(TOP)/fips/fipscanister.o fopen.m incore_macho
+	FIPS_SIG=./incore_macho \
+	$(TOP)/fips/fipsld $(CFLAGS) -I$(TOP)/fips -o $@ \
+		$(TOP)/test/fips_algvs.c $(TOP)/fips/fipscanister.o \
+		fopen.m -framework Foundation || rm $@
+	codesign -f -s "iPhone Developer" --entitlements fips_algvs.app/Entitlements.plist fips_algvs.app || rm $@
+
+install:
+	@[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
+	@set -e; for i in $(EXE); \
+	do  \
+	(echo installing $$i; \
+	 cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new; \
+	 chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new; \
+	 mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i ); \
+	 done;
+	@set -e; for i in $(SCRIPTS); \
+	do  \
+	(echo installing $$i; \
+	 cp $$i $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new; \
+	 chmod 755 $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new; \
+	 mv -f $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i ); \
+	 done
+
+tags:
+	ctags $(SRC)
+
+tests:
+
+links:
+
+lint:
+	lint -DLINT $(INCLUDES) $(SRC)>fluff
+
+depend:
+	@if [ -z "$(THIS)" ]; then \
+	    $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \
+	else \
+	    $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC); \
+	fi
+
+dclean:
+	$(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
+	mv -f Makefile.new $(MAKEFILE)
+
+clean:
+	rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff $(EXE)
+	rm -f fips_algvs.app/fips_algvs
+
+# DO NOT DELETE THIS LINE -- make depend depends on it.
+
--- a/iOS/fips_algvs.app/Entitlements.plist
+++ b/iOS/fips_algvs.app/Entitlements.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>get-task-allow</key>
+    <true/>
+</dict>
+</plist>
--- a/iOS/fips_algvs.app/Info.plist
+++ b/iOS/fips_algvs.app/Info.plist
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleName</key>
+	<string>fips_algvs</string>
+	<key>CFBundleSupportedPlatforms</key>
+	<array>
+	    <string>iPhoneOS</string>
+	</array>
+	<key>CFBundleExecutable</key>
+	<string>fips_algvs</string>
+	<key>CFBundleIdentifier</key>
+	<string>fips_algvs</string>
+	<key>CFBundleResourceSpecification</key>
+	<string>ResourceRules.plist</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>CFBundleDisplayName</key>
+	<string>fips_algvs</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+</dict>
+</plist>
--- a/iOS/fips_algvs.app/ResourceRules.plist
+++ b/iOS/fips_algvs.app/ResourceRules.plist
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>rules</key>
+	<dict>
+		<key>.*</key>
+		<true/>
+		<key>Info.plist</key>
+		<dict>
+			<key>omit</key>
+			<true/>
+			<key>weight</key>
+			<real>10</real>
+		</dict>
+		<key>ResourceRules.plist</key>
+		<dict>
+			<key>omit</key>
+			<true/>
+			<key>weight</key>
+			<real>100</real>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/iOS/fopen.m
+++ b/iOS/fopen.m
@@ -0,0 +1,93 @@
+#include <stdio.h>
+#include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <Foundation/Foundation.h>
+ 
+static FILE *(*libc_fopen)(const char *, const char *) = NULL;
+
+__attribute__((constructor))
+static void pre_main(void)
+{
+    /*
+     * Pull reference to fopen(3) from libc.
+     */
+    void *handle = dlopen("libSystem.B.dylib",RTLD_LAZY);
+
+    if (handle) {
+        libc_fopen = dlsym(handle,"fopen");
+        dlclose(handle);
+    }
+
+    /*
+     * Change to Documents directory.
+     */
+    NSString *docs = [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) lastObject];
+
+    NSFileManager *filemgr = [NSFileManager defaultManager];
+    [filemgr changeCurrentDirectoryPath: docs];
+    [filemgr release];
+}
+
+char *mkdirhier(char *path)
+{
+    char *slash;
+    struct stat buf;
+
+    if (path[0]=='.' && path[1]=='/') path+=2;
+
+    if ((slash = strrchr(path,'/'))) {
+	*slash = '\0';
+	if (stat(path,&buf)==0) {
+	    *slash = '/';
+	    return NULL;
+	}
+	(void)mkdirhier(path);
+	mkdir (path,0777);
+	*slash = '/';
+    }
+
+    return slash;
+}
+/*
+ * Replacement fopen(3)
+ */
+FILE *fopen(const char *filename, const char *mode)
+{
+    FILE *ret;
+
+    if ((ret = (*libc_fopen)(filename,mode)) == NULL) {
+        /*
+         * If file is not present in Documents directory, try from Bundle.
+         */
+        NSString *nsspath = [NSString stringWithFormat:@"%@/%s",
+                                   [[NSBundle mainBundle] bundlePath],
+                                   filename];
+
+        if ((ret = (*libc_fopen)([nsspath cStringUsingEncoding:NSUTF8StringEncoding],mode)) == NULL &&
+	    mode[0]=='w' &&
+	    ((filename[0]!='.' && filename[0]!='/') ||
+	     (filename[0]=='.' && filename[1]=='/')) ) {
+	    /*
+	     * If not present in Bundle, create directory in Documents
+	     */
+	    char *path = strdup(filename), *slash;
+	    static int once = 1;
+
+	    if ((slash = mkdirhier(path)) && once) {
+		/*
+		 * For some reason iOS truncates first created file
+		 * upon program exit, so we create one preemptively...
+		 */
+		once = 0;
+		strcpy(slash,"/.0");
+		creat(path,0444);
+	    }
+	    free(path);
+	    ret = (*libc_fopen)(filename,mode);
+	}
+    }
+
+    return ret;
+}
--- a/iOS/incore_macho.c
+++ b/iOS/incore_macho.c
--- a/test/fips_algvs.c
+++ b/test/fips_algvs.c
@@ -70,6 +70,67 @@ int main(int argc, char **argv)
 }
 #else

+#if defined(__vxworks)
+
+#include <taskLibCommon.h>
+#include <string.h>
+
+int fips_algvs_main(int argc, char **argv);
+#define main fips_algvs_main
+
+static int fips_algvs_argv(char *a0)
+{
+	char *argv[32] = { "fips_algvs" };
+	int argc = 1;
+	int main_ret;
+
+	if (a0) {
+		char *scan = a0, *arg = a0;
+
+		while (*scan) {
+			if (*scan++ == ' ') {
+				scan[-1] = '\0';
+				argv[argc++] = arg;
+				if (argc == (sizeof(argv)/sizeof(argv[0])-1))
+					break;
+
+				while (*scan == ' ') scan++;
+				arg = scan;
+			}
+		}
+		if (*scan == '\0') argv[argc++] = arg;
+	}
+
+	argv[argc] = NULL;
+
+	main_ret = fips_algvs_main(argc, argv);
+
+	if (a0) free(a0);
+
+	return main_ret;
+}
+
+int fips_algvs(int a0)
+{
+	return taskSpawn("fips_algvs", 100, (VX_FP_TASK | VX_SPE_TASK), 100000,
+			(FUNCPTR)fips_algvs_argv,
+			a0 ? strdup(a0) : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static FILE *fips_fopen(const char *path, const char *mode)
+{
+	char fips_path [256];
+
+	if (path[0] != '/' && strlen(path) < (sizeof(fips_path)-8)) {
+		strcpy(fips_path,"/fips0/");
+		strcat(fips_path,path);
+		return fopen(fips_path,mode);
+	}
+	return fopen(path,mode);
+}
+#define fopen fips_fopen
+#endif
+
 #define FIPS_ALGVS

 extern int fips_aesavs_main(int argc, char **argv);
@@ -265,6 +326,16 @@ int main(int argc, char **argv)
 	SysInit();
 #endif

+#if (defined(__arm__) || defined(__aarch64__))
+	if (*args && !strcmp(*args, "-noaccel"))
+		{
+		extern unsigned int OPENSSL_armcap_P;
+
+		OPENSSL_armcap_P=0;
+		args++;
+		argc--;
+		}
+#endif
 	if (*args && *args[0] != '-')
 		{
 		rv = run_prg(argc - 1, args);
--- a/util/fipsdist.pl
+++ b/util/fipsdist.pl
@@ -58,7 +58,7 @@ while (<STDIN>)
 		}
 	else
 		{
-		next unless (/^(fips\/|crypto|util|test|include|ms|c6x)/);
+		next unless (/^(fips\/|crypto|util|test|include|ms|c6x|iOS)/);
 		}
 	if (/^crypto\/([^\/]+)/)
 		{
--- a/util/incore
+++ b/util/incore
@@ -382,7 +382,7 @@ if (!$legacy_mode) {
    }

    $FINGERPRINT_ascii_value
-			= $exe->Lookup("FINGERPRINT_ascii_value")	or die;
+			= $exe->Lookup("FINGERPRINT_ascii_value");

 }
 if ($FIPS_text_startX && $FIPS_text_endX) {
@@ -439,9 +439,12 @@ $fingerprint = FIPS_incore_fingerprint();

 if ($legacy_mode) {
    print unpack("H*",$fingerprint);
-} else {
+} elsif (defined($FINGERPRINT_ascii_value)) {
    seek(FD,$FINGERPRINT_ascii_value->{st_offset},0)	or die "$!";
    print FD unpack("H*",$fingerprint)			or die "$!";
+} else {
+    seek(FD,$FIPS_signature->{st_offset},0)		or die "$!";
+    print FD $fingerprint				or die "$!";
 }

 close (FD);
Author	SHA1	Message	Date
Steve Marquess	1278ce48a5	Add target for i686 cross compilation Reviewed-by: Stephen Henson <steve@openssl.org>	2016-02-15 10:26:20 -05:00
Steve Marquess	a0f8d282d7	Add new iOS subdirectory Reviewed-by: Rich Salz <rsalz@openssl.org> Reviewed-by: Stephen Henson <steve@openssl.org>	2015-07-04 15:18:46 -04:00
Andy Polyakov	0f38e9cd78	Add new VxWorks x86 platform Reviewed-by: Rich Salz <rsalz@openssl.org> Reviewed-by: Stephen Henson <steve@openssl.org>	2015-07-04 15:17:45 -04:00
Andy Polyakov	34f39b062c	util/incore update that allows FINGERPRINT_premain-free build. As for complementary fips.c modification. Goal is to ensure that FIPS_signature does not end up in .bss segment, one guaranteed to be zeroed upon program start-up. One would expect explicitly initialized values to end up in .data segment, but it turned out that values explicitly initialized with zeros can end up in .bss. The modification does not affect program flow, because first byte was the only one of significance [to FINGERPRINT_premain]. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:48:08 +02:00
Andy Polyakov	6db8e3bdc9	Add support for Android 5, both 32- and 64-bit cases. Special note about additional -pie flag in android-armv7. The initial reason for adding it is that Android 5 refuses to execute non-PIE binaries. But what about older systems and previously validated platforms? It should be noted that flag is not used when compiling object code, fipscanister.o in this context, only when linking applications, supplementary fips_algvs used during validation procedure. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:47:55 +02:00
Andy Polyakov	50e2a0ea46	Additional vxWorks target. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:47:43 +02:00
Andy Polyakov	3f137e6f1d	fipsalgtest.pl update. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:47:32 +02:00
Andy Polyakov	97fbb0c88c	Configure: add ios-cross target with ARM assembly support. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:47:21 +02:00
Andy Polyakov	5837e90f08	Add iOS-specific armv4cpud.S module. Normally it would be generated from a perlasm module, but doing so would affect existing armv4cpuid.S, which in turn would formally void previously validated platforms. Hense separate module is generated. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:47:10 +02:00
Andy Polyakov	874faf2ffb	Adapt ARM assembly pack for iOS. This is achieved by filtering perlasm output through arm-xlate.pl. But note that it's done only if "flavour" argument is not 'void'. As 'void' is default value for other ARM targets, permasm output is not actually filtered on previously validated platforms. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:46:58 +02:00
Andy Polyakov	0b45df73d2	crypto/modes/modes_lcl.h: let STRICT_ALIGNMENT be on iOS. While ARMv7 in general is capable of unaligned access, not all instructions actually are. And trouble is that compiler doesn't seem to differentiate those capable and incapable of unaligned access. As result exceptions could be observed in xts128.c and ccm128.c modules. Contemporary Linux kernels handle such exceptions by performing requested operation and resuming execution as is if it succeeded. While on iOS exception is fatal. Correct solution is to let STRICT_ALIGNMENT be on all ARM platforms, but doing so is in formal conflict with FIPS maintenance policy. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:46:44 +02:00
Andy Polyakov	2bd3976ed0	Add iOS-specific fips_algvs application. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:46:26 +02:00
Andy Polyakov	c6d109051d	Configure: engage ARMv8 assembly pack in ios64-cross target. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:45:50 +02:00
Andy Polyakov	083ed53def	Engage ARMv8 assembly pack. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:45:07 +02:00
Andy Polyakov	b84813ec01	Add ARMv8 assembly pack. Reviewed-by: Dr. Stephen Henson <steve@openssl.org>	2015-05-13 16:43:25 +02:00
Dr. Stephen Henson	7447e65fcc	support for iOS 7.x/ARMv8 Reviewed-by: Tim Hudson <tjh@openssl.org> Reviewed-by: Steve Marquess <marquess@openssl.org	2014-10-24 20:41:49 +01:00
Dr. Stephen Henson	60cd2b7206	Update fipsalgtest.pl to cope with changes in file names and format X9.31 tests need to look in files for '9.31' RSA-PSS tests may contain additonal text as well as "salt len: n". We now just look at the start of a filename for a match. Separate ECDSA2 test list. Reorder test to handle new formats: for example PQGVer for DSA2 can be detected based on file format but if this fails revert to PQGVER. For future debugging add a --debug-detect option which prints out more details of the test detection including the first few lines of each request file. Reviewed-by: Tim Hudson <tjh@openssl.org> Reviewed-by: Steve Marquess <marquess@openssl.org	2014-10-24 20:32:27 +01:00
Dr. Stephen Henson	7fb7844f3b	Remove Dual EC DRBG again... Dual EC DRBG removal now accepted for 2.0.8 onwards.	2014-07-11 19:14:15 +01:00
Dr. Stephen Henson	005563bbce	Add linux-x86_64-cross target.	2014-05-12 18:38:41 +01:00
Dr. Stephen Henson	3b43568d5b	Revert "Remove Dual EC DRBG from FIPS module." Revert Dual EC DRBG removal commit as it was not accepted for 2.0.7 version of the module. This reverts commit `200f249b8c`.	2014-05-12 18:35:30 +01:00
Dr. Stephen Henson	2659a2aa7c	QNX6-armv4 support.	2013-12-16 21:41:07 +00:00
Dr. Stephen Henson	200f249b8c	Remove Dual EC DRBG from FIPS module.	2013-12-16 19:00:58 +00:00