From 4c7c5ff667de19627861db07b50d595e47856422 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 27 Sep 2007 07:09:46 +0000 Subject: [PATCH] ARMv4 assembler pack. --- CHANGES | 4 ++++ Configure | 2 ++ config | 4 ++-- crypto/sha/Makefile | 3 +++ crypto/sha/asm/sha1-armv4-large.pl | 4 ++++ crypto/sha/asm/sha1-thumb.pl | 4 ++++ crypto/sha/asm/sha256-armv4.pl | 4 ++++ crypto/sha/asm/sha512-armv4.pl | 4 ++++ 8 files changed, 27 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 73cf7b354..9b43c4c54 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ Changes between 0.9.8f and 0.9.9 [xx XXX xxxx] + *) ARMv4 assembler pack. ARMv4 refers to v4 and later ISA, not CPU + "family." + [Andy Polyakov] + *) Implement certificate status request TLS extension defined in RFC3546. A client can set the appropriate parameters and receive the encoded OCSP response via a callback. A server can query the supplied parameters diff --git a/Configure b/Configure index d55dd9416..958d46ae3 100755 --- a/Configure +++ b/Configure @@ -127,6 +127,7 @@ my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o::::::::::"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::::::::"; my $mips3_asm=":bn-mips3.o:::::::::::"; my $s390x_asm=":bn_asm.o s390x-mont.o::aes_cbc.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::::::"; +my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o::::::"; my $no_asm="::::::::::::"; # As for $BSDthreads. Idea is to maintain "collective" set of flags, @@ -323,6 +324,7 @@ my %table=( # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:ppccpuid_linux32.o:linux_ppc32.o linux_ppc32-mont.o:::::sha1-ppc_linux32.o sha256-ppc_linux32.o:::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### IA-32 targets... "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", diff --git a/config b/config index 5a7504702..e8128262d 100755 --- a/config +++ b/config @@ -611,8 +611,8 @@ case "$GUESSOS" in options="$options -DB_ENDIAN -mschedule=$CPUSCHEDULE -march=$CPUARCH" OUT="linux-generic32" ;; - arm*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; - arm*l-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;; + arm[1-3]*-*-linux2) OUT="linux-generic32" ;; + arm*-*-linux2) OUT="linux-armv4" ;; sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;; m68k*-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index e02d9f1dc..763b2aad6 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -71,6 +71,9 @@ sha256-ia64.s: asm/sha512-ia64.pl sha512-ia64.s: asm/sha512-ia64.pl (cd asm; $(PERL) sha512-ia64.pl ../$@ $(CFLAGS)) +sha256-armv4.s: asm/sha256-armv4.pl + $(PERL) $< $@ + # Solaris make has to be explicitly told sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $@ sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $@ diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index f54eef5a0..436eb36d7 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -38,6 +38,9 @@ # the same job in Thumb, therefore the code is never twice as # small and always slower. +$output=shift; +open STDOUT,">$output"; + $ctx="r0"; $inp="r1"; $len="r2"; @@ -224,3 +227,4 @@ $code.=<<___; ___ print $code; +close STDOUT; # enforce flush diff --git a/crypto/sha/asm/sha1-thumb.pl b/crypto/sha/asm/sha1-thumb.pl index f025001b2..7c9ea9b02 100644 --- a/crypto/sha/asm/sha1-thumb.pl +++ b/crypto/sha/asm/sha1-thumb.pl @@ -19,6 +19,9 @@ # by over 40%, while code increases by only 10% or 32 bytes. But once # again, the goal was to establish _size_ benchmark, not performance. +$output=shift; +open STDOUT,">$output"; + $inline=0; #$cheat_on_binutils=1; @@ -253,3 +256,4 @@ $code.=<<___; ___ print $code; +close STDOUT; # enforce flush diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl index 4dd09619a..0c9d999de 100644 --- a/crypto/sha/asm/sha256-armv4.pl +++ b/crypto/sha/asm/sha256-armv4.pl @@ -13,6 +13,9 @@ # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per # byte. +$output=shift; +open STDOUT,">$output"; + $ctx="r0"; $t0="r0"; $inp="r1"; $len="r2"; $t1="r2"; @@ -173,3 +176,4 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; +close STDOUT; # enforce flush diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index da720bd5d..230e78d7e 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -22,6 +22,9 @@ $hi=0; $lo=4; # ==================================================================== +$output=shift; +open STDOUT,">$output"; + $ctx="r0"; $inp="r1"; $len="r2"; @@ -391,3 +394,4 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; +close STDOUT; # enforce flush