Add sha512-ppc.pl module.
This commit is contained in:
parent
4dca00cec8
commit
17478fdede
16
Configure
16
Configure
@ -315,7 +315,7 @@ my %table=(
|
||||
# *-generic* is endian-neutral target, but ./config is free to
|
||||
# throw in -D[BL]_ENDIAN, whichever appropriate...
|
||||
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linux_ppc32-mont.o:::::sha1-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc32.o linux_ppc32-mont.o:::::sha1-ppc_linux32.o sha256-ppc_linux32.o::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
#### IA-32 targets...
|
||||
"linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
@ -323,7 +323,7 @@ my %table=(
|
||||
####
|
||||
"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
# -bpowerpc64-linux is transient option, -m64 should be the one to use...
|
||||
"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc64", "gcc:-bpowerpc64-linux -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL::linux_ppc64.o linux_ppc64-mont.o:::::sha1-ppc_linux64.o sha256-ppc_linux64.o sha512-ppc_linux64.o::::::dlfcn:linux-shared:-fPIC:-bpowerpc64-linux:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
@ -408,12 +408,12 @@ my %table=(
|
||||
|
||||
#### IBM's AIX.
|
||||
"aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
|
||||
"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:",
|
||||
"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn::::::-X64",
|
||||
"aix-gcc", "gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o sha256-ppc_aix32.o::::::dlfcn:",
|
||||
"aix64-gcc","gcc:-O -DB_ENDIAN::-D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o sha256-ppc_aix64.o sha512-ppc_aix64.o::::::dlfcn::::::-X64",
|
||||
# Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
|
||||
# at build time. $OBJECT_MODE is respected at ./config stage!
|
||||
"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
|
||||
"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
|
||||
"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384::-qthreaded:AIX::BN_LLONG RC4_CHAR::aix_ppc32.o aix_ppc32-mont.o:::::sha1-ppc_aix32.o sha256-ppc_aix32.o::::::dlfcn:aix-shared::-q32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
|
||||
"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR::aix_ppc64.o aix_ppc64-mont.o:::::sha1-ppc_aix64.o sha256-ppc_aix64.o sha512-ppc_aix64.o::::::dlfcn:aix-shared::-q64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64",
|
||||
|
||||
#
|
||||
# Cray T90 and similar (SDSC)
|
||||
@ -505,8 +505,8 @@ my %table=(
|
||||
|
||||
##### MacOS X (a.k.a. Rhapsody or Darwin) setup
|
||||
"rhapsody-ppc-cc","cc:-O3 -DB_ENDIAN::(unknown):MACOSX_RHAPSODY::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}::",
|
||||
"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin-ppc-cc","cc:-O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o sha256-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin64-ppc-cc","cc:-m64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc64.o osx_ppc64-mont.o:::::sha1-ppc_osx64.o sha256-ppc_osx64.o sha512-ppc_osx64.o::::::dlfcn:darwin-shared:-fPIC -fno-common:-m64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"darwin-i386-cc","cc:-O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
"debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::osx_ppc32.o osx_ppc32-mont.o:::::sha1-ppc_osx32.o::::::dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
|
||||
|
||||
|
16
TABLE
16
TABLE
@ -714,7 +714,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_aix32.o
|
||||
$sha1_obj = sha1-ppc_aix32.o sha256-ppc_aix32.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -742,7 +742,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_aix32.o
|
||||
$sha1_obj = sha1-ppc_aix32.o sha256-ppc_aix32.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -798,7 +798,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_aix64.o
|
||||
$sha1_obj = sha1-ppc_aix64.o sha256-ppc_aix64.o sha512-ppc_aix64.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -826,7 +826,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_aix64.o
|
||||
$sha1_obj = sha1-ppc_aix64.o sha256-ppc_aix64.o sha512-ppc_aix64.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -1078,7 +1078,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_osx32.o
|
||||
$sha1_obj = sha1-ppc_osx32.o sha256-ppc_osx32.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -1106,7 +1106,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_osx64.o
|
||||
$sha1_obj = sha1-ppc_osx64.o sha256-ppc_osx64.o sha512-ppc_osx64.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -3010,7 +3010,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_linux32.o
|
||||
$sha1_obj = sha1-ppc_linux32.o sha256-ppc_linux32.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
@ -3038,7 +3038,7 @@ $des_obj =
|
||||
$aes_obj =
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc_linux64.o
|
||||
$sha1_obj = sha1-ppc_linux64.o sha256-ppc_linux64.o sha512-ppc_linux64.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rmd160_obj =
|
||||
|
@ -73,8 +73,14 @@ sha512-x86_64.s: asm/sha512-x86_64.pl
|
||||
|
||||
sha1-ppc_aix32.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@
|
||||
sha1-ppc_aix64.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $@
|
||||
sha256-ppc_aix32.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $@
|
||||
sha256-ppc_aix64.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $@
|
||||
sha512-ppc_aix32.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $@
|
||||
sha512-ppc_aix64.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $@
|
||||
# non-AIX targets are believed to be armed with GNU make
|
||||
sha1-ppc_%.s: asm/sha1-ppc.pl; $(PERL) $< $@
|
||||
sha256-ppc_%.s: asm/sha512-ppc.pl; $(PERL) $< $@
|
||||
sha512-ppc_%.s: asm/sha512-ppc.pl; $(PERL) $< $@
|
||||
|
||||
files:
|
||||
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
|
||||
|
431
crypto/sha/asm/sha512-ppc.pl
Executable file
431
crypto/sha/asm/sha512-ppc.pl
Executable file
@ -0,0 +1,431 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
# project. Rights for redistribution and usage in source and binary
|
||||
# forms are granted according to the OpenSSL license.
|
||||
# ====================================================================
|
||||
|
||||
# I let hardware handle unaligned input, except on page boundaries
|
||||
# (see below for details). Otherwise straightforward implementation
|
||||
# with X vector in register bank. The module is big-endian [which is
|
||||
# not big deal as there're no little-endian targets left around].
|
||||
|
||||
# sha256 | sha512
|
||||
# -m64 -m32 | -m64 -m32
|
||||
# --------------------------------------+-----------------------
|
||||
# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*)
|
||||
#
|
||||
# (*) 64-bit code in 32-bit application context, which actually is
|
||||
# on TODO list
|
||||
|
||||
$output=shift;
|
||||
|
||||
if ($output =~ /64/) {
|
||||
$SIZE_T=8;
|
||||
$STU="stdu";
|
||||
$UCMP="cmpld";
|
||||
$SHL="sldi";
|
||||
$POP="ld";
|
||||
$PUSH="std";
|
||||
} elsif ($output =~ /32/) {
|
||||
$SIZE_T=4;
|
||||
$STU="stwu";
|
||||
$UCMP="cmplw";
|
||||
$SHL="slwi";
|
||||
$POP="lwz";
|
||||
$PUSH="stw";
|
||||
} else { die "nonsense $output"; }
|
||||
|
||||
( defined shift || open STDOUT,"| $^X ../perlasm/ppc-xlate.pl $output" ) ||
|
||||
die "can't call ../perlasm/ppc-xlate.pl: $!";
|
||||
|
||||
if ($output =~ /512/) {
|
||||
$func="sha512_block";
|
||||
$SZ=8;
|
||||
@Sigma0=(28,34,39);
|
||||
@Sigma1=(14,18,41);
|
||||
@sigma0=(1, 8, 7);
|
||||
@sigma1=(19,61, 6);
|
||||
$rounds=80;
|
||||
$LD="ld";
|
||||
$ST="std";
|
||||
$ROR="rotrdi";
|
||||
$SHR="srdi";
|
||||
} else {
|
||||
$func="sha256_block";
|
||||
$SZ=4;
|
||||
@Sigma0=( 2,13,22);
|
||||
@Sigma1=( 6,11,25);
|
||||
@sigma0=( 7,18, 3);
|
||||
@sigma1=(17,19,10);
|
||||
$rounds=64;
|
||||
$LD="lwz";
|
||||
$ST="stw";
|
||||
$ROR="rotrwi";
|
||||
$SHR="srwi";
|
||||
}
|
||||
|
||||
$FRAME=32*$SIZE_T;
|
||||
|
||||
$sp ="r1";
|
||||
$toc="r2"; # zapped by $Tbl
|
||||
$ctx="r3"; # zapped by $a0
|
||||
$inp="r4";
|
||||
$num="r5"; # zapped by $a1
|
||||
|
||||
$T ="r0";
|
||||
$Tbl="r2";
|
||||
$a0 ="r3";
|
||||
$a1 ="r5";
|
||||
$t0 ="r6";
|
||||
$t1 ="r7";
|
||||
|
||||
$A ="r8";
|
||||
$B ="r9";
|
||||
$C ="r10";
|
||||
$D ="r11";
|
||||
$E ="r12";
|
||||
$F ="r13";
|
||||
$G ="r14";
|
||||
$H ="r15";
|
||||
|
||||
@V=($A,$B,$C,$D,$E,$F,$G,$H);
|
||||
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
|
||||
"r24","r25","r26","r27","r28","r29","r30","r31");
|
||||
|
||||
sub ROUND_00_15 {
|
||||
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
|
||||
$code.=<<___;
|
||||
$LD $T,`$i*$SZ`($Tbl)
|
||||
$ROR $a0,$e,$Sigma1[0]
|
||||
$ROR $a1,$e,$Sigma1[1]
|
||||
and $t0,$f,$e
|
||||
andc $t1,$g,$e
|
||||
add $T,$T,$h
|
||||
xor $a0,$a0,$a1
|
||||
$ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
|
||||
or $t0,$t0,$t1 ; Ch(e,f,g)
|
||||
add $T,$T,@X[$i]
|
||||
xor $a0,$a0,$a1 ; Sigma1(e)
|
||||
add $T,$T,$t0
|
||||
add $T,$T,$a0
|
||||
|
||||
$ROR $a0,$a,$Sigma0[0]
|
||||
$ROR $a1,$a,$Sigma0[1]
|
||||
and $t0,$a,$b
|
||||
and $t1,$a,$c
|
||||
xor $a0,$a0,$a1
|
||||
$ROR $a1,$a1,`$Sigma0[2]-$Sigma0[1]`
|
||||
xor $t0,$t0,$t1
|
||||
and $t1,$b,$c
|
||||
xor $a0,$a0,$a1 ; Sigma0(a)
|
||||
add $d,$d,$T
|
||||
xor $t0,$t0,$t1 ; Maj(a,b,c)
|
||||
add $h,$T,$a0
|
||||
add $h,$h,$t0
|
||||
|
||||
___
|
||||
}
|
||||
|
||||
sub ROUND_16_xx {
|
||||
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
|
||||
$i-=16;
|
||||
$code.=<<___;
|
||||
$ROR $a0,@X[($i+1)%16],$sigma0[0]
|
||||
$ROR $a1,@X[($i+1)%16],$sigma0[1]
|
||||
$ROR $t0,@X[($i+14)%16],$sigma1[0]
|
||||
$ROR $t1,@X[($i+14)%16],$sigma1[1]
|
||||
xor $a0,$a0,$a1
|
||||
$SHR $a1,@X[($i+1)%16],$sigma0[2]
|
||||
xor $t0,$t0,$t1
|
||||
$SHR $t1,@X[($i+14)%16],$sigma1[2]
|
||||
add @X[$i],@X[$i],@X[($i+9)%16]
|
||||
xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f])
|
||||
xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f])
|
||||
add @X[$i],@X[$i],$a0
|
||||
add @X[$i],@X[$i],$t0
|
||||
___
|
||||
&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
|
||||
}
|
||||
|
||||
$code=<<___;
|
||||
.text
|
||||
|
||||
.globl $func
|
||||
.align 6
|
||||
$func:
|
||||
mflr r0
|
||||
$STU $sp,`-($FRAME+16*$SZ)`($sp)
|
||||
$SHL $num,$num,`log(16*$SZ)/log(2)`
|
||||
|
||||
$PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
|
||||
|
||||
$PUSH r0,`$FRAME-$SIZE_T*21`($sp)
|
||||
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
|
||||
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
|
||||
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
|
||||
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
|
||||
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
|
||||
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
|
||||
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
|
||||
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
|
||||
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
|
||||
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
|
||||
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
|
||||
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
|
||||
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
|
||||
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
|
||||
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
|
||||
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
|
||||
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
|
||||
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
|
||||
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
|
||||
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
|
||||
|
||||
$LD $A,`0*$SZ`($ctx)
|
||||
$LD $B,`1*$SZ`($ctx)
|
||||
$LD $C,`2*$SZ`($ctx)
|
||||
$LD $D,`3*$SZ`($ctx)
|
||||
$LD $E,`4*$SZ`($ctx)
|
||||
$LD $F,`5*$SZ`($ctx)
|
||||
$LD $G,`6*$SZ`($ctx)
|
||||
$LD $H,`7*$SZ`($ctx)
|
||||
|
||||
b LPICmeup
|
||||
LPICedup:
|
||||
andi. r0,$inp,3
|
||||
bne Lunaligned
|
||||
Laligned:
|
||||
add $t0,$inp,$num
|
||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
||||
bl Lsha2_block_private
|
||||
Ldone:
|
||||
$POP r0,`$FRAME-$SIZE_T*21`($sp)
|
||||
$POP $toc,`$FRAME-$SIZE_T*20`($sp)
|
||||
$POP r13,`$FRAME-$SIZE_T*19`($sp)
|
||||
$POP r14,`$FRAME-$SIZE_T*18`($sp)
|
||||
$POP r15,`$FRAME-$SIZE_T*17`($sp)
|
||||
$POP r16,`$FRAME-$SIZE_T*16`($sp)
|
||||
$POP r17,`$FRAME-$SIZE_T*15`($sp)
|
||||
$POP r18,`$FRAME-$SIZE_T*14`($sp)
|
||||
$POP r19,`$FRAME-$SIZE_T*13`($sp)
|
||||
$POP r20,`$FRAME-$SIZE_T*12`($sp)
|
||||
$POP r21,`$FRAME-$SIZE_T*11`($sp)
|
||||
$POP r22,`$FRAME-$SIZE_T*10`($sp)
|
||||
$POP r23,`$FRAME-$SIZE_T*9`($sp)
|
||||
$POP r24,`$FRAME-$SIZE_T*8`($sp)
|
||||
$POP r25,`$FRAME-$SIZE_T*7`($sp)
|
||||
$POP r26,`$FRAME-$SIZE_T*6`($sp)
|
||||
$POP r27,`$FRAME-$SIZE_T*5`($sp)
|
||||
$POP r28,`$FRAME-$SIZE_T*4`($sp)
|
||||
$POP r29,`$FRAME-$SIZE_T*3`($sp)
|
||||
$POP r30,`$FRAME-$SIZE_T*2`($sp)
|
||||
$POP r31,`$FRAME-$SIZE_T*1`($sp)
|
||||
mtlr r0
|
||||
addi $sp,$sp,`$FRAME+16*$SZ`
|
||||
blr
|
||||
___
|
||||
|
||||
# PowerPC specification allows an implementation to be ill-behaved
|
||||
# upon unaligned access which crosses page boundary. "Better safe
|
||||
# than sorry" principle makes me treat it specially. But I don't
|
||||
# look for particular offending word, but rather for the input
|
||||
# block which crosses the boundary. Once found that block is aligned
|
||||
# and hashed separately...
|
||||
$code.=<<___;
|
||||
.align 4
|
||||
Lunaligned:
|
||||
subfic $t1,$inp,4096
|
||||
andi. $t1,$t1,`4096-16*$SZ` ; distance to closest page boundary
|
||||
beq Lcross_page
|
||||
$UCMP $num,$t1
|
||||
ble- Laligned ; didn't cross the page boundary
|
||||
subfc $num,$t1,$num
|
||||
add $t0,$inp,$t1
|
||||
$PUSH $num,`$FRAME-$SIZE_T*24`($sp)
|
||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
||||
bl Lsha2_block_private
|
||||
$POP $num,`$FRAME-$SIZE_T*24`($sp)
|
||||
Lcross_page:
|
||||
li $t1,`16*$SZ/4`
|
||||
mtctr $t1
|
||||
addi r20,$sp,$FRAME ; spot below the frame
|
||||
Lmemcpy:
|
||||
lbz r16,0($inp)
|
||||
lbz r17,1($inp)
|
||||
lbz r18,2($inp)
|
||||
lbz r19,3($inp)
|
||||
addi $inp,$inp,4
|
||||
stb r16,0(r20)
|
||||
stb r17,1(r20)
|
||||
stb r18,2(r20)
|
||||
stb r19,3(r20)
|
||||
addi r20,r20,4
|
||||
bdnz Lmemcpy
|
||||
|
||||
$PUSH $inp,`$FRAME-$SIZE_T*25`($sp)
|
||||
addi $inp,$sp,$FRAME
|
||||
addi $t0,$sp,`$FRAME+16*$SZ`
|
||||
$PUSH $num,`$FRAME-$SIZE_T*24`($sp)
|
||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
||||
bl Lsha2_block_private
|
||||
$POP $inp,`$FRAME-$SIZE_T*25`($sp)
|
||||
$POP $num,`$FRAME-$SIZE_T*24`($sp)
|
||||
addic. $num,$num,`-16*$SZ`
|
||||
bne- Lunaligned
|
||||
b Ldone
|
||||
___
|
||||
|
||||
$code.=<<___;
|
||||
.align 4
|
||||
Lsha2_block_private:
|
||||
___
|
||||
for($i=0;$i<16;$i++) {
|
||||
$code.=<<___ if ($SZ==4);
|
||||
lwz @X[$i],`$i*$SZ`($inp)
|
||||
___
|
||||
# 64-bit loads are split to 2x32-bit ones, as CPU can't handle
|
||||
# unaligned 64-bit loads, only 32-bit ones...
|
||||
$code.=<<___ if ($SZ==8);
|
||||
lwz $t0,`$i*$SZ`($inp)
|
||||
lwz @X[$i],`$i*$SZ+4`($inp)
|
||||
insrdi @X[$i],$t0,32,0
|
||||
___
|
||||
&ROUND_00_15($i,@V);
|
||||
unshift(@V,pop(@V));
|
||||
}
|
||||
$code.=<<___;
|
||||
li $T,`$rounds/16-1`
|
||||
mtctr $T
|
||||
.align 4
|
||||
Lrounds:
|
||||
addi $Tbl,$Tbl,`16*$SZ`
|
||||
___
|
||||
for(;$i<32;$i++) {
|
||||
&ROUND_16_xx($i,@V);
|
||||
unshift(@V,pop(@V));
|
||||
}
|
||||
$code.=<<___;
|
||||
bdnz- Lrounds
|
||||
|
||||
subi $Tbl,$Tbl,`($rounds-16)*$SZ`
|
||||
$POP $ctx,`$FRAME-$SIZE_T*22`($sp)
|
||||
$POP $num,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
||||
|
||||
$LD r16,`0*$SZ`($ctx)
|
||||
$LD r17,`1*$SZ`($ctx)
|
||||
$LD r18,`2*$SZ`($ctx)
|
||||
$LD r19,`3*$SZ`($ctx)
|
||||
$LD r20,`4*$SZ`($ctx)
|
||||
$LD r21,`5*$SZ`($ctx)
|
||||
$LD r22,`6*$SZ`($ctx)
|
||||
$LD r23,`7*$SZ`($ctx)
|
||||
add $A,$A,r16
|
||||
add $B,$B,r17
|
||||
add $C,$C,r18
|
||||
$ST $A,`0*$SZ`($ctx)
|
||||
add $D,$D,r19
|
||||
$ST $B,`1*$SZ`($ctx)
|
||||
add $E,$E,r20
|
||||
$ST $C,`2*$SZ`($ctx)
|
||||
add $F,$F,r21
|
||||
$ST $D,`3*$SZ`($ctx)
|
||||
add $G,$G,r22
|
||||
$ST $E,`4*$SZ`($ctx)
|
||||
add $H,$H,r23
|
||||
$ST $F,`5*$SZ`($ctx)
|
||||
addi $inp,$inp,`16*$SZ`
|
||||
$ST $G,`6*$SZ`($ctx)
|
||||
$UCMP $inp,$num
|
||||
$ST $H,`7*$SZ`($ctx)
|
||||
bne Lsha2_block_private
|
||||
blr
|
||||
___
|
||||
|
||||
# Ugly hack here, because PPC assembler syntax seem to vary too
|
||||
# much from platforms to platform...
|
||||
$code.=<<___;
|
||||
.align 6
|
||||
LPICmeup:
|
||||
bl LPIC
|
||||
b LPICedup
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
LPIC: mflr $Tbl
|
||||
addi $Tbl,$Tbl,`64-4` ; "distance" between bl and last nop
|
||||
blr
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
___
|
||||
$code.=<<___ if ($SZ==8);
|
||||
.long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
|
||||
.long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
|
||||
.long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
|
||||
.long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
|
||||
.long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
|
||||
.long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
|
||||
.long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
|
||||
.long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
|
||||
.long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
|
||||
.long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
|
||||
.long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
|
||||
.long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
|
||||
.long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
|
||||
.long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
|
||||
.long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
|
||||
.long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
|
||||
.long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
|
||||
.long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
|
||||
.long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
|
||||
.long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
|
||||
.long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
|
||||
.long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
|
||||
.long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
|
||||
.long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
|
||||
.long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
|
||||
.long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
|
||||
.long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
|
||||
.long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
|
||||
.long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
|
||||
.long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
|
||||
.long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
|
||||
.long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
|
||||
.long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
|
||||
.long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
|
||||
.long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
|
||||
.long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
|
||||
.long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
|
||||
.long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
|
||||
.long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
|
||||
.long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
|
||||
___
|
||||
$code.=<<___ if ($SZ==4);
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
___
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||
print $code;
|
||||
close STDOUT;
|
Loading…
x
Reference in New Issue
Block a user