Add ghash-parisc.pl.
This commit is contained in:
parent
8a1c92ce49
commit
5e19ee96f6
@ -135,8 +135,8 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
|
||||
my $mips3_asm=":bn-mips3.o:::::::::::::void";
|
||||
my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o::::::void";
|
||||
my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes_ctr.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o::::::::void";
|
||||
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::32";
|
||||
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o::::::64";
|
||||
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
|
||||
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
|
||||
my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
|
||||
my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes_ctr.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o:::::::";
|
||||
my $no_asm="::::::::::::::void";
|
||||
@ -292,6 +292,7 @@ my %table=(
|
||||
# Since there is mention of this in shlib/hpux10-cc.sh
|
||||
"hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:+Z:-b:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1::pa-risc2.o:::::::::::::void:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
|
||||
|
||||
|
36
TABLE
36
TABLE
@ -2933,7 +2933,7 @@ $rmd160_obj =
|
||||
$rc5_obj =
|
||||
$wp_obj =
|
||||
$cmll_obj =
|
||||
$modes_obj =
|
||||
$modes_obj = ghash-parisc.o
|
||||
$perlasm_scheme = 32
|
||||
$dso_scheme = dl
|
||||
$shared_target= hpux-shared
|
||||
@ -2944,6 +2944,38 @@ $ranlib =
|
||||
$arflags =
|
||||
$multilib = /pa1.1
|
||||
|
||||
*** hpux-parisc1_1-gcc
|
||||
$cc = gcc
|
||||
$cflags = -O3 -DB_ENDIAN -DBN_DIV2W
|
||||
$unistd =
|
||||
$thread_cflag = -D_REENTRANT
|
||||
$sys_id =
|
||||
$lflags = -Wl,+s -ldld
|
||||
$bn_ops = BN_LLONG DES_PTR DES_UNROLL DES_RISC1
|
||||
$cpuid_obj = pariscid.o
|
||||
$bn_obj = bn_asm.o parisc-mont.o
|
||||
$des_obj =
|
||||
$aes_obj = aes_core.o aes_cbc.o aes_ctr.o aes-parisc.o
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-parisc.o sha256-parisc.o sha512-parisc.o
|
||||
$cast_obj =
|
||||
$rc4_obj = rc4-parisc.o
|
||||
$rmd160_obj =
|
||||
$rc5_obj =
|
||||
$wp_obj =
|
||||
$cmll_obj =
|
||||
$modes_obj = ghash-parisc.o
|
||||
$perlasm_scheme = 32
|
||||
$dso_scheme = dl
|
||||
$shared_target= hpux-shared
|
||||
$shared_cflag = -fPIC
|
||||
$shared_ldflag = -shared
|
||||
$shared_extension = .sl.$(SHLIB_MAJOR).$(SHLIB_MINOR)
|
||||
$ranlib =
|
||||
$arflags =
|
||||
$multilib =
|
||||
|
||||
*** hpux-parisc2-cc
|
||||
$cc = cc
|
||||
$cflags = +DA2.0 +DS2.0 +O3 +Optrs_strongly_typed -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY -D_REENTRANT
|
||||
@ -3093,7 +3125,7 @@ $rmd160_obj =
|
||||
$rc5_obj =
|
||||
$wp_obj =
|
||||
$cmll_obj =
|
||||
$modes_obj =
|
||||
$modes_obj = ghash-parisc.o
|
||||
$perlasm_scheme = 64
|
||||
$dso_scheme = dlfcn
|
||||
$shared_target= hpux-shared
|
||||
|
@ -50,9 +50,11 @@ ghash-x86.s: asm/ghash-x86.pl
|
||||
ghash-x86_64.s: asm/ghash-x86_64.pl
|
||||
$(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
|
||||
ghash-sparcv9.s: asm/ghash-sparcv9.pl
|
||||
$(PERL) asm/ghash-sparcv8.pl $(CFLAGS) > $@
|
||||
$(PERL) asm/ghash-sparcv9.pl $(CFLAGS) > $@
|
||||
ghash-alpha.s: asm/ghash-alpha.pl
|
||||
$(PERL) $< | $(CC) -E - | tee $@ > /dev/null
|
||||
ghash-parisc.s: asm/ghash-parisc.pl
|
||||
$($PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
|
||||
# GNU make "catch all"
|
||||
ghash-%.s: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $(CFLAGS) > $@
|
||||
|
||||
|
730
crypto/modes/asm/ghash-parisc.pl
Normal file
730
crypto/modes/asm/ghash-parisc.pl
Normal file
@ -0,0 +1,730 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# April 2010
|
||||
#
|
||||
# The module implements "4-bit" GCM GHASH function and underlying
|
||||
# single multiplication operation in GF(2^128). "4-bit" means that it
|
||||
# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
|
||||
# it processes one byte in 19 cycles, which is more than twice as fast
|
||||
# as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 8
|
||||
# cycles, but measured performance on PA-8600 system is ~9 cycles per
|
||||
# processed byte. This is ~2.2x faster than 64-bit code generated by
|
||||
# vendor compiler (which used to be very hard to beat:-).
|
||||
#
|
||||
# Special thanks to polarhome.com for providing HP-UX account.
|
||||
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
open STDOUT,">$output";
|
||||
|
||||
if ($flavour =~ /64/) {
|
||||
$LEVEL ="2.0W";
|
||||
$SIZE_T =8;
|
||||
$FRAME_MARKER =80;
|
||||
$SAVED_RP =16;
|
||||
$PUSH ="std";
|
||||
$PUSHMA ="std,ma";
|
||||
$POP ="ldd";
|
||||
$POPMB ="ldd,mb";
|
||||
$NREGS =6;
|
||||
} else {
|
||||
$LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
|
||||
$SIZE_T =4;
|
||||
$FRAME_MARKER =48;
|
||||
$SAVED_RP =20;
|
||||
$PUSH ="stw";
|
||||
$PUSHMA ="stwm";
|
||||
$POP ="ldw";
|
||||
$POPMB ="ldwm";
|
||||
$NREGS =11;
|
||||
}
|
||||
|
||||
$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
|
||||
# [+ argument transfer]
|
||||
|
||||
################# volatile registers
|
||||
$Xi="%r26"; # argument block
|
||||
$Htbl="%r25";
|
||||
$inp="%r24";
|
||||
$len="%r23";
|
||||
$Hhh=$Htbl; # variables
|
||||
$Hll="%r22";
|
||||
$Zhh="%r21";
|
||||
$Zll="%r20";
|
||||
$cnt="%r19";
|
||||
$rem_4bit="%r28";
|
||||
$rem="%r29";
|
||||
$mask0xf0="%r31";
|
||||
|
||||
################# preserved registers
|
||||
$Thh="%r1";
|
||||
$Tll="%r2";
|
||||
$nlo="%r3";
|
||||
$nhi="%r4";
|
||||
$byte="%r5";
|
||||
if ($SIZE_T==4) {
|
||||
$Zhl="%r6";
|
||||
$Zlh="%r7";
|
||||
$Hhl="%r8";
|
||||
$Hlh="%r9";
|
||||
$Thl="%r10";
|
||||
$Tlh="%r11";
|
||||
}
|
||||
$rem2="%r6"; # used in PA-RISC 2.0 code
|
||||
|
||||
$code.=<<___;
|
||||
.LEVEL $LEVEL
|
||||
.SPACE \$TEXT\$
|
||||
.SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
||||
|
||||
.EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
|
||||
.ALIGN 64
|
||||
gcm_gmult_4bit
|
||||
.PROC
|
||||
.CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
|
||||
.ENTRY
|
||||
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
||||
$PUSHMA %r3,$FRAME(%sp)
|
||||
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
||||
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
||||
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
$PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
||||
$PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
||||
$PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
||||
$PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
||||
$PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
blr %r0,$rem_4bit
|
||||
ldi 3,$rem
|
||||
L\$pic_gmult
|
||||
andcm $rem_4bit,$rem,$rem_4bit
|
||||
addl $inp,$len,$len
|
||||
ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
|
||||
ldi 0xf0,$mask0xf0
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
ldi 31,$rem
|
||||
mtctl $rem,%cr11
|
||||
extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
|
||||
b L\$parisc1_gmult
|
||||
nop
|
||||
___
|
||||
|
||||
$code.=<<___;
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 8($Htbl),$Hll
|
||||
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
|
||||
ldd $nlo($Hll),$Zll
|
||||
ldd $nlo($Hhh),$Zhh
|
||||
|
||||
depd,z $Zll,60,4,$rem
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldb 14($Xi),$nlo
|
||||
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
b L\$oop_gmult_pa2
|
||||
ldi 13,$cnt
|
||||
|
||||
.ALIGN 8
|
||||
L\$oop_gmult_pa2
|
||||
xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
|
||||
depd,z $Zll,60,4,$rem
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nlo($Hll),$Tll
|
||||
ldd $nlo($Hhh),$Thh
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
depd,z $Zll,60,4,$rem
|
||||
ldbx $cnt($Xi),$nlo
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
ldd $rem($rem_4bit),$rem
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
addib,uv -1,$cnt,L\$oop_gmult_pa2
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
depd,z $Zll,60,4,$rem
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nlo($Hll),$Tll
|
||||
ldd $nlo($Hhh),$Thh
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
depd,z $Zll,60,4,$rem
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
std $Zll,8($Xi)
|
||||
std $Zhh,0($Xi)
|
||||
___
|
||||
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
b L\$done_gmult
|
||||
nop
|
||||
|
||||
L\$parisc1_gmult
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 12($Htbl),$Hll
|
||||
ldo 8($Htbl),$Hlh
|
||||
ldo 4($Htbl),$Hhl
|
||||
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
zdep $nlo,27,4,$nlo
|
||||
|
||||
ldwx $nlo($Hll),$Zll
|
||||
ldwx $nlo($Hlh),$Zlh
|
||||
ldwx $nlo($Hhl),$Zhl
|
||||
ldwx $nlo($Hhh),$Zhh
|
||||
zdep $Zll,28,4,$rem
|
||||
ldb 14($Xi),$nlo
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
extru $Zhh,27,28,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
zdep $nlo,27,4,$nlo
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nlo($Hll),$Tll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nlo($Hlh),$Tlh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
b L\$oop_gmult_pa1
|
||||
ldi 13,$cnt
|
||||
|
||||
.ALIGN 8
|
||||
L\$oop_gmult_pa1
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $nlo($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nlo($Hhh),$Thh
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
ldbx $cnt($Xi),$nlo
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
xor $rem,$Zhh,$Zhh
|
||||
zdep $Zll,28,4,$rem
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
extru $Zhh,27,28,$Zhh
|
||||
zdep $nlo,27,4,$nlo
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nlo($Hll),$Tll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nlo($Hlh),$Tlh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
addib,uv -1,$cnt,L\$oop_gmult_pa1
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $nlo($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nlo($Hhh),$Thh
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
stw $Zll,12($Xi)
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
stw $Zlh,8($Xi)
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
stw $Zhl,4($Xi)
|
||||
stw $Zhh,0($Xi)
|
||||
___
|
||||
$code.=<<___;
|
||||
L\$done_gmult
|
||||
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
||||
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
||||
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
||||
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
$POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
||||
$POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
||||
$POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
||||
$POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
||||
$POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
||||
___
|
||||
$code.=<<___;
|
||||
bv (%r2)
|
||||
.EXIT
|
||||
$POPMB -$FRAME(%sp),%r3
|
||||
.PROCEND
|
||||
|
||||
.EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
|
||||
.ALIGN 64
|
||||
gcm_ghash_4bit
|
||||
.PROC
|
||||
.CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
|
||||
.ENTRY
|
||||
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
||||
$PUSHMA %r3,$FRAME(%sp)
|
||||
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
||||
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
||||
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
$PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
||||
$PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
||||
$PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
||||
$PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
||||
$PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
blr %r0,$rem_4bit
|
||||
ldi 3,$rem
|
||||
L\$pic_ghash
|
||||
andcm $rem_4bit,$rem,$rem_4bit
|
||||
addl $inp,$len,$len
|
||||
ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
|
||||
ldi 0xf0,$mask0xf0
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
ldi 31,$rem
|
||||
mtctl $rem,%cr11
|
||||
extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
|
||||
b L\$parisc1_ghash
|
||||
nop
|
||||
___
|
||||
|
||||
$code.=<<___;
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 8($Htbl),$Hll
|
||||
|
||||
L\$outer_ghash_pa2
|
||||
ldb 15($inp),$nhi
|
||||
xor $nhi,$nlo,$nlo
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
|
||||
ldd $nlo($Hll),$Zll
|
||||
ldd $nlo($Hhh),$Zhh
|
||||
|
||||
depd,z $Zll,60,4,$rem
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldb 14($Xi),$nlo
|
||||
ldb 14($inp),$byte
|
||||
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
xor $byte,$nlo,$nlo
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
b L\$oop_ghash_pa2
|
||||
ldi 13,$cnt
|
||||
|
||||
.ALIGN 8
|
||||
L\$oop_ghash_pa2
|
||||
xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
|
||||
depd,z $Zll,60,4,$rem2
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nlo($Hll),$Tll
|
||||
ldd $nlo($Hhh),$Thh
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldbx $cnt($Xi),$nlo
|
||||
ldbx $cnt($inp),$byte
|
||||
|
||||
depd,z $Zll,60,4,$rem
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
ldd $rem2($rem_4bit),$rem2
|
||||
|
||||
xor $rem2,$Zhh,$Zhh
|
||||
xor $byte,$nlo,$nlo
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
depd,z $nlo,59,4,$nlo
|
||||
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
xor $Tll,$Zll,$Zll
|
||||
|
||||
ldd $rem($rem_4bit),$rem
|
||||
addib,uv -1,$cnt,L\$oop_ghash_pa2
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
depd,z $Zll,60,4,$rem2
|
||||
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
ldd $nlo($Hll),$Tll
|
||||
ldd $nlo($Hhh),$Thh
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
|
||||
depd,z $Zll,60,4,$rem
|
||||
shrpd $Zhh,$Zll,4,$Zll
|
||||
ldd $rem2($rem_4bit),$rem2
|
||||
|
||||
xor $rem2,$Zhh,$Zhh
|
||||
ldd $nhi($Hll),$Tll
|
||||
ldd $nhi($Hhh),$Thh
|
||||
|
||||
extrd,u $Zhh,59,60,$Zhh
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldd $rem($rem_4bit),$rem
|
||||
|
||||
xor $rem,$Zhh,$Zhh
|
||||
std $Zll,8($Xi)
|
||||
ldo 16($inp),$inp
|
||||
std $Zhh,0($Xi)
|
||||
cmpb,*<> $inp,$len,L\$outer_ghash_pa2
|
||||
copy $Zll,$nlo
|
||||
___
|
||||
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
b L\$done_ghash
|
||||
nop
|
||||
|
||||
L\$parisc1_ghash
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 12($Htbl),$Hll
|
||||
ldo 8($Htbl),$Hlh
|
||||
ldo 4($Htbl),$Hhl
|
||||
|
||||
L\$outer_ghash_pa1
|
||||
ldb 15($inp),$byte
|
||||
xor $byte,$nlo,$nlo
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
zdep $nlo,27,4,$nlo
|
||||
|
||||
ldwx $nlo($Hll),$Zll
|
||||
ldwx $nlo($Hlh),$Zlh
|
||||
ldwx $nlo($Hhl),$Zhl
|
||||
ldwx $nlo($Hhh),$Zhh
|
||||
zdep $Zll,28,4,$rem
|
||||
ldb 14($Xi),$nlo
|
||||
ldb 14($inp),$byte
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
extru $Zhh,27,28,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
xor $byte,$nlo,$nlo
|
||||
xor $rem,$Zhh,$Zhh
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
zdep $nlo,27,4,$nlo
|
||||
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nlo($Hll),$Tll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nlo($Hlh),$Tlh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
b L\$oop_ghash_pa1
|
||||
ldi 13,$cnt
|
||||
|
||||
.ALIGN 8
|
||||
L\$oop_ghash_pa1
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $nlo($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nlo($Hhh),$Thh
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
ldbx $cnt($Xi),$nlo
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
ldbx $cnt($inp),$byte
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
xor $rem,$Zhh,$Zhh
|
||||
zdep $Zll,28,4,$rem
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
xor $byte,$nlo,$nlo
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
and $mask0xf0,$nlo,$nhi
|
||||
extru $Zhh,27,28,$Zhh
|
||||
zdep $nlo,27,4,$nlo
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nlo($Hll),$Tll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nlo($Hlh),$Tlh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
addib,uv -1,$cnt,L\$oop_ghash_pa1
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $nlo($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
ldwx $nlo($Hhh),$Thh
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
xor $Tll,$Zll,$Zll
|
||||
ldwx $nhi($Hll),$Tll
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
ldwx $nhi($Hlh),$Tlh
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
ldwx $nhi($Hhl),$Thl
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
ldwx $nhi($Hhh),$Thh
|
||||
zdep $Zll,28,4,$rem
|
||||
ldwx $rem($rem_4bit),$rem
|
||||
shrpw $Zlh,$Zll,4,$Zll
|
||||
shrpw $Zhl,$Zlh,4,$Zlh
|
||||
shrpw $Zhh,$Zhl,4,$Zhl
|
||||
extru $Zhh,27,28,$Zhh
|
||||
xor $Tll,$Zll,$Zll
|
||||
xor $Tlh,$Zlh,$Zlh
|
||||
xor $rem,$Zhh,$Zhh
|
||||
stw $Zll,12($Xi)
|
||||
xor $Thl,$Zhl,$Zhl
|
||||
stw $Zlh,8($Xi)
|
||||
xor $Thh,$Zhh,$Zhh
|
||||
stw $Zhl,4($Xi)
|
||||
ldo 16($inp),$inp
|
||||
stw $Zhh,0($Xi)
|
||||
comb,<> $inp,$len,L\$outer_ghash_pa1
|
||||
copy $Zll,$nlo
|
||||
___
|
||||
$code.=<<___;
|
||||
L\$done_ghash
|
||||
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
||||
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
||||
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
||||
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
||||
___
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
$POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
||||
$POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
||||
$POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
||||
$POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
||||
$POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
||||
___
|
||||
$code.=<<___;
|
||||
bv (%r2)
|
||||
.EXIT
|
||||
$POPMB -$FRAME(%sp),%r3
|
||||
.PROCEND
|
||||
|
||||
.ALIGN 64
|
||||
L\$rem_4bit
|
||||
.WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
|
||||
.WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
|
||||
.WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
|
||||
.WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
|
||||
.STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
|
||||
.ALIGN 64
|
||||
___
|
||||
|
||||
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
|
||||
# that it can be compiled with .LEVEL 1.0. It should be noted that I
|
||||
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
|
||||
# directive...
|
||||
|
||||
my $ldd = sub {
|
||||
my ($mod,$args) = @_;
|
||||
my $orig = "ldd$mod\t$args";
|
||||
|
||||
if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
|
||||
{ my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
|
||||
{ my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
|
||||
$opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
|
||||
$opcode|=(1<<5) if ($mod =~ /^,m/);
|
||||
$opcode|=(1<<13) if ($mod =~ /^,mb/);
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
else { "\t".$orig; }
|
||||
};
|
||||
|
||||
my $std = sub {
|
||||
my ($mod,$args) = @_;
|
||||
my $orig = "std$mod\t$args";
|
||||
|
||||
if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
|
||||
{ my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
else { "\t".$orig; }
|
||||
};
|
||||
|
||||
my $extrd = sub {
|
||||
my ($mod,$args) = @_;
|
||||
my $orig = "extrd$mod\t$args";
|
||||
|
||||
# I only have ",u" completer, it's implicitly encoded...
|
||||
if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
|
||||
{ my $opcode=(0x36<<26)|($1<<21)|($4<<16);
|
||||
my $len=32-$3;
|
||||
$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
|
||||
$opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
|
||||
{ my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
|
||||
my $len=32-$2;
|
||||
$opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
|
||||
$opcode |= (1<<13) if ($mod =~ /,\**=/);
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
else { "\t".$orig; }
|
||||
};
|
||||
|
||||
my $shrpd = sub {
|
||||
my ($mod,$args) = @_;
|
||||
my $orig = "shrpd$mod\t$args";
|
||||
|
||||
if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
|
||||
{ my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
|
||||
my $cpos=63-$3;
|
||||
$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
|
||||
{ sprintf "\t.WORD\t0x%08x\t; %s",
|
||||
(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
|
||||
}
|
||||
else { "\t".$orig; }
|
||||
};
|
||||
|
||||
my $depd = sub {
|
||||
my ($mod,$args) = @_;
|
||||
my $orig = "depd$mod\t$args";
|
||||
|
||||
# I only have ",z" completer, it's impicitly encoded...
|
||||
if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
|
||||
{ my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
|
||||
my $cpos=63-$2;
|
||||
my $len=32-$3;
|
||||
$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
|
||||
$opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
|
||||
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
||||
}
|
||||
else { "\t".$orig; }
|
||||
};
|
||||
|
||||
sub assemble {
|
||||
my ($mnemonic,$mod,$args)=@_;
|
||||
my $opcode = eval("\$$mnemonic");
|
||||
|
||||
ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
|
||||
}
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/ge;
|
||||
if ($SIZE_T==4) {
|
||||
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
|
||||
s/cmpb,\*/comb,/;
|
||||
s/,\*/,/;
|
||||
}
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
Loading…
Reference in New Issue
Block a user