From 13c3a1defad3837192a8dbfe41074666ed4eb9c1 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 11 Feb 2009 10:01:36 +0000 Subject: [PATCH] RC4 for s390x. --- Configure | 2 +- TABLE | 2 +- crypto/rc4/Makefile | 3 + crypto/rc4/asm/rc4-s390x.pl | 205 ++++++++++++++++++++++++++++++++++++ 4 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 crypto/rc4/asm/rc4-s390x.pl diff --git a/Configure b/Configure index 31f06dcb4..700982af5 100755 --- a/Configure +++ b/Configure @@ -131,7 +131,7 @@ my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a- my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::::::::void"; my $mips3_asm=":bn-mips3.o::::::::::::void"; -my $s390x_asm="s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o:::::::void"; +my $s390x_asm="s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::void"; my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::void"; my $ppc32_asm="ppccpuid.o:bn-ppc.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::"; my $ppc64_asm="ppccpuid.o:bn-ppc.o ppc-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::"; diff --git a/TABLE b/TABLE index e3caccaa0..eedbecced 100644 --- a/TABLE +++ b/TABLE @@ -3550,7 +3550,7 @@ $bf_obj = $md5_obj = $sha1_obj = sha1-s390x.o sha256-s390x.o sha512-s390x.o $cast_obj = -$rc4_obj = +$rc4_obj = rc4-s390x.o $rmd160_obj = $rc5_obj = $wp_obj = diff --git a/crypto/rc4/Makefile b/crypto/rc4/Makefile index f932a8903..264451a21 100644 --- a/crypto/rc4/Makefile +++ b/crypto/rc4/Makefile @@ -50,6 +50,9 @@ rc4-x86_64.s: asm/rc4-x86_64.pl rc4-ia64.S: asm/rc4-ia64.pl $(PERL) asm/rc4-ia64.pl $(CFLAGS) > $@ +rc4-s390x.s: asm/rc4-s390x.pl + $(PERL) asm/rc4-s390x.pl > $@ + rc4-ia64.s: rc4-ia64.S @case `awk '/^#define RC4_INT/{print$$NF}' $(TOP)/include/openssl/opensslconf.h` in \ int) set -x; $(CC) $(CFLAGS) -DSZ=4 -E rc4-ia64.S > $@ ;; \ diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl new file mode 100644 index 000000000..4366c4fc1 --- /dev/null +++ b/crypto/rc4/asm/rc4-s390x.pl @@ -0,0 +1,205 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# February 2009 +# +# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to avoid +# arithmetic instructions, but adhere to load and load address in +# order to minimize Address Generation Interlock. + +$rp="%r14"; +$sp="%r15"; +$code=<<___; +.text + +___ + +# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) +{ +$acc="%r0"; +$cnt="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$out="%r5"; + +@XX=("%r6","%r7"); +@TX=("%r8","%r9"); +$YY="%r10"; +$TY="%r11"; + +$code.=<<___; +.globl RC4 +.type RC4,\@function +.align 64 +RC4: + stmg %r6,%r11,48($sp) + llgc $XX[0],0($key) + llgc $YY,1($key) + la $XX[0],1($XX[0]) + llgcr $XX[0],$XX[0] + llgc $TX[0],2($XX[0],$key) + srlg $cnt,$len,3 + ltgr $cnt,$cnt + jz .Lshort + j .Loop8 + +.align 64 +.Loop8: +___ +for ($i=0;$i<8;$i++) { +$code.=<<___; + la $YY,0($YY,$TX[0]) # $i + llgcr $YY,$YY + la $XX[1],1($XX[0]) + llgcr $XX[1],$XX[1] +___ +$code.=<<___ if ($i>1); + sllg $acc,$acc,8 + ic $acc,2($TY,$key) +___ +$code.=<<___ if ($i==1); + llgc $acc,2($TY,$key) +___ +$code.=<<___; + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + llgc $TX[1],2($XX[1],$key) + stc $TY,2($XX[0],$key) + cr $XX[1],$YY + jne .Lcmov$i + la $TX[1],0($TX[0]) +.Lcmov$i: + la $TY,0($TY,$TX[0]) + llgcr $TY,$TY +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} + +$code.=<<___; + lg $TX[1],0($inp) + sllg $acc,$acc,8 + la $inp,8($inp) + ic $acc,2($TY,$key) + xgr $acc,$TX[1] + stg $acc,0($out) + la $out,8($out) + brct $cnt,.Loop8 + +.Lshort: + lghi $acc,7 + ngr $len,$acc + jz .Lexit + j .Loop1 + +.align 16 +.Loop1: + la $YY,0($YY,$TX[0]) + llgcr $YY,$YY + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + stc $TY,2($XX[0],$key) + la $TY,0($TY,$TX[0]) + llgcr $TY,$TY + la $XX[0],1($XX[0]) + llgcr $XX[0],$XX[0] + llgc $TY,2($TY,$key) + llgc $TX[0],2($XX[0],$key) + llgc $acc,0($inp) + la $inp,1($inp) + xr $acc,$TY + stc $acc,0($out) + la $out,1($out) + brct $len,.Loop1 + +.Lexit: + ahi $XX[0],-1 + stc $XX[0],0($key) + stc $YY,1($key) + lmg %r6,%r11,48($sp) + br $rp +.size RC4,.-RC4 +.string "RC4 for s390x, CRYPTOGAMS by " + +___ +} + +# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) +{ +$cnt="%r0"; +$idx="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$acc="%r5"; +$dat="%r6"; +$ikey="%r7"; +$iinp="%r8"; + +$code.=<<___; +.globl RC4_set_key +.type RC4_set_key,\@function +.align 64 +RC4_set_key: + stmg %r6,%r8,48($sp) + lhi $cnt,256 + la $idx,0(%r0) + sth $idx,0($key) +.align 4 +.L1stloop: + stc $idx,2($idx,$key) + la $idx,1($idx) + brct $cnt,.L1stloop + + lghi $ikey,-256 + lr $cnt,$len + la $iinp,0(%r0) + la $idx,0(%r0) +.align 16 +.L2ndloop: + llgc $acc,2+256($ikey,$key) + llgc $dat,0($iinp,$inp) + la $idx,0($idx,$acc) + la $ikey,1($ikey) + la $idx,0($idx,$dat) + la $iinp,1($iinp) + llgcr $idx,$idx + tml $ikey,255 + llgc $dat,2($idx,$key) + stc $dat,2+256-1($ikey,$key) + stc $acc,2($idx,$key) + jz .Ldone + brct $cnt,.L2ndloop + lr $cnt,$len + la $iinp,0(%r0) + j .L2ndloop +.Ldone: + lmg %r6,%r8,48($sp) + br $rp +.size RC4_set_key,.-RC4_set_key + +___ +} + +# const char *RC4_options() +$code.=<<___; +.globl RC4_options +.type RC4_options,\@function +.align 16 +RC4_options: + larl %r2,.Loptions + br %r14 +.size RC4_options,.-RC4_options +.section .rodata +.Loptions: +.align 8 +.string "rc4(8x,char)" +___ + +print $code;