From 287546242539e686ef1233d0a7c57c935fb61c3f Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 1 Apr 2007 17:01:12 +0000 Subject: [PATCH] Reserve for assembler implementation of RC4_set_key and implement x86 one. --- Configure | 6 +-- TABLE | 24 ++++----- crypto/rc4/Makefile | 4 +- crypto/rc4/asm/rc4-586.pl | 111 +++++++++++++++++++++++++++++++++++--- 4 files changed, 120 insertions(+), 25 deletions(-) diff --git a/Configure b/Configure index 89803c5d1..5160b1020 100755 --- a/Configure +++ b/Configure @@ -120,8 +120,8 @@ my $x86_elf_asm="x86cpuid-elf.o:bn86-elf.o co86-elf.o mo86-elf.o:dx86-elf.o yx86 my $x86_coff_asm="x86cpuid-cof.o:bn86-cof.o co86-cof.o mo86-cof.o:dx86-cof.o yx86-cof.o:ax86-cof.o:bx86-cof.o:mx86-cof.o:sx86-cof.o s512sse2-cof.o:cx86-cof.o:rx86-cof.o:rm86-cof.o:r586-cof.o:wp_block.o w86mmx-cof.o"; my $x86_out_asm="x86cpuid-out.o:bn86-out.o co86-out.o mo86-out.o:dx86-out.o yx86-out.o:ax86-out.o:bx86-out.o:mx86-out.o:sx86-out.o s512sse2-out.o:cx86-out.o:rx86-out.o:rm86-out.o:r586-out.o:wp_block.o w86mmx-out.o"; -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o"; -my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o:::"; +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4_skey.o:::wp-x86_64.o"; +my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::"; my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::::::::"; my $alpha_asm=":bn_asm.o alpha-mont.o::::::::::"; my $no_asm=":::::::::::"; @@ -599,7 +599,7 @@ my $des_enc="des_enc.o fcrypt_b.o"; my $aes_enc="aes_core.o aes_cbc.o"; my $bf_enc ="bf_enc.o"; my $cast_enc="c_enc.o"; -my $rc4_enc="rc4_enc.o"; +my $rc4_enc="rc4_enc.o rc4_skey.o"; my $rc5_enc="rc5_enc.o"; my $md5_obj=""; my $sha1_obj=""; diff --git a/TABLE b/TABLE index 18e6e16a9..203cb25e8 100644 --- a/TABLE +++ b/TABLE @@ -128,7 +128,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -268,7 +268,7 @@ $bf_obj = $md5_obj = md5-x86_64.o $sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o $cast_obj = -$rc4_obj = rc4-x86_64.o +$rc4_obj = rc4-x86_64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = wp-x86_64.o @@ -2172,7 +2172,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -2200,7 +2200,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -2396,7 +2396,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -2424,7 +2424,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -2956,7 +2956,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -2984,7 +2984,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -3012,7 +3012,7 @@ $bf_obj = $md5_obj = md5-ia64.o $sha1_obj = sha1-ia64.o sha256-ia64.o sha512-ia64.o $cast_obj = -$rc4_obj = rc4-ia64.o +$rc4_obj = rc4-ia64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = @@ -3152,7 +3152,7 @@ $bf_obj = $md5_obj = md5-x86_64.o $sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o $cast_obj = -$rc4_obj = rc4-x86_64.o +$rc4_obj = rc4-x86_64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = wp-x86_64.o @@ -3964,7 +3964,7 @@ $bf_obj = $md5_obj = md5-x86_64.o $sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o $cast_obj = -$rc4_obj = rc4-x86_64.o +$rc4_obj = rc4-x86_64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = wp-x86_64.o @@ -3992,7 +3992,7 @@ $bf_obj = $md5_obj = md5-x86_64.o $sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o $cast_obj = -$rc4_obj = rc4-x86_64.o +$rc4_obj = rc4-x86_64.o rc4_skey.o $rmd160_obj = $rc5_obj = $wp_obj = wp-x86_64.o diff --git a/crypto/rc4/Makefile b/crypto/rc4/Makefile index 3ecf76768..dcc6d2c81 100644 --- a/crypto/rc4/Makefile +++ b/crypto/rc4/Makefile @@ -10,7 +10,7 @@ INCLUDES= CFLAG=-g AR= ar r -RC4_ENC=rc4_enc.o +RC4_ENC=rc4_enc.o rc4_skey.o CFLAGS= $(INCLUDES) $(CFLAG) ASFLAGS= $(INCLUDES) $(ASFLAG) @@ -22,7 +22,7 @@ APPS= LIB=$(TOP)/libcrypto.a LIBSRC=rc4_skey.c rc4_enc.c -LIBOBJ=rc4_skey.o $(RC4_ENC) +LIBOBJ=$(RC4_ENC) SRC= $(LIBSRC) diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl index 22bda4b45..9766abfb6 100644 --- a/crypto/rc4/asm/rc4-586.pl +++ b/crypto/rc4/asm/rc4-586.pl @@ -36,10 +36,6 @@ $in="esi"; $out="edi"; $d="ebp"; -&RC4("RC4"); - -&asm_finish(); - sub RC4_loop { local($n,$p,$char)=@_; @@ -99,11 +95,10 @@ sub RC4_loop } -sub RC4 +&function_begin_B("RC4"); { local($name)=@_; - &function_begin_B($name,""); &mov($ty,&wparam(1)); # len &cmp($ty,0); @@ -224,7 +219,107 @@ sub RC4 &stack_pop(3); &movb( &BP(-4,$d,"",0),&LB($y)); &movb( &BP(-8,$d,"",0),&LB($x)); +} +&function_end("RC4"); - &function_end($name); - } +######################################################################## + +$inp="esi"; +$out="edi"; +$idi="ebp"; +$ido="ecx"; +$idx="edx"; + +&external_label("OPENSSL_ia32cap_P"); + +# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); +&function_begin("RC4_set_key"); + &mov ($out,&wparam(0)); # load key + &mov ($idi,&wparam(1)); # load len + &mov ($inp,&wparam(2)); # load data + &picmeup($idx,"OPENSSL_ia32cap_P"); + + &lea ($out,&DWP(2*4,$out)); # &key->data + &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end + &neg ($idi); + &xor ("eax","eax"); + &mov (&DWP(-4,$out),$idi); # borrow key->y + + &bt (&DWP(0,$idx),20); # check for bit#20 + &jc (&label("c1stloop")); + +&set_label("w1stloop",16); + &mov (&DWP(0,$out,"eax",4),"eax"); # key->data[i]=i; + &add (&LB("eax"),1); # i++; + &jnc (&label("w1stloop")); + + &xor ($ido,$ido); + &xor ($idx,$idx); + +&set_label("w2ndloop",16); + &mov ("eax",&DWP(0,$out,$ido,4)); + &add (&LB($idx),&BP(0,$inp,$idi)); + &add (&LB($idx),&LB("eax")); + &add ($idi,1); + &mov ("ebx",&DWP(0,$out,$idx,4)); + &jnz (&label("wnowrap")); + &mov ($idi,&DWP(-4,$out)); + &set_label("wnowrap"); + &mov (&DWP(0,$out,$idx,4),"eax"); + &mov (&DWP(0,$out,$ido,4),"ebx"); + &add (&LB($ido),1); + &jnc (&label("w2ndloop")); +&jmp (&label("exit")); + +&set_label("c1stloop",16); + &mov (&BP(0,$out,"eax"),&LB("eax")); # key->data[i]=i; + &add (&LB("eax"),1); # i++; + &jnc (&label("c1stloop")); + + &xor ($ido,$ido); + &xor ($idx,$idx); + &xor ("ebx","ebx"); + +&set_label("c2ndloop",16); + &mov (&LB("eax"),&BP(0,$out,$ido)); + &add (&LB($idx),&BP(0,$inp,$idi)); + &add (&LB($idx),&LB("eax")); + &add ($idi,1); + &mov (&LB("ebx"),&BP(0,$out,$idx)); + &jnz (&label("cnowrap")); + &mov ($idi,&DWP(-4,$out)); + &set_label("cnowrap"); + &mov (&BP(0,$out,$idx),&LB("eax")); + &mov (&BP(0,$out,$ido),&LB("ebx")); + &add (&LB($ido),1); + &jnc (&label("c2ndloop")); + + &mov (&DWP(256,$out),-1); # mark schedule as compressed + +&set_label("exit"); + &xor ("eax","eax"); + &mov (&DWP(-8,$out),"eax"); # key->x=0; + &mov (&DWP(-4,$out),"eax"); # key->y=0; +&function_end("RC4_set_key"); + +# const char *RC4_options(void); +&function_begin_B("RC4_options"); + &call (&label("pic_point")); +&set_label("pic_point"); + &blindpop("eax"); + &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); + &picmeup("edx","OPENSSL_ia32cap_P"); + &bt (&DWP(0,"edx"),20); + &jnc (&label("skip")); + &add ("eax",12); + &set_label("skip"); + &ret (); +&set_label("opts",64); +&asciz ("rc4(8x,int)"); +&asciz ("rc4(1x,char)"); +&asciz ("RC4 for x86, OpenSSL project"); # RC4_version +&align (64); +&function_end_B("RC4_options"); + +&asm_finish();