From 804515425aa520a186c4d1b919739d1a04d782e5 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 15 May 2005 22:43:00 +0000 Subject: [PATCH] +20% performance improvement of P4-specific RC4_CHAR loop. --- crypto/rc4/asm/rc4-586.pl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl index d6e98f081..22bda4b45 100644 --- a/crypto/rc4/asm/rc4-586.pl +++ b/crypto/rc4/asm/rc4-586.pl @@ -200,22 +200,23 @@ sub RC4 &lea ($ty,&DWP(0,$in,$ty)); &mov (&swtmp(2),$ty); + &movz ($tx,&BP(0,$d,$x)); # strangely enough unrolled loop performs over 20% slower... &set_label("RC4_CHAR_loop"); - &movz ($tx,&BP(0,$d,$x)); &add (&LB($y),&LB($tx)); &movz ($ty,&BP(0,$d,$y)); &movb (&BP(0,$d,$y),&LB($tx)); &movb (&BP(0,$d,$x),&LB($ty)); &add (&LB($ty),&LB($tx)); &movz ($ty,&BP(0,$d,$ty)); + &add (&LB($x),1); &xorb (&LB($ty),&BP(0,$in)); - &movb (&BP(0,$out),&LB($ty)); - &inc (&LB($x)); - &inc ($in); - &inc ($out); + &lea ($in,&BP(1,$in)); + &movz ($tx,&BP(0,$d,$x)); &cmp ($in,&swtmp(2)); + &movb (&BP(0,$out),&LB($ty)); + &lea ($out,&BP(1,$out)); &jb (&label("RC4_CHAR_loop")); &set_label("finished");