+20% performance improvement of P4-specific RC4_CHAR loop.
This commit is contained in:
parent
81a86fcf17
commit
804515425a
@ -200,22 +200,23 @@ sub RC4
|
|||||||
|
|
||||||
&lea ($ty,&DWP(0,$in,$ty));
|
&lea ($ty,&DWP(0,$in,$ty));
|
||||||
&mov (&swtmp(2),$ty);
|
&mov (&swtmp(2),$ty);
|
||||||
|
&movz ($tx,&BP(0,$d,$x));
|
||||||
|
|
||||||
# strangely enough unrolled loop performs over 20% slower...
|
# strangely enough unrolled loop performs over 20% slower...
|
||||||
&set_label("RC4_CHAR_loop");
|
&set_label("RC4_CHAR_loop");
|
||||||
&movz ($tx,&BP(0,$d,$x));
|
|
||||||
&add (&LB($y),&LB($tx));
|
&add (&LB($y),&LB($tx));
|
||||||
&movz ($ty,&BP(0,$d,$y));
|
&movz ($ty,&BP(0,$d,$y));
|
||||||
&movb (&BP(0,$d,$y),&LB($tx));
|
&movb (&BP(0,$d,$y),&LB($tx));
|
||||||
&movb (&BP(0,$d,$x),&LB($ty));
|
&movb (&BP(0,$d,$x),&LB($ty));
|
||||||
&add (&LB($ty),&LB($tx));
|
&add (&LB($ty),&LB($tx));
|
||||||
&movz ($ty,&BP(0,$d,$ty));
|
&movz ($ty,&BP(0,$d,$ty));
|
||||||
|
&add (&LB($x),1);
|
||||||
&xorb (&LB($ty),&BP(0,$in));
|
&xorb (&LB($ty),&BP(0,$in));
|
||||||
&movb (&BP(0,$out),&LB($ty));
|
&lea ($in,&BP(1,$in));
|
||||||
&inc (&LB($x));
|
&movz ($tx,&BP(0,$d,$x));
|
||||||
&inc ($in);
|
|
||||||
&inc ($out);
|
|
||||||
&cmp ($in,&swtmp(2));
|
&cmp ($in,&swtmp(2));
|
||||||
|
&movb (&BP(0,$out),&LB($ty));
|
||||||
|
&lea ($out,&BP(1,$out));
|
||||||
&jb (&label("RC4_CHAR_loop"));
|
&jb (&label("RC4_CHAR_loop"));
|
||||||
|
|
||||||
&set_label("finished");
|
&set_label("finished");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user