Two extra instructions in RC4 character loop give 80% performance

improvement on Core2. I still need to detect Core2 and choose this
path...
This commit is contained in:
Andy Polyakov 2007-03-20 09:13:07 +00:00
parent 3d1def0132
commit de50494505

View File

@ -221,6 +221,8 @@ $code.=<<___;
movb $TY#b,($dat,$XX[0])
add $TX[0]#b,$TY#b
add \$1,$XX[0]#b
movzb $TY#b,$TY#d
movzb $XX[0]#b,$XX[0]#d
movzb ($dat,$TY),$TY#d
movzb ($dat,$XX[0]),$TX[0]#d
xorb ($inp),$TY#b