+45% RC4 performance boost on Intel EM64T core. Unrolled loop providing

further +35% will follow...

Submitted by: Zou Nanhai
This commit is contained in:
Andy Polyakov 2005-04-06 09:45:42 +00:00
parent 7d727231b7
commit 81ee80ab88

View File

@ -181,7 +181,7 @@ $code.=<<___;
.align 16 .align 16
.LRC4_CHAR: .LRC4_CHAR:
inc $XX#b add \$1,$XX#b
movzb `&PTR("BYTE:[$dat+$XX]")`,$TX#d movzb `&PTR("BYTE:[$dat+$XX]")`,$TX#d
add $TX#b,$YY#b add $TX#b,$YY#b
movzb `&PTR("BYTE:[$dat+$YY]")`,$TY#d movzb `&PTR("BYTE:[$dat+$YY]")`,$TY#d
@ -191,9 +191,9 @@ $code.=<<___;
movzb `&PTR("BYTE:[$dat+$TY]")`,$TY#d movzb `&PTR("BYTE:[$dat+$TY]")`,$TY#d
xorb `&PTR("BYTE:[$inp]")`,$TY#b xorb `&PTR("BYTE:[$inp]")`,$TY#b
movb $TY#b,`&PTR("BYTE:[$out]")` movb $TY#b,`&PTR("BYTE:[$out]")`
inc $inp lea 1($inp),$inp
inc $out lea 1($out),$out
dec $len sub \$1,$len
jnz .LRC4_CHAR jnz .LRC4_CHAR
jmp .Lexit jmp .Lexit
___ ___