+45% RC4 performance boost on Intel EM64T core. Unrolled loop providing

further +35% will follow... Submitted by: Zou Nanhai
2005-04-06 09:45:42 +00:00 · 2005-04-06 09:45:42 +00:00 · 81ee80ab88
commit 81ee80ab88
parent 7d727231b7
1 changed files with 4 additions and 4 deletions
--- a/crypto/rc4/asm/rc4-amd64.pl
+++ b/crypto/rc4/asm/rc4-amd64.pl
@ -181,7 +181,7 @@ $code.=<<___;

 .align	16
 .LRC4_CHAR:
-	inc	$XX#b
+	add	\$1,$XX#b
 	movzb	`&PTR("BYTE:[$dat+$XX]")`,$TX#d
 	add	$TX#b,$YY#b
 	movzb	`&PTR("BYTE:[$dat+$YY]")`,$TY#d
@ -191,9 +191,9 @@ $code.=<<___;
 	movzb	`&PTR("BYTE:[$dat+$TY]")`,$TY#d
 	xorb	`&PTR("BYTE:[$inp]")`,$TY#b
 	movb	$TY#b,`&PTR("BYTE:[$out]")`
-	inc	$inp
-	inc	$out
-	dec	$len
+	lea	1($inp),$inp
+	lea	1($out),$out
+	sub	\$1,$len
 	jnz	.LRC4_CHAR
 	jmp	.Lexit
 ___