aesni-x86[_64].pl: pull from HEAD.
This commit is contained in:
		@@ -594,6 +594,7 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
 | 
			
		||||
	&movdqu	($ivec,&QWP(0,$rounds_));	# load ivec
 | 
			
		||||
	&movdqu	($cmac,&QWP(0,$rounds));	# load cmac
 | 
			
		||||
	&mov	($rounds,&DWP(240,$key));
 | 
			
		||||
 | 
			
		||||
	# compose byte-swap control mask for pshufb on stack
 | 
			
		||||
	&mov	(&DWP(0,"esp"),0x0c0d0e0f);
 | 
			
		||||
@@ -602,34 +603,30 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
	&mov	(&DWP(12,"esp"),0x00010203);
 | 
			
		||||
 | 
			
		||||
	# compose counter increment vector on stack
 | 
			
		||||
	&mov	($rounds,1);
 | 
			
		||||
	&mov	($rounds_,1);
 | 
			
		||||
	&xor	($key_,$key_);
 | 
			
		||||
	&mov	(&DWP(16,"esp"),$rounds);
 | 
			
		||||
	&mov	(&DWP(16,"esp"),$rounds_);
 | 
			
		||||
	&mov	(&DWP(20,"esp"),$key_);
 | 
			
		||||
	&mov	(&DWP(24,"esp"),$key_);
 | 
			
		||||
	&mov	(&DWP(28,"esp"),$key_);
 | 
			
		||||
 | 
			
		||||
	&shr	($rounds,1);
 | 
			
		||||
	&lea	($key_,&DWP(0,$key));
 | 
			
		||||
	&movdqa	($inout3,&QWP(0,"esp"));
 | 
			
		||||
	&pshufb	($ivec,$inout3);		# keep iv in reverse order
 | 
			
		||||
 | 
			
		||||
	&mov	($rounds,&DWP(240,$key));
 | 
			
		||||
	&mov	($key_,$key);
 | 
			
		||||
	&mov	($rounds_,$rounds);
 | 
			
		||||
	&movdqa	($inout0,$ivec);
 | 
			
		||||
	&mov	($rounds_,$rounds);
 | 
			
		||||
	&pshufb	($ivec,$inout3);
 | 
			
		||||
 | 
			
		||||
&set_label("ccm64_enc_outer");
 | 
			
		||||
	&movups		($in0,&QWP(0,$inp));
 | 
			
		||||
	&pshufb		($inout0,$inout3);
 | 
			
		||||
	&mov		($key,$key_);
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key_));
 | 
			
		||||
	&mov		($rounds,$rounds_);
 | 
			
		||||
	&movups		($in0,&QWP(0,$inp));
 | 
			
		||||
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key));
 | 
			
		||||
	&shr		($rounds,1);
 | 
			
		||||
	&$movekey	($rndkey1,&QWP(16,$key));
 | 
			
		||||
	&xorps		($in0,$rndkey0);
 | 
			
		||||
	&lea		($key,&DWP(32,$key));
 | 
			
		||||
	&xorps		($inout0,$rndkey0);
 | 
			
		||||
	&xorps		($cmac,$in0);		# cmac^=inp
 | 
			
		||||
	&$movekey	($rndkey1,&QWP(16,$key_));
 | 
			
		||||
	&xorps		($rndkey0,$in0);
 | 
			
		||||
	&lea		($key,&DWP(32,$key_));
 | 
			
		||||
	&xorps		($cmac,$rndkey0);		# cmac^=inp
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key));
 | 
			
		||||
 | 
			
		||||
&set_label("ccm64_enc2_loop");
 | 
			
		||||
@@ -644,16 +641,17 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
	&jnz		(&label("ccm64_enc2_loop"));
 | 
			
		||||
	&aesenc		($inout0,$rndkey1);
 | 
			
		||||
	&aesenc		($cmac,$rndkey1);
 | 
			
		||||
	&paddq		($ivec,&QWP(16,"esp"));
 | 
			
		||||
	&aesenclast	($inout0,$rndkey0);
 | 
			
		||||
	&aesenclast	($cmac,$rndkey0);
 | 
			
		||||
 | 
			
		||||
	&paddq	($ivec,&QWP(16,"esp"));
 | 
			
		||||
	&dec	($len);
 | 
			
		||||
	&lea	($inp,&DWP(16,$inp));
 | 
			
		||||
	&xorps	($in0,$inout0);			# inp^=E(ivec)
 | 
			
		||||
	&movdqa	($inout0,$ivec);
 | 
			
		||||
	&movups	(&QWP(0,$out),$in0);
 | 
			
		||||
	&movups	(&QWP(0,$out),$in0);		# save output
 | 
			
		||||
	&lea	($out,&DWP(16,$out));
 | 
			
		||||
	&pshufb	($inout0,$inout3);
 | 
			
		||||
	&jnz	(&label("ccm64_enc_outer"));
 | 
			
		||||
 | 
			
		||||
	&mov	("esp",&DWP(48,"esp"));
 | 
			
		||||
@@ -675,6 +673,7 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
 | 
			
		||||
	&movdqu	($ivec,&QWP(0,$rounds_));	# load ivec
 | 
			
		||||
	&movdqu	($cmac,&QWP(0,$rounds));	# load cmac
 | 
			
		||||
	&mov	($rounds,&DWP(240,$key));
 | 
			
		||||
 | 
			
		||||
	# compose byte-swap control mask for pshufb on stack
 | 
			
		||||
	&mov	(&DWP(0,"esp"),0x0c0d0e0f);
 | 
			
		||||
@@ -683,46 +682,45 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
	&mov	(&DWP(12,"esp"),0x00010203);
 | 
			
		||||
 | 
			
		||||
	# compose counter increment vector on stack
 | 
			
		||||
	&mov	($rounds,1);
 | 
			
		||||
	&mov	($rounds_,1);
 | 
			
		||||
	&xor	($key_,$key_);
 | 
			
		||||
	&mov	(&DWP(16,"esp"),$rounds);
 | 
			
		||||
	&mov	(&DWP(16,"esp"),$rounds_);
 | 
			
		||||
	&mov	(&DWP(20,"esp"),$key_);
 | 
			
		||||
	&mov	(&DWP(24,"esp"),$key_);
 | 
			
		||||
	&mov	(&DWP(28,"esp"),$key_);
 | 
			
		||||
 | 
			
		||||
	&movdqa	($inout3,&QWP(0,"esp"));	# bswap mask
 | 
			
		||||
	&movdqa	($inout0,$ivec);
 | 
			
		||||
	&pshufb	($ivec,$inout3);		# keep iv in reverse order
 | 
			
		||||
 | 
			
		||||
	&mov	($rounds,&DWP(240,$key));
 | 
			
		||||
	&mov	($key_,$key);
 | 
			
		||||
	&mov	($rounds_,$rounds);
 | 
			
		||||
 | 
			
		||||
	&pshufb	($ivec,$inout3);
 | 
			
		||||
	if ($inline)
 | 
			
		||||
	{   &aesni_inline_generate1("enc");	}
 | 
			
		||||
	else
 | 
			
		||||
	{   &call	("_aesni_encrypt1");	}
 | 
			
		||||
 | 
			
		||||
&set_label("ccm64_dec_outer");
 | 
			
		||||
	&paddq	($ivec,&QWP(16,"esp"));
 | 
			
		||||
	&movups	($in0,&QWP(0,$inp));		# load inp
 | 
			
		||||
	&xorps	($in0,$inout0);
 | 
			
		||||
	&movdqa	($inout0,$ivec);
 | 
			
		||||
	&paddq	($ivec,&QWP(16,"esp"));
 | 
			
		||||
	&lea	($inp,&QWP(16,$inp));
 | 
			
		||||
	&pshufb	($inout0,$inout3);
 | 
			
		||||
	&mov	($key,$key_);
 | 
			
		||||
	&jmp	(&label("ccm64_dec_outer"));
 | 
			
		||||
 | 
			
		||||
&set_label("ccm64_dec_outer",16);
 | 
			
		||||
	&xorps	($in0,$inout0);			# inp ^= E(ivec)
 | 
			
		||||
	&movdqa	($inout0,$ivec);
 | 
			
		||||
	&mov	($rounds,$rounds_);
 | 
			
		||||
	&movups	(&QWP(0,$out),$in0);
 | 
			
		||||
	&movups	(&QWP(0,$out),$in0);		# save output
 | 
			
		||||
	&lea	($out,&DWP(16,$out));
 | 
			
		||||
	&pshufb	($inout0,$inout3);
 | 
			
		||||
 | 
			
		||||
	&sub	($len,1);
 | 
			
		||||
	&jz	(&label("ccm64_dec_break"));
 | 
			
		||||
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key));
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key_));
 | 
			
		||||
	&shr		($rounds,1);
 | 
			
		||||
	&$movekey	($rndkey1,&QWP(16,$key));
 | 
			
		||||
	&$movekey	($rndkey1,&QWP(16,$key_));
 | 
			
		||||
	&xorps		($in0,$rndkey0);
 | 
			
		||||
	&lea		($key,&DWP(32,$key));
 | 
			
		||||
	&lea		($key,&DWP(32,$key_));
 | 
			
		||||
	&xorps		($inout0,$rndkey0);
 | 
			
		||||
	&xorps		($cmac,$in0);		# cmac^=out
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key));
 | 
			
		||||
@@ -737,13 +735,17 @@ if ($PREFIX eq "aesni") {
 | 
			
		||||
	&aesenc		($cmac,$rndkey0);
 | 
			
		||||
	&$movekey	($rndkey0,&QWP(0,$key));
 | 
			
		||||
	&jnz		(&label("ccm64_dec2_loop"));
 | 
			
		||||
	&movups		($in0,&QWP(0,$inp));	# load inp
 | 
			
		||||
	&paddq		($ivec,&QWP(16,"esp"));
 | 
			
		||||
	&aesenc		($inout0,$rndkey1);
 | 
			
		||||
	&aesenc		($cmac,$rndkey1);
 | 
			
		||||
	&lea		($inp,&QWP(16,$inp));
 | 
			
		||||
	&aesenclast	($inout0,$rndkey0);
 | 
			
		||||
	&aesenclast	($cmac,$rndkey0);
 | 
			
		||||
	&jmp	(&label("ccm64_dec_outer"));
 | 
			
		||||
 | 
			
		||||
&set_label("ccm64_dec_break",16);
 | 
			
		||||
	&mov	($key,$key_);
 | 
			
		||||
	if ($inline)
 | 
			
		||||
	{   &aesni_inline_generate1("enc",$cmac,$in0);	}
 | 
			
		||||
	else
 | 
			
		||||
 
 | 
			
		||||
@@ -821,8 +821,8 @@ ___
 | 
			
		||||
{
 | 
			
		||||
my $cmac="%r9";	# 6th argument
 | 
			
		||||
 | 
			
		||||
my $increment="%xmm8";
 | 
			
		||||
my $bswap_mask="%xmm9";
 | 
			
		||||
my $increment="%xmm6";
 | 
			
		||||
my $bswap_mask="%xmm7";
 | 
			
		||||
 | 
			
		||||
$code.=<<___;
 | 
			
		||||
.globl	aesni_ccm64_encrypt_blocks
 | 
			
		||||
@@ -839,30 +839,29 @@ $code.=<<___ if ($win64);
 | 
			
		||||
.Lccm64_enc_body:
 | 
			
		||||
___
 | 
			
		||||
$code.=<<___;
 | 
			
		||||
	mov	240($key),$rounds		# key->rounds
 | 
			
		||||
	movdqu	($ivp),$iv
 | 
			
		||||
	movdqu	($cmac),$inout1
 | 
			
		||||
	movdqa	.Lincrement64(%rip),$increment
 | 
			
		||||
	movdqa	.Lbswap_mask(%rip),$bswap_mask
 | 
			
		||||
	pshufb	$bswap_mask,$iv			# keep iv in reverse order
 | 
			
		||||
 | 
			
		||||
	mov	240($key),$rounds		# key->rounds
 | 
			
		||||
	mov	$key,$key_
 | 
			
		||||
	mov	$rounds,$rnds_
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
 | 
			
		||||
.Lccm64_enc_outer:
 | 
			
		||||
	movups	($inp),$in0			# load inp
 | 
			
		||||
	pshufb	$bswap_mask,$inout0
 | 
			
		||||
	mov	$key_,$key
 | 
			
		||||
	mov	$rnds_,$rounds
 | 
			
		||||
 | 
			
		||||
	$movkey	($key),$rndkey0
 | 
			
		||||
	shr	\$1,$rounds
 | 
			
		||||
	$movkey	16($key),$rndkey1
 | 
			
		||||
	xorps	$rndkey0,$in0
 | 
			
		||||
	lea	32($key),$key
 | 
			
		||||
	xorps	$rndkey0,$inout0
 | 
			
		||||
	xorps	$inout1,$in0			# cmac^=inp
 | 
			
		||||
	lea	0($key),$key_
 | 
			
		||||
	movdqu	($cmac),$inout1
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
	mov	$rounds,$rnds_
 | 
			
		||||
	pshufb	$bswap_mask,$iv
 | 
			
		||||
	jmp	.Lccm64_enc_outer
 | 
			
		||||
.align	16
 | 
			
		||||
.Lccm64_enc_outer:
 | 
			
		||||
	$movkey	($key_),$rndkey0
 | 
			
		||||
	mov	$rnds_,$rounds
 | 
			
		||||
	movups	($inp),$in0			# load inp
 | 
			
		||||
 | 
			
		||||
	xorps	$rndkey0,$inout0		# counter
 | 
			
		||||
	$movkey	16($key_),$rndkey1
 | 
			
		||||
	xorps	$in0,$rndkey0
 | 
			
		||||
	lea	32($key_),$key
 | 
			
		||||
	xorps	$rndkey0,$inout1		# cmac^=inp
 | 
			
		||||
	$movkey	($key),$rndkey0
 | 
			
		||||
 | 
			
		||||
.Lccm64_enc2_loop:
 | 
			
		||||
@@ -877,16 +876,17 @@ $code.=<<___;
 | 
			
		||||
	jnz	.Lccm64_enc2_loop
 | 
			
		||||
	aesenc	$rndkey1,$inout0
 | 
			
		||||
	aesenc	$rndkey1,$inout1
 | 
			
		||||
	paddq	$increment,$iv
 | 
			
		||||
	aesenclast	$rndkey0,$inout0
 | 
			
		||||
	aesenclast	$rndkey0,$inout1
 | 
			
		||||
 | 
			
		||||
	paddq	$increment,$iv
 | 
			
		||||
	dec	$len
 | 
			
		||||
	lea	16($inp),$inp
 | 
			
		||||
	xorps	$inout0,$in0			# inp ^= E(iv)
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
	movups	$in0,($out)			# save output
 | 
			
		||||
	lea	16($out),$out
 | 
			
		||||
	pshufb	$bswap_mask,$inout0
 | 
			
		||||
	jnz	.Lccm64_enc_outer
 | 
			
		||||
 | 
			
		||||
	movups	$inout1,($cmac)
 | 
			
		||||
@@ -919,39 +919,40 @@ $code.=<<___ if ($win64);
 | 
			
		||||
.Lccm64_dec_body:
 | 
			
		||||
___
 | 
			
		||||
$code.=<<___;
 | 
			
		||||
	movdqu	($ivp),$iv
 | 
			
		||||
	mov	240($key),$rounds		# key->rounds
 | 
			
		||||
	movups	($ivp),$iv
 | 
			
		||||
	movdqu	($cmac),$inout1
 | 
			
		||||
	movdqa	.Lincrement64(%rip),$increment
 | 
			
		||||
	movdqa	.Lbswap_mask(%rip),$bswap_mask
 | 
			
		||||
 | 
			
		||||
	mov	240($key),$rounds		# key->rounds
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
	pshufb	$bswap_mask,$iv			# keep iv in reverse order
 | 
			
		||||
	movaps	$iv,$inout0
 | 
			
		||||
	mov	$rounds,$rnds_
 | 
			
		||||
	mov	$key,$key_
 | 
			
		||||
	pshufb	$bswap_mask,$iv
 | 
			
		||||
___
 | 
			
		||||
	&aesni_generate1("enc",$key,$rounds);
 | 
			
		||||
$code.=<<___;
 | 
			
		||||
.Lccm64_dec_outer:
 | 
			
		||||
	paddq	$increment,$iv
 | 
			
		||||
	movups	($inp),$in0			# load inp
 | 
			
		||||
	xorps	$inout0,$in0
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
	paddq	$increment,$iv
 | 
			
		||||
	lea	16($inp),$inp
 | 
			
		||||
	pshufb	$bswap_mask,$inout0
 | 
			
		||||
	mov	$key_,$key
 | 
			
		||||
	jmp	.Lccm64_dec_outer
 | 
			
		||||
.align	16
 | 
			
		||||
.Lccm64_dec_outer:
 | 
			
		||||
	xorps	$inout0,$in0			# inp ^= E(iv)
 | 
			
		||||
	movdqa	$iv,$inout0
 | 
			
		||||
	mov	$rnds_,$rounds
 | 
			
		||||
	movups	$in0,($out)
 | 
			
		||||
	movups	$in0,($out)			# save output
 | 
			
		||||
	lea	16($out),$out
 | 
			
		||||
	pshufb	$bswap_mask,$inout0
 | 
			
		||||
 | 
			
		||||
	sub	\$1,$len
 | 
			
		||||
	jz	.Lccm64_dec_break
 | 
			
		||||
 | 
			
		||||
	$movkey	($key),$rndkey0
 | 
			
		||||
	$movkey	($key_),$rndkey0
 | 
			
		||||
	shr	\$1,$rounds
 | 
			
		||||
	$movkey	16($key),$rndkey1
 | 
			
		||||
	$movkey	16($key_),$rndkey1
 | 
			
		||||
	xorps	$rndkey0,$in0
 | 
			
		||||
	lea	32($key),$key
 | 
			
		||||
	lea	32($key_),$key
 | 
			
		||||
	xorps	$rndkey0,$inout0
 | 
			
		||||
	xorps	$in0,$inout1			# cmac^=out
 | 
			
		||||
	$movkey	($key),$rndkey0
 | 
			
		||||
@@ -966,15 +967,20 @@ $code.=<<___;
 | 
			
		||||
	aesenc	$rndkey0,$inout1
 | 
			
		||||
	$movkey	0($key),$rndkey0
 | 
			
		||||
	jnz	.Lccm64_dec2_loop
 | 
			
		||||
	movups	($inp),$in0			# load inp
 | 
			
		||||
	paddq	$increment,$iv
 | 
			
		||||
	aesenc	$rndkey1,$inout0
 | 
			
		||||
	aesenc	$rndkey1,$inout1
 | 
			
		||||
	lea	16($inp),$inp
 | 
			
		||||
	aesenclast	$rndkey0,$inout0
 | 
			
		||||
	aesenclast	$rndkey0,$inout1
 | 
			
		||||
	jmp	.Lccm64_dec_outer
 | 
			
		||||
 | 
			
		||||
.align	16
 | 
			
		||||
.Lccm64_dec_break:
 | 
			
		||||
	#xorps	$in0,$inout1			# cmac^=out
 | 
			
		||||
___
 | 
			
		||||
	&aesni_generate1("enc",$key,$rounds,$inout1);
 | 
			
		||||
	&aesni_generate1("enc",$key_,$rounds,$inout1,$in0);
 | 
			
		||||
$code.=<<___;
 | 
			
		||||
	movups	$inout1,($cmac)
 | 
			
		||||
___
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user