aesni-x86_64.pl: make it possibel to use in Linux kernel.
This commit is contained in:
parent
d4bb6bddf8
commit
6a40ebe86b
@ -1006,7 +1006,7 @@ ___
|
|||||||
# does not update *ivec! (see engine/eng_aesni.c for details)
|
# does not update *ivec! (see engine/eng_aesni.c for details)
|
||||||
#
|
#
|
||||||
{
|
{
|
||||||
my $reserved = $win64?0:-0x28;
|
my $frame_size = 0x20+($win64?160:0);
|
||||||
my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
|
my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
|
||||||
my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
|
my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
|
||||||
my $bswap_mask="%xmm15";
|
my $bswap_mask="%xmm15";
|
||||||
@ -1016,9 +1016,12 @@ $code.=<<___;
|
|||||||
.type aesni_ctr32_encrypt_blocks,\@function,5
|
.type aesni_ctr32_encrypt_blocks,\@function,5
|
||||||
.align 16
|
.align 16
|
||||||
aesni_ctr32_encrypt_blocks:
|
aesni_ctr32_encrypt_blocks:
|
||||||
|
lea (%rsp),%rax
|
||||||
|
push %rbp
|
||||||
|
sub \$$frame_size,%rsp
|
||||||
|
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
lea -0xc8(%rsp),%rsp
|
|
||||||
movaps %xmm6,0x20(%rsp)
|
movaps %xmm6,0x20(%rsp)
|
||||||
movaps %xmm7,0x30(%rsp)
|
movaps %xmm7,0x30(%rsp)
|
||||||
movaps %xmm8,0x40(%rsp)
|
movaps %xmm8,0x40(%rsp)
|
||||||
@ -1032,6 +1035,7 @@ $code.=<<___ if ($win64);
|
|||||||
.Lctr32_body:
|
.Lctr32_body:
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea -8(%rax),%rbp
|
||||||
cmp \$1,$len
|
cmp \$1,$len
|
||||||
je .Lctr32_one_shortcut
|
je .Lctr32_one_shortcut
|
||||||
|
|
||||||
@ -1056,9 +1060,9 @@ $code.=<<___;
|
|||||||
pinsrd \$2,$rnds_,$iv0
|
pinsrd \$2,$rnds_,$iv0
|
||||||
inc $key_
|
inc $key_
|
||||||
pinsrd \$2,$key_,$iv1
|
pinsrd \$2,$key_,$iv1
|
||||||
movdqa $iv0,$reserved(%rsp)
|
movdqa $iv0,0x00(%rsp)
|
||||||
pshufb $bswap_mask,$iv0
|
pshufb $bswap_mask,$iv0
|
||||||
movdqa $iv1,`$reserved+0x10`(%rsp)
|
movdqa $iv1,0x10(%rsp)
|
||||||
pshufb $bswap_mask,$iv1
|
pshufb $bswap_mask,$iv1
|
||||||
|
|
||||||
pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword
|
pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword
|
||||||
@ -1098,7 +1102,7 @@ $code.=<<___;
|
|||||||
movdqa .Lincrement32(%rip),$iv1
|
movdqa .Lincrement32(%rip),$iv1
|
||||||
pxor $rndkey0,$inout3
|
pxor $rndkey0,$inout3
|
||||||
aesenc $rndkey1,$inout2
|
aesenc $rndkey1,$inout2
|
||||||
movdqa $reserved(%rsp),$iv0
|
movdqa (%rsp),$iv0
|
||||||
pxor $rndkey0,$inout4
|
pxor $rndkey0,$inout4
|
||||||
aesenc $rndkey1,$inout3
|
aesenc $rndkey1,$inout3
|
||||||
pxor $rndkey0,$inout5
|
pxor $rndkey0,$inout5
|
||||||
@ -1131,11 +1135,11 @@ $code.=<<___;
|
|||||||
aesenc $rndkey1,$inout0
|
aesenc $rndkey1,$inout0
|
||||||
paddd $iv1,$iv0 # increment counter vector
|
paddd $iv1,$iv0 # increment counter vector
|
||||||
aesenc $rndkey1,$inout1
|
aesenc $rndkey1,$inout1
|
||||||
paddd `$reserved+0x10`(%rsp),$iv1
|
paddd 0x10(%rsp),$iv1
|
||||||
aesenc $rndkey1,$inout2
|
aesenc $rndkey1,$inout2
|
||||||
movdqa $iv0,$reserved(%rsp) # save counter vector
|
movdqa $iv0,0x00(%rsp) # save counter vector
|
||||||
aesenc $rndkey1,$inout3
|
aesenc $rndkey1,$inout3
|
||||||
movdqa $iv1,`$reserved+0x10`(%rsp)
|
movdqa $iv1,0x10(%rsp)
|
||||||
aesenc $rndkey1,$inout4
|
aesenc $rndkey1,$inout4
|
||||||
pshufb $bswap_mask,$iv0 # byte swap
|
pshufb $bswap_mask,$iv0 # byte swap
|
||||||
aesenc $rndkey1,$inout5
|
aesenc $rndkey1,$inout5
|
||||||
@ -1278,10 +1282,11 @@ $code.=<<___ if ($win64);
|
|||||||
movaps 0x90(%rsp),%xmm13
|
movaps 0x90(%rsp),%xmm13
|
||||||
movaps 0xa0(%rsp),%xmm14
|
movaps 0xa0(%rsp),%xmm14
|
||||||
movaps 0xb0(%rsp),%xmm15
|
movaps 0xb0(%rsp),%xmm15
|
||||||
lea 0xc8(%rsp),%rsp
|
|
||||||
.Lctr32_ret:
|
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea (%rbp),%rsp
|
||||||
|
pop %rbp
|
||||||
|
.Lctr32_ret:
|
||||||
ret
|
ret
|
||||||
.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
|
.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
|
||||||
___
|
___
|
||||||
@ -1296,14 +1301,17 @@ ___
|
|||||||
my @tweak=map("%xmm$_",(10..15));
|
my @tweak=map("%xmm$_",(10..15));
|
||||||
my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
|
my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
|
||||||
my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
|
my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
|
||||||
my $frame_size = 0x68 + ($win64?160:0);
|
my $frame_size = 0x60 + ($win64?160:0);
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.globl aesni_xts_encrypt
|
.globl aesni_xts_encrypt
|
||||||
.type aesni_xts_encrypt,\@function,6
|
.type aesni_xts_encrypt,\@function,6
|
||||||
.align 16
|
.align 16
|
||||||
aesni_xts_encrypt:
|
aesni_xts_encrypt:
|
||||||
lea -$frame_size(%rsp),%rsp
|
lea (%rsp),%rax
|
||||||
|
push %rbp
|
||||||
|
sub \$$frame_size,%rsp
|
||||||
|
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
movaps %xmm6,0x60(%rsp)
|
movaps %xmm6,0x60(%rsp)
|
||||||
@ -1319,6 +1327,7 @@ $code.=<<___ if ($win64);
|
|||||||
.Lxts_enc_body:
|
.Lxts_enc_body:
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea -8(%rax),%rbp
|
||||||
movups ($ivp),@tweak[5] # load clear-text tweak
|
movups ($ivp),@tweak[5] # load clear-text tweak
|
||||||
mov 240(%r8),$rounds # key2->rounds
|
mov 240(%r8),$rounds # key2->rounds
|
||||||
mov 240($key),$rnds_ # key1->rounds
|
mov 240($key),$rnds_ # key1->rounds
|
||||||
@ -1682,7 +1691,8 @@ $code.=<<___ if ($win64);
|
|||||||
movaps 0xf0(%rsp),%xmm15
|
movaps 0xf0(%rsp),%xmm15
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
lea $frame_size(%rsp),%rsp
|
lea (%rbp),%rsp
|
||||||
|
pop %rbp
|
||||||
.Lxts_enc_epilogue:
|
.Lxts_enc_epilogue:
|
||||||
ret
|
ret
|
||||||
.size aesni_xts_encrypt,.-aesni_xts_encrypt
|
.size aesni_xts_encrypt,.-aesni_xts_encrypt
|
||||||
@ -1693,7 +1703,10 @@ $code.=<<___;
|
|||||||
.type aesni_xts_decrypt,\@function,6
|
.type aesni_xts_decrypt,\@function,6
|
||||||
.align 16
|
.align 16
|
||||||
aesni_xts_decrypt:
|
aesni_xts_decrypt:
|
||||||
lea -$frame_size(%rsp),%rsp
|
lea (%rsp),%rax
|
||||||
|
push %rbp
|
||||||
|
sub \$$frame_size,%rsp
|
||||||
|
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
movaps %xmm6,0x60(%rsp)
|
movaps %xmm6,0x60(%rsp)
|
||||||
@ -1709,6 +1722,7 @@ $code.=<<___ if ($win64);
|
|||||||
.Lxts_dec_body:
|
.Lxts_dec_body:
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea -8(%rax),%rbp
|
||||||
movups ($ivp),@tweak[5] # load clear-text tweak
|
movups ($ivp),@tweak[5] # load clear-text tweak
|
||||||
mov 240($key2),$rounds # key2->rounds
|
mov 240($key2),$rounds # key2->rounds
|
||||||
mov 240($key),$rnds_ # key1->rounds
|
mov 240($key),$rnds_ # key1->rounds
|
||||||
@ -2108,7 +2122,8 @@ $code.=<<___ if ($win64);
|
|||||||
movaps 0xf0(%rsp),%xmm15
|
movaps 0xf0(%rsp),%xmm15
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
lea $frame_size(%rsp),%rsp
|
lea (%rbp),%rsp
|
||||||
|
pop %rbp
|
||||||
.Lxts_dec_epilogue:
|
.Lxts_dec_epilogue:
|
||||||
ret
|
ret
|
||||||
.size aesni_xts_decrypt,.-aesni_xts_decrypt
|
.size aesni_xts_decrypt,.-aesni_xts_decrypt
|
||||||
@ -2120,7 +2135,7 @@ ___
|
|||||||
# size_t length, const AES_KEY *key,
|
# size_t length, const AES_KEY *key,
|
||||||
# unsigned char *ivp,const int enc);
|
# unsigned char *ivp,const int enc);
|
||||||
{
|
{
|
||||||
my $reserved = $win64?0x40:-0x18; # used in decrypt
|
my $frame_size = 0x10 + ($win64?0x40:0); # used in decrypt
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.globl ${PREFIX}_cbc_encrypt
|
.globl ${PREFIX}_cbc_encrypt
|
||||||
.type ${PREFIX}_cbc_encrypt,\@function,6
|
.type ${PREFIX}_cbc_encrypt,\@function,6
|
||||||
@ -2176,16 +2191,20 @@ $code.=<<___;
|
|||||||
#--------------------------- CBC DECRYPT ------------------------------#
|
#--------------------------- CBC DECRYPT ------------------------------#
|
||||||
.align 16
|
.align 16
|
||||||
.Lcbc_decrypt:
|
.Lcbc_decrypt:
|
||||||
|
lea (%rsp),%rax
|
||||||
|
push %rbp
|
||||||
|
sub \$$frame_size,%rsp
|
||||||
|
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
lea -0x58(%rsp),%rsp
|
movaps %xmm6,0x10(%rsp)
|
||||||
movaps %xmm6,(%rsp)
|
movaps %xmm7,0x20(%rsp)
|
||||||
movaps %xmm7,0x10(%rsp)
|
movaps %xmm8,0x30(%rsp)
|
||||||
movaps %xmm8,0x20(%rsp)
|
movaps %xmm9,0x40(%rsp)
|
||||||
movaps %xmm9,0x30(%rsp)
|
|
||||||
.Lcbc_decrypt_body:
|
.Lcbc_decrypt_body:
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea -8(%rax),%rbp
|
||||||
movups ($ivp),$iv
|
movups ($ivp),$iv
|
||||||
mov $rnds_,$rounds
|
mov $rnds_,$rounds
|
||||||
cmp \$0x70,$len
|
cmp \$0x70,$len
|
||||||
@ -2193,11 +2212,11 @@ $code.=<<___;
|
|||||||
shr \$1,$rnds_
|
shr \$1,$rnds_
|
||||||
sub \$0x70,$len
|
sub \$0x70,$len
|
||||||
mov $rnds_,$rounds
|
mov $rnds_,$rounds
|
||||||
movaps $iv,$reserved(%rsp)
|
movaps $iv,(%rsp)
|
||||||
jmp .Lcbc_dec_loop8_enter
|
jmp .Lcbc_dec_loop8_enter
|
||||||
.align 16
|
.align 16
|
||||||
.Lcbc_dec_loop8:
|
.Lcbc_dec_loop8:
|
||||||
movaps $rndkey0,$reserved(%rsp) # save IV
|
movaps $rndkey0,(%rsp) # save IV
|
||||||
movups $inout7,($out)
|
movups $inout7,($out)
|
||||||
lea 0x10($out),$out
|
lea 0x10($out),$out
|
||||||
.Lcbc_dec_loop8_enter:
|
.Lcbc_dec_loop8_enter:
|
||||||
@ -2237,7 +2256,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
movups ($inp),$rndkey1 # re-load input
|
movups ($inp),$rndkey1 # re-load input
|
||||||
movups 0x10($inp),$rndkey0
|
movups 0x10($inp),$rndkey0
|
||||||
xorps $reserved(%rsp),$inout0 # ^= IV
|
xorps (%rsp),$inout0 # ^= IV
|
||||||
xorps $rndkey1,$inout1
|
xorps $rndkey1,$inout1
|
||||||
movups 0x20($inp),$rndkey1
|
movups 0x20($inp),$rndkey1
|
||||||
xorps $rndkey0,$inout2
|
xorps $rndkey0,$inout2
|
||||||
@ -2301,11 +2320,11 @@ $code.=<<___;
|
|||||||
jbe .Lcbc_dec_six
|
jbe .Lcbc_dec_six
|
||||||
|
|
||||||
movups 0x60($inp),$inout6
|
movups 0x60($inp),$inout6
|
||||||
movaps $iv,$reserved(%rsp) # save IV
|
movaps $iv,(%rsp) # save IV
|
||||||
call _aesni_decrypt8
|
call _aesni_decrypt8
|
||||||
movups ($inp),$rndkey1
|
movups ($inp),$rndkey1
|
||||||
movups 0x10($inp),$rndkey0
|
movups 0x10($inp),$rndkey0
|
||||||
xorps $reserved(%rsp),$inout0 # ^= IV
|
xorps (%rsp),$inout0 # ^= IV
|
||||||
xorps $rndkey1,$inout1
|
xorps $rndkey1,$inout1
|
||||||
movups 0x20($inp),$rndkey1
|
movups 0x20($inp),$rndkey1
|
||||||
xorps $rndkey0,$inout2
|
xorps $rndkey0,$inout2
|
||||||
@ -2429,23 +2448,24 @@ $code.=<<___;
|
|||||||
jmp .Lcbc_dec_ret
|
jmp .Lcbc_dec_ret
|
||||||
.align 16
|
.align 16
|
||||||
.Lcbc_dec_tail_partial:
|
.Lcbc_dec_tail_partial:
|
||||||
movaps $inout0,$reserved(%rsp)
|
movaps $inout0,(%rsp)
|
||||||
mov \$16,%rcx
|
mov \$16,%rcx
|
||||||
mov $out,%rdi
|
mov $out,%rdi
|
||||||
sub $len,%rcx
|
sub $len,%rcx
|
||||||
lea $reserved(%rsp),%rsi
|
lea (%rsp),%rsi
|
||||||
.long 0x9066A4F3 # rep movsb
|
.long 0x9066A4F3 # rep movsb
|
||||||
|
|
||||||
.Lcbc_dec_ret:
|
.Lcbc_dec_ret:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
movaps (%rsp),%xmm6
|
movaps 0x10(%rsp),%xmm6
|
||||||
movaps 0x10(%rsp),%xmm7
|
movaps 0x20(%rsp),%xmm7
|
||||||
movaps 0x20(%rsp),%xmm8
|
movaps 0x30(%rsp),%xmm8
|
||||||
movaps 0x30(%rsp),%xmm9
|
movaps 0x40(%rsp),%xmm9
|
||||||
lea 0x58(%rsp),%rsp
|
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
lea (%rbp),%rsp
|
||||||
|
pop %rbp
|
||||||
.Lcbc_ret:
|
.Lcbc_ret:
|
||||||
ret
|
ret
|
||||||
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
|
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
|
||||||
@ -2820,9 +2840,8 @@ ctr32_se_handler:
|
|||||||
lea 512($context),%rdi # &context.Xmm6
|
lea 512($context),%rdi # &context.Xmm6
|
||||||
mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
|
mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
|
||||||
.long 0xa548f3fc # cld; rep movsq
|
.long 0xa548f3fc # cld; rep movsq
|
||||||
lea 0xc8(%rax),%rax # adjust stack pointer
|
|
||||||
|
|
||||||
jmp .Lcommon_seh_tail
|
jmp .Lcommon_rbp_tail
|
||||||
.size ctr32_se_handler,.-ctr32_se_handler
|
.size ctr32_se_handler,.-ctr32_se_handler
|
||||||
|
|
||||||
.type xts_se_handler,\@abi-omnipotent
|
.type xts_se_handler,\@abi-omnipotent
|
||||||
@ -2861,9 +2880,8 @@ xts_se_handler:
|
|||||||
lea 512($context),%rdi # & context.Xmm6
|
lea 512($context),%rdi # & context.Xmm6
|
||||||
mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
|
mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
|
||||||
.long 0xa548f3fc # cld; rep movsq
|
.long 0xa548f3fc # cld; rep movsq
|
||||||
lea 0x68+160(%rax),%rax # adjust stack pointer
|
|
||||||
|
|
||||||
jmp .Lcommon_seh_tail
|
jmp .Lcommon_rbp_tail
|
||||||
.size xts_se_handler,.-xts_se_handler
|
.size xts_se_handler,.-xts_se_handler
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
@ -2896,11 +2914,16 @@ cbc_se_handler:
|
|||||||
cmp %r10,%rbx # context->Rip>="epilogue" label
|
cmp %r10,%rbx # context->Rip>="epilogue" label
|
||||||
jae .Lcommon_seh_tail
|
jae .Lcommon_seh_tail
|
||||||
|
|
||||||
lea 0(%rax),%rsi # top of stack
|
lea 16(%rax),%rsi # %xmm save area
|
||||||
lea 512($context),%rdi # &context.Xmm6
|
lea 512($context),%rdi # &context.Xmm6
|
||||||
mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
|
mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
|
||||||
.long 0xa548f3fc # cld; rep movsq
|
.long 0xa548f3fc # cld; rep movsq
|
||||||
lea 0x58(%rax),%rax # adjust stack pointer
|
|
||||||
|
.Lcommon_rbp_tail:
|
||||||
|
mov 160($context),%rax # pull context->Rbp
|
||||||
|
mov (%rax),%rbp # restore saved %rbp
|
||||||
|
lea 8(%rax),%rax # adjust stack pointer
|
||||||
|
mov %rbp,160($context) # restore context->Rbp
|
||||||
jmp .Lcommon_seh_tail
|
jmp .Lcommon_seh_tail
|
||||||
|
|
||||||
.Lrestore_cbc_rax:
|
.Lrestore_cbc_rax:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user