e_padlock-x86[_64].pl: protection against prefetch errata.
This commit is contained in:
parent
3231e42d72
commit
6c8ce3c2ff
@ -37,6 +37,7 @@ require "x86asm.pl";
|
||||
|
||||
&asm_init($ARGV[0],$0);
|
||||
|
||||
%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata
|
||||
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
|
||||
|
||||
$ctx="edx";
|
||||
@ -187,6 +188,10 @@ my ($mode,$opcode) = @_;
|
||||
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
|
||||
} else {
|
||||
&xor ("ebx","ebx");
|
||||
if ($PADLOCK_MARGIN{$mode}) {
|
||||
&cmp ($len,$PADLOCK_MARGIN{$mode});
|
||||
&jbe (&label("${mode}_short"));
|
||||
}
|
||||
&test (&DWP(0,$ctx),1<<5); # align bit in control word
|
||||
&jnz (&label("${mode}_aligned"));
|
||||
&test ($out,0x0f);
|
||||
@ -285,20 +290,39 @@ my ($mode,$opcode) = @_;
|
||||
&mov ($chunk,$PADLOCK_CHUNK);
|
||||
&jnz (&label("${mode}_loop"));
|
||||
if ($mode ne "ctr32") {
|
||||
&test ($out,0x0f); # out_misaligned
|
||||
&jz (&label("${mode}_done"));
|
||||
&cmp ("esp","ebp");
|
||||
&je (&label("${mode}_done"));
|
||||
}
|
||||
&mov ($len,"ebp");
|
||||
&mov ($out,"esp");
|
||||
&sub ($len,"esp");
|
||||
&xor ("eax","eax");
|
||||
&shr ($len,2);
|
||||
&data_byte(0xf3,0xab); # rep stosl
|
||||
&pxor ("xmm0","xmm0");
|
||||
&lea ("eax",&DWP(0,"esp"));
|
||||
&set_label("${mode}_bzero");
|
||||
&movaps (&QWP(0,"eax"),"xmm0");
|
||||
&lea ("eax",&DWP(16,"eax"));
|
||||
&cmp ("ebp","eax");
|
||||
&ja (&label("${mode}_bzero"));
|
||||
|
||||
&set_label("${mode}_done");
|
||||
&lea ("esp",&DWP(24,"ebp"));
|
||||
if ($mode ne "ctr32") {
|
||||
&jmp (&label("${mode}_exit"));
|
||||
|
||||
&set_label("${mode}_short",16);
|
||||
&xor ("eax","eax");
|
||||
&lea ("ebp",&DWP(-24,"esp"));
|
||||
&sub ("eax",$len);
|
||||
&lea ("esp",&DWP(0,"eax","ebp"));
|
||||
&and ("esp",-16);
|
||||
&xor ($chunk,$chunk);
|
||||
&set_label("${mode}_short_copy");
|
||||
&movups ("xmm0",&QWP(0,$inp,$chunk));
|
||||
&lea ($chunk,&DWP(16,$chunk));
|
||||
&cmp ($len,$chunk);
|
||||
&movaps (&QWP(-16,"esp",$chunk),"xmm0");
|
||||
&ja (&label("${mode}_short_copy"));
|
||||
&mov ($inp,"esp");
|
||||
&mov ($chunk,$len);
|
||||
&jmp (&label("${mode}_loop"));
|
||||
|
||||
&set_label("${mode}_aligned",16);
|
||||
&lea ("eax",&DWP(-16,$ctx)); # ivp
|
||||
&lea ("ebx",&DWP(16,$ctx)); # key
|
||||
|
@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
|
||||
|
||||
$code=".text\n";
|
||||
|
||||
%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64); # prefetch errata
|
||||
$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
|
||||
|
||||
$ctx="%rdx";
|
||||
@ -284,6 +285,17 @@ padlock_${mode}_encrypt:
|
||||
lea 16($ctx),$ctx # control word
|
||||
xor %eax,%eax
|
||||
xor %ebx,%ebx
|
||||
___
|
||||
# Formally speaking correct condtion is $len<=$margin and $inp+$margin
|
||||
# crosses page boundary [and next page is unreadable]. But $inp can
|
||||
# be unaligned in which case data can be copied to $out if latter is
|
||||
# aligned, in which case $out+$margin has to be checked. Covering all
|
||||
# cases appears more complicated than just copying short input...
|
||||
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
|
||||
cmp \$$PADLOCK_MARGIN{$mode},$len
|
||||
jbe .L${mode}_short
|
||||
___
|
||||
$code.=<<___;
|
||||
testl \$`1<<5`,($ctx) # align bit in control word
|
||||
jnz .L${mode}_aligned
|
||||
test \$0x0f,$out
|
||||
@ -305,6 +317,7 @@ padlock_${mode}_encrypt:
|
||||
lea (%rax,%rbp),%rsp
|
||||
___
|
||||
$code.=<<___ if ($mode eq "ctr32");
|
||||
.L${mode}_reenter:
|
||||
mov -4($ctx),%eax # pull 32-bit counter
|
||||
bswap %eax
|
||||
neg %eax
|
||||
@ -373,19 +386,38 @@ $code.=<<___;
|
||||
mov \$$PADLOCK_CHUNK,$chunk
|
||||
jnz .L${mode}_loop
|
||||
|
||||
test \$0x0f,$out
|
||||
jz .L${mode}_done
|
||||
cmp %rsp,%rbp
|
||||
je .L${mode}_done
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
lea (%rsp),%rax
|
||||
.L${mode}_bzero:
|
||||
movaps %xmm0,(%rax)
|
||||
lea 16(%rax),%rax
|
||||
cmp %rax,%rbp
|
||||
ja .L${mode}_bzero
|
||||
|
||||
mov %rbp,$len
|
||||
mov %rsp,$out
|
||||
sub %rsp,$len
|
||||
xor %rax,%rax
|
||||
shr \$3,$len
|
||||
.byte 0xf3,0x48,0xab # rep stosq
|
||||
.L${mode}_done:
|
||||
lea (%rbp),%rsp
|
||||
jmp .L${mode}_exit
|
||||
|
||||
___
|
||||
$code.=<<___ if ($PADLOCK_MARGIN{$mode});
|
||||
.align 16
|
||||
.L${mode}_short:
|
||||
mov %rsp,%rbp
|
||||
sub $len,%rsp
|
||||
xor $chunk,$chunk
|
||||
.L${mode}_short_copy:
|
||||
movups ($inp,$chunk),%xmm0
|
||||
lea 16($chunk),$chunk
|
||||
cmp $chunk,$len
|
||||
movaps %xmm0,-16(%rsp,$chunk)
|
||||
ja .L${mode}_short_copy
|
||||
mov %rsp,$inp
|
||||
mov $len,$chunk
|
||||
jmp .L${mode}_`${mode} eq "ctr32"?"reenter":"loop"`
|
||||
___
|
||||
$code.=<<___;
|
||||
.align 16
|
||||
.L${mode}_aligned:
|
||||
___
|
||||
|
Loading…
Reference in New Issue
Block a user