aesv8-armx.pl update:
- fix 32-bit build (submitted by Ard Biesheuvel); - fix performance issue in CTR;
This commit is contained in:
parent
5111672b8e
commit
65cad34b10
@ -18,9 +18,9 @@
|
|||||||
#
|
#
|
||||||
# Performance in cycles per byte processed with 128-bit key:
|
# Performance in cycles per byte processed with 128-bit key:
|
||||||
#
|
#
|
||||||
# CBC enc CBC dec
|
# CBC enc CBC dec CTR
|
||||||
# Apple A7 2.39 1.20
|
# Apple A7 2.39 1.20 1.20
|
||||||
# Cortex-A5x n/a n/a
|
# Cortex-A5x n/a n/a n/a
|
||||||
|
|
||||||
$flavour = shift;
|
$flavour = shift;
|
||||||
open STDOUT,">".shift;
|
open STDOUT,">".shift;
|
||||||
@ -733,12 +733,11 @@ $code.=<<___;
|
|||||||
subs $len,$len,#2
|
subs $len,$len,#2
|
||||||
aesmc $tmp0,$tmp0
|
aesmc $tmp0,$tmp0
|
||||||
aesmc $tmp1,$tmp1
|
aesmc $tmp1,$tmp1
|
||||||
vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
|
vld1.32 {q8-q9},[$key_],#32 // re-pre-load rndkey[0-1]
|
||||||
aese $tmp0,q13
|
aese $tmp0,q13
|
||||||
aese $tmp1,q13
|
aese $tmp1,q13
|
||||||
aesmc $tmp0,$tmp0
|
aesmc $tmp0,$tmp0
|
||||||
aesmc $tmp1,$tmp1
|
aesmc $tmp1,$tmp1
|
||||||
vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
|
|
||||||
aese $tmp0,q14
|
aese $tmp0,q14
|
||||||
aese $tmp1,q14
|
aese $tmp1,q14
|
||||||
vmov.32 ${dat0}[3], $tctr
|
vmov.32 ${dat0}[3], $tctr
|
||||||
@ -878,14 +877,14 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
|||||||
"aesd" => 0x4e285800, "aese" => 0x4e284800,
|
"aesd" => 0x4e285800, "aese" => 0x4e284800,
|
||||||
"aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
|
"aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
|
||||||
|
|
||||||
sub unaes {
|
local *unaes = sub {
|
||||||
my ($mnemonic,$arg)=@_;
|
my ($mnemonic,$arg)=@_;
|
||||||
|
|
||||||
$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
|
$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
|
||||||
sprintf ".long\t0x%08x\t//%s %s",
|
sprintf ".inst\t0x%08x\t//%s %s",
|
||||||
$opcode{$mnemonic}|$1|($2<<5),
|
$opcode{$mnemonic}|$1|($2<<5),
|
||||||
$mnemonic,$arg;
|
$mnemonic,$arg;
|
||||||
}
|
};
|
||||||
|
|
||||||
foreach(split("\n",$code)) {
|
foreach(split("\n",$code)) {
|
||||||
s/\`([^\`]*)\`/eval($1)/geo;
|
s/\`([^\`]*)\`/eval($1)/geo;
|
||||||
@ -917,35 +916,42 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
|||||||
"aesd" => 0xf3b00340, "aese" => 0xf3b00300,
|
"aesd" => 0xf3b00340, "aese" => 0xf3b00300,
|
||||||
"aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 );
|
"aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 );
|
||||||
|
|
||||||
sub unaes {
|
local *unaes = sub {
|
||||||
my ($mnemonic,$arg)=@_;
|
my ($mnemonic,$arg)=@_;
|
||||||
|
|
||||||
$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
|
if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
|
||||||
sprintf ".long\t0x%08x\t@ %s %s",
|
my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
|
||||||
$opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
|
|(($2&7)<<1) |(($2&8)<<2);
|
||||||
|(($2&7)<<1) |(($2&8)<<2),
|
# since ARMv7 instructions are always encoded little-endian.
|
||||||
|
# correct solution is to use .inst directive, but older
|
||||||
|
# assemblers don't implement it:-(
|
||||||
|
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
|
||||||
|
$word&0xff,($word>>8)&0xff,
|
||||||
|
($word>>16)&0xff,($word>>24)&0xff,
|
||||||
$mnemonic,$arg;
|
$mnemonic,$arg;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
sub unvtbl {
|
sub unvtbl {
|
||||||
my $arg=shift;
|
my $arg=shift;
|
||||||
|
|
||||||
$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
|
$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
|
||||||
sprintf "vtbl.8 d%d,{q%d},d%d\n\tvtbl.8 d%d,{q%d},d%d",2*$1,$2,2*$3,2*$1+1,$2,2*$3+1;
|
sprintf "vtbl.8 d%d,{q%d},d%d\n\t".
|
||||||
|
"vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub unvdup32 {
|
sub unvdup32 {
|
||||||
my $arg=shift;
|
my $arg=shift;
|
||||||
|
|
||||||
$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
|
$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
|
||||||
sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+$3>>1,$3&1;
|
sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub unvmov32 {
|
sub unvmov32 {
|
||||||
my $arg=shift;
|
my $arg=shift;
|
||||||
|
|
||||||
$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
|
$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
|
||||||
sprintf "vmov.32 d%d[%d],%s",2*$1+$2>>1,$2&1,$3;
|
sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach(split("\n",$code)) {
|
foreach(split("\n",$code)) {
|
||||||
@ -956,6 +962,7 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
|||||||
s/\/\/\s?/@ /o; # new->old style commentary
|
s/\/\/\s?/@ /o; # new->old style commentary
|
||||||
|
|
||||||
# fix up remainig new-style suffixes
|
# fix up remainig new-style suffixes
|
||||||
|
s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or
|
||||||
s/\],#[0-9]+/]!/o;
|
s/\],#[0-9]+/]!/o;
|
||||||
|
|
||||||
s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
|
s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
|
||||||
|
Loading…
x
Reference in New Issue
Block a user