aes/asm/vpaes-ppc.pl: add little-endian support.
This commit is contained in:
parent
f0170ebb97
commit
1fb83a3bc2
@ -365,7 +365,7 @@ my %table=(
|
||||
####
|
||||
"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
|
||||
"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:".eval{my $asm=$ppc64_asm;$asm=~s/vpaes\-ppc\.o//;$asm}.":linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
|
||||
"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
|
||||
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
|
||||
|
2
TABLE
2
TABLE
@ -4532,7 +4532,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL
|
||||
$cpuid_obj = ppccpuid.o ppccap.o
|
||||
$bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o
|
||||
$des_obj =
|
||||
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o
|
||||
$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o
|
||||
$bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o
|
||||
|
@ -61,89 +61,89 @@ $code.=<<___;
|
||||
.align 7 # totally strategic alignment
|
||||
_vpaes_consts:
|
||||
Lk_mc_forward: # mc_forward
|
||||
.long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c
|
||||
.long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300
|
||||
.long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704
|
||||
.long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08
|
||||
.long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv
|
||||
.long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv
|
||||
.long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv
|
||||
.long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv
|
||||
Lk_mc_backward: # mc_backward
|
||||
.long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e
|
||||
.long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a
|
||||
.long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506
|
||||
.long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102
|
||||
.long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv
|
||||
.long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv
|
||||
.long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv
|
||||
.long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv
|
||||
Lk_sr: # sr
|
||||
.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
|
||||
.long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b
|
||||
.long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07
|
||||
.long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603
|
||||
.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv
|
||||
.long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv
|
||||
.long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv
|
||||
.long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv
|
||||
|
||||
##
|
||||
## "Hot" constants
|
||||
##
|
||||
Lk_inv: # inv, inva
|
||||
.long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704
|
||||
.long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03
|
||||
.long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev
|
||||
.long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev
|
||||
Lk_ipt: # input transform (lo, hi)
|
||||
.long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca
|
||||
.long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd
|
||||
.long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev
|
||||
.long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev
|
||||
Lk_sbo: # sbou, sbot
|
||||
.long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15
|
||||
.long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e
|
||||
.long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev
|
||||
.long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev
|
||||
Lk_sb1: # sb1u, sb1t
|
||||
.long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b
|
||||
.long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5
|
||||
.long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev
|
||||
.long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev
|
||||
Lk_sb2: # sb2u, sb2t
|
||||
.long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2
|
||||
.long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e
|
||||
.long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev
|
||||
.long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev
|
||||
|
||||
##
|
||||
## Decryption stuff
|
||||
##
|
||||
Lk_dipt: # decryption input transform
|
||||
.long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15
|
||||
.long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712
|
||||
.long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev
|
||||
.long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev
|
||||
Lk_dsbo: # decryption sbox final output
|
||||
.long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7
|
||||
.long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca
|
||||
.long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev
|
||||
.long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev
|
||||
Lk_dsb9: # decryption sbox output *9*u, *9*t
|
||||
.long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca
|
||||
.long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72
|
||||
.long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev
|
||||
.long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev
|
||||
Lk_dsbd: # decryption sbox output *D*u, *D*t
|
||||
.long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5
|
||||
.long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129
|
||||
.long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev
|
||||
.long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev
|
||||
Lk_dsbb: # decryption sbox output *B*u, *B*t
|
||||
.long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660
|
||||
.long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3
|
||||
.long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev
|
||||
.long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev
|
||||
Lk_dsbe: # decryption sbox output *E*u, *E*t
|
||||
.long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222
|
||||
.long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794
|
||||
.long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev
|
||||
.long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev
|
||||
|
||||
##
|
||||
## Key schedule constants
|
||||
##
|
||||
Lk_dksd: # decryption key schedule: invskew x*D
|
||||
.long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007
|
||||
.long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f
|
||||
.long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev
|
||||
.long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev
|
||||
Lk_dksb: # decryption key schedule: invskew x*B
|
||||
.long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603
|
||||
.long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9
|
||||
.long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev
|
||||
.long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev
|
||||
Lk_dkse: # decryption key schedule: invskew x*E + 0x63
|
||||
.long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553
|
||||
.long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd
|
||||
.long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev
|
||||
.long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev
|
||||
Lk_dks9: # decryption key schedule: invskew x*9
|
||||
.long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a
|
||||
.long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b
|
||||
.long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev
|
||||
.long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev
|
||||
|
||||
Lk_rcon: # rcon
|
||||
.long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70
|
||||
.long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis
|
||||
Lk_s63:
|
||||
.long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b
|
||||
.long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis
|
||||
|
||||
Lk_opt: # output transform
|
||||
.long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7
|
||||
.long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1
|
||||
.long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev
|
||||
.long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev
|
||||
Lk_deskew: # deskew tables: inverts the sbox's "skew"
|
||||
.long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d
|
||||
.long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128
|
||||
.long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev
|
||||
.long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev
|
||||
.align 5
|
||||
Lconsts:
|
||||
mflr r0
|
||||
@ -227,7 +227,7 @@ _vpaes_encrypt_core:
|
||||
li r11, 0x10
|
||||
lvx v6, r9, $key
|
||||
addi r9, r9, 16
|
||||
vperm v5, v5, v6, $keyperm # align round key
|
||||
?vperm v5, v5, v6, $keyperm # align round key
|
||||
addi r10, r11, 0x40
|
||||
vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
|
||||
vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1
|
||||
@ -275,7 +275,7 @@ Lenc_entry:
|
||||
vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
|
||||
addi r9, r9, 16
|
||||
vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
|
||||
vperm v5, v5, v6, $keyperm # align round key
|
||||
?vperm v5, v5, v6, $keyperm # align round key
|
||||
vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
|
||||
bdnz Lenc_loop
|
||||
|
||||
@ -330,25 +330,20 @@ Lenc_entry:
|
||||
|
||||
bl _vpaes_encrypt_preheat
|
||||
|
||||
neg r8, $inp # prepare for unaligned access
|
||||
lvsl $keyperm, 0, $key
|
||||
lvsr $outperm, 0, $out
|
||||
lvsr $inpperm, 0, r8 # -$inp
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
lvx $inptail, 0, $inp
|
||||
vperm $outmask, v7, $outmask, $outperm
|
||||
?lvsl $inpperm, 0, $inp # prepare for unaligned access
|
||||
lvx v0, 0, $inp
|
||||
addi $inp, $inp, 15 # 15 is not a typo
|
||||
lvx $outhead, 0, $out
|
||||
|
||||
########
|
||||
vmr v0, $inptail
|
||||
?lvsr $outperm, 0, $out
|
||||
?lvsl $keyperm, 0, $key # prepare for unaligned access
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
lvx $inptail, 0, $inp # redundant in aligned case
|
||||
addi $inp, $inp, 16
|
||||
vperm v0, v0, $inptail, $inpperm
|
||||
?vperm $outmask, v7, $outmask, $outperm
|
||||
lvx $outhead, 0, $out
|
||||
?vperm v0, v0, $inptail, $inpperm
|
||||
|
||||
bl _vpaes_encrypt_core
|
||||
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v1, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v1, 0, $out
|
||||
@ -445,7 +440,7 @@ _vpaes_decrypt_core:
|
||||
li r11, 0x30
|
||||
lvx v6, r9, $key
|
||||
addi r9, r9, 16
|
||||
vperm v5, v5, v6, $keyperm # align round key
|
||||
?vperm v5, v5, v6, $keyperm # align round key
|
||||
vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
|
||||
vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
|
||||
vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0
|
||||
@ -509,7 +504,7 @@ Ldec_entry:
|
||||
vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
|
||||
addi r9, r9, 16
|
||||
vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
|
||||
vperm v5, v5, v6, $keyperm # align round key
|
||||
?vperm v5, v5, v6, $keyperm # align round key
|
||||
vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
|
||||
bdnz Ldec_loop
|
||||
|
||||
@ -564,25 +559,20 @@ Ldec_entry:
|
||||
|
||||
bl _vpaes_decrypt_preheat
|
||||
|
||||
neg r8, $inp # prepare for unaligned access
|
||||
lvsl $keyperm, 0, $key
|
||||
lvsr $outperm, 0, $out
|
||||
lvsr $inpperm, 0, r8 # -$inp
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
lvx $inptail, 0, $inp
|
||||
vperm $outmask, v7, $outmask, $outperm
|
||||
?lvsl $inpperm, 0, $inp # prepare for unaligned access
|
||||
lvx v0, 0, $inp
|
||||
addi $inp, $inp, 15 # 15 is not a typo
|
||||
lvx $outhead, 0, $out
|
||||
|
||||
########
|
||||
vmr v0, $inptail
|
||||
?lvsr $outperm, 0, $out
|
||||
?lvsl $keyperm, 0, $key
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
lvx $inptail, 0, $inp # redundant in aligned case
|
||||
addi $inp, $inp, 16
|
||||
vperm v0, v0, $inptail, $inpperm
|
||||
?vperm $outmask, v7, $outmask, $outperm
|
||||
lvx $outhead, 0, $out
|
||||
?vperm v0, v0, $inptail, $inpperm
|
||||
|
||||
bl _vpaes_decrypt_core
|
||||
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v1, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v1, 0, $out
|
||||
@ -673,18 +663,18 @@ Ldec_entry:
|
||||
|
||||
lvx v24, 0, r31 # load [potentially unaligned] iv
|
||||
li r9, 15
|
||||
lvsl $inpperm, 0, r31
|
||||
?lvsl $inpperm, 0, r31
|
||||
lvx v25, r9, r31
|
||||
vperm v24, v24, v25, $inpperm
|
||||
?vperm v24, v24, v25, $inpperm
|
||||
|
||||
neg r8, $inp # prepare for unaligned access
|
||||
vxor v7, v7, v7
|
||||
lvsl $keyperm, 0, $key
|
||||
lvsr $outperm, 0, $out
|
||||
lvsr $inpperm, 0, r8 # -$inp
|
||||
?lvsl $keyperm, 0, $key
|
||||
?lvsr $outperm, 0, $out
|
||||
?lvsr $inpperm, 0, r8 # -$inp
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
lvx $inptail, 0, $inp
|
||||
vperm $outmask, v7, $outmask, $outperm
|
||||
?vperm $outmask, v7, $outmask, $outperm
|
||||
addi $inp, $inp, 15 # 15 is not a typo
|
||||
lvx $outhead, 0, $out
|
||||
|
||||
@ -697,14 +687,14 @@ Lcbc_enc_loop:
|
||||
vmr v0, $inptail
|
||||
lvx $inptail, 0, $inp
|
||||
addi $inp, $inp, 16
|
||||
vperm v0, v0, $inptail, $inpperm
|
||||
?vperm v0, v0, $inptail, $inpperm
|
||||
vxor v0, v0, v24 # ^= iv
|
||||
|
||||
bl _vpaes_encrypt_core
|
||||
|
||||
vmr v24, v0 # put aside iv
|
||||
sub. r30, r30, r0 # len -= 16
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v1, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v1, 0, $out
|
||||
@ -722,7 +712,7 @@ Lcbc_dec_loop:
|
||||
vmr v0, $inptail
|
||||
lvx $inptail, 0, $inp
|
||||
addi $inp, $inp, 16
|
||||
vperm v0, v0, $inptail, $inpperm
|
||||
?vperm v0, v0, $inptail, $inpperm
|
||||
vmr v25, v0 # put aside input
|
||||
|
||||
bl _vpaes_decrypt_core
|
||||
@ -730,7 +720,7 @@ Lcbc_dec_loop:
|
||||
vxor v0, v0, v24 # ^= iv
|
||||
vmr v24, v25
|
||||
sub. r30, r30, r0 # len -= 16
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v1, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v1, 0, $out
|
||||
@ -744,12 +734,12 @@ Lcbc_done:
|
||||
stvx v1, 0, $out
|
||||
|
||||
neg r8, r31 # write [potentially unaligned] iv
|
||||
lvsl $outperm, 0, r8
|
||||
?lvsl $outperm, 0, r8
|
||||
li r6, 15
|
||||
vnor $outmask, v7, v7 # 0xff..ff
|
||||
vperm $outmask, v7, $outmask, $outperm
|
||||
?vperm $outmask, v7, $outmask, $outperm
|
||||
lvx $outhead, 0, r31
|
||||
vperm v24, v24, v24, $outperm # rotate
|
||||
vperm v24, v24, v24, $outperm # rotate right/left
|
||||
vsel v0, $outhead, v24, $outmask
|
||||
lvx v1, r6, r31
|
||||
stvx v0, 0, r31
|
||||
@ -863,10 +853,10 @@ _vpaes_schedule_core:
|
||||
neg r8, $inp # prepare for unaligned access
|
||||
lvx v0, 0, $inp
|
||||
addi $inp, $inp, 15 # 15 is not typo
|
||||
lvsr $inpperm, 0, r8 # -$inp
|
||||
?lvsr $inpperm, 0, r8 # -$inp
|
||||
lvx v6, 0, $inp # v6 serves as inptail
|
||||
addi $inp, $inp, 8
|
||||
vperm v0, v0, v6, $inpperm
|
||||
?vperm v0, v0, v6, $inpperm
|
||||
|
||||
# input transform
|
||||
vmr v3, v0 # vmovdqa %xmm0, %xmm3
|
||||
@ -879,13 +869,13 @@ _vpaes_schedule_core:
|
||||
li r8, 0x30 # mov \$0x30,%r8d
|
||||
addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
|
||||
|
||||
lvsr $outperm, 0, $out # prepare for unaligned access
|
||||
vspltisb $outmask, -1 # 0xff..ff
|
||||
?lvsr $outperm, 0, $out # prepare for unaligned access
|
||||
vnor $outmask, v9, v9 # 0xff..ff
|
||||
lvx $outhead, 0, $out
|
||||
vperm $outmask, v9, $outmask, $outperm
|
||||
?vperm $outmask, v9, $outmask, $outperm
|
||||
|
||||
#stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
|
||||
vperm v1, v0, v0, $outperm # rotate left
|
||||
vperm v1, v0, v0, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v1, $outmask
|
||||
vmr $outhead, v1
|
||||
stvx v2, 0, $out
|
||||
@ -901,14 +891,14 @@ Lschedule_am_decrypting:
|
||||
vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
|
||||
|
||||
neg r0, $out # prepare for unaligned access
|
||||
lvsl $outperm, 0, r0
|
||||
?lvsl $outperm, 0, r0
|
||||
addi $out, $out, 15 # 15 is not typo
|
||||
vspltisb $outmask, -1 # 0xff..ff
|
||||
vnor $outmask, v9, v9 # 0xff..ff
|
||||
lvx $outhead, 0, $out
|
||||
vperm $outmask, $outmask, v9, $outperm
|
||||
?vperm $outmask, $outmask, v9, $outperm
|
||||
|
||||
#stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
|
||||
vperm v4, v4, v4, $outperm # rotate left
|
||||
vperm v4, v4, v4, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v4, $outmask
|
||||
vmr $outhead, v4
|
||||
stvx v2, 0, $out
|
||||
@ -957,16 +947,16 @@ Loop_schedule_128:
|
||||
Lschedule_192:
|
||||
li r0, 4 # mov \$4, %esi
|
||||
lvx v0, 0, $inp
|
||||
vperm v0, v6, v0, $inpperm
|
||||
vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
|
||||
?vperm v0, v6, v0, $inpperm
|
||||
?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
|
||||
bl _vpaes_schedule_transform # input transform
|
||||
vsldoi v6, v0, v9, 8
|
||||
vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
|
||||
?vsldoi v6, v0, v9, 8
|
||||
?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
|
||||
mtctr r0
|
||||
|
||||
Loop_schedule_192:
|
||||
bl _vpaes_schedule_round
|
||||
vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
|
||||
?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
|
||||
bl _vpaes_schedule_mangle # save key n
|
||||
bl _vpaes_schedule_192_smear
|
||||
bl _vpaes_schedule_mangle # save key n+1
|
||||
@ -991,7 +981,7 @@ Lschedule_256:
|
||||
li r0, 7 # mov \$7, %esi
|
||||
addi $inp, $inp, 8
|
||||
lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
|
||||
vperm v0, v6, v0, $inpperm
|
||||
?vperm v0, v6, v0, $inpperm
|
||||
bl _vpaes_schedule_transform # input transform
|
||||
mtctr r0
|
||||
|
||||
@ -1005,7 +995,7 @@ Loop_schedule_256:
|
||||
bl _vpaes_schedule_mangle
|
||||
|
||||
# low round. swap xmm7 and xmm6
|
||||
vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
|
||||
?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
|
||||
vmr v5, v7 # vmovdqa %xmm7, %xmm5
|
||||
vmr v7, v6 # vmovdqa %xmm6, %xmm7
|
||||
bl _vpaes_schedule_low_round
|
||||
@ -1042,7 +1032,7 @@ Lschedule_mangle_last:
|
||||
bl _vpaes_schedule_transform # output transform
|
||||
|
||||
#stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v2, 0, $out
|
||||
@ -1062,7 +1052,7 @@ Lschedule_mangle_last_dec:
|
||||
bl _vpaes_schedule_transform # output transform
|
||||
|
||||
#stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
|
||||
vperm v0, v0, v0, $outperm # rotate left
|
||||
vperm v0, v0, v0, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v0, $outmask
|
||||
vmr $outhead, v0
|
||||
stvx v2, 0, $out
|
||||
@ -1104,14 +1094,14 @@ Lschedule_mangle_done:
|
||||
##
|
||||
.align 4
|
||||
_vpaes_schedule_192_smear:
|
||||
vspltw v0, v7, 3
|
||||
vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
|
||||
vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
|
||||
?vspltw v0, v7, 3
|
||||
?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
|
||||
?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
|
||||
vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0
|
||||
vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a
|
||||
vmr v0, v6
|
||||
vsldoi v6, v6, v9, 8
|
||||
vsldoi v6, v9, v6, 8 # clobber low side with zeros
|
||||
?vsldoi v6, v6, v9, 8
|
||||
?vsldoi v6, v9, v6, 8 # clobber low side with zeros
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
@ -1138,23 +1128,23 @@ _vpaes_schedule_192_smear:
|
||||
_vpaes_schedule_round:
|
||||
# extract rcon from xmm8
|
||||
#vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
|
||||
vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
|
||||
vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
|
||||
?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
|
||||
?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
|
||||
vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
|
||||
|
||||
# rotate
|
||||
vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
|
||||
vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
|
||||
?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
|
||||
?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
|
||||
|
||||
# fall through...
|
||||
|
||||
# low round: same as high round, but no rotation and no rcon.
|
||||
_vpaes_schedule_low_round:
|
||||
# smear xmm7
|
||||
vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
|
||||
?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
|
||||
vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
|
||||
vspltisb v1, 0x0f # 0x0f..0f
|
||||
vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
|
||||
?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
|
||||
|
||||
# subbytes
|
||||
vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k
|
||||
@ -1248,7 +1238,7 @@ _vpaes_schedule_mangle:
|
||||
andi. r8, r8, 0x30 # and \$0x30, %r8
|
||||
|
||||
#stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
|
||||
vperm v1, v3, v3, $outperm # rotate left
|
||||
vperm v1, v3, v3, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v1, $outmask
|
||||
vmr $outhead, v1
|
||||
stvx v2, 0, $out
|
||||
@ -1299,7 +1289,7 @@ Lschedule_mangle_dec:
|
||||
andi. r8, r8, 0x30 # and \$0x30, %r8
|
||||
|
||||
#stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
|
||||
vperm v1, v3, v3, $outperm # rotate left
|
||||
vperm v1, v3, v3, $outperm # rotate right/left
|
||||
vsel v2, $outhead, v1, $outmask
|
||||
vmr $outhead, v1
|
||||
stvx v2, 0, $out
|
||||
@ -1346,7 +1336,7 @@ Lschedule_mangle_dec:
|
||||
addi r9, r9, 6 # add \$5,%eax
|
||||
stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
|
||||
|
||||
cmplw $dir, $bits, $bits
|
||||
cmplw $dir, $bits, $bits # set encrypt direction
|
||||
li r8, 0x30 # mov \$0x30,%r8d
|
||||
bl _vpaes_schedule_core
|
||||
|
||||
@ -1427,7 +1417,7 @@ Lschedule_mangle_dec:
|
||||
slwi r9, r9, 4 # shl \$4,%eax
|
||||
add $out, $out, r9 # lea (%rdx,%rax),%rdx
|
||||
|
||||
cmplwi $dir, $bits, 0
|
||||
cmplwi $dir, $bits, 0 # set decrypt direction
|
||||
srwi r8, $bits, 1 # shr \$1,%r8d
|
||||
andi. r8, r8, 32 # and \$32,%r8d
|
||||
xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
|
||||
@ -1470,8 +1460,48 @@ Lschedule_mangle_dec:
|
||||
___
|
||||
}
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
my $consts=1;
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/geo;
|
||||
|
||||
print $code;
|
||||
# constants table endian-specific conversion
|
||||
if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
|
||||
my $conv=$2;
|
||||
my @bytes=();
|
||||
|
||||
# convert to endian-agnostic format
|
||||
foreach (split(/,\s+/,$1)) {
|
||||
my $l = /^0/?oct:int;
|
||||
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
|
||||
}
|
||||
|
||||
# little-endian conversion
|
||||
if ($flavour =~ /le$/o) {
|
||||
SWITCH: for($conv) {
|
||||
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
|
||||
/\?rev/ && do { @bytes=reverse(@bytes); last; };
|
||||
}
|
||||
}
|
||||
|
||||
#emit
|
||||
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
|
||||
next;
|
||||
}
|
||||
$consts=0 if (m/Lconsts:/o); # end of table
|
||||
|
||||
# instructions prefixed with '?' are endian-specific and need
|
||||
# to be adjusted accordingly...
|
||||
if ($flavour =~ /le$/o) { # little-endian
|
||||
s/\?lvsr/lvsl/o or
|
||||
s/\?lvsl/lvsr/o or
|
||||
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
|
||||
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
|
||||
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
|
||||
} else { # big-endian
|
||||
s/\?([a-z]+)/$1/o;
|
||||
}
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
||||
|
Loading…
x
Reference in New Issue
Block a user