openssl/crypto/aes/asm/aes-ppc.pl
Andy Polyakov 23b93b587b aes-ppc.pl, sha512-ppc.pl: comply even with Embedded ABI specification
(most restrictive about r2 and r13 usage).
2012-01-13 09:16:52 +00:00

1361 lines
37 KiB
Raku

#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# Needs more work: key setup, CBC routine...
#
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
# 4.0. But these are not the ones currently used! Their "compact"
# counterparts are, for security reason. ppc_AES_encrypt_compact runs
# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
# at 1/3 of ppc_AES_decrypt.
# February 2010
#
# Rescheduling instructions to favour Power6 pipeline gave 10%
# performance improvement on the platfrom in question (and marginal
# improvement even on others). It should be noted that Power6 fails
# to process byte in 18 cycles, only in 23, because it fails to issue
# 4 load instructions in two cycles, only in 3. As result non-compact
# block subroutines are 25% slower than one would expect. Compact
# functions scale better, because they have pure computational part,
# which scales perfectly with clock frequency. To be specific
# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
$flavour = shift;
if ($flavour =~ /64/) {
$SIZE_T =8;
$LRSAVE =2*$SIZE_T;
$STU ="stdu";
$POP ="ld";
$PUSH ="std";
} elsif ($flavour =~ /32/) {
$SIZE_T =4;
$LRSAVE =$SIZE_T;
$STU ="stwu";
$POP ="lwz";
$PUSH ="stw";
} else { die "nonsense $flavour"; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
$FRAME=32*$SIZE_T;
sub _data_word()
{ my $i;
while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
}
$sp="r1";
$toc="r2";
$inp="r3";
$out="r4";
$key="r5";
$Tbl0="r3";
$Tbl1="r6";
$Tbl2="r7";
$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
$s0="r8";
$s1="r9";
$s2="r10";
$s3="r11";
$t0="r12";
$t1="r0"; # stay away from "r13";
$t2="r14";
$t3="r15";
$acc00="r16";
$acc01="r17";
$acc02="r18";
$acc03="r19";
$acc04="r20";
$acc05="r21";
$acc06="r22";
$acc07="r23";
$acc08="r24";
$acc09="r25";
$acc10="r26";
$acc11="r27";
$acc12="r28";
$acc13="r29";
$acc14="r30";
$acc15="r31";
$mask80=$Tbl2;
$mask1b=$Tbl3;
$code.=<<___;
.machine "any"
.text
.align 7
LAES_Te:
mflr r0
bcl 20,31,\$+4
mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
addi $Tbl0,$Tbl0,`128-8`
mtlr r0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space `64-9*4`
LAES_Td:
mflr r0
bcl 20,31,\$+4
mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
addi $Tbl0,$Tbl0,`128-64-8+2048+256`
mtlr r0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space `128-64-9*4`
___
&_data_word(
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
$code.=<<___;
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
___
&_data_word(
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
$code.=<<___;
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.globl .AES_encrypt
.align 7
.AES_encrypt:
$STU $sp,-$FRAME($sp)
mflr r0
$PUSH $out,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
andi. $t0,$inp,3
andi. $t1,$out,3
or. $t0,$t0,$t1
bne Lenc_unaligned
Lenc_unaligned_ok:
lwz $s0,0($inp)
lwz $s1,4($inp)
lwz $s2,8($inp)
lwz $s3,12($inp)
bl LAES_Te
bl Lppc_AES_encrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
stw $s0,0($out)
stw $s1,4($out)
stw $s2,8($out)
stw $s3,12($out)
b Lenc_done
Lenc_unaligned:
subfic $t0,$inp,4096
subfic $t1,$out,4096
andi. $t0,$t0,4096-16
beq Lenc_xpage
andi. $t1,$t1,4096-16
bne Lenc_unaligned_ok
Lenc_xpage:
lbz $acc00,0($inp)
lbz $acc01,1($inp)
lbz $acc02,2($inp)
lbz $s0,3($inp)
lbz $acc04,4($inp)
lbz $acc05,5($inp)
lbz $acc06,6($inp)
lbz $s1,7($inp)
lbz $acc08,8($inp)
lbz $acc09,9($inp)
lbz $acc10,10($inp)
insrwi $s0,$acc00,8,0
lbz $s2,11($inp)
insrwi $s1,$acc04,8,0
lbz $acc12,12($inp)
insrwi $s0,$acc01,8,8
lbz $acc13,13($inp)
insrwi $s1,$acc05,8,8
lbz $acc14,14($inp)
insrwi $s0,$acc02,8,16
lbz $s3,15($inp)
insrwi $s1,$acc06,8,16
insrwi $s2,$acc08,8,0
insrwi $s3,$acc12,8,0
insrwi $s2,$acc09,8,8
insrwi $s3,$acc13,8,8
insrwi $s2,$acc10,8,16
insrwi $s3,$acc14,8,16
bl LAES_Te
bl Lppc_AES_encrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
extrwi $acc00,$s0,8,0
extrwi $acc01,$s0,8,8
stb $acc00,0($out)
extrwi $acc02,$s0,8,16
stb $acc01,1($out)
stb $acc02,2($out)
extrwi $acc04,$s1,8,0
stb $s0,3($out)
extrwi $acc05,$s1,8,8
stb $acc04,4($out)
extrwi $acc06,$s1,8,16
stb $acc05,5($out)
stb $acc06,6($out)
extrwi $acc08,$s2,8,0
stb $s1,7($out)
extrwi $acc09,$s2,8,8
stb $acc08,8($out)
extrwi $acc10,$s2,8,16
stb $acc09,9($out)
stb $acc10,10($out)
extrwi $acc12,$s3,8,0
stb $s2,11($out)
extrwi $acc13,$s3,8,8
stb $acc12,12($out)
extrwi $acc14,$s3,8,16
stb $acc13,13($out)
stb $acc14,14($out)
stb $s3,15($out)
Lenc_done:
$POP r0,`$FRAME+$LRSAVE`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.align 5
Lppc_AES_encrypt:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,3
lwz $t0,0($key)
addi $Tbl2,$Tbl0,2
lwz $t1,4($key)
addi $Tbl3,$Tbl0,1
lwz $t2,8($key)
addi $acc00,$acc00,-1
lwz $t3,12($key)
addi $key,$key,16
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
mtctr $acc00
.align 4
Lenc_loop:
rlwinm $acc00,$s0,`32-24+3`,21,28
rlwinm $acc01,$s1,`32-24+3`,21,28
rlwinm $acc02,$s2,`32-24+3`,21,28
rlwinm $acc03,$s3,`32-24+3`,21,28
lwz $t0,0($key)
rlwinm $acc04,$s1,`32-16+3`,21,28
lwz $t1,4($key)
rlwinm $acc05,$s2,`32-16+3`,21,28
lwz $t2,8($key)
rlwinm $acc06,$s3,`32-16+3`,21,28
lwz $t3,12($key)
rlwinm $acc07,$s0,`32-16+3`,21,28
lwzx $acc00,$Tbl0,$acc00
rlwinm $acc08,$s2,`32-8+3`,21,28
lwzx $acc01,$Tbl0,$acc01
rlwinm $acc09,$s3,`32-8+3`,21,28
lwzx $acc02,$Tbl0,$acc02
rlwinm $acc10,$s0,`32-8+3`,21,28
lwzx $acc03,$Tbl0,$acc03
rlwinm $acc11,$s1,`32-8+3`,21,28
lwzx $acc04,$Tbl1,$acc04
rlwinm $acc12,$s3,`0+3`,21,28
lwzx $acc05,$Tbl1,$acc05
rlwinm $acc13,$s0,`0+3`,21,28
lwzx $acc06,$Tbl1,$acc06
rlwinm $acc14,$s1,`0+3`,21,28
lwzx $acc07,$Tbl1,$acc07
rlwinm $acc15,$s2,`0+3`,21,28
lwzx $acc08,$Tbl2,$acc08
xor $t0,$t0,$acc00
lwzx $acc09,$Tbl2,$acc09
xor $t1,$t1,$acc01
lwzx $acc10,$Tbl2,$acc10
xor $t2,$t2,$acc02
lwzx $acc11,$Tbl2,$acc11
xor $t3,$t3,$acc03
lwzx $acc12,$Tbl3,$acc12
xor $t0,$t0,$acc04
lwzx $acc13,$Tbl3,$acc13
xor $t1,$t1,$acc05
lwzx $acc14,$Tbl3,$acc14
xor $t2,$t2,$acc06
lwzx $acc15,$Tbl3,$acc15
xor $t3,$t3,$acc07
xor $t0,$t0,$acc08
xor $t1,$t1,$acc09
xor $t2,$t2,$acc10
xor $t3,$t3,$acc11
xor $s0,$t0,$acc12
xor $s1,$t1,$acc13
xor $s2,$t2,$acc14
xor $s3,$t3,$acc15
addi $key,$key,16
bdnz- Lenc_loop
addi $Tbl2,$Tbl0,2048
nop
lwz $t0,0($key)
rlwinm $acc00,$s0,`32-24`,24,31
lwz $t1,4($key)
rlwinm $acc01,$s1,`32-24`,24,31
lwz $t2,8($key)
rlwinm $acc02,$s2,`32-24`,24,31
lwz $t3,12($key)
rlwinm $acc03,$s3,`32-24`,24,31
lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
rlwinm $acc04,$s1,`32-16`,24,31
lwz $acc09,`2048+32`($Tbl0)
rlwinm $acc05,$s2,`32-16`,24,31
lwz $acc10,`2048+64`($Tbl0)
rlwinm $acc06,$s3,`32-16`,24,31
lwz $acc11,`2048+96`($Tbl0)
rlwinm $acc07,$s0,`32-16`,24,31
lwz $acc12,`2048+128`($Tbl0)
rlwinm $acc08,$s2,`32-8`,24,31
lwz $acc13,`2048+160`($Tbl0)
rlwinm $acc09,$s3,`32-8`,24,31
lwz $acc14,`2048+192`($Tbl0)
rlwinm $acc10,$s0,`32-8`,24,31
lwz $acc15,`2048+224`($Tbl0)
rlwinm $acc11,$s1,`32-8`,24,31
lbzx $acc00,$Tbl2,$acc00
rlwinm $acc12,$s3,`0`,24,31
lbzx $acc01,$Tbl2,$acc01
rlwinm $acc13,$s0,`0`,24,31
lbzx $acc02,$Tbl2,$acc02
rlwinm $acc14,$s1,`0`,24,31
lbzx $acc03,$Tbl2,$acc03
rlwinm $acc15,$s2,`0`,24,31
lbzx $acc04,$Tbl2,$acc04
rlwinm $s0,$acc00,24,0,7
lbzx $acc05,$Tbl2,$acc05
rlwinm $s1,$acc01,24,0,7
lbzx $acc06,$Tbl2,$acc06
rlwinm $s2,$acc02,24,0,7
lbzx $acc07,$Tbl2,$acc07
rlwinm $s3,$acc03,24,0,7
lbzx $acc08,$Tbl2,$acc08
rlwimi $s0,$acc04,16,8,15
lbzx $acc09,$Tbl2,$acc09
rlwimi $s1,$acc05,16,8,15
lbzx $acc10,$Tbl2,$acc10
rlwimi $s2,$acc06,16,8,15
lbzx $acc11,$Tbl2,$acc11
rlwimi $s3,$acc07,16,8,15
lbzx $acc12,$Tbl2,$acc12
rlwimi $s0,$acc08,8,16,23
lbzx $acc13,$Tbl2,$acc13
rlwimi $s1,$acc09,8,16,23
lbzx $acc14,$Tbl2,$acc14
rlwimi $s2,$acc10,8,16,23
lbzx $acc15,$Tbl2,$acc15
rlwimi $s3,$acc11,8,16,23
or $s0,$s0,$acc12
or $s1,$s1,$acc13
or $s2,$s2,$acc14
or $s3,$s3,$acc15
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.align 4
Lppc_AES_encrypt_compact:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,2048
lwz $t0,0($key)
lis $mask80,0x8080
lwz $t1,4($key)
lis $mask1b,0x1b1b
lwz $t2,8($key)
ori $mask80,$mask80,0x8080
lwz $t3,12($key)
ori $mask1b,$mask1b,0x1b1b
addi $key,$key,16
mtctr $acc00
.align 4
Lenc_compact_loop:
xor $s0,$s0,$t0
xor $s1,$s1,$t1
rlwinm $acc00,$s0,`32-24`,24,31
xor $s2,$s2,$t2
rlwinm $acc01,$s1,`32-24`,24,31
xor $s3,$s3,$t3
rlwinm $acc02,$s2,`32-24`,24,31
rlwinm $acc03,$s3,`32-24`,24,31
rlwinm $acc04,$s1,`32-16`,24,31
rlwinm $acc05,$s2,`32-16`,24,31
rlwinm $acc06,$s3,`32-16`,24,31
rlwinm $acc07,$s0,`32-16`,24,31
lbzx $acc00,$Tbl1,$acc00
rlwinm $acc08,$s2,`32-8`,24,31
lbzx $acc01,$Tbl1,$acc01
rlwinm $acc09,$s3,`32-8`,24,31
lbzx $acc02,$Tbl1,$acc02
rlwinm $acc10,$s0,`32-8`,24,31
lbzx $acc03,$Tbl1,$acc03
rlwinm $acc11,$s1,`32-8`,24,31
lbzx $acc04,$Tbl1,$acc04
rlwinm $acc12,$s3,`0`,24,31
lbzx $acc05,$Tbl1,$acc05
rlwinm $acc13,$s0,`0`,24,31
lbzx $acc06,$Tbl1,$acc06
rlwinm $acc14,$s1,`0`,24,31
lbzx $acc07,$Tbl1,$acc07
rlwinm $acc15,$s2,`0`,24,31
lbzx $acc08,$Tbl1,$acc08
rlwinm $s0,$acc00,24,0,7
lbzx $acc09,$Tbl1,$acc09
rlwinm $s1,$acc01,24,0,7
lbzx $acc10,$Tbl1,$acc10
rlwinm $s2,$acc02,24,0,7
lbzx $acc11,$Tbl1,$acc11
rlwinm $s3,$acc03,24,0,7
lbzx $acc12,$Tbl1,$acc12
rlwimi $s0,$acc04,16,8,15
lbzx $acc13,$Tbl1,$acc13
rlwimi $s1,$acc05,16,8,15
lbzx $acc14,$Tbl1,$acc14
rlwimi $s2,$acc06,16,8,15
lbzx $acc15,$Tbl1,$acc15
rlwimi $s3,$acc07,16,8,15
rlwimi $s0,$acc08,8,16,23
rlwimi $s1,$acc09,8,16,23
rlwimi $s2,$acc10,8,16,23
rlwimi $s3,$acc11,8,16,23
lwz $t0,0($key)
or $s0,$s0,$acc12
lwz $t1,4($key)
or $s1,$s1,$acc13
lwz $t2,8($key)
or $s2,$s2,$acc14
lwz $t3,12($key)
or $s3,$s3,$acc15
addi $key,$key,16
bdz Lenc_compact_done
and $acc00,$s0,$mask80 # r1=r0&0x80808080
and $acc01,$s1,$mask80
and $acc02,$s2,$mask80
and $acc03,$s3,$mask80
srwi $acc04,$acc00,7 # r1>>7
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
srwi $acc05,$acc01,7
andc $acc09,$s1,$mask80
srwi $acc06,$acc02,7
andc $acc10,$s2,$mask80
srwi $acc07,$acc03,7
andc $acc11,$s3,$mask80
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
sub $acc01,$acc01,$acc05
sub $acc02,$acc02,$acc06
sub $acc03,$acc03,$acc07
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
add $acc09,$acc09,$acc09
add $acc10,$acc10,$acc10
add $acc11,$acc11,$acc11
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc01,$acc01,$mask1b
and $acc02,$acc02,$mask1b
and $acc03,$acc03,$mask1b
xor $acc00,$acc00,$acc08 # r2
xor $acc01,$acc01,$acc09
rotlwi $acc12,$s0,16 # ROTATE(r0,16)
xor $acc02,$acc02,$acc10
rotlwi $acc13,$s1,16
xor $acc03,$acc03,$acc11
rotlwi $acc14,$s2,16
xor $s0,$s0,$acc00 # r0^r2
rotlwi $acc15,$s3,16
xor $s1,$s1,$acc01
rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
xor $s2,$s2,$acc02
rotrwi $s1,$s1,24
xor $s3,$s3,$acc03
rotrwi $s2,$s2,24
xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
rotrwi $s3,$s3,24
xor $s1,$s1,$acc01
xor $s2,$s2,$acc02
xor $s3,$s3,$acc03
rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
xor $s0,$s0,$acc12 #
rotlwi $acc09,$acc13,8
xor $s1,$s1,$acc13
rotlwi $acc10,$acc14,8
xor $s2,$s2,$acc14
rotlwi $acc11,$acc15,8
xor $s3,$s3,$acc15
xor $s0,$s0,$acc08 #
xor $s1,$s1,$acc09
xor $s2,$s2,$acc10
xor $s3,$s3,$acc11
b Lenc_compact_loop
.align 4
Lenc_compact_done:
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl .AES_decrypt
.align 7
.AES_decrypt:
$STU $sp,-$FRAME($sp)
mflr r0
$PUSH $out,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
$PUSH r0,`$FRAME+$LRSAVE`($sp)
andi. $t0,$inp,3
andi. $t1,$out,3
or. $t0,$t0,$t1
bne Ldec_unaligned
Ldec_unaligned_ok:
lwz $s0,0($inp)
lwz $s1,4($inp)
lwz $s2,8($inp)
lwz $s3,12($inp)
bl LAES_Td
bl Lppc_AES_decrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
stw $s0,0($out)
stw $s1,4($out)
stw $s2,8($out)
stw $s3,12($out)
b Ldec_done
Ldec_unaligned:
subfic $t0,$inp,4096
subfic $t1,$out,4096
andi. $t0,$t0,4096-16
beq Ldec_xpage
andi. $t1,$t1,4096-16
bne Ldec_unaligned_ok
Ldec_xpage:
lbz $acc00,0($inp)
lbz $acc01,1($inp)
lbz $acc02,2($inp)
lbz $s0,3($inp)
lbz $acc04,4($inp)
lbz $acc05,5($inp)
lbz $acc06,6($inp)
lbz $s1,7($inp)
lbz $acc08,8($inp)
lbz $acc09,9($inp)
lbz $acc10,10($inp)
insrwi $s0,$acc00,8,0
lbz $s2,11($inp)
insrwi $s1,$acc04,8,0
lbz $acc12,12($inp)
insrwi $s0,$acc01,8,8
lbz $acc13,13($inp)
insrwi $s1,$acc05,8,8
lbz $acc14,14($inp)
insrwi $s0,$acc02,8,16
lbz $s3,15($inp)
insrwi $s1,$acc06,8,16
insrwi $s2,$acc08,8,0
insrwi $s3,$acc12,8,0
insrwi $s2,$acc09,8,8
insrwi $s3,$acc13,8,8
insrwi $s2,$acc10,8,16
insrwi $s3,$acc14,8,16
bl LAES_Td
bl Lppc_AES_decrypt_compact
$POP $out,`$FRAME-$SIZE_T*19`($sp)
extrwi $acc00,$s0,8,0
extrwi $acc01,$s0,8,8
stb $acc00,0($out)
extrwi $acc02,$s0,8,16
stb $acc01,1($out)
stb $acc02,2($out)
extrwi $acc04,$s1,8,0
stb $s0,3($out)
extrwi $acc05,$s1,8,8
stb $acc04,4($out)
extrwi $acc06,$s1,8,16
stb $acc05,5($out)
stb $acc06,6($out)
extrwi $acc08,$s2,8,0
stb $s1,7($out)
extrwi $acc09,$s2,8,8
stb $acc08,8($out)
extrwi $acc10,$s2,8,16
stb $acc09,9($out)
stb $acc10,10($out)
extrwi $acc12,$s3,8,0
stb $s2,11($out)
extrwi $acc13,$s3,8,8
stb $acc12,12($out)
extrwi $acc14,$s3,8,16
stb $acc13,13($out)
stb $acc14,14($out)
stb $s3,15($out)
Ldec_done:
$POP r0,`$FRAME+$LRSAVE`($sp)
$POP r14,`$FRAME-$SIZE_T*18`($sp)
$POP r15,`$FRAME-$SIZE_T*17`($sp)
$POP r16,`$FRAME-$SIZE_T*16`($sp)
$POP r17,`$FRAME-$SIZE_T*15`($sp)
$POP r18,`$FRAME-$SIZE_T*14`($sp)
$POP r19,`$FRAME-$SIZE_T*13`($sp)
$POP r20,`$FRAME-$SIZE_T*12`($sp)
$POP r21,`$FRAME-$SIZE_T*11`($sp)
$POP r22,`$FRAME-$SIZE_T*10`($sp)
$POP r23,`$FRAME-$SIZE_T*9`($sp)
$POP r24,`$FRAME-$SIZE_T*8`($sp)
$POP r25,`$FRAME-$SIZE_T*7`($sp)
$POP r26,`$FRAME-$SIZE_T*6`($sp)
$POP r27,`$FRAME-$SIZE_T*5`($sp)
$POP r28,`$FRAME-$SIZE_T*4`($sp)
$POP r29,`$FRAME-$SIZE_T*3`($sp)
$POP r30,`$FRAME-$SIZE_T*2`($sp)
$POP r31,`$FRAME-$SIZE_T*1`($sp)
mtlr r0
addi $sp,$sp,$FRAME
blr
.long 0
.byte 0,12,4,1,0x80,18,3,0
.long 0
.align 5
Lppc_AES_decrypt:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,3
lwz $t0,0($key)
addi $Tbl2,$Tbl0,2
lwz $t1,4($key)
addi $Tbl3,$Tbl0,1
lwz $t2,8($key)
addi $acc00,$acc00,-1
lwz $t3,12($key)
addi $key,$key,16
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
mtctr $acc00
.align 4
Ldec_loop:
rlwinm $acc00,$s0,`32-24+3`,21,28
rlwinm $acc01,$s1,`32-24+3`,21,28
rlwinm $acc02,$s2,`32-24+3`,21,28
rlwinm $acc03,$s3,`32-24+3`,21,28
lwz $t0,0($key)
rlwinm $acc04,$s3,`32-16+3`,21,28
lwz $t1,4($key)
rlwinm $acc05,$s0,`32-16+3`,21,28
lwz $t2,8($key)
rlwinm $acc06,$s1,`32-16+3`,21,28
lwz $t3,12($key)
rlwinm $acc07,$s2,`32-16+3`,21,28
lwzx $acc00,$Tbl0,$acc00
rlwinm $acc08,$s2,`32-8+3`,21,28
lwzx $acc01,$Tbl0,$acc01
rlwinm $acc09,$s3,`32-8+3`,21,28
lwzx $acc02,$Tbl0,$acc02
rlwinm $acc10,$s0,`32-8+3`,21,28
lwzx $acc03,$Tbl0,$acc03
rlwinm $acc11,$s1,`32-8+3`,21,28
lwzx $acc04,$Tbl1,$acc04
rlwinm $acc12,$s1,`0+3`,21,28
lwzx $acc05,$Tbl1,$acc05
rlwinm $acc13,$s2,`0+3`,21,28
lwzx $acc06,$Tbl1,$acc06
rlwinm $acc14,$s3,`0+3`,21,28
lwzx $acc07,$Tbl1,$acc07
rlwinm $acc15,$s0,`0+3`,21,28
lwzx $acc08,$Tbl2,$acc08
xor $t0,$t0,$acc00
lwzx $acc09,$Tbl2,$acc09
xor $t1,$t1,$acc01
lwzx $acc10,$Tbl2,$acc10
xor $t2,$t2,$acc02
lwzx $acc11,$Tbl2,$acc11
xor $t3,$t3,$acc03
lwzx $acc12,$Tbl3,$acc12
xor $t0,$t0,$acc04
lwzx $acc13,$Tbl3,$acc13
xor $t1,$t1,$acc05
lwzx $acc14,$Tbl3,$acc14
xor $t2,$t2,$acc06
lwzx $acc15,$Tbl3,$acc15
xor $t3,$t3,$acc07
xor $t0,$t0,$acc08
xor $t1,$t1,$acc09
xor $t2,$t2,$acc10
xor $t3,$t3,$acc11
xor $s0,$t0,$acc12
xor $s1,$t1,$acc13
xor $s2,$t2,$acc14
xor $s3,$t3,$acc15
addi $key,$key,16
bdnz- Ldec_loop
addi $Tbl2,$Tbl0,2048
nop
lwz $t0,0($key)
rlwinm $acc00,$s0,`32-24`,24,31
lwz $t1,4($key)
rlwinm $acc01,$s1,`32-24`,24,31
lwz $t2,8($key)
rlwinm $acc02,$s2,`32-24`,24,31
lwz $t3,12($key)
rlwinm $acc03,$s3,`32-24`,24,31
lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
rlwinm $acc04,$s3,`32-16`,24,31
lwz $acc09,`2048+32`($Tbl0)
rlwinm $acc05,$s0,`32-16`,24,31
lwz $acc10,`2048+64`($Tbl0)
lbzx $acc00,$Tbl2,$acc00
lwz $acc11,`2048+96`($Tbl0)
lbzx $acc01,$Tbl2,$acc01
lwz $acc12,`2048+128`($Tbl0)
rlwinm $acc06,$s1,`32-16`,24,31
lwz $acc13,`2048+160`($Tbl0)
rlwinm $acc07,$s2,`32-16`,24,31
lwz $acc14,`2048+192`($Tbl0)
rlwinm $acc08,$s2,`32-8`,24,31
lwz $acc15,`2048+224`($Tbl0)
rlwinm $acc09,$s3,`32-8`,24,31
lbzx $acc02,$Tbl2,$acc02
rlwinm $acc10,$s0,`32-8`,24,31
lbzx $acc03,$Tbl2,$acc03
rlwinm $acc11,$s1,`32-8`,24,31
lbzx $acc04,$Tbl2,$acc04
rlwinm $acc12,$s1,`0`,24,31
lbzx $acc05,$Tbl2,$acc05
rlwinm $acc13,$s2,`0`,24,31
lbzx $acc06,$Tbl2,$acc06
rlwinm $acc14,$s3,`0`,24,31
lbzx $acc07,$Tbl2,$acc07
rlwinm $acc15,$s0,`0`,24,31
lbzx $acc08,$Tbl2,$acc08
rlwinm $s0,$acc00,24,0,7
lbzx $acc09,$Tbl2,$acc09
rlwinm $s1,$acc01,24,0,7
lbzx $acc10,$Tbl2,$acc10
rlwinm $s2,$acc02,24,0,7
lbzx $acc11,$Tbl2,$acc11
rlwinm $s3,$acc03,24,0,7
lbzx $acc12,$Tbl2,$acc12
rlwimi $s0,$acc04,16,8,15
lbzx $acc13,$Tbl2,$acc13
rlwimi $s1,$acc05,16,8,15
lbzx $acc14,$Tbl2,$acc14
rlwimi $s2,$acc06,16,8,15
lbzx $acc15,$Tbl2,$acc15
rlwimi $s3,$acc07,16,8,15
rlwimi $s0,$acc08,8,16,23
rlwimi $s1,$acc09,8,16,23
rlwimi $s2,$acc10,8,16,23
rlwimi $s3,$acc11,8,16,23
or $s0,$s0,$acc12
or $s1,$s1,$acc13
or $s2,$s2,$acc14
or $s3,$s3,$acc15
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.align 4
Lppc_AES_decrypt_compact:
lwz $acc00,240($key)
addi $Tbl1,$Tbl0,2048
lwz $t0,0($key)
lis $mask80,0x8080
lwz $t1,4($key)
lis $mask1b,0x1b1b
lwz $t2,8($key)
ori $mask80,$mask80,0x8080
lwz $t3,12($key)
ori $mask1b,$mask1b,0x1b1b
addi $key,$key,16
___
$code.=<<___ if ($SIZE_T==8);
insrdi $mask80,$mask80,32,0
insrdi $mask1b,$mask1b,32,0
___
$code.=<<___;
mtctr $acc00
.align 4
Ldec_compact_loop:
xor $s0,$s0,$t0
xor $s1,$s1,$t1
rlwinm $acc00,$s0,`32-24`,24,31
xor $s2,$s2,$t2
rlwinm $acc01,$s1,`32-24`,24,31
xor $s3,$s3,$t3
rlwinm $acc02,$s2,`32-24`,24,31
rlwinm $acc03,$s3,`32-24`,24,31
rlwinm $acc04,$s3,`32-16`,24,31
rlwinm $acc05,$s0,`32-16`,24,31
rlwinm $acc06,$s1,`32-16`,24,31
rlwinm $acc07,$s2,`32-16`,24,31
lbzx $acc00,$Tbl1,$acc00
rlwinm $acc08,$s2,`32-8`,24,31
lbzx $acc01,$Tbl1,$acc01
rlwinm $acc09,$s3,`32-8`,24,31
lbzx $acc02,$Tbl1,$acc02
rlwinm $acc10,$s0,`32-8`,24,31
lbzx $acc03,$Tbl1,$acc03
rlwinm $acc11,$s1,`32-8`,24,31
lbzx $acc04,$Tbl1,$acc04
rlwinm $acc12,$s1,`0`,24,31
lbzx $acc05,$Tbl1,$acc05
rlwinm $acc13,$s2,`0`,24,31
lbzx $acc06,$Tbl1,$acc06
rlwinm $acc14,$s3,`0`,24,31
lbzx $acc07,$Tbl1,$acc07
rlwinm $acc15,$s0,`0`,24,31
lbzx $acc08,$Tbl1,$acc08
rlwinm $s0,$acc00,24,0,7
lbzx $acc09,$Tbl1,$acc09
rlwinm $s1,$acc01,24,0,7
lbzx $acc10,$Tbl1,$acc10
rlwinm $s2,$acc02,24,0,7
lbzx $acc11,$Tbl1,$acc11
rlwinm $s3,$acc03,24,0,7
lbzx $acc12,$Tbl1,$acc12
rlwimi $s0,$acc04,16,8,15
lbzx $acc13,$Tbl1,$acc13
rlwimi $s1,$acc05,16,8,15
lbzx $acc14,$Tbl1,$acc14
rlwimi $s2,$acc06,16,8,15
lbzx $acc15,$Tbl1,$acc15
rlwimi $s3,$acc07,16,8,15
rlwimi $s0,$acc08,8,16,23
rlwimi $s1,$acc09,8,16,23
rlwimi $s2,$acc10,8,16,23
rlwimi $s3,$acc11,8,16,23
lwz $t0,0($key)
or $s0,$s0,$acc12
lwz $t1,4($key)
or $s1,$s1,$acc13
lwz $t2,8($key)
or $s2,$s2,$acc14
lwz $t3,12($key)
or $s3,$s3,$acc15
addi $key,$key,16
bdz Ldec_compact_done
___
$code.=<<___ if ($SIZE_T==8);
# vectorized permutation improves decrypt performance by 10%
insrdi $s0,$s1,32,0
insrdi $s2,$s3,32,0
and $acc00,$s0,$mask80 # r1=r0&0x80808080
and $acc02,$s2,$mask80
srdi $acc04,$acc00,7 # r1>>7
srdi $acc06,$acc02,7
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
andc $acc10,$s2,$mask80
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
sub $acc02,$acc02,$acc06
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
add $acc10,$acc10,$acc10
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc02,$acc02,$mask1b
xor $acc00,$acc00,$acc08 # r2
xor $acc02,$acc02,$acc10
and $acc04,$acc00,$mask80 # r1=r2&0x80808080
and $acc06,$acc02,$mask80
srdi $acc08,$acc04,7 # r1>>7
srdi $acc10,$acc06,7
andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
andc $acc14,$acc02,$mask80
sub $acc04,$acc04,$acc08 # r1-(r1>>7)
sub $acc06,$acc06,$acc10
add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
add $acc14,$acc14,$acc14
and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc06,$acc06,$mask1b
xor $acc04,$acc04,$acc12 # r4
xor $acc06,$acc06,$acc14
and $acc08,$acc04,$mask80 # r1=r4&0x80808080
and $acc10,$acc06,$mask80
srdi $acc12,$acc08,7 # r1>>7
srdi $acc14,$acc10,7
sub $acc08,$acc08,$acc12 # r1-(r1>>7)
sub $acc10,$acc10,$acc14
andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
andc $acc14,$acc06,$mask80
add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
add $acc14,$acc14,$acc14
and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc10,$acc10,$mask1b
xor $acc08,$acc08,$acc12 # r8
xor $acc10,$acc10,$acc14
xor $acc00,$acc00,$s0 # r2^r0
xor $acc02,$acc02,$s2
xor $acc04,$acc04,$s0 # r4^r0
xor $acc06,$acc06,$s2
extrdi $acc01,$acc00,32,0
extrdi $acc03,$acc02,32,0
extrdi $acc05,$acc04,32,0
extrdi $acc07,$acc06,32,0
extrdi $acc09,$acc08,32,0
extrdi $acc11,$acc10,32,0
___
$code.=<<___ if ($SIZE_T==4);
and $acc00,$s0,$mask80 # r1=r0&0x80808080
and $acc01,$s1,$mask80
and $acc02,$s2,$mask80
and $acc03,$s3,$mask80
srwi $acc04,$acc00,7 # r1>>7
andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
srwi $acc05,$acc01,7
andc $acc09,$s1,$mask80
srwi $acc06,$acc02,7
andc $acc10,$s2,$mask80
srwi $acc07,$acc03,7
andc $acc11,$s3,$mask80
sub $acc00,$acc00,$acc04 # r1-(r1>>7)
sub $acc01,$acc01,$acc05
sub $acc02,$acc02,$acc06
sub $acc03,$acc03,$acc07
add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
add $acc09,$acc09,$acc09
add $acc10,$acc10,$acc10
add $acc11,$acc11,$acc11
and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc01,$acc01,$mask1b
and $acc02,$acc02,$mask1b
and $acc03,$acc03,$mask1b
xor $acc00,$acc00,$acc08 # r2
xor $acc01,$acc01,$acc09
xor $acc02,$acc02,$acc10
xor $acc03,$acc03,$acc11
and $acc04,$acc00,$mask80 # r1=r2&0x80808080
and $acc05,$acc01,$mask80
and $acc06,$acc02,$mask80
and $acc07,$acc03,$mask80
srwi $acc08,$acc04,7 # r1>>7
andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
srwi $acc09,$acc05,7
andc $acc13,$acc01,$mask80
srwi $acc10,$acc06,7
andc $acc14,$acc02,$mask80
srwi $acc11,$acc07,7
andc $acc15,$acc03,$mask80
sub $acc04,$acc04,$acc08 # r1-(r1>>7)
sub $acc05,$acc05,$acc09
sub $acc06,$acc06,$acc10
sub $acc07,$acc07,$acc11
add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
add $acc13,$acc13,$acc13
add $acc14,$acc14,$acc14
add $acc15,$acc15,$acc15
and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc05,$acc05,$mask1b
and $acc06,$acc06,$mask1b
and $acc07,$acc07,$mask1b
xor $acc04,$acc04,$acc12 # r4
xor $acc05,$acc05,$acc13
xor $acc06,$acc06,$acc14
xor $acc07,$acc07,$acc15
and $acc08,$acc04,$mask80 # r1=r4&0x80808080
and $acc09,$acc05,$mask80
srwi $acc12,$acc08,7 # r1>>7
and $acc10,$acc06,$mask80
srwi $acc13,$acc09,7
and $acc11,$acc07,$mask80
srwi $acc14,$acc10,7
sub $acc08,$acc08,$acc12 # r1-(r1>>7)
srwi $acc15,$acc11,7
sub $acc09,$acc09,$acc13
sub $acc10,$acc10,$acc14
sub $acc11,$acc11,$acc15
andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
andc $acc13,$acc05,$mask80
andc $acc14,$acc06,$mask80
andc $acc15,$acc07,$mask80
add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
add $acc13,$acc13,$acc13
add $acc14,$acc14,$acc14
add $acc15,$acc15,$acc15
and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
and $acc09,$acc09,$mask1b
and $acc10,$acc10,$mask1b
and $acc11,$acc11,$mask1b
xor $acc08,$acc08,$acc12 # r8
xor $acc09,$acc09,$acc13
xor $acc10,$acc10,$acc14
xor $acc11,$acc11,$acc15
xor $acc00,$acc00,$s0 # r2^r0
xor $acc01,$acc01,$s1
xor $acc02,$acc02,$s2
xor $acc03,$acc03,$s3
xor $acc04,$acc04,$s0 # r4^r0
xor $acc05,$acc05,$s1
xor $acc06,$acc06,$s2
xor $acc07,$acc07,$s3
___
$code.=<<___;
rotrwi $s0,$s0,8 # = ROTATE(r0,8)
rotrwi $s1,$s1,8
xor $s0,$s0,$acc00 # ^= r2^r0
rotrwi $s2,$s2,8
xor $s1,$s1,$acc01
rotrwi $s3,$s3,8
xor $s2,$s2,$acc02
xor $s3,$s3,$acc03
xor $acc00,$acc00,$acc08
xor $acc01,$acc01,$acc09
xor $acc02,$acc02,$acc10
xor $acc03,$acc03,$acc11
xor $s0,$s0,$acc04 # ^= r4^r0
rotrwi $acc00,$acc00,24
xor $s1,$s1,$acc05
rotrwi $acc01,$acc01,24
xor $s2,$s2,$acc06
rotrwi $acc02,$acc02,24
xor $s3,$s3,$acc07
rotrwi $acc03,$acc03,24
xor $acc04,$acc04,$acc08
xor $acc05,$acc05,$acc09
xor $acc06,$acc06,$acc10
xor $acc07,$acc07,$acc11
xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
rotrwi $acc04,$acc04,16
xor $s1,$s1,$acc09
rotrwi $acc05,$acc05,16
xor $s2,$s2,$acc10
rotrwi $acc06,$acc06,16
xor $s3,$s3,$acc11
rotrwi $acc07,$acc07,16
xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
rotrwi $acc08,$acc08,8
xor $s1,$s1,$acc01
rotrwi $acc09,$acc09,8
xor $s2,$s2,$acc02
rotrwi $acc10,$acc10,8
xor $s3,$s3,$acc03
rotrwi $acc11,$acc11,8
xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
xor $s1,$s1,$acc05
xor $s2,$s2,$acc06
xor $s3,$s3,$acc07
xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
xor $s1,$s1,$acc09
xor $s2,$s2,$acc10
xor $s3,$s3,$acc11
b Ldec_compact_loop
.align 4
Ldec_compact_done:
xor $s0,$s0,$t0
xor $s1,$s1,$t1
xor $s2,$s2,$t2
xor $s3,$s3,$t3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
.align 7
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;