#!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # Needs more work: key setup, page boundaries, CBC routine... # # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with # 128-bit key, which is ~40% better than 64-bit code generated by gcc # 4.0. But these are not the ones currently used! Their "compact" # counterparts are, for security reason. ppc_AES_encrypt_compact runs # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - # at 1/3 of ppc_AES_decrypt. # February 2010 # # Rescheduling instructions to favour Power6 pipeline gave 10% # performance improvement on the platfrom in question (and marginal # improvement even on others). It should be noted that Power6 fails # to process byte in 18 cycles, only in 23, because it fails to issue # 4 load instructions in two cycles, only in 3. As result non-compact # block subroutines are 25% slower than one would expect. Compact # functions scale better, because they have pure computational part, # which scales perfectly with clock frequency. To be specific # ppc_AES_encrypt_compact operates at 42 cycles per byte, while # ppc_AES_decrypt_compact - at 55 (in 64-bit build). $flavour = shift; if ($flavour =~ /64/) { $SIZE_T =8; $LRSAVE =2*$SIZE_T; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; } elsif ($flavour =~ /32/) { $SIZE_T =4; $LRSAVE =$SIZE_T; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; } else { die "nonsense $flavour"; } $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $FRAME=32*$SIZE_T; sub _data_word() { my $i; while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } } $sp="r1"; $toc="r2"; $inp="r3"; $out="r4"; $key="r5"; $Tbl0="r3"; $Tbl1="r6"; $Tbl2="r7"; $Tbl3="r2"; $s0="r8"; $s1="r9"; $s2="r10"; $s3="r11"; $t0="r12"; $t1="r13"; $t2="r14"; $t3="r15"; $acc00="r16"; $acc01="r17"; $acc02="r18"; $acc03="r19"; $acc04="r20"; $acc05="r21"; $acc06="r22"; $acc07="r23"; $acc08="r24"; $acc09="r25"; $acc10="r26"; $acc11="r27"; $acc12="r28"; $acc13="r29"; $acc14="r30"; $acc15="r31"; # stay away from TLS pointer if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } $mask80=$Tbl2; $mask1b=$Tbl3; $code.=<<___; .machine "any" .text .align 7 LAES_Te: mflr r0 bcl 20,31,\$+4 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry addi $Tbl0,$Tbl0,`128-8` mtlr r0 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .space `64-9*4` LAES_Td: mflr r0 bcl 20,31,\$+4 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry addi $Tbl0,$Tbl0,`128-64-8+2048+256` mtlr r0 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .space `128-64-9*4` ___ &_data_word( 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); $code.=<<___; .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ___ &_data_word( 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); $code.=<<___; .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .globl .AES_encrypt .align 7 .AES_encrypt: $STU $sp,-$FRAME($sp) mflr r0 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) $PUSH r18,`$FRAME-$SIZE_T*14`($sp) $PUSH r19,`$FRAME-$SIZE_T*13`($sp) $PUSH r20,`$FRAME-$SIZE_T*12`($sp) $PUSH r21,`$FRAME-$SIZE_T*11`($sp) $PUSH r22,`$FRAME-$SIZE_T*10`($sp) $PUSH r23,`$FRAME-$SIZE_T*9`($sp) $PUSH r24,`$FRAME-$SIZE_T*8`($sp) $PUSH r25,`$FRAME-$SIZE_T*7`($sp) $PUSH r26,`$FRAME-$SIZE_T*6`($sp) $PUSH r27,`$FRAME-$SIZE_T*5`($sp) $PUSH r28,`$FRAME-$SIZE_T*4`($sp) $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) lwz $s3,12($inp) bl LAES_Te bl Lppc_AES_encrypt_compact stw $s0,0($out) stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r17,`$FRAME-$SIZE_T*15`($sp) $POP r18,`$FRAME-$SIZE_T*14`($sp) $POP r19,`$FRAME-$SIZE_T*13`($sp) $POP r20,`$FRAME-$SIZE_T*12`($sp) $POP r21,`$FRAME-$SIZE_T*11`($sp) $POP r22,`$FRAME-$SIZE_T*10`($sp) $POP r23,`$FRAME-$SIZE_T*9`($sp) $POP r24,`$FRAME-$SIZE_T*8`($sp) $POP r25,`$FRAME-$SIZE_T*7`($sp) $POP r26,`$FRAME-$SIZE_T*6`($sp) $POP r27,`$FRAME-$SIZE_T*5`($sp) $POP r28,`$FRAME-$SIZE_T*4`($sp) $POP r29,`$FRAME-$SIZE_T*3`($sp) $POP r30,`$FRAME-$SIZE_T*2`($sp) $POP r31,`$FRAME-$SIZE_T*1`($sp) mtlr r0 addi $sp,$sp,$FRAME blr .long 0 .byte 0,12,4,1,0x80,18,3,0 .long 0 .align 5 Lppc_AES_encrypt: lwz $acc00,240($key) lwz $t0,0($key) lwz $t1,4($key) lwz $t2,8($key) lwz $t3,12($key) addi $Tbl1,$Tbl0,3 addi $Tbl2,$Tbl0,2 addi $Tbl3,$Tbl0,1 addi $acc00,$acc00,-1 addi $key,$key,16 xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 mtctr $acc00 .align 4 Lenc_loop: rlwinm $acc00,$s0,`32-24+3`,21,28 rlwinm $acc01,$s1,`32-24+3`,21,28 rlwinm $acc02,$s2,`32-24+3`,21,28 rlwinm $acc03,$s3,`32-24+3`,21,28 lwz $t0,0($key) lwz $t1,4($key) rlwinm $acc04,$s1,`32-16+3`,21,28 rlwinm $acc05,$s2,`32-16+3`,21,28 lwz $t2,8($key) lwz $t3,12($key) rlwinm $acc06,$s3,`32-16+3`,21,28 rlwinm $acc07,$s0,`32-16+3`,21,28 lwzx $acc00,$Tbl0,$acc00 lwzx $acc01,$Tbl0,$acc01 rlwinm $acc08,$s2,`32-8+3`,21,28 rlwinm $acc09,$s3,`32-8+3`,21,28 lwzx $acc02,$Tbl0,$acc02 lwzx $acc03,$Tbl0,$acc03 rlwinm $acc10,$s0,`32-8+3`,21,28 rlwinm $acc11,$s1,`32-8+3`,21,28 lwzx $acc04,$Tbl1,$acc04 lwzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s3,`0+3`,21,28 rlwinm $acc13,$s0,`0+3`,21,28 lwzx $acc06,$Tbl1,$acc06 lwzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s1,`0+3`,21,28 rlwinm $acc15,$s2,`0+3`,21,28 lwzx $acc08,$Tbl2,$acc08 lwzx $acc09,$Tbl2,$acc09 xor $t0,$t0,$acc00 xor $t1,$t1,$acc01 lwzx $acc10,$Tbl2,$acc10 lwzx $acc11,$Tbl2,$acc11 xor $t2,$t2,$acc02 xor $t3,$t3,$acc03 lwzx $acc12,$Tbl3,$acc12 lwzx $acc13,$Tbl3,$acc13 xor $t0,$t0,$acc04 xor $t1,$t1,$acc05 lwzx $acc14,$Tbl3,$acc14 lwzx $acc15,$Tbl3,$acc15 xor $t2,$t2,$acc06 xor $t3,$t3,$acc07 xor $t0,$t0,$acc08 xor $t1,$t1,$acc09 xor $t2,$t2,$acc10 xor $t3,$t3,$acc11 xor $s0,$t0,$acc12 xor $s1,$t1,$acc13 xor $s2,$t2,$acc14 xor $s3,$t3,$acc15 addi $key,$key,16 bdnz- Lenc_loop addi $Tbl2,$Tbl0,2048 nop lwz $t0,0($key) lwz $t1,4($key) rlwinm $acc00,$s0,`32-24`,24,31 rlwinm $acc01,$s1,`32-24`,24,31 lwz $t2,8($key) lwz $t3,12($key) rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 lwz $acc09,`2048+32`($Tbl0) rlwinm $acc04,$s1,`32-16`,24,31 rlwinm $acc05,$s2,`32-16`,24,31 lwz $acc10,`2048+64`($Tbl0) lwz $acc11,`2048+96`($Tbl0) rlwinm $acc06,$s3,`32-16`,24,31 rlwinm $acc07,$s0,`32-16`,24,31 lwz $acc12,`2048+128`($Tbl0) lwz $acc13,`2048+160`($Tbl0) rlwinm $acc08,$s2,`32-8`,24,31 rlwinm $acc09,$s3,`32-8`,24,31 lwz $acc14,`2048+192`($Tbl0) lwz $acc15,`2048+224`($Tbl0) rlwinm $acc10,$s0,`32-8`,24,31 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc00,$Tbl2,$acc00 lbzx $acc01,$Tbl2,$acc01 rlwinm $acc12,$s3,`0`,24,31 rlwinm $acc13,$s0,`0`,24,31 lbzx $acc02,$Tbl2,$acc02 lbzx $acc03,$Tbl2,$acc03 rlwinm $acc14,$s1,`0`,24,31 rlwinm $acc15,$s2,`0`,24,31 lbzx $acc04,$Tbl2,$acc04 lbzx $acc05,$Tbl2,$acc05 rlwinm $s0,$acc00,24,0,7 rlwinm $s1,$acc01,24,0,7 lbzx $acc06,$Tbl2,$acc06 lbzx $acc07,$Tbl2,$acc07 rlwinm $s2,$acc02,24,0,7 rlwinm $s3,$acc03,24,0,7 lbzx $acc08,$Tbl2,$acc08 lbzx $acc09,$Tbl2,$acc09 rlwimi $s0,$acc04,16,8,15 rlwimi $s1,$acc05,16,8,15 lbzx $acc10,$Tbl2,$acc10 lbzx $acc11,$Tbl2,$acc11 rlwimi $s2,$acc06,16,8,15 rlwimi $s3,$acc07,16,8,15 lbzx $acc12,$Tbl2,$acc12 lbzx $acc13,$Tbl2,$acc13 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 lbzx $acc14,$Tbl2,$acc14 lbzx $acc15,$Tbl2,$acc15 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 or $s0,$s0,$acc12 or $s1,$s1,$acc13 or $s2,$s2,$acc14 or $s3,$s3,$acc15 xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .align 4 Lppc_AES_encrypt_compact: lwz $acc00,240($key) lwz $t0,0($key) lwz $t1,4($key) lwz $t2,8($key) lwz $t3,12($key) addi $Tbl1,$Tbl0,2048 lis $mask80,0x8080 lis $mask1b,0x1b1b addi $key,$key,16 ori $mask80,$mask80,0x8080 ori $mask1b,$mask1b,0x1b1b mtctr $acc00 .align 4 Lenc_compact_loop: xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 rlwinm $acc00,$s0,`32-24`,24,31 rlwinm $acc01,$s1,`32-24`,24,31 rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 rlwinm $acc04,$s1,`32-16`,24,31 rlwinm $acc05,$s2,`32-16`,24,31 rlwinm $acc06,$s3,`32-16`,24,31 rlwinm $acc07,$s0,`32-16`,24,31 lbzx $acc00,$Tbl1,$acc00 lbzx $acc01,$Tbl1,$acc01 rlwinm $acc08,$s2,`32-8`,24,31 rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl1,$acc02 lbzx $acc03,$Tbl1,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl1,$acc04 lbzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s3,`0`,24,31 rlwinm $acc13,$s0,`0`,24,31 lbzx $acc06,$Tbl1,$acc06 lbzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s1,`0`,24,31 rlwinm $acc15,$s2,`0`,24,31 lbzx $acc08,$Tbl1,$acc08 lbzx $acc09,$Tbl1,$acc09 rlwinm $s0,$acc00,24,0,7 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl1,$acc10 lbzx $acc11,$Tbl1,$acc11 rlwinm $s2,$acc02,24,0,7 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl1,$acc12 lbzx $acc13,$Tbl1,$acc13 rlwimi $s0,$acc04,16,8,15 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl1,$acc14 lbzx $acc15,$Tbl1,$acc15 rlwimi $s2,$acc06,16,8,15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 lwz $t0,0($key) lwz $t1,4($key) or $s0,$s0,$acc12 or $s1,$s1,$acc13 lwz $t2,8($key) lwz $t3,12($key) or $s2,$s2,$acc14 or $s3,$s3,$acc15 addi $key,$key,16 bdz Lenc_compact_done and $acc00,$s0,$mask80 # r1=r0&0x80808080 and $acc01,$s1,$mask80 and $acc02,$s2,$mask80 and $acc03,$s3,$mask80 srwi $acc04,$acc00,7 # r1>>7 srwi $acc05,$acc01,7 srwi $acc06,$acc02,7 srwi $acc07,$acc03,7 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f andc $acc09,$s1,$mask80 andc $acc10,$s2,$mask80 andc $acc11,$s3,$mask80 sub $acc00,$acc00,$acc04 # r1-(r1>>7) sub $acc01,$acc01,$acc05 sub $acc02,$acc02,$acc06 sub $acc03,$acc03,$acc07 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 add $acc09,$acc09,$acc09 add $acc10,$acc10,$acc10 add $acc11,$acc11,$acc11 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc01,$acc01,$mask1b and $acc02,$acc02,$mask1b and $acc03,$acc03,$mask1b xor $acc00,$acc00,$acc08 # r2 xor $acc01,$acc01,$acc09 xor $acc02,$acc02,$acc10 xor $acc03,$acc03,$acc11 rotlwi $acc12,$s0,16 # ROTATE(r0,16) rotlwi $acc13,$s1,16 rotlwi $acc14,$s2,16 rotlwi $acc15,$s3,16 xor $s0,$s0,$acc00 # r0^r2 xor $s1,$s1,$acc01 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) rotrwi $s1,$s1,24 rotrwi $s2,$s2,24 rotrwi $s3,$s3,24 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 xor $s1,$s1,$acc01 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) rotlwi $acc09,$acc13,8 rotlwi $acc10,$acc14,8 rotlwi $acc11,$acc15,8 xor $s0,$s0,$acc12 # xor $s1,$s1,$acc13 xor $s2,$s2,$acc14 xor $s3,$s3,$acc15 xor $s0,$s0,$acc08 # xor $s1,$s1,$acc09 xor $s2,$s2,$acc10 xor $s3,$s3,$acc11 b Lenc_compact_loop .align 4 Lenc_compact_done: xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .globl .AES_decrypt .align 7 .AES_decrypt: $STU $sp,-$FRAME($sp) mflr r0 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) $PUSH r17,`$FRAME-$SIZE_T*15`($sp) $PUSH r18,`$FRAME-$SIZE_T*14`($sp) $PUSH r19,`$FRAME-$SIZE_T*13`($sp) $PUSH r20,`$FRAME-$SIZE_T*12`($sp) $PUSH r21,`$FRAME-$SIZE_T*11`($sp) $PUSH r22,`$FRAME-$SIZE_T*10`($sp) $PUSH r23,`$FRAME-$SIZE_T*9`($sp) $PUSH r24,`$FRAME-$SIZE_T*8`($sp) $PUSH r25,`$FRAME-$SIZE_T*7`($sp) $PUSH r26,`$FRAME-$SIZE_T*6`($sp) $PUSH r27,`$FRAME-$SIZE_T*5`($sp) $PUSH r28,`$FRAME-$SIZE_T*4`($sp) $PUSH r29,`$FRAME-$SIZE_T*3`($sp) $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) lwz $s3,12($inp) bl LAES_Td bl Lppc_AES_decrypt_compact stw $s0,0($out) stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) $POP r0,`$FRAME+$LRSAVE`($sp) $POP $toc,`$FRAME-$SIZE_T*20`($sp) $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) $POP r17,`$FRAME-$SIZE_T*15`($sp) $POP r18,`$FRAME-$SIZE_T*14`($sp) $POP r19,`$FRAME-$SIZE_T*13`($sp) $POP r20,`$FRAME-$SIZE_T*12`($sp) $POP r21,`$FRAME-$SIZE_T*11`($sp) $POP r22,`$FRAME-$SIZE_T*10`($sp) $POP r23,`$FRAME-$SIZE_T*9`($sp) $POP r24,`$FRAME-$SIZE_T*8`($sp) $POP r25,`$FRAME-$SIZE_T*7`($sp) $POP r26,`$FRAME-$SIZE_T*6`($sp) $POP r27,`$FRAME-$SIZE_T*5`($sp) $POP r28,`$FRAME-$SIZE_T*4`($sp) $POP r29,`$FRAME-$SIZE_T*3`($sp) $POP r30,`$FRAME-$SIZE_T*2`($sp) $POP r31,`$FRAME-$SIZE_T*1`($sp) mtlr r0 addi $sp,$sp,$FRAME blr .long 0 .byte 0,12,4,1,0x80,18,3,0 .long 0 .align 5 Lppc_AES_decrypt: lwz $acc00,240($key) lwz $t0,0($key) lwz $t1,4($key) lwz $t2,8($key) lwz $t3,12($key) addi $Tbl1,$Tbl0,3 addi $Tbl2,$Tbl0,2 addi $Tbl3,$Tbl0,1 addi $acc00,$acc00,-1 addi $key,$key,16 xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 mtctr $acc00 .align 4 Ldec_loop: rlwinm $acc00,$s0,`32-24+3`,21,28 rlwinm $acc01,$s1,`32-24+3`,21,28 rlwinm $acc02,$s2,`32-24+3`,21,28 rlwinm $acc03,$s3,`32-24+3`,21,28 lwz $t0,0($key) lwz $t1,4($key) rlwinm $acc04,$s3,`32-16+3`,21,28 rlwinm $acc05,$s0,`32-16+3`,21,28 lwz $t2,8($key) lwz $t3,12($key) rlwinm $acc06,$s1,`32-16+3`,21,28 rlwinm $acc07,$s2,`32-16+3`,21,28 lwzx $acc00,$Tbl0,$acc00 lwzx $acc01,$Tbl0,$acc01 rlwinm $acc08,$s2,`32-8+3`,21,28 rlwinm $acc09,$s3,`32-8+3`,21,28 lwzx $acc02,$Tbl0,$acc02 lwzx $acc03,$Tbl0,$acc03 rlwinm $acc10,$s0,`32-8+3`,21,28 rlwinm $acc11,$s1,`32-8+3`,21,28 lwzx $acc04,$Tbl1,$acc04 lwzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s1,`0+3`,21,28 rlwinm $acc13,$s2,`0+3`,21,28 lwzx $acc06,$Tbl1,$acc06 lwzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s3,`0+3`,21,28 rlwinm $acc15,$s0,`0+3`,21,28 lwzx $acc08,$Tbl2,$acc08 lwzx $acc09,$Tbl2,$acc09 xor $t0,$t0,$acc00 xor $t1,$t1,$acc01 lwzx $acc10,$Tbl2,$acc10 lwzx $acc11,$Tbl2,$acc11 xor $t2,$t2,$acc02 xor $t3,$t3,$acc03 lwzx $acc12,$Tbl3,$acc12 lwzx $acc13,$Tbl3,$acc13 xor $t0,$t0,$acc04 xor $t1,$t1,$acc05 lwzx $acc14,$Tbl3,$acc14 lwzx $acc15,$Tbl3,$acc15 xor $t2,$t2,$acc06 xor $t3,$t3,$acc07 xor $t0,$t0,$acc08 xor $t1,$t1,$acc09 xor $t2,$t2,$acc10 xor $t3,$t3,$acc11 xor $s0,$t0,$acc12 xor $s1,$t1,$acc13 xor $s2,$t2,$acc14 xor $s3,$t3,$acc15 addi $key,$key,16 bdnz- Ldec_loop addi $Tbl2,$Tbl0,2048 nop lwz $t0,0($key) lwz $t1,4($key) rlwinm $acc00,$s0,`32-24`,24,31 rlwinm $acc01,$s1,`32-24`,24,31 lwz $t2,8($key) lwz $t3,12($key) rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 lwz $acc09,`2048+32`($Tbl0) rlwinm $acc04,$s3,`32-16`,24,31 rlwinm $acc05,$s0,`32-16`,24,31 lwz $acc10,`2048+64`($Tbl0) lwz $acc11,`2048+96`($Tbl0) lbzx $acc00,$Tbl2,$acc00 lbzx $acc01,$Tbl2,$acc01 lwz $acc12,`2048+128`($Tbl0) lwz $acc13,`2048+160`($Tbl0) rlwinm $acc06,$s1,`32-16`,24,31 rlwinm $acc07,$s2,`32-16`,24,31 lwz $acc14,`2048+192`($Tbl0) lwz $acc15,`2048+224`($Tbl0) rlwinm $acc08,$s2,`32-8`,24,31 rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl2,$acc02 lbzx $acc03,$Tbl2,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl2,$acc04 lbzx $acc05,$Tbl2,$acc05 rlwinm $acc12,$s1,`0`,24,31 rlwinm $acc13,$s2,`0`,24,31 lbzx $acc06,$Tbl2,$acc06 lbzx $acc07,$Tbl2,$acc07 rlwinm $acc14,$s3,`0`,24,31 rlwinm $acc15,$s0,`0`,24,31 lbzx $acc08,$Tbl2,$acc08 lbzx $acc09,$Tbl2,$acc09 rlwinm $s0,$acc00,24,0,7 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl2,$acc10 lbzx $acc11,$Tbl2,$acc11 rlwinm $s2,$acc02,24,0,7 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl2,$acc12 lbzx $acc13,$Tbl2,$acc13 rlwimi $s0,$acc04,16,8,15 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl2,$acc14 lbzx $acc15,$Tbl2,$acc15 rlwimi $s2,$acc06,16,8,15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 or $s0,$s0,$acc12 or $s1,$s1,$acc13 or $s2,$s2,$acc14 or $s3,$s3,$acc15 xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .align 4 Lppc_AES_decrypt_compact: lwz $acc00,240($key) lwz $t0,0($key) lwz $t1,4($key) lwz $t2,8($key) lwz $t3,12($key) addi $Tbl1,$Tbl0,2048 lis $mask80,0x8080 lis $mask1b,0x1b1b addi $key,$key,16 ori $mask80,$mask80,0x8080 ori $mask1b,$mask1b,0x1b1b ___ $code.=<<___ if ($SIZE_T==8); insrdi $mask80,$mask80,32,0 insrdi $mask1b,$mask1b,32,0 ___ $code.=<<___; mtctr $acc00 .align 4 Ldec_compact_loop: xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 rlwinm $acc00,$s0,`32-24`,24,31 rlwinm $acc01,$s1,`32-24`,24,31 rlwinm $acc02,$s2,`32-24`,24,31 rlwinm $acc03,$s3,`32-24`,24,31 rlwinm $acc04,$s3,`32-16`,24,31 rlwinm $acc05,$s0,`32-16`,24,31 rlwinm $acc06,$s1,`32-16`,24,31 rlwinm $acc07,$s2,`32-16`,24,31 lbzx $acc00,$Tbl1,$acc00 lbzx $acc01,$Tbl1,$acc01 rlwinm $acc08,$s2,`32-8`,24,31 rlwinm $acc09,$s3,`32-8`,24,31 lbzx $acc02,$Tbl1,$acc02 lbzx $acc03,$Tbl1,$acc03 rlwinm $acc10,$s0,`32-8`,24,31 rlwinm $acc11,$s1,`32-8`,24,31 lbzx $acc04,$Tbl1,$acc04 lbzx $acc05,$Tbl1,$acc05 rlwinm $acc12,$s1,`0`,24,31 rlwinm $acc13,$s2,`0`,24,31 lbzx $acc06,$Tbl1,$acc06 lbzx $acc07,$Tbl1,$acc07 rlwinm $acc14,$s3,`0`,24,31 rlwinm $acc15,$s0,`0`,24,31 lbzx $acc08,$Tbl1,$acc08 lbzx $acc09,$Tbl1,$acc09 rlwinm $s0,$acc00,24,0,7 rlwinm $s1,$acc01,24,0,7 lbzx $acc10,$Tbl1,$acc10 lbzx $acc11,$Tbl1,$acc11 rlwinm $s2,$acc02,24,0,7 rlwinm $s3,$acc03,24,0,7 lbzx $acc12,$Tbl1,$acc12 lbzx $acc13,$Tbl1,$acc13 rlwimi $s0,$acc04,16,8,15 rlwimi $s1,$acc05,16,8,15 lbzx $acc14,$Tbl1,$acc14 lbzx $acc15,$Tbl1,$acc15 rlwimi $s2,$acc06,16,8,15 rlwimi $s3,$acc07,16,8,15 rlwimi $s0,$acc08,8,16,23 rlwimi $s1,$acc09,8,16,23 rlwimi $s2,$acc10,8,16,23 rlwimi $s3,$acc11,8,16,23 lwz $t0,0($key) lwz $t1,4($key) or $s0,$s0,$acc12 or $s1,$s1,$acc13 lwz $t2,8($key) lwz $t3,12($key) or $s2,$s2,$acc14 or $s3,$s3,$acc15 addi $key,$key,16 bdz Ldec_compact_done ___ $code.=<<___ if ($SIZE_T==8); # vectorized permutation improves decrypt performance by 10% insrdi $s0,$s1,32,0 insrdi $s2,$s3,32,0 and $acc00,$s0,$mask80 # r1=r0&0x80808080 and $acc02,$s2,$mask80 srdi $acc04,$acc00,7 # r1>>7 srdi $acc06,$acc02,7 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f andc $acc10,$s2,$mask80 sub $acc00,$acc00,$acc04 # r1-(r1>>7) sub $acc02,$acc02,$acc06 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 add $acc10,$acc10,$acc10 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc02,$acc02,$mask1b xor $acc00,$acc00,$acc08 # r2 xor $acc02,$acc02,$acc10 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 and $acc06,$acc02,$mask80 srdi $acc08,$acc04,7 # r1>>7 srdi $acc10,$acc06,7 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f andc $acc14,$acc02,$mask80 sub $acc04,$acc04,$acc08 # r1-(r1>>7) sub $acc06,$acc06,$acc10 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 add $acc14,$acc14,$acc14 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc06,$acc06,$mask1b xor $acc04,$acc04,$acc12 # r4 xor $acc06,$acc06,$acc14 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 and $acc10,$acc06,$mask80 srdi $acc12,$acc08,7 # r1>>7 srdi $acc14,$acc10,7 sub $acc08,$acc08,$acc12 # r1-(r1>>7) sub $acc10,$acc10,$acc14 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f andc $acc14,$acc06,$mask80 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 add $acc14,$acc14,$acc14 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc10,$acc10,$mask1b xor $acc08,$acc08,$acc12 # r8 xor $acc10,$acc10,$acc14 xor $acc00,$acc00,$s0 # r2^r0 xor $acc02,$acc02,$s2 xor $acc04,$acc04,$s0 # r4^r0 xor $acc06,$acc06,$s2 extrdi $acc01,$acc00,32,0 extrdi $acc03,$acc02,32,0 extrdi $acc05,$acc04,32,0 extrdi $acc07,$acc06,32,0 extrdi $acc09,$acc08,32,0 extrdi $acc11,$acc10,32,0 ___ $code.=<<___ if ($SIZE_T==4); and $acc00,$s0,$mask80 # r1=r0&0x80808080 and $acc01,$s1,$mask80 and $acc02,$s2,$mask80 and $acc03,$s3,$mask80 srwi $acc04,$acc00,7 # r1>>7 srwi $acc05,$acc01,7 srwi $acc06,$acc02,7 srwi $acc07,$acc03,7 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f andc $acc09,$s1,$mask80 andc $acc10,$s2,$mask80 andc $acc11,$s3,$mask80 sub $acc00,$acc00,$acc04 # r1-(r1>>7) sub $acc01,$acc01,$acc05 sub $acc02,$acc02,$acc06 sub $acc03,$acc03,$acc07 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 add $acc09,$acc09,$acc09 add $acc10,$acc10,$acc10 add $acc11,$acc11,$acc11 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc01,$acc01,$mask1b and $acc02,$acc02,$mask1b and $acc03,$acc03,$mask1b xor $acc00,$acc00,$acc08 # r2 xor $acc01,$acc01,$acc09 xor $acc02,$acc02,$acc10 xor $acc03,$acc03,$acc11 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 and $acc05,$acc01,$mask80 and $acc06,$acc02,$mask80 and $acc07,$acc03,$mask80 srwi $acc08,$acc04,7 # r1>>7 srwi $acc09,$acc05,7 srwi $acc10,$acc06,7 srwi $acc11,$acc07,7 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f andc $acc13,$acc01,$mask80 andc $acc14,$acc02,$mask80 andc $acc15,$acc03,$mask80 sub $acc04,$acc04,$acc08 # r1-(r1>>7) sub $acc05,$acc05,$acc09 sub $acc06,$acc06,$acc10 sub $acc07,$acc07,$acc11 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 add $acc13,$acc13,$acc13 add $acc14,$acc14,$acc14 add $acc15,$acc15,$acc15 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc05,$acc05,$mask1b and $acc06,$acc06,$mask1b and $acc07,$acc07,$mask1b xor $acc04,$acc04,$acc12 # r4 xor $acc05,$acc05,$acc13 xor $acc06,$acc06,$acc14 xor $acc07,$acc07,$acc15 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 and $acc09,$acc05,$mask80 and $acc10,$acc06,$mask80 and $acc11,$acc07,$mask80 srwi $acc12,$acc08,7 # r1>>7 srwi $acc13,$acc09,7 srwi $acc14,$acc10,7 srwi $acc15,$acc11,7 sub $acc08,$acc08,$acc12 # r1-(r1>>7) sub $acc09,$acc09,$acc13 sub $acc10,$acc10,$acc14 sub $acc11,$acc11,$acc15 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f andc $acc13,$acc05,$mask80 andc $acc14,$acc06,$mask80 andc $acc15,$acc07,$mask80 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 add $acc13,$acc13,$acc13 add $acc14,$acc14,$acc14 add $acc15,$acc15,$acc15 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b and $acc09,$acc09,$mask1b and $acc10,$acc10,$mask1b and $acc11,$acc11,$mask1b xor $acc08,$acc08,$acc12 # r8 xor $acc09,$acc09,$acc13 xor $acc10,$acc10,$acc14 xor $acc11,$acc11,$acc15 xor $acc00,$acc00,$s0 # r2^r0 xor $acc01,$acc01,$s1 xor $acc02,$acc02,$s2 xor $acc03,$acc03,$s3 xor $acc04,$acc04,$s0 # r4^r0 xor $acc05,$acc05,$s1 xor $acc06,$acc06,$s2 xor $acc07,$acc07,$s3 ___ $code.=<<___; rotrwi $s0,$s0,8 # = ROTATE(r0,8) rotrwi $s1,$s1,8 rotrwi $s2,$s2,8 rotrwi $s3,$s3,8 xor $s0,$s0,$acc00 # ^= r2^r0 xor $s1,$s1,$acc01 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 xor $acc00,$acc00,$acc08 xor $acc01,$acc01,$acc09 xor $acc02,$acc02,$acc10 xor $acc03,$acc03,$acc11 xor $s0,$s0,$acc04 # ^= r4^r0 xor $s1,$s1,$acc05 xor $s2,$s2,$acc06 xor $s3,$s3,$acc07 rotrwi $acc00,$acc00,24 rotrwi $acc01,$acc01,24 rotrwi $acc02,$acc02,24 rotrwi $acc03,$acc03,24 xor $acc04,$acc04,$acc08 xor $acc05,$acc05,$acc09 xor $acc06,$acc06,$acc10 xor $acc07,$acc07,$acc11 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] xor $s1,$s1,$acc09 xor $s2,$s2,$acc10 xor $s3,$s3,$acc11 rotrwi $acc04,$acc04,16 rotrwi $acc05,$acc05,16 rotrwi $acc06,$acc06,16 rotrwi $acc07,$acc07,16 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) xor $s1,$s1,$acc01 xor $s2,$s2,$acc02 xor $s3,$s3,$acc03 rotrwi $acc08,$acc08,8 rotrwi $acc09,$acc09,8 rotrwi $acc10,$acc10,8 rotrwi $acc11,$acc11,8 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) xor $s1,$s1,$acc05 xor $s2,$s2,$acc06 xor $s3,$s3,$acc07 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) xor $s1,$s1,$acc09 xor $s2,$s2,$acc10 xor $s3,$s3,$acc11 b Ldec_compact_loop .align 4 Ldec_compact_done: xor $s0,$s0,$t0 xor $s1,$s1,$t1 xor $s2,$s2,$t2 xor $s3,$s3,$t3 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .asciz "AES for PPC, CRYPTOGAMS by " .align 7 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; close STDOUT;