#!/usr/bin/env perl # # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # ==================================================================== # # Version 1.1 # # The major reason for undertaken effort was to mitigate the hazard of # cache-timing attack. This is [currently and initially!] addressed in # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. # 2. References to them are scheduled for L2 cache latency, meaning # that the tables don't have to reside in L1 cache. Once again, this # is an initial draft and one should expect more countermeasures to # be implemented... # # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last # round. # # Even though performance was not the primary goal [on the contrary, # extra shifts "induced" by compressed S-box and longer loop epilogue # "induced" by scheduling for L2 have negative effect on performance], # the code turned out to run in ~23 cycles per processed byte en-/ # decrypted with 128-bit key. This is pretty good result for code # with mentioned qualities and UltraSPARC core. Compared to Sun C # generated code my encrypt procedure runs just few percents faster, # while decrypt one - whole 50% faster [yes, Sun C failed to generate # optimal decrypt procedure]. Compared to GNU C generated code both # procedures are more than 60% faster:-) $bits=32; for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } if ($bits==64) { $bias=2047; $frame=192; } else { $bias=0; $frame=112; } $locals=16; $acc0="%l0"; $acc1="%o0"; $acc2="%o1"; $acc3="%o2"; $acc4="%l1"; $acc5="%o3"; $acc6="%o4"; $acc7="%o5"; $acc8="%l2"; $acc9="%o7"; $acc10="%g1"; $acc11="%g2"; $acc12="%l3"; $acc13="%g3"; $acc14="%g4"; $acc15="%g5"; $t0="%l4"; $t1="%l5"; $t2="%l6"; $t3="%l7"; $s0="%i0"; $s1="%i1"; $s2="%i2"; $s3="%i3"; $tbl="%i4"; $key="%i5"; $rounds="%i7"; # aliases with return address, which is off-loaded to stack sub _data_word() { my $i; while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } } $code.=<<___ if ($bits==64); .register %g2,#scratch .register %g3,#scratch ___ $code.=<<___; .section ".text",#alloc,#execinstr .align 256 AES_Te: ___ &_data_word( 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); $code.=<<___; .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 .type AES_Te,#object .size AES_Te,(.-AES_Te) .align 64 .skip 16 _sparcv9_AES_encrypt: save %sp,-$frame-$locals,%sp stx %i7,[%sp+$bias+$frame+0] ! off-load return address ld [$key+240],$rounds ld [$key+0],$t0 ld [$key+4],$t1 ! ld [$key+8],$t2 srl $rounds,1,$rounds xor $t0,$s0,$s0 ld [$key+12],$t3 srl $s0,21,$acc0 xor $t1,$s1,$s1 ld [$key+16],$t0 srl $s1,13,$acc1 ! xor $t2,$s2,$s2 ld [$key+20],$t1 xor $t3,$s3,$s3 ld [$key+24],$t2 and $acc0,2040,$acc0 ld [$key+28],$t3 nop .Lenc_loop: srl $s2,5,$acc2 ! and $acc1,2040,$acc1 ldx [$tbl+$acc0],$acc0 sll $s3,3,$acc3 and $acc2,2040,$acc2 ldx [$tbl+$acc1],$acc1 srl $s1,21,$acc4 and $acc3,2040,$acc3 ldx [$tbl+$acc2],$acc2 ! srl $s2,13,$acc5 and $acc4,2040,$acc4 ldx [$tbl+$acc3],$acc3 srl $s3,5,$acc6 and $acc5,2040,$acc5 ldx [$tbl+$acc4],$acc4 fmovs %f0,%f0 sll $s0,3,$acc7 ! and $acc6,2040,$acc6 ldx [$tbl+$acc5],$acc5 srl $s2,21,$acc8 and $acc7,2040,$acc7 ldx [$tbl+$acc6],$acc6 srl $s3,13,$acc9 and $acc8,2040,$acc8 ldx [$tbl+$acc7],$acc7 ! srl $s0,5,$acc10 and $acc9,2040,$acc9 ldx [$tbl+$acc8],$acc8 sll $s1,3,$acc11 and $acc10,2040,$acc10 ldx [$tbl+$acc9],$acc9 fmovs %f0,%f0 srl $s3,21,$acc12 ! and $acc11,2040,$acc11 ldx [$tbl+$acc10],$acc10 srl $s0,13,$acc13 and $acc12,2040,$acc12 ldx [$tbl+$acc11],$acc11 srl $s1,5,$acc14 and $acc13,2040,$acc13 ldx [$tbl+$acc12],$acc12 ! sll $s2,3,$acc15 and $acc14,2040,$acc14 ldx [$tbl+$acc13],$acc13 and $acc15,2040,$acc15 add $key,32,$key ldx [$tbl+$acc14],$acc14 fmovs %f0,%f0 subcc $rounds,1,$rounds ! ldx [$tbl+$acc15],$acc15 bz,a,pn %icc,.Lenc_last add $tbl,2048,$rounds srlx $acc1,8,$acc1 xor $acc0,$t0,$t0 ld [$key+0],$s0 fmovs %f0,%f0 srlx $acc2,16,$acc2 ! xor $acc1,$t0,$t0 ld [$key+4],$s1 srlx $acc3,24,$acc3 xor $acc2,$t0,$t0 ld [$key+8],$s2 srlx $acc5,8,$acc5 xor $acc3,$t0,$t0 ld [$key+12],$s3 ! srlx $acc6,16,$acc6 xor $acc4,$t1,$t1 fmovs %f0,%f0 srlx $acc7,24,$acc7 xor $acc5,$t1,$t1 srlx $acc9,8,$acc9 xor $acc6,$t1,$t1 srlx $acc10,16,$acc10 ! xor $acc7,$t1,$t1 srlx $acc11,24,$acc11 xor $acc8,$t2,$t2 srlx $acc13,8,$acc13 xor $acc9,$t2,$t2 srlx $acc14,16,$acc14 xor $acc10,$t2,$t2 srlx $acc15,24,$acc15 ! xor $acc11,$t2,$t2 xor $acc12,$acc14,$acc14 xor $acc13,$t3,$t3 srl $t0,21,$acc0 xor $acc14,$t3,$t3 srl $t1,13,$acc1 xor $acc15,$t3,$t3 and $acc0,2040,$acc0 ! srl $t2,5,$acc2 and $acc1,2040,$acc1 ldx [$tbl+$acc0],$acc0 sll $t3,3,$acc3 and $acc2,2040,$acc2 ldx [$tbl+$acc1],$acc1 fmovs %f0,%f0 srl $t1,21,$acc4 ! and $acc3,2040,$acc3 ldx [$tbl+$acc2],$acc2 srl $t2,13,$acc5 and $acc4,2040,$acc4 ldx [$tbl+$acc3],$acc3 srl $t3,5,$acc6 and $acc5,2040,$acc5 ldx [$tbl+$acc4],$acc4 ! sll $t0,3,$acc7 and $acc6,2040,$acc6 ldx [$tbl+$acc5],$acc5 srl $t2,21,$acc8 and $acc7,2040,$acc7 ldx [$tbl+$acc6],$acc6 fmovs %f0,%f0 srl $t3,13,$acc9 ! and $acc8,2040,$acc8 ldx [$tbl+$acc7],$acc7 srl $t0,5,$acc10 and $acc9,2040,$acc9 ldx [$tbl+$acc8],$acc8 sll $t1,3,$acc11 and $acc10,2040,$acc10 ldx [$tbl+$acc9],$acc9 ! srl $t3,21,$acc12 and $acc11,2040,$acc11 ldx [$tbl+$acc10],$acc10 srl $t0,13,$acc13 and $acc12,2040,$acc12 ldx [$tbl+$acc11],$acc11 fmovs %f0,%f0 srl $t1,5,$acc14 ! and $acc13,2040,$acc13 ldx [$tbl+$acc12],$acc12 sll $t2,3,$acc15 and $acc14,2040,$acc14 ldx [$tbl+$acc13],$acc13 srlx $acc1,8,$acc1 and $acc15,2040,$acc15 ldx [$tbl+$acc14],$acc14 ! srlx $acc2,16,$acc2 xor $acc0,$s0,$s0 ldx [$tbl+$acc15],$acc15 srlx $acc3,24,$acc3 xor $acc1,$s0,$s0 ld [$key+16],$t0 fmovs %f0,%f0 srlx $acc5,8,$acc5 ! xor $acc2,$s0,$s0 ld [$key+20],$t1 srlx $acc6,16,$acc6 xor $acc3,$s0,$s0 ld [$key+24],$t2 srlx $acc7,24,$acc7 xor $acc4,$s1,$s1 ld [$key+28],$t3 ! srlx $acc9,8,$acc9 xor $acc5,$s1,$s1 ldx [$tbl+2048+0],%g0 ! prefetch te4 srlx $acc10,16,$acc10 xor $acc6,$s1,$s1 ldx [$tbl+2048+32],%g0 ! prefetch te4 srlx $acc11,24,$acc11 xor $acc7,$s1,$s1 ldx [$tbl+2048+64],%g0 ! prefetch te4 srlx $acc13,8,$acc13 xor $acc8,$s2,$s2 ldx [$tbl+2048+96],%g0 ! prefetch te4 srlx $acc14,16,$acc14 ! xor $acc9,$s2,$s2 ldx [$tbl+2048+128],%g0 ! prefetch te4 srlx $acc15,24,$acc15 xor $acc10,$s2,$s2 ldx [$tbl+2048+160],%g0 ! prefetch te4 srl $s0,21,$acc0 xor $acc11,$s2,$s2 ldx [$tbl+2048+192],%g0 ! prefetch te4 xor $acc12,$acc14,$acc14 xor $acc13,$s3,$s3 ldx [$tbl+2048+224],%g0 ! prefetch te4 srl $s1,13,$acc1 ! xor $acc14,$s3,$s3 xor $acc15,$s3,$s3 ba .Lenc_loop and $acc0,2040,$acc0 .align 32 .Lenc_last: srlx $acc1,8,$acc1 ! xor $acc0,$t0,$t0 ld [$key+0],$s0 srlx $acc2,16,$acc2 xor $acc1,$t0,$t0 ld [$key+4],$s1 srlx $acc3,24,$acc3 xor $acc2,$t0,$t0 ld [$key+8],$s2 ! srlx $acc5,8,$acc5 xor $acc3,$t0,$t0 ld [$key+12],$s3 srlx $acc6,16,$acc6 xor $acc4,$t1,$t1 srlx $acc7,24,$acc7 xor $acc5,$t1,$t1 srlx $acc9,8,$acc9 ! xor $acc6,$t1,$t1 srlx $acc10,16,$acc10 xor $acc7,$t1,$t1 srlx $acc11,24,$acc11 xor $acc8,$t2,$t2 srlx $acc13,8,$acc13 xor $acc9,$t2,$t2 srlx $acc14,16,$acc14 ! xor $acc10,$t2,$t2 srlx $acc15,24,$acc15 xor $acc11,$t2,$t2 xor $acc12,$acc14,$acc14 xor $acc13,$t3,$t3 srl $t0,24,$acc0 xor $acc14,$t3,$t3 srl $t1,16,$acc1 ! xor $acc15,$t3,$t3 srl $t2,8,$acc2 and $acc1,255,$acc1 ldub [$rounds+$acc0],$acc0 srl $t1,24,$acc4 and $acc2,255,$acc2 ldub [$rounds+$acc1],$acc1 srl $t2,16,$acc5 ! and $t3,255,$acc3 ldub [$rounds+$acc2],$acc2 ldub [$rounds+$acc3],$acc3 srl $t3,8,$acc6 and $acc5,255,$acc5 ldub [$rounds+$acc4],$acc4 fmovs %f0,%f0 srl $t2,24,$acc8 ! and $acc6,255,$acc6 ldub [$rounds+$acc5],$acc5 srl $t3,16,$acc9 and $t0,255,$acc7 ldub [$rounds+$acc6],$acc6 ldub [$rounds+$acc7],$acc7 fmovs %f0,%f0 srl $t0,8,$acc10 ! and $acc9,255,$acc9 ldub [$rounds+$acc8],$acc8 srl $t3,24,$acc12 and $acc10,255,$acc10 ldub [$rounds+$acc9],$acc9 srl $t0,16,$acc13 and $t1,255,$acc11 ldub [$rounds+$acc10],$acc10 ! srl $t1,8,$acc14 and $acc13,255,$acc13 ldub [$rounds+$acc11],$acc11 ldub [$rounds+$acc12],$acc12 and $acc14,255,$acc14 ldub [$rounds+$acc13],$acc13 and $t2,255,$acc15 ldub [$rounds+$acc14],$acc14 ! sll $acc0,24,$acc0 xor $acc3,$s0,$s0 ldub [$rounds+$acc15],$acc15 sll $acc1,16,$acc1 xor $acc0,$s0,$s0 ldx [%sp+$bias+$frame+0],%i7 ! restore return address fmovs %f0,%f0 sll $acc2,8,$acc2 ! xor $acc1,$s0,$s0 sll $acc4,24,$acc4 xor $acc2,$s0,$s0 sll $acc5,16,$acc5 xor $acc7,$s1,$s1 sll $acc6,8,$acc6 xor $acc4,$s1,$s1 sll $acc8,24,$acc8 ! xor $acc5,$s1,$s1 sll $acc9,16,$acc9 xor $acc11,$s2,$s2 sll $acc10,8,$acc10 xor $acc6,$s1,$s1 sll $acc12,24,$acc12 xor $acc8,$s2,$s2 sll $acc13,16,$acc13 ! xor $acc9,$s2,$s2 sll $acc14,8,$acc14 xor $acc10,$s2,$s2 xor $acc12,$acc14,$acc14 xor $acc13,$s3,$s3 xor $acc14,$s3,$s3 xor $acc15,$s3,$s3 ret restore .type _sparcv9_AES_encrypt,#function .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) .align 32 .globl AES_encrypt AES_encrypt: or %o0,%o1,%g1 andcc %g1,3,%g0 bnz,pn %xcc,.Lunaligned_enc save %sp,-$frame,%sp ld [%i0+0],%o0 ld [%i0+4],%o1 ld [%i0+8],%o2 ld [%i0+12],%o3 mov %i2,%o5 nop 1: call _sparcv9_AES_encrypt sub %o7,1b-AES_Te,%o4 st %o0,[%i1+0] st %o1,[%i1+4] st %o2,[%i1+8] st %o3,[%i1+12] ret restore .align 32 .Lunaligned_enc: ldub [%i0+0],%l0 ldub [%i0+1],%l1 ldub [%i0+2],%l2 sll %l0,24,%l0 ldub [%i0+3],%l3 sll %l1,16,%l1 ldub [%i0+4],%l4 sll %l2,8,%l2 or %l1,%l0,%l0 ldub [%i0+5],%l5 sll %l4,24,%l4 or %l3,%l2,%l2 ldub [%i0+6],%l6 sll %l5,16,%l5 or %l0,%l2,%o0 ldub [%i0+7],%l7 sll %l6,8,%l6 or %l5,%l4,%l4 ldub [%i0+8],%l0 or %l7,%l6,%l6 ldub [%i0+9],%l1 or %l4,%l6,%o1 ldub [%i0+10],%l2 sll %l0,24,%l0 ldub [%i0+11],%l3 sll %l1,16,%l1 ldub [%i0+12],%l4 sll %l2,8,%l2 or %l1,%l0,%l0 ldub [%i0+13],%l5 sll %l4,24,%l4 or %l3,%l2,%l2 ldub [%i0+14],%l6 sll %l5,16,%l5 or %l0,%l2,%o2 ldub [%i0+15],%l7 sll %l6,8,%l6 or %l5,%l4,%l4 or %l7,%l6,%l6 or %l4,%l6,%o3 mov %i2,%o5 nop 1: call _sparcv9_AES_encrypt sub %o7,1b-AES_Te,%o4 srl %o0,24,%l0 srl %o0,16,%l1 stb %l0,[%i1+0] srl %o0,8,%l2 stb %l1,[%i1+1] stb %l2,[%i1+2] srl %o1,24,%l4 stb %o0,[%i1+3] srl %o1,16,%l5 stb %l4,[%i1+4] srl %o1,8,%l6 stb %l5,[%i1+5] stb %l6,[%i1+6] srl %o2,24,%l0 stb %o1,[%i1+7] srl %o2,16,%l1 stb %l0,[%i1+8] srl %o2,8,%l2 stb %l1,[%i1+9] stb %l2,[%i1+10] srl %o3,24,%l4 stb %o2,[%i1+11] srl %o3,16,%l5 stb %l4,[%i1+12] srl %o3,8,%l6 stb %l5,[%i1+13] stb %l6,[%i1+14] stb %o3,[%i1+15] ret restore .type AES_encrypt,#function .size AES_encrypt,(.-AES_encrypt) ___ $code.=<<___; .align 256 AES_Td: ___ &_data_word( 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); $code.=<<___; .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .type AES_Td,#object .size AES_Td,(.-AES_Td) .align 64 .skip 16 _sparcv9_AES_decrypt: save %sp,-$frame-$locals,%sp stx %i7,[%sp+$bias+$frame+0] ! off-load return address ld [$key+240],$rounds ld [$key+0],$t0 ld [$key+4],$t1 ! ld [$key+8],$t2 ld [$key+12],$t3 srl $rounds,1,$rounds xor $t0,$s0,$s0 ld [$key+16],$t0 xor $t1,$s1,$s1 ld [$key+20],$t1 srl $s0,21,$acc0 ! xor $t2,$s2,$s2 ld [$key+24],$t2 xor $t3,$s3,$s3 and $acc0,2040,$acc0 ld [$key+28],$t3 srl $s3,13,$acc1 nop .Ldec_loop: srl $s2,5,$acc2 ! and $acc1,2040,$acc1 ldx [$tbl+$acc0],$acc0 sll $s1,3,$acc3 and $acc2,2040,$acc2 ldx [$tbl+$acc1],$acc1 srl $s1,21,$acc4 and $acc3,2040,$acc3 ldx [$tbl+$acc2],$acc2 ! srl $s0,13,$acc5 and $acc4,2040,$acc4 ldx [$tbl+$acc3],$acc3 srl $s3,5,$acc6 and $acc5,2040,$acc5 ldx [$tbl+$acc4],$acc4 fmovs %f0,%f0 sll $s2,3,$acc7 ! and $acc6,2040,$acc6 ldx [$tbl+$acc5],$acc5 srl $s2,21,$acc8 and $acc7,2040,$acc7 ldx [$tbl+$acc6],$acc6 srl $s1,13,$acc9 and $acc8,2040,$acc8 ldx [$tbl+$acc7],$acc7 ! srl $s0,5,$acc10 and $acc9,2040,$acc9 ldx [$tbl+$acc8],$acc8 sll $s3,3,$acc11 and $acc10,2040,$acc10 ldx [$tbl+$acc9],$acc9 fmovs %f0,%f0 srl $s3,21,$acc12 ! and $acc11,2040,$acc11 ldx [$tbl+$acc10],$acc10 srl $s2,13,$acc13 and $acc12,2040,$acc12 ldx [$tbl+$acc11],$acc11 srl $s1,5,$acc14 and $acc13,2040,$acc13 ldx [$tbl+$acc12],$acc12 ! sll $s0,3,$acc15 and $acc14,2040,$acc14 ldx [$tbl+$acc13],$acc13 and $acc15,2040,$acc15 add $key,32,$key ldx [$tbl+$acc14],$acc14 fmovs %f0,%f0 subcc $rounds,1,$rounds ! ldx [$tbl+$acc15],$acc15 bz,a,pn %icc,.Ldec_last add $tbl,2048,$rounds srlx $acc1,8,$acc1 xor $acc0,$t0,$t0 ld [$key+0],$s0 fmovs %f0,%f0 srlx $acc2,16,$acc2 ! xor $acc1,$t0,$t0 ld [$key+4],$s1 srlx $acc3,24,$acc3 xor $acc2,$t0,$t0 ld [$key+8],$s2 srlx $acc5,8,$acc5 xor $acc3,$t0,$t0 ld [$key+12],$s3 ! srlx $acc6,16,$acc6 xor $acc4,$t1,$t1 fmovs %f0,%f0 srlx $acc7,24,$acc7 xor $acc5,$t1,$t1 srlx $acc9,8,$acc9 xor $acc6,$t1,$t1 srlx $acc10,16,$acc10 ! xor $acc7,$t1,$t1 srlx $acc11,24,$acc11 xor $acc8,$t2,$t2 srlx $acc13,8,$acc13 xor $acc9,$t2,$t2 srlx $acc14,16,$acc14 xor $acc10,$t2,$t2 srlx $acc15,24,$acc15 ! xor $acc11,$t2,$t2 xor $acc12,$acc14,$acc14 xor $acc13,$t3,$t3 srl $t0,21,$acc0 xor $acc14,$t3,$t3 xor $acc15,$t3,$t3 srl $t3,13,$acc1 and $acc0,2040,$acc0 ! srl $t2,5,$acc2 and $acc1,2040,$acc1 ldx [$tbl+$acc0],$acc0 sll $t1,3,$acc3 and $acc2,2040,$acc2 ldx [$tbl+$acc1],$acc1 fmovs %f0,%f0 srl $t1,21,$acc4 ! and $acc3,2040,$acc3 ldx [$tbl+$acc2],$acc2 srl $t0,13,$acc5 and $acc4,2040,$acc4 ldx [$tbl+$acc3],$acc3 srl $t3,5,$acc6 and $acc5,2040,$acc5 ldx [$tbl+$acc4],$acc4 ! sll $t2,3,$acc7 and $acc6,2040,$acc6 ldx [$tbl+$acc5],$acc5 srl $t2,21,$acc8 and $acc7,2040,$acc7 ldx [$tbl+$acc6],$acc6 fmovs %f0,%f0 srl $t1,13,$acc9 ! and $acc8,2040,$acc8 ldx [$tbl+$acc7],$acc7 srl $t0,5,$acc10 and $acc9,2040,$acc9 ldx [$tbl+$acc8],$acc8 sll $t3,3,$acc11 and $acc10,2040,$acc10 ldx [$tbl+$acc9],$acc9 ! srl $t3,21,$acc12 and $acc11,2040,$acc11 ldx [$tbl+$acc10],$acc10 srl $t2,13,$acc13 and $acc12,2040,$acc12 ldx [$tbl+$acc11],$acc11 fmovs %f0,%f0 srl $t1,5,$acc14 ! and $acc13,2040,$acc13 ldx [$tbl+$acc12],$acc12 sll $t0,3,$acc15 and $acc14,2040,$acc14 ldx [$tbl+$acc13],$acc13 srlx $acc1,8,$acc1 and $acc15,2040,$acc15 ldx [$tbl+$acc14],$acc14 ! srlx $acc2,16,$acc2 xor $acc0,$s0,$s0 ldx [$tbl+$acc15],$acc15 srlx $acc3,24,$acc3 xor $acc1,$s0,$s0 ld [$key+16],$t0 fmovs %f0,%f0 srlx $acc5,8,$acc5 ! xor $acc2,$s0,$s0 ld [$key+20],$t1 srlx $acc6,16,$acc6 xor $acc3,$s0,$s0 ld [$key+24],$t2 srlx $acc7,24,$acc7 xor $acc4,$s1,$s1 ld [$key+28],$t3 ! srlx $acc9,8,$acc9 xor $acc5,$s1,$s1 ldx [$tbl+2048+0],%g0 ! prefetch td4 srlx $acc10,16,$acc10 xor $acc6,$s1,$s1 ldx [$tbl+2048+32],%g0 ! prefetch td4 srlx $acc11,24,$acc11 xor $acc7,$s1,$s1 ldx [$tbl+2048+64],%g0 ! prefetch td4 srlx $acc13,8,$acc13 xor $acc8,$s2,$s2 ldx [$tbl+2048+96],%g0 ! prefetch td4 srlx $acc14,16,$acc14 ! xor $acc9,$s2,$s2 ldx [$tbl+2048+128],%g0 ! prefetch td4 srlx $acc15,24,$acc15 xor $acc10,$s2,$s2 ldx [$tbl+2048+160],%g0 ! prefetch td4 srl $s0,21,$acc0 xor $acc11,$s2,$s2 ldx [$tbl+2048+192],%g0 ! prefetch td4 xor $acc12,$acc14,$acc14 xor $acc13,$s3,$s3 ldx [$tbl+2048+224],%g0 ! prefetch td4 and $acc0,2040,$acc0 ! xor $acc14,$s3,$s3 xor $acc15,$s3,$s3 ba .Ldec_loop srl $s3,13,$acc1 .align 32 .Ldec_last: srlx $acc1,8,$acc1 ! xor $acc0,$t0,$t0 ld [$key+0],$s0 srlx $acc2,16,$acc2 xor $acc1,$t0,$t0 ld [$key+4],$s1 srlx $acc3,24,$acc3 xor $acc2,$t0,$t0 ld [$key+8],$s2 ! srlx $acc5,8,$acc5 xor $acc3,$t0,$t0 ld [$key+12],$s3 srlx $acc6,16,$acc6 xor $acc4,$t1,$t1 srlx $acc7,24,$acc7 xor $acc5,$t1,$t1 srlx $acc9,8,$acc9 ! xor $acc6,$t1,$t1 srlx $acc10,16,$acc10 xor $acc7,$t1,$t1 srlx $acc11,24,$acc11 xor $acc8,$t2,$t2 srlx $acc13,8,$acc13 xor $acc9,$t2,$t2 srlx $acc14,16,$acc14 ! xor $acc10,$t2,$t2 srlx $acc15,24,$acc15 xor $acc11,$t2,$t2 xor $acc12,$acc14,$acc14 xor $acc13,$t3,$t3 srl $t0,24,$acc0 xor $acc14,$t3,$t3 xor $acc15,$t3,$t3 ! srl $t3,16,$acc1 srl $t2,8,$acc2 and $acc1,255,$acc1 ldub [$rounds+$acc0],$acc0 srl $t1,24,$acc4 and $acc2,255,$acc2 ldub [$rounds+$acc1],$acc1 srl $t0,16,$acc5 ! and $t1,255,$acc3 ldub [$rounds+$acc2],$acc2 ldub [$rounds+$acc3],$acc3 srl $t3,8,$acc6 and $acc5,255,$acc5 ldub [$rounds+$acc4],$acc4 fmovs %f0,%f0 srl $t2,24,$acc8 ! and $acc6,255,$acc6 ldub [$rounds+$acc5],$acc5 srl $t1,16,$acc9 and $t2,255,$acc7 ldub [$rounds+$acc6],$acc6 ldub [$rounds+$acc7],$acc7 fmovs %f0,%f0 srl $t0,8,$acc10 ! and $acc9,255,$acc9 ldub [$rounds+$acc8],$acc8 srl $t3,24,$acc12 and $acc10,255,$acc10 ldub [$rounds+$acc9],$acc9 srl $t2,16,$acc13 and $t3,255,$acc11 ldub [$rounds+$acc10],$acc10 ! srl $t1,8,$acc14 and $acc13,255,$acc13 ldub [$rounds+$acc11],$acc11 ldub [$rounds+$acc12],$acc12 and $acc14,255,$acc14 ldub [$rounds+$acc13],$acc13 and $t0,255,$acc15 ldub [$rounds+$acc14],$acc14 ! sll $acc0,24,$acc0 xor $acc3,$s0,$s0 ldub [$rounds+$acc15],$acc15 sll $acc1,16,$acc1 xor $acc0,$s0,$s0 ldx [%sp+$bias+$frame+0],%i7 ! restore return address fmovs %f0,%f0 sll $acc2,8,$acc2 ! xor $acc1,$s0,$s0 sll $acc4,24,$acc4 xor $acc2,$s0,$s0 sll $acc5,16,$acc5 xor $acc7,$s1,$s1 sll $acc6,8,$acc6 xor $acc4,$s1,$s1 sll $acc8,24,$acc8 ! xor $acc5,$s1,$s1 sll $acc9,16,$acc9 xor $acc11,$s2,$s2 sll $acc10,8,$acc10 xor $acc6,$s1,$s1 sll $acc12,24,$acc12 xor $acc8,$s2,$s2 sll $acc13,16,$acc13 ! xor $acc9,$s2,$s2 sll $acc14,8,$acc14 xor $acc10,$s2,$s2 xor $acc12,$acc14,$acc14 xor $acc13,$s3,$s3 xor $acc14,$s3,$s3 xor $acc15,$s3,$s3 ret restore .type _sparcv9_AES_decrypt,#function .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) .align 32 .globl AES_decrypt AES_decrypt: or %o0,%o1,%g1 andcc %g1,3,%g0 bnz,pn %xcc,.Lunaligned_dec save %sp,-$frame,%sp ld [%i0+0],%o0 ld [%i0+4],%o1 ld [%i0+8],%o2 ld [%i0+12],%o3 mov %i2,%o5 nop 1: call _sparcv9_AES_decrypt sub %o7,1b-AES_Td,%o4 st %o0,[%i1+0] st %o1,[%i1+4] st %o2,[%i1+8] st %o3,[%i1+12] ret restore .align 32 .Lunaligned_dec: ldub [%i0+0],%l0 ldub [%i0+1],%l1 ldub [%i0+2],%l2 sll %l0,24,%l0 ldub [%i0+3],%l3 sll %l1,16,%l1 ldub [%i0+4],%l4 sll %l2,8,%l2 or %l1,%l0,%l0 ldub [%i0+5],%l5 sll %l4,24,%l4 or %l3,%l2,%l2 ldub [%i0+6],%l6 sll %l5,16,%l5 or %l0,%l2,%o0 ldub [%i0+7],%l7 sll %l6,8,%l6 or %l5,%l4,%l4 ldub [%i0+8],%l0 or %l7,%l6,%l6 ldub [%i0+9],%l1 or %l4,%l6,%o1 ldub [%i0+10],%l2 sll %l0,24,%l0 ldub [%i0+11],%l3 sll %l1,16,%l1 ldub [%i0+12],%l4 sll %l2,8,%l2 or %l1,%l0,%l0 ldub [%i0+13],%l5 sll %l4,24,%l4 or %l3,%l2,%l2 ldub [%i0+14],%l6 sll %l5,16,%l5 or %l0,%l2,%o2 ldub [%i0+15],%l7 sll %l6,8,%l6 or %l5,%l4,%l4 or %l7,%l6,%l6 or %l4,%l6,%o3 mov %i2,%o5 nop 1: call _sparcv9_AES_decrypt sub %o7,1b-AES_Td,%o4 srl %o0,24,%l0 srl %o0,16,%l1 stb %l0,[%i1+0] srl %o0,8,%l2 stb %l1,[%i1+1] stb %l2,[%i1+2] srl %o1,24,%l4 stb %o0,[%i1+3] srl %o1,16,%l5 stb %l4,[%i1+4] srl %o1,8,%l6 stb %l5,[%i1+5] stb %l6,[%i1+6] srl %o2,24,%l0 stb %o1,[%i1+7] srl %o2,16,%l1 stb %l0,[%i1+8] srl %o2,8,%l2 stb %l1,[%i1+9] stb %l2,[%i1+10] srl %o3,24,%l4 stb %o2,[%i1+11] srl %o3,16,%l5 stb %l4,[%i1+12] srl %o3,8,%l6 stb %l5,[%i1+13] stb %l6,[%i1+14] stb %o3,[%i1+15] ret restore .type AES_decrypt,#function .size AES_decrypt,(.-AES_decrypt) ___ # fmovs instructions substituting for FP nops were originally added # to meet specific instruction alignment requirements to maximize ILP. # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have # undesired effect, so just omit them and sacrifice some portion of # percent in performance... $code =~ s/fmovs.*$//gem; print $code;