openssl/crypto/aes/asm/aes-mips.pl
2012-09-15 11:18:20 +00:00

1945 lines
48 KiB
Raku
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# AES for MIPS
# October 2010
#
# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
# faster than gcc-generated code, which is not very impressive. But
# recall that compressed S-box requires extra processing, namely
# additional rotations. Rotations are implemented with lwl/lwr pairs,
# which is normally used for loading unaligned data. Another cool
# thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order...
# September 2012
#
# Add MIPS32R2 code.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
# one picks the latter, it's possible to arrange code in ABI neutral
# manner. Therefore let's stick to NUBI register layout:
#
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
#
# The return value is placed in $a0. Following coding rules facilitate
# interoperability:
#
# - never ever touch $tp, "thread pointer", former $gp;
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
# old code];
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
#
# For reference here is register layout for N32/64 MIPS ABIs:
#
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
$PTR_INS="dins";
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8;
} else {
$PTR_ADD="add";
$PTR_SUB="sub";
$PTR_INS="ins";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
$SZREG=4;
}
$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
#
# <appro@openssl.org>
#
######################################################################
$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
open STDOUT,">$output";
if (!defined($big_endian))
{ $big_endian=(unpack('L',pack('N',1))==1); }
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
$code.=<<___;
.text
#ifdef OPENSSL_FIPSCANISTER
# include <openssl/fipssyms.h>
#endif
#if !defined(__vxworks) || defined(__pic__)
.option pic2
#endif
.set noat
___
{{{
my $FRAMESIZE=16*$SZREG;
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
my ($key0,$cnt)=($gp,$fp);
# instuction ordering is "stolen" from output from MIPSpro assembler
# invoked with -mips3 -O3 arguments...
$code.=<<___;
.align 5
.ent _mips_AES_encrypt
_mips_AES_encrypt:
.frame $sp,0,$ra
.set reorder
lw $t0,0($key)
lw $t1,4($key)
lw $t2,8($key)
lw $t3,12($key)
lw $cnt,240($key)
$PTR_ADD $key0,$key,16
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
sub $cnt,1
_xtr $i0,$s1,16-2
.Loop_enc:
_xtr $i1,$s2,16-2
_xtr $i2,$s3,16-2
_xtr $i3,$s0,16-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Te1[s1>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Te1[s2>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Te1[s1>>16]
lwl $t1,3($i1) # Te1[s2>>16]
lwl $t2,3($i2) # Te1[s3>>16]
lwl $t3,3($i3) # Te1[s0>>16]
lwr $t0,2($i0) # Te1[s1>>16]
_xtr $i0,$s2,8-2
lwr $t1,2($i1) # Te1[s2>>16]
_xtr $i1,$s3,8-2
lwr $t2,2($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
#endif
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Te1[s1>>16]
_xtr $i0,$s3,0-2
lw $t5,0($i1) # Te1[s2>>16]
_xtr $i1,$s0,0-2
lw $t6,0($i2) # Te1[s3>>16]
_xtr $i2,$s1,0-2
lw $t7,0($i3) # Te1[s0>>16]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s3,2,8
# else
lw $t4,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s3,2,8
lw $t5,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s0,2,8
lw $t6,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s1,2,8
lw $t7,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s2,2,8
lw $t8,0($i0) # Te1[s1>>16]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Te1[s2>>16]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Te1[s3>>16]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Te1[s0>>16]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Te2[s2>>8]
lwl $t5,2($i1) # Te2[s3>>8]
lwl $t6,2($i2) # Te2[s0>>8]
lwl $t7,2($i3) # Te2[s1>>8]
lwr $t4,1($i0) # Te2[s2>>8]
_xtr $i0,$s3,0-2
lwr $t5,1($i1) # Te2[s3>>8]
_xtr $i1,$s0,0-2
lwr $t6,1($i2) # Te2[s0>>8]
_xtr $i2,$s1,0-2
lwr $t7,1($i3) # Te2[s1>>8]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lwl $t8,1($i0) # Te3[s3]
lwl $t9,1($i1) # Te3[s0]
lwl $t10,1($i2) # Te3[s1]
lwl $t11,1($i3) # Te3[s2]
lwr $t8,0($i0) # Te3[s3]
_xtr $i0,$s0,24-2
lwr $t9,0($i1) # Te3[s0]
_xtr $i1,$s1,24-2
lwr $t10,0($i2) # Te3[s1]
_xtr $i2,$s2,24-2
lwr $t11,0($i3) # Te3[s2]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4
lw $t4,0($i0) # Te0[s0>>24]
xor $t1,$t5
lw $t5,0($i1) # Te0[s1>>24]
xor $t2,$t6
lw $t6,0($i2) # Te0[s2>>24]
xor $t3,$t7
lw $t7,0($i3) # Te0[s3>>24]
xor $t0,$t8
lw $s0,0($key0)
xor $t1,$t9
lw $s1,4($key0)
xor $t2,$t10
lw $s2,8($key0)
xor $t3,$t11
lw $s3,12($key0)
xor $t0,$t4
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
sub $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
.set noreorder
bnez $cnt,.Loop_enc
_xtr $i0,$s1,16-2
.set reorder
_xtr $i1,$s2,16-2
_xtr $i2,$s3,16-2
_xtr $i3,$s0,16-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t0,2($i0) # Te4[s1>>16]
_xtr $i0,$s2,8-2
lbu $t1,2($i1) # Te4[s2>>16]
_xtr $i1,$s3,8-2
lbu $t2,2($i2) # Te4[s3>>16]
_xtr $i2,$s0,8-2
lbu $t3,2($i3) # Te4[s0>>16]
_xtr $i3,$s1,8-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,2($i0) # Te4[s2>>8]
$PTR_INS $i0,$s0,2,8
lbu $t5,2($i1) # Te4[s3>>8]
$PTR_INS $i1,$s1,2,8
lbu $t6,2($i2) # Te4[s0>>8]
$PTR_INS $i2,$s2,2,8
lbu $t7,2($i3) # Te4[s1>>8]
$PTR_INS $i3,$s3,2,8
lbu $t8,2($i0) # Te4[s0>>24]
_xtr $i0,$s3,0-2
lbu $t9,2($i1) # Te4[s1>>24]
_xtr $i1,$s0,0-2
lbu $t10,2($i2) # Te4[s2>>24]
_xtr $i2,$s1,0-2
lbu $t11,2($i3) # Te4[s3>>24]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
_xtr $i1,$s1,24-2
lbu $t6,2($i2) # Te4[s0>>8]
_xtr $i2,$s2,24-2
lbu $t7,2($i3) # Te4[s1>>8]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,2($i0) # Te4[s0>>24]
$PTR_INS $i0,$s3,2,8
lbu $t9,2($i1) # Te4[s1>>24]
$PTR_INS $i1,$s0,2,8
lbu $t10,2($i2) # Te4[s2>>24]
$PTR_INS $i2,$s1,2,8
lbu $t11,2($i3) # Te4[s3>>24]
$PTR_INS $i3,$s2,2,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,2($i0) # Te4[s3]
_ins2 $t1,$t5,8
lbu $t5,2($i1) # Te4[s0]
_ins2 $t2,$t6,8
lbu $t6,2($i2) # Te4[s1]
_ins2 $t3,$t7,8
lbu $t7,2($i3) # Te4[s2]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
_xtr $i1,$s1,24-2
lbu $t6,2($i2) # Te4[s0>>8]
_xtr $i2,$s2,24-2
lbu $t7,2($i3) # Te4[s1>>8]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,2($i0) # Te4[s0>>24]
_xtr $i0,$s3,0-2
lbu $t9,2($i1) # Te4[s1>>24]
_xtr $i1,$s0,0-2
lbu $t10,2($i2) # Te4[s2>>24]
_xtr $i2,$s1,0-2
lbu $t11,2($i3) # Te4[s3>>24]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins $t4,8
_ins $t5,8
_ins $t6,8
_ins $t7,8
xor $t0,$t4
lbu $t4,2($i0) # Te4[s3]
xor $t1,$t5
lbu $t5,2($i1) # Te4[s0]
xor $t2,$t6
lbu $t6,2($i2) # Te4[s1]
xor $t3,$t7
lbu $t7,2($i3) # Te4[s2]
_ins $t8,24
lw $s0,0($key0)
_ins $t9,24
lw $s1,4($key0)
_ins $t10,24
lw $s2,8($key0)
_ins $t11,24
lw $s3,12($key0)
xor $t0,$t8
xor $t1,$t9
xor $t2,$t10
xor $t3,$t11
_ins $t4,0
_ins $t5,0
_ins $t6,0
_ins $t7,0
xor $t0,$t4
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
#endif
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
jr $ra
.end _mips_AES_encrypt
.align 5
.globl AES_encrypt
.ent AES_encrypt
AES_encrypt:
.frame $sp,$FRAMESIZE,$ra
.mask $SAVED_REGS_MASK,-$SZREG
.set noreorder
___
$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
.cpload $pf
___
$code.=<<___;
$PTR_SUB $sp,$FRAMESIZE
$REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
$REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
$REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
$REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
$REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
$REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
$REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
$REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
$REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
$REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
$REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
$REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
$REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
$REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
.cplocal $Tbl
.cpsetup $pf,$zero,AES_encrypt
___
$code.=<<___;
.set reorder
la $Tbl,AES_Te # PIC-ified 'load address'
lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp)
lwl $s3,12+$MSB($inp)
lwr $s0,0+$LSB($inp)
lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp)
bal _mips_AES_encrypt
swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out)
swr $s3,12+$LSB($out)
swl $s0,0+$MSB($out)
swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out)
.set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
$REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
$REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
$REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
$REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
$REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
$REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
$REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
$REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
$REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
$REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
$REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
$REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___;
jr $ra
$PTR_ADD $sp,$FRAMESIZE
.end AES_encrypt
___
$code.=<<___;
.align 5
.ent _mips_AES_decrypt
_mips_AES_decrypt:
.frame $sp,0,$ra
.set reorder
lw $t0,0($key)
lw $t1,4($key)
lw $t2,8($key)
lw $t3,12($key)
lw $cnt,240($key)
$PTR_ADD $key0,$key,16
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
sub $cnt,1
_xtr $i0,$s3,16-2
.Loop_dec:
_xtr $i1,$s0,16-2
_xtr $i2,$s1,16-2
_xtr $i3,$s2,16-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Td1[s3>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Td1[s0>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Td1[s3>>16]
lwl $t1,3($i1) # Td1[s0>>16]
lwl $t2,3($i2) # Td1[s1>>16]
lwl $t3,3($i3) # Td1[s2>>16]
lwr $t0,2($i0) # Td1[s3>>16]
_xtr $i0,$s2,8-2
lwr $t1,2($i1) # Td1[s0>>16]
_xtr $i1,$s3,8-2
lwr $t2,2($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
#endif
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Td2[s2>>8]
_xtr $i0,$s1,0-2
lw $t5,0($i1) # Td2[s3>>8]
_xtr $i1,$s2,0-2
lw $t6,0($i2) # Td2[s0>>8]
_xtr $i2,$s3,0-2
lw $t7,0($i3) # Td2[s1>>8]
_xtr $i3,$s0,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Td3[s1]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Td3[s2]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Td3[s3]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Td3[s0]
$PTR_INS $i3,$s3,2,8
#else
lw $t4,0($i0) # Td2[s2>>8]
$PTR_INS $i0,$s1,2,8
lw $t5,0($i1) # Td2[s3>>8]
$PTR_INS $i1,$s2,2,8
lw $t6,0($i2) # Td2[s0>>8]
$PTR_INS $i2,$s3,2,8
lw $t7,0($i3) # Td2[s1>>8]
$PTR_INS $i3,$s0,2,8
lw $t8,0($i0) # Td3[s1]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Td3[s2]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Td3[s3]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Td3[s0]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Td2[s2>>8]
lwl $t5,2($i1) # Td2[s3>>8]
lwl $t6,2($i2) # Td2[s0>>8]
lwl $t7,2($i3) # Td2[s1>>8]
lwr $t4,1($i0) # Td2[s2>>8]
_xtr $i0,$s1,0-2
lwr $t5,1($i1) # Td2[s3>>8]
_xtr $i1,$s2,0-2
lwr $t6,1($i2) # Td2[s0>>8]
_xtr $i2,$s3,0-2
lwr $t7,1($i3) # Td2[s1>>8]
_xtr $i3,$s0,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lwl $t8,1($i0) # Td3[s1]
lwl $t9,1($i1) # Td3[s2]
lwl $t10,1($i2) # Td3[s3]
lwl $t11,1($i3) # Td3[s0]
lwr $t8,0($i0) # Td3[s1]
_xtr $i0,$s0,24-2
lwr $t9,0($i1) # Td3[s2]
_xtr $i1,$s1,24-2
lwr $t10,0($i2) # Td3[s3]
_xtr $i2,$s2,24-2
lwr $t11,0($i3) # Td3[s0]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4
lw $t4,0($i0) # Td0[s0>>24]
xor $t1,$t5
lw $t5,0($i1) # Td0[s1>>24]
xor $t2,$t6
lw $t6,0($i2) # Td0[s2>>24]
xor $t3,$t7
lw $t7,0($i3) # Td0[s3>>24]
xor $t0,$t8
lw $s0,0($key0)
xor $t1,$t9
lw $s1,4($key0)
xor $t2,$t10
lw $s2,8($key0)
xor $t3,$t11
lw $s3,12($key0)
xor $t0,$t4
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
sub $cnt,1
$PTR_ADD $key0,16
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
.set noreorder
bnez $cnt,.Loop_dec
_xtr $i0,$s3,16-2
.set reorder
lw $t4,1024($Tbl) # prefetch Td4
_xtr $i0,$s3,16
lw $t5,1024+32($Tbl)
_xtr $i1,$s0,16
lw $t6,1024+64($Tbl)
_xtr $i2,$s1,16
lw $t7,1024+96($Tbl)
_xtr $i3,$s2,16
lw $t8,1024+128($Tbl)
and $i0,0xff
lw $t9,1024+160($Tbl)
and $i1,0xff
lw $t10,1024+192($Tbl)
and $i2,0xff
lw $t11,1024+224($Tbl)
and $i3,0xff
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t0,1024($i0) # Td4[s3>>16]
_xtr $i0,$s2,8
lbu $t1,1024($i1) # Td4[s0>>16]
_xtr $i1,$s3,8
lbu $t2,1024($i2) # Td4[s1>>16]
_xtr $i2,$s0,8
lbu $t3,1024($i3) # Td4[s2>>16]
_xtr $i3,$s1,8
and $i0,0xff
and $i1,0xff
and $i2,0xff
and $i3,0xff
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,1024($i0) # Td4[s2>>8]
$PTR_INS $i0,$s0,0,8
lbu $t5,1024($i1) # Td4[s3>>8]
$PTR_INS $i1,$s1,0,8
lbu $t6,1024($i2) # Td4[s0>>8]
$PTR_INS $i2,$s2,0,8
lbu $t7,1024($i3) # Td4[s1>>8]
$PTR_INS $i3,$s3,0,8
lbu $t8,1024($i0) # Td4[s0>>24]
_xtr $i0,$s1,0
lbu $t9,1024($i1) # Td4[s1>>24]
_xtr $i1,$s2,0
lbu $t10,1024($i2) # Td4[s2>>24]
_xtr $i2,$s3,0
lbu $t11,1024($i3) # Td4[s3>>24]
_xtr $i3,$s0,0
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
_xtr $i1,$s1,24
lbu $t6,1024($i2) # Td4[s0>>8]
_xtr $i2,$s2,24
lbu $t7,1024($i3) # Td4[s1>>8]
_xtr $i3,$s3,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,1024($i0) # Td4[s0>>24]
$PTR_INS $i0,$s1,0,8
lbu $t9,1024($i1) # Td4[s1>>24]
$PTR_INS $i1,$s2,0,8
lbu $t10,1024($i2) # Td4[s2>>24]
$PTR_INS $i2,$s3,0,8
lbu $t11,1024($i3) # Td4[s3>>24]
$PTR_INS $i3,$s0,0,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,1024($i0) # Td4[s1]
_ins2 $t1,$t5,8
lbu $t5,1024($i1) # Td4[s2]
_ins2 $t2,$t6,8
lbu $t6,1024($i2) # Td4[s3]
_ins2 $t3,$t7,8
lbu $t7,1024($i3) # Td4[s0]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
_xtr $i1,$s1,24
lbu $t6,1024($i2) # Td4[s0>>8]
_xtr $i2,$s2,24
lbu $t7,1024($i3) # Td4[s1>>8]
_xtr $i3,$s3,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,1024($i0) # Td4[s0>>24]
_xtr $i0,$s1,0
lbu $t9,1024($i1) # Td4[s1>>24]
_xtr $i1,$s2,0
lbu $t10,1024($i2) # Td4[s2>>24]
_xtr $i2,$s3,0
lbu $t11,1024($i3) # Td4[s3>>24]
_xtr $i3,$s0,0
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins $t4,8
_ins $t5,8
_ins $t6,8
_ins $t7,8
xor $t0,$t4
lbu $t4,1024($i0) # Td4[s1]
xor $t1,$t5
lbu $t5,1024($i1) # Td4[s2]
xor $t2,$t6
lbu $t6,1024($i2) # Td4[s3]
xor $t3,$t7
lbu $t7,1024($i3) # Td4[s0]
_ins $t8,24
lw $s0,0($key0)
_ins $t9,24
lw $s1,4($key0)
_ins $t10,24
lw $s2,8($key0)
_ins $t11,24
lw $s3,12($key0)
xor $t0,$t8
xor $t1,$t9
xor $t2,$t10
xor $t3,$t11
_ins $t4,0
_ins $t5,0
_ins $t6,0
_ins $t7,0
xor $t0,$t4
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
#endif
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
xor $s3,$t3
jr $ra
.end _mips_AES_decrypt
.align 5
.globl AES_decrypt
.ent AES_decrypt
AES_decrypt:
.frame $sp,$FRAMESIZE,$ra
.mask $SAVED_REGS_MASK,-$SZREG
.set noreorder
___
$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
.cpload $pf
___
$code.=<<___;
$PTR_SUB $sp,$FRAMESIZE
$REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
$REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
$REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
$REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
$REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
$REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
$REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
$REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
$REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
$REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
$REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
$REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
$REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
$REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
.cplocal $Tbl
.cpsetup $pf,$zero,AES_decrypt
___
$code.=<<___;
.set reorder
la $Tbl,AES_Td # PIC-ified 'load address'
lwl $s0,0+$MSB($inp)
lwl $s1,4+$MSB($inp)
lwl $s2,8+$MSB($inp)
lwl $s3,12+$MSB($inp)
lwr $s0,0+$LSB($inp)
lwr $s1,4+$LSB($inp)
lwr $s2,8+$LSB($inp)
lwr $s3,12+$LSB($inp)
bal _mips_AES_decrypt
swr $s0,0+$LSB($out)
swr $s1,4+$LSB($out)
swr $s2,8+$LSB($out)
swr $s3,12+$LSB($out)
swl $s0,0+$MSB($out)
swl $s1,4+$MSB($out)
swl $s2,8+$MSB($out)
swl $s3,12+$MSB($out)
.set noreorder
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
$REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
$REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
$REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
$REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
$REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
$REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
$REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
$REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
$REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
$REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
$REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
$REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___;
jr $ra
$PTR_ADD $sp,$FRAMESIZE
.end AES_decrypt
___
}}}
{{{
my $FRAMESIZE=8*$SZREG;
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
my ($rcon,$cnt)=($gp,$fp);
$code.=<<___;
.align 5
.ent _mips_AES_set_encrypt_key
_mips_AES_set_encrypt_key:
.frame $sp,0,$ra
.set noreorder
beqz $inp,.Lekey_done
li $t0,-1
beqz $key,.Lekey_done
$PTR_ADD $rcon,$Tbl,256
.set reorder
lwl $rk0,0+$MSB($inp) # load 128 bits
lwl $rk1,4+$MSB($inp)
lwl $rk2,8+$MSB($inp)
lwl $rk3,12+$MSB($inp)
li $at,128
lwr $rk0,0+$LSB($inp)
lwr $rk1,4+$LSB($inp)
lwr $rk2,8+$LSB($inp)
lwr $rk3,12+$LSB($inp)
.set noreorder
beq $bits,$at,.L128bits
li $cnt,10
.set reorder
lwl $rk4,16+$MSB($inp) # load 192 bits
lwl $rk5,20+$MSB($inp)
li $at,192
lwr $rk4,16+$LSB($inp)
lwr $rk5,20+$LSB($inp)
.set noreorder
beq $bits,$at,.L192bits
li $cnt,8
.set reorder
lwl $rk6,24+$MSB($inp) # load 256 bits
lwl $rk7,28+$MSB($inp)
li $at,256
lwr $rk6,24+$LSB($inp)
lwr $rk7,28+$LSB($inp)
.set noreorder
beq $bits,$at,.L256bits
li $cnt,7
b .Lekey_done
li $t0,-2
.align 4
.L128bits:
.set reorder
srl $i0,$rk3,16
srl $i1,$rk3,8
and $i0,0xff
and $i1,0xff
and $i2,$rk3,0xff
srl $i3,$rk3,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
sw $rk2,8($key)
sw $rk3,12($key)
sub $cnt,1
$PTR_ADD $key,16
_bias $i0,24
_bias $i1,16
_bias $i2,8
_bias $i3,0
xor $rk0,$i0
lw $i0,0($rcon)
xor $rk0,$i1
xor $rk0,$i2
xor $rk0,$i3
xor $rk0,$i0
xor $rk1,$rk0
xor $rk2,$rk1
xor $rk3,$rk2
.set noreorder
bnez $cnt,.L128bits
$PTR_ADD $rcon,4
sw $rk0,0($key)
sw $rk1,4($key)
sw $rk2,8($key)
li $cnt,10
sw $rk3,12($key)
li $t0,0
sw $cnt,80($key)
b .Lekey_done
$PTR_SUB $key,10*16
.align 4
.L192bits:
.set reorder
srl $i0,$rk5,16
srl $i1,$rk5,8
and $i0,0xff
and $i1,0xff
and $i2,$rk5,0xff
srl $i3,$rk5,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
sw $rk2,8($key)
sw $rk3,12($key)
sw $rk4,16($key)
sw $rk5,20($key)
sub $cnt,1
$PTR_ADD $key,24
_bias $i0,24
_bias $i1,16
_bias $i2,8
_bias $i3,0
xor $rk0,$i0
lw $i0,0($rcon)
xor $rk0,$i1
xor $rk0,$i2
xor $rk0,$i3
xor $rk0,$i0
xor $rk1,$rk0
xor $rk2,$rk1
xor $rk3,$rk2
xor $rk4,$rk3
xor $rk5,$rk4
.set noreorder
bnez $cnt,.L192bits
$PTR_ADD $rcon,4
sw $rk0,0($key)
sw $rk1,4($key)
sw $rk2,8($key)
li $cnt,12
sw $rk3,12($key)
li $t0,0
sw $cnt,48($key)
b .Lekey_done
$PTR_SUB $key,12*16
.align 4
.L256bits:
.set reorder
srl $i0,$rk7,16
srl $i1,$rk7,8
and $i0,0xff
and $i1,0xff
and $i2,$rk7,0xff
srl $i3,$rk7,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
sw $rk2,8($key)
sw $rk3,12($key)
sw $rk4,16($key)
sw $rk5,20($key)
sw $rk6,24($key)
sw $rk7,28($key)
sub $cnt,1
_bias $i0,24
_bias $i1,16
_bias $i2,8
_bias $i3,0
xor $rk0,$i0
lw $i0,0($rcon)
xor $rk0,$i1
xor $rk0,$i2
xor $rk0,$i3
xor $rk0,$i0
xor $rk1,$rk0
xor $rk2,$rk1
xor $rk3,$rk2
beqz $cnt,.L256bits_done
srl $i0,$rk3,24
srl $i1,$rk3,16
srl $i2,$rk3,8
and $i3,$rk3,0xff
and $i1,0xff
and $i2,0xff
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sll $i0,24
sll $i1,16
sll $i2,8
xor $rk4,$i0
xor $rk4,$i1
xor $rk4,$i2
xor $rk4,$i3
xor $rk5,$rk4
xor $rk6,$rk5
xor $rk7,$rk6
$PTR_ADD $key,32
.set noreorder
b .L256bits
$PTR_ADD $rcon,4
.L256bits_done:
sw $rk0,32($key)
sw $rk1,36($key)
sw $rk2,40($key)
li $cnt,14
sw $rk3,44($key)
li $t0,0
sw $cnt,48($key)
$PTR_SUB $key,12*16
.Lekey_done:
jr $ra
nop
.end _mips_AES_set_encrypt_key
.globl AES_set_encrypt_key
.ent AES_set_encrypt_key
AES_set_encrypt_key:
.frame $sp,$FRAMESIZE,$ra
.mask $SAVED_REGS_MASK,-$SZREG
.set noreorder
___
$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
.cpload $pf
___
$code.=<<___;
$PTR_SUB $sp,$FRAMESIZE
$REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
$REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
$REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
$REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
$REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
$REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
___
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
.cplocal $Tbl
.cpsetup $pf,$zero,AES_set_encrypt_key
___
$code.=<<___;
.set reorder
la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
.set noreorder
move $a0,$t0
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
$REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
$REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
$REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
$REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___;
jr $ra
$PTR_ADD $sp,$FRAMESIZE
.end AES_set_encrypt_key
___
my ($head,$tail)=($inp,$bits);
my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
$code.=<<___;
.align 5
.globl AES_set_decrypt_key
.ent AES_set_decrypt_key
AES_set_decrypt_key:
.frame $sp,$FRAMESIZE,$ra
.mask $SAVED_REGS_MASK,-$SZREG
.set noreorder
___
$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
.cpload $pf
___
$code.=<<___;
$PTR_SUB $sp,$FRAMESIZE
$REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
$REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
$REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
$REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
$REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
$REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
___
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
.cplocal $Tbl
.cpsetup $pf,$zero,AES_set_decrypt_key
___
$code.=<<___;
.set reorder
la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
bltz $t0,.Ldkey_done
sll $at,$cnt,4
$PTR_ADD $head,$key,0
$PTR_ADD $tail,$key,$at
.align 4
.Lswap:
lw $rk0,0($head)
lw $rk1,4($head)
lw $rk2,8($head)
lw $rk3,12($head)
lw $rk4,0($tail)
lw $rk5,4($tail)
lw $rk6,8($tail)
lw $rk7,12($tail)
sw $rk0,0($tail)
sw $rk1,4($tail)
sw $rk2,8($tail)
sw $rk3,12($tail)
$PTR_ADD $head,16
$PTR_SUB $tail,16
sw $rk4,-16($head)
sw $rk5,-12($head)
sw $rk6,-8($head)
sw $rk7,-4($head)
bne $head,$tail,.Lswap
lw $tp1,16($key) # modulo-scheduled
lui $x80808080,0x8080
sub $cnt,1
or $x80808080,0x8080
sll $cnt,2
$PTR_ADD $key,16
lui $x1b1b1b1b,0x1b1b
nor $x7f7f7f7f,$zero,$x80808080
or $x1b1b1b1b,0x1b1b
.align 4
.Lmix:
and $m,$tp1,$x80808080
and $tp2,$tp1,$x7f7f7f7f
srl $tp4,$m,7
addu $tp2,$tp2 # tp2<<1
subu $m,$tp4
and $m,$x1b1b1b1b
xor $tp2,$m
and $m,$tp2,$x80808080
and $tp4,$tp2,$x7f7f7f7f
srl $tp8,$m,7
addu $tp4,$tp4 # tp4<<1
subu $m,$tp8
and $m,$x1b1b1b1b
xor $tp4,$m
and $m,$tp4,$x80808080
and $tp8,$tp4,$x7f7f7f7f
srl $tp9,$m,7
addu $tp8,$tp8 # tp8<<1
subu $m,$tp9
and $m,$x1b1b1b1b
xor $tp8,$m
xor $tp9,$tp8,$tp1
xor $tpe,$tp8,$tp4
xor $tpb,$tp9,$tp2
xor $tpd,$tp9,$tp4
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $tp1,$tpd,16
xor $tpe,$tp2
rotr $tp2,$tp9,8
xor $tpe,$tp1
rotr $tp4,$tpb,24
xor $tpe,$tp2
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp4
#else
_ror $tp1,$tpd,16
xor $tpe,$tp2
_ror $tp2,$tpd,-16
xor $tpe,$tp1
_ror $tp1,$tp9,8
xor $tpe,$tp2
_ror $tp2,$tp9,-24
xor $tpe,$tp1
_ror $tp1,$tpb,24
xor $tpe,$tp2
_ror $tp2,$tpb,-8
xor $tpe,$tp1
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2
#endif
sub $cnt,1
sw $tpe,0($key)
$PTR_ADD $key,4
bnez $cnt,.Lmix
li $t0,0
.Ldkey_done:
.set noreorder
move $a0,$t0
$REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
$REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
$REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
$REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
$REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
$REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
___
$code.=<<___;
jr $ra
$PTR_ADD $sp,$FRAMESIZE
.end AES_set_decrypt_key
___
}}}
######################################################################
# Tables are kept in endian-neutral manner
$code.=<<___;
.rdata
.align 10
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
AES_Td:
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
AES_Te4:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
___
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
# made-up _instructions, _xtr, _ins, _ror and _bias, cope
# with byte order dependencies...
if (/^\s+_/) {
s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
: eval("$3*-1"))/e or
s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
: eval("($3-16)&31"))/e;
s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
sprintf("sll\t$1,$2,$3")/e or
s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
sprintf("and\t$1,$2,0xff")/e or
s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
}
# convert lwl/lwr and swr/swl to little-endian order
if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
}
s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
print $_,"\n";
}
close STDOUT;