MIPS assembly pack: add MIPS[32|64]R2 code.

This commit is contained in:
Andy Polyakov 2012-09-15 11:18:20 +00:00
parent 9b222748e7
commit 8df5518bd9
3 changed files with 556 additions and 71 deletions

View File

@ -20,6 +20,10 @@
# thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order...
# September 2012
#
# Add MIPS32R2 code.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@ -52,6 +56,7 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
$PTR_INS="dins";
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
@ -59,6 +64,7 @@ if ($flavour =~ /64|n32/i) {
} else {
$PTR_ADD="add";
$PTR_SUB="sub";
$PTR_INS="ins";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
@ -138,6 +144,16 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Te1[s1>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Te1[s2>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Te1[s1>>16]
lwl $t1,3($i1) # Te1[s2>>16]
lwl $t2,3($i2) # Te1[s3>>16]
@ -150,6 +166,29 @@ _mips_AES_encrypt:
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
#endif
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Te1[s1>>16]
_xtr $i0,$s3,0-2
lw $t5,0($i1) # Te1[s2>>16]
_xtr $i1,$s0,0-2
lw $t6,0($i2) # Te1[s3>>16]
_xtr $i2,$s1,0-2
lw $t7,0($i3) # Te1[s0>>16]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
@ -159,6 +198,52 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s3,2,8
# else
lw $t4,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s3,2,8
lw $t5,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s0,2,8
lw $t6,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s1,2,8
lw $t7,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s2,2,8
lw $t8,0($i0) # Te1[s1>>16]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Te1[s2>>16]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Te1[s3>>16]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Te1[s0>>16]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Te2[s2>>8]
lwl $t5,2($i1) # Te2[s3>>8]
lwl $t6,2($i2) # Te2[s0>>8]
@ -201,6 +286,7 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4
lw $t4,0($i0) # Te0[s0>>24]
xor $t1,$t5
@ -263,6 +349,89 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,2($i0) # Te4[s2>>8]
$PTR_INS $i0,$s0,2,8
lbu $t5,2($i1) # Te4[s3>>8]
$PTR_INS $i1,$s1,2,8
lbu $t6,2($i2) # Te4[s0>>8]
$PTR_INS $i2,$s2,2,8
lbu $t7,2($i3) # Te4[s1>>8]
$PTR_INS $i3,$s3,2,8
lbu $t8,2($i0) # Te4[s0>>24]
_xtr $i0,$s3,0-2
lbu $t9,2($i1) # Te4[s1>>24]
_xtr $i1,$s0,0-2
lbu $t10,2($i2) # Te4[s2>>24]
_xtr $i2,$s1,0-2
lbu $t11,2($i3) # Te4[s3>>24]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
_xtr $i1,$s1,24-2
lbu $t6,2($i2) # Te4[s0>>8]
_xtr $i2,$s2,24-2
lbu $t7,2($i3) # Te4[s1>>8]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,2($i0) # Te4[s0>>24]
$PTR_INS $i0,$s3,2,8
lbu $t9,2($i1) # Te4[s1>>24]
$PTR_INS $i1,$s0,2,8
lbu $t10,2($i2) # Te4[s2>>24]
$PTR_INS $i2,$s1,2,8
lbu $t11,2($i3) # Te4[s3>>24]
$PTR_INS $i3,$s2,2,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,2($i0) # Te4[s3]
_ins2 $t1,$t5,8
lbu $t5,2($i1) # Te4[s0]
_ins2 $t2,$t6,8
lbu $t6,2($i2) # Te4[s1]
_ins2 $t3,$t7,8
lbu $t7,2($i3) # Te4[s2]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
@ -340,7 +509,7 @@ _mips_AES_encrypt:
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
#endif
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
@ -465,6 +634,16 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Td1[s3>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Td1[s0>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Td1[s3>>16]
lwl $t1,3($i1) # Td1[s0>>16]
lwl $t2,3($i2) # Td1[s1>>16]
@ -477,6 +656,7 @@ _mips_AES_decrypt:
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
#endif
and $i0,0x3fc
and $i1,0x3fc
@ -486,6 +666,75 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Td2[s2>>8]
_xtr $i0,$s1,0-2
lw $t5,0($i1) # Td2[s3>>8]
_xtr $i1,$s2,0-2
lw $t6,0($i2) # Td2[s0>>8]
_xtr $i2,$s3,0-2
lw $t7,0($i3) # Td2[s1>>8]
_xtr $i3,$s0,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Td3[s1]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Td3[s2]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Td3[s3]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Td3[s0]
$PTR_INS $i3,$s3,2,8
#else
lw $t4,0($i0) # Td2[s2>>8]
$PTR_INS $i0,$s1,2,8
lw $t5,0($i1) # Td2[s3>>8]
$PTR_INS $i1,$s2,2,8
lw $t6,0($i2) # Td2[s0>>8]
$PTR_INS $i2,$s3,2,8
lw $t7,0($i3) # Td2[s1>>8]
$PTR_INS $i3,$s0,2,8
lw $t8,0($i0) # Td3[s1]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Td3[s2]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Td3[s3]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Td3[s0]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Td2[s2>>8]
lwl $t5,2($i1) # Td2[s3>>8]
lwl $t6,2($i2) # Td2[s0>>8]
@ -528,6 +777,7 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4
lw $t4,0($i0) # Td0[s0>>24]
@ -601,6 +851,81 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,1024($i0) # Td4[s2>>8]
$PTR_INS $i0,$s0,0,8
lbu $t5,1024($i1) # Td4[s3>>8]
$PTR_INS $i1,$s1,0,8
lbu $t6,1024($i2) # Td4[s0>>8]
$PTR_INS $i2,$s2,0,8
lbu $t7,1024($i3) # Td4[s1>>8]
$PTR_INS $i3,$s3,0,8
lbu $t8,1024($i0) # Td4[s0>>24]
_xtr $i0,$s1,0
lbu $t9,1024($i1) # Td4[s1>>24]
_xtr $i1,$s2,0
lbu $t10,1024($i2) # Td4[s2>>24]
_xtr $i2,$s3,0
lbu $t11,1024($i3) # Td4[s3>>24]
_xtr $i3,$s0,0
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
_xtr $i1,$s1,24
lbu $t6,1024($i2) # Td4[s0>>8]
_xtr $i2,$s2,24
lbu $t7,1024($i3) # Td4[s1>>8]
_xtr $i3,$s3,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,1024($i0) # Td4[s0>>24]
$PTR_INS $i0,$s1,0,8
lbu $t9,1024($i1) # Td4[s1>>24]
$PTR_INS $i1,$s2,0,8
lbu $t10,1024($i2) # Td4[s2>>24]
$PTR_INS $i2,$s3,0,8
lbu $t11,1024($i3) # Td4[s3>>24]
$PTR_INS $i3,$s0,0,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,1024($i0) # Td4[s1]
_ins2 $t1,$t5,8
lbu $t5,1024($i1) # Td4[s2]
_ins2 $t2,$t6,8
lbu $t6,1024($i2) # Td4[s3]
_ins2 $t3,$t7,8
lbu $t7,1024($i3) # Td4[s0]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
@ -670,6 +995,7 @@ _mips_AES_decrypt:
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
#endif
xor $s0,$t0
xor $s1,$t1
@ -782,7 +1108,7 @@ _mips_AES_set_encrypt_key:
beqz $inp,.Lekey_done
li $t0,-1
beqz $key,.Lekey_done
$PTR_ADD $rcon,$Tbl,1024+256
$PTR_ADD $rcon,$Tbl,256
.set reorder
lwl $rk0,0+$MSB($inp) # load 128 bits
@ -834,10 +1160,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,1024($i0)
lbu $i1,1024($i1)
lbu $i2,1024($i2)
lbu $i3,1024($i3)
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
@ -889,10 +1215,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,1024($i0)
lbu $i1,1024($i1)
lbu $i2,1024($i2)
lbu $i3,1024($i3)
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
@ -948,10 +1274,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,1024($i0)
lbu $i1,1024($i1)
lbu $i2,1024($i2)
lbu $i3,1024($i3)
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
@ -990,10 +1316,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $i0,1024($i0)
lbu $i1,1024($i1)
lbu $i2,1024($i2)
lbu $i3,1024($i3)
lbu $i0,0($i0)
lbu $i1,0($i1)
lbu $i2,0($i2)
lbu $i3,0($i3)
sll $i0,24
sll $i1,16
sll $i2,8
@ -1055,7 +1381,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___
$code.=<<___;
.set reorder
la $Tbl,AES_Te # PIC-ified 'load address'
la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
@ -1110,7 +1436,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___
$code.=<<___;
.set reorder
la $Tbl,AES_Te # PIC-ified 'load address'
la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
@ -1181,6 +1507,16 @@ $code.=<<___;
xor $tpb,$tp9,$tp2
xor $tpd,$tp9,$tp4
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $tp1,$tpd,16
xor $tpe,$tp2
rotr $tp2,$tp9,8
xor $tpe,$tp1
rotr $tp4,$tpb,24
xor $tpe,$tp2
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp4
#else
_ror $tp1,$tpd,16
xor $tpe,$tp2
_ror $tp2,$tpd,-16
@ -1195,6 +1531,7 @@ $code.=<<___;
xor $tpe,$tp1
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2
#endif
sub $cnt,1
sw $tpe,0($key)
$PTR_ADD $key,4
@ -1225,7 +1562,7 @@ ___
# Tables are kept in endian-neutral manner
$code.=<<___;
.rdata
.align 6
.align 10
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
@ -1356,46 +1693,6 @@ AES_Te:
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
.align 6
AES_Td:
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
@ -1558,6 +1855,46 @@ AES_Td:
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
AES_Te4:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
___
foreach (split("\n",$code)) {
@ -1574,6 +1911,9 @@ foreach (split("\n",$code)) {
s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
: eval("$3*-1"))/e or
@ -1596,6 +1936,8 @@ foreach (split("\n",$code)) {
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
}
s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
print $_,"\n";
}

View File

@ -15,6 +15,10 @@
# compatible subroutine. There is room for minor optimization on
# little-endian platforms...
# September 2012.
#
# Add MIPS32r2 code.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@ -95,6 +99,10 @@ sub BODY_00_14 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if (!$big_endian);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00
@ -104,8 +112,22 @@ $code.=<<___ if (!$big_endian);
or @X[$i],$t0
or $t1,$t2
or @X[$i],$t1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor $t0,$c,$d
rotr $t1,$a,27
lwl @X[$j],$j*4+$MSB($inp)
and $t0,$b
addu $e,$t1
lwr @X[$j],$j*4+$LSB($inp)
xor $t0,$d
addu $e,@X[$i]
rotr $b,$b,2
addu $e,$t0
#else
lwl @X[$j],$j*4+$MSB($inp)
sll $t0,$a,5 # $i
addu $e,$K
@ -121,6 +143,7 @@ $code.=<<___;
addu $e,@X[$i]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if (!$big_endian && $i==15);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00
@ -138,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15);
or @X[$i],$t0
or @X[$i],$t1
or @X[$i],$t2
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor @X[$j%16],@X[($j+2)%16]
xor $t0,$c,$d
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
and $t0,$b
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$d
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -159,6 +202,7 @@ $code.=<<___;
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -166,6 +210,20 @@ sub BODY_20_39 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor @X[$j%16],@X[($j+2)%16]
addu $e,$K # $i
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
or @X[$j%16],$t1
or $b,$t2
addu $e,$t0
#endif
___
$code.=<<___ if ($i==79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw @X[0],0($ctx)
addu $e,$K # $i
lw @X[1],4($ctx)
rotr $t1,$a,27
lw @X[2],8($ctx)
xor $t0,$c,$d
addu $e,$t1
lw @X[3],12($ctx)
xor $t0,$b
addu $e,@X[$i%16]
lw @X[4],16($ctx)
rotr $b,$b,2
addu $e,$t0
#else
lw @X[0],0($ctx)
sll $t0,$a,5 # $i
addu $e,$K
@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}
@ -210,6 +285,22 @@ sub BODY_40_59 {
my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1;
$code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
and $t0,$c,$d
xor @X[$j%16],@X[($j+2)%16]
rotr $t1,$a,27
addu $e,$t0
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
and $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i
addu $e,$K
@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
addu $e,@X[$i%16]
or $b,$t2
addu $e,$t0
#endif
___
}

View File

@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@ -17,6 +17,10 @@
# ~17%, but it comes for free, because it's same instruction sequence.
# Improvement coefficients are for aligned input.
# September 2012.
#
# Add MIPS[32|64]r2 code.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@ -83,6 +87,7 @@ if ($output =~ /512/) {
$SLL="dsll"; # shift left logical
$SRL="dsrl"; # shift right logical
$ADDU="daddu";
$ROTR="drotr";
@Sigma0=(28,34,39);
@Sigma1=(14,18,41);
@sigma0=( 7, 1, 8); # right shift first
@ -97,6 +102,7 @@ if ($output =~ /512/) {
$SLL="sll"; # shift left logical
$SRL="srl"; # shift right logical
$ADDU="addu";
$ROTR="rotr";
@Sigma0=( 2,13,22);
@Sigma1=( 6,11,25);
@sigma0=( 3, 7,18); # right shift first
@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[0],@X[0] # byte swap($i)
rotr @X[0],@X[0],16
#else
srl $tmp0,@X[0],24 # byte swap($i)
srl $tmp1,@X[0],8
andi $tmp2,@X[0],0xFF00
@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
or @X[0],$tmp0
or $tmp1,$tmp2
or @X[0],$tmp1
#endif
___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
#if defined(_MIPS_ARCH_MIPS64R2)
dsbh @X[0],@X[0] # byte swap($i)
dshd @X[0],@X[0]
#else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
dsrl $tmp1,@X[0],32
dsll @X[0],32
or @X[0],$tmp1
#endif
___
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor $tmp2,$f,$g # $i
$ROTR $tmp0,$e,@Sigma1[0]
$ADDU $T1,$X[0],$h
$ROTR $tmp1,$e,@Sigma1[1]
and $tmp2,$e
$ROTR $h,$e,@Sigma1[2]
xor $tmp0,$tmp1
$ROTR $tmp1,$a,@Sigma0[0]
xor $tmp2,$g # Ch(e,f,g)
xor $tmp0,$h # Sigma1(e)
$ROTR $h,$a,@Sigma0[1]
$ADDU $T1,$tmp2
$LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
xor $h,$tmp1
$ROTR $tmp1,$a,@Sigma0[2]
$ADDU $T1,$tmp0
and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a)
xor $tmp1,$b,$c
#else
$ADDU $T1,$X[0],$h # $i
$SRL $h,$e,@Sigma1[0]
xor $tmp2,$f,$g
@ -184,16 +222,15 @@ $code.=<<___;
xor $h,$tmp1
$SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
xor $h,$tmp0
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a)
or $tmp0,$a,$b
and $tmp1,$a,$b
and $tmp0,$c
or $tmp1,$tmp0 # Maj(a,b,c)
xor $tmp1,$b,$c
#endif
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
$ADDU $h,$tmp0
and $tmp1,$a
$ADDU $T1,$tmp2 # +=K[$i]
$ADDU $h,$tmp1
$ADDU $h,$tmp1 # +=Maj(a,b,c)
$ADDU $d,$T1
$ADDU $h,$T1
___
@ -207,6 +244,20 @@ my $i=@_[0];
my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
$code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ROTR $tmp0,@X[1],@sigma0[1]
$ADDU @X[0],@X[9] # +=X[i+9]
xor $tmp2,$tmp0
$ROTR $tmp0,@X[1],@sigma0[2]
$SRL $tmp3,@X[14],@sigma1[0]
$ROTR $tmp1,@X[14],@sigma1[1]
xor $tmp2,$tmp0 # sigma0(X[i+1])
$ROTR $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1
$ADDU @X[0],$tmp2
#else
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ADDU @X[0],@X[9] # +=X[i+9]
$SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
@ -227,7 +278,7 @@ $code.=<<___;
xor $tmp3,$tmp0
$SRL $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1
#endif
xor $tmp3,$tmp0 # sigma1(X[i+14])
$ADDU @X[0],$tmp3
___