MIPS assembly pack: add MIPS[32|64]R2 code.

This commit is contained in:
Andy Polyakov 2012-09-15 11:18:20 +00:00
parent 9b222748e7
commit 8df5518bd9
3 changed files with 556 additions and 71 deletions

View File

@ -20,6 +20,10 @@
# thing about this module is its endian neutrality, which means that # thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order... # it processes data without ever changing byte order...
# September 2012
#
# Add MIPS32R2 code.
###################################################################### ######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most # There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if # widely used. Then there is a new contender: NUBI. It appears that if
@ -52,6 +56,7 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) { if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32 $PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32 $PTR_SUB="dsub"; # incidentally works even on n32
$PTR_INS="dins";
$REG_S="sd"; $REG_S="sd";
$REG_L="ld"; $REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32 $PTR_SLL="dsll"; # incidentally works even on n32
@ -59,6 +64,7 @@ if ($flavour =~ /64|n32/i) {
} else { } else {
$PTR_ADD="add"; $PTR_ADD="add";
$PTR_SUB="sub"; $PTR_SUB="sub";
$PTR_INS="ins";
$REG_S="sw"; $REG_S="sw";
$REG_L="lw"; $REG_L="lw";
$PTR_SLL="sll"; $PTR_SLL="sll";
@ -138,6 +144,16 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Te1[s1>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Te1[s2>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Te1[s1>>16] lwl $t0,3($i0) # Te1[s1>>16]
lwl $t1,3($i1) # Te1[s2>>16] lwl $t1,3($i1) # Te1[s2>>16]
lwl $t2,3($i2) # Te1[s3>>16] lwl $t2,3($i2) # Te1[s3>>16]
@ -150,6 +166,29 @@ _mips_AES_encrypt:
_xtr $i2,$s0,8-2 _xtr $i2,$s0,8-2
lwr $t3,2($i3) # Te1[s0>>16] lwr $t3,2($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2 _xtr $i3,$s1,8-2
#endif
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Te1[s1>>16]
_xtr $i0,$s3,0-2
lw $t5,0($i1) # Te1[s2>>16]
_xtr $i1,$s0,0-2
lw $t6,0($i2) # Te1[s3>>16]
_xtr $i2,$s1,0-2
lw $t7,0($i3) # Te1[s0>>16]
_xtr $i3,$s2,0-2
and $i0,0x3fc and $i0,0x3fc
and $i1,0x3fc and $i1,0x3fc
@ -159,6 +198,52 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s3,2,8
# else
lw $t4,0($i0) # Te1[s1>>16]
$PTR_INS $i0,$s3,2,8
lw $t5,0($i1) # Te1[s2>>16]
$PTR_INS $i1,$s0,2,8
lw $t6,0($i2) # Te1[s3>>16]
$PTR_INS $i2,$s1,2,8
lw $t7,0($i3) # Te1[s0>>16]
$PTR_INS $i3,$s2,2,8
lw $t8,0($i0) # Te1[s1>>16]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Te1[s2>>16]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Te1[s3>>16]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Te1[s0>>16]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Te2[s2>>8] lwl $t4,2($i0) # Te2[s2>>8]
lwl $t5,2($i1) # Te2[s3>>8] lwl $t5,2($i1) # Te2[s3>>8]
lwl $t6,2($i2) # Te2[s0>>8] lwl $t6,2($i2) # Te2[s0>>8]
@ -201,6 +286,7 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4 xor $t0,$t4
lw $t4,0($i0) # Te0[s0>>24] lw $t4,0($i0) # Te0[s0>>24]
xor $t1,$t5 xor $t1,$t5
@ -263,6 +349,89 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,2($i0) # Te4[s2>>8]
$PTR_INS $i0,$s0,2,8
lbu $t5,2($i1) # Te4[s3>>8]
$PTR_INS $i1,$s1,2,8
lbu $t6,2($i2) # Te4[s0>>8]
$PTR_INS $i2,$s2,2,8
lbu $t7,2($i3) # Te4[s1>>8]
$PTR_INS $i3,$s3,2,8
lbu $t8,2($i0) # Te4[s0>>24]
_xtr $i0,$s3,0-2
lbu $t9,2($i1) # Te4[s1>>24]
_xtr $i1,$s0,0-2
lbu $t10,2($i2) # Te4[s2>>24]
_xtr $i2,$s1,0-2
lbu $t11,2($i3) # Te4[s3>>24]
_xtr $i3,$s2,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
_xtr $i1,$s1,24-2
lbu $t6,2($i2) # Te4[s0>>8]
_xtr $i2,$s2,24-2
lbu $t7,2($i3) # Te4[s1>>8]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,2($i0) # Te4[s0>>24]
$PTR_INS $i0,$s3,2,8
lbu $t9,2($i1) # Te4[s1>>24]
$PTR_INS $i1,$s0,2,8
lbu $t10,2($i2) # Te4[s2>>24]
$PTR_INS $i2,$s1,2,8
lbu $t11,2($i3) # Te4[s3>>24]
$PTR_INS $i3,$s2,2,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,2($i0) # Te4[s3]
_ins2 $t1,$t5,8
lbu $t5,2($i1) # Te4[s0]
_ins2 $t2,$t6,8
lbu $t6,2($i2) # Te4[s1]
_ins2 $t3,$t7,8
lbu $t7,2($i3) # Te4[s2]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,2($i0) # Te4[s2>>8] lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2 _xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8] lbu $t5,2($i1) # Te4[s3>>8]
@ -340,7 +509,7 @@ _mips_AES_encrypt:
xor $t1,$t5 xor $t1,$t5
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
#endif
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
xor $s2,$t2 xor $s2,$t2
@ -465,6 +634,16 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw $t0,0($i0) # Td1[s3>>16]
_xtr $i0,$s2,8-2
lw $t1,0($i1) # Td1[s0>>16]
_xtr $i1,$s3,8-2
lw $t2,0($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
lw $t3,0($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
#else
lwl $t0,3($i0) # Td1[s3>>16] lwl $t0,3($i0) # Td1[s3>>16]
lwl $t1,3($i1) # Td1[s0>>16] lwl $t1,3($i1) # Td1[s0>>16]
lwl $t2,3($i2) # Td1[s1>>16] lwl $t2,3($i2) # Td1[s1>>16]
@ -477,6 +656,7 @@ _mips_AES_decrypt:
_xtr $i2,$s0,8-2 _xtr $i2,$s0,8-2
lwr $t3,2($i3) # Td1[s2>>16] lwr $t3,2($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2 _xtr $i3,$s1,8-2
#endif
and $i0,0x3fc and $i0,0x3fc
and $i1,0x3fc and $i1,0x3fc
@ -486,6 +666,75 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $t0,$t0,8
rotr $t1,$t1,8
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
lw $t4,0($i0) # Td2[s2>>8]
_xtr $i0,$s1,0-2
lw $t5,0($i1) # Td2[s3>>8]
_xtr $i1,$s2,0-2
lw $t6,0($i2) # Td2[s0>>8]
_xtr $i2,$s3,0-2
lw $t7,0($i3) # Td2[s1>>8]
_xtr $i3,$s0,0-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lw $t8,0($i0) # Td3[s1]
$PTR_INS $i0,$s0,2,8
lw $t9,0($i1) # Td3[s2]
$PTR_INS $i1,$s1,2,8
lw $t10,0($i2) # Td3[s3]
$PTR_INS $i2,$s2,2,8
lw $t11,0($i3) # Td3[s0]
$PTR_INS $i3,$s3,2,8
#else
lw $t4,0($i0) # Td2[s2>>8]
$PTR_INS $i0,$s1,2,8
lw $t5,0($i1) # Td2[s3>>8]
$PTR_INS $i1,$s2,2,8
lw $t6,0($i2) # Td2[s0>>8]
$PTR_INS $i2,$s3,2,8
lw $t7,0($i3) # Td2[s1>>8]
$PTR_INS $i3,$s0,2,8
lw $t8,0($i0) # Td3[s1]
_xtr $i0,$s0,24-2
lw $t9,0($i1) # Td3[s2]
_xtr $i1,$s1,24-2
lw $t10,0($i2) # Td3[s3]
_xtr $i2,$s2,24-2
lw $t11,0($i3) # Td3[s0]
_xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
#endif
rotr $t4,$t4,16
rotr $t5,$t5,16
rotr $t6,$t6,16
rotr $t7,$t7,16
rotr $t8,$t8,24
rotr $t9,$t9,24
rotr $t10,$t10,24
rotr $t11,$t11,24
#else
lwl $t4,2($i0) # Td2[s2>>8] lwl $t4,2($i0) # Td2[s2>>8]
lwl $t5,2($i1) # Td2[s3>>8] lwl $t5,2($i1) # Td2[s3>>8]
lwl $t6,2($i2) # Td2[s0>>8] lwl $t6,2($i2) # Td2[s0>>8]
@ -528,6 +777,7 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#endif
xor $t0,$t4 xor $t0,$t4
lw $t4,0($i0) # Td0[s0>>24] lw $t4,0($i0) # Td0[s0>>24]
@ -601,6 +851,81 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
# if defined(_MIPSEL)
lbu $t4,1024($i0) # Td4[s2>>8]
$PTR_INS $i0,$s0,0,8
lbu $t5,1024($i1) # Td4[s3>>8]
$PTR_INS $i1,$s1,0,8
lbu $t6,1024($i2) # Td4[s0>>8]
$PTR_INS $i2,$s2,0,8
lbu $t7,1024($i3) # Td4[s1>>8]
$PTR_INS $i3,$s3,0,8
lbu $t8,1024($i0) # Td4[s0>>24]
_xtr $i0,$s1,0
lbu $t9,1024($i1) # Td4[s1>>24]
_xtr $i1,$s2,0
lbu $t10,1024($i2) # Td4[s2>>24]
_xtr $i2,$s3,0
lbu $t11,1024($i3) # Td4[s3>>24]
_xtr $i3,$s0,0
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
# else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
_xtr $i1,$s1,24
lbu $t6,1024($i2) # Td4[s0>>8]
_xtr $i2,$s2,24
lbu $t7,1024($i3) # Td4[s1>>8]
_xtr $i3,$s3,24
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,1024($i0) # Td4[s0>>24]
$PTR_INS $i0,$s1,0,8
lbu $t9,1024($i1) # Td4[s1>>24]
$PTR_INS $i1,$s2,0,8
lbu $t10,1024($i2) # Td4[s2>>24]
$PTR_INS $i2,$s3,0,8
lbu $t11,1024($i3) # Td4[s3>>24]
$PTR_INS $i3,$s0,0,8
# endif
_ins $t0,16
_ins $t1,16
_ins $t2,16
_ins $t3,16
_ins2 $t0,$t4,8
lbu $t4,1024($i0) # Td4[s1]
_ins2 $t1,$t5,8
lbu $t5,1024($i1) # Td4[s2]
_ins2 $t2,$t6,8
lbu $t6,1024($i2) # Td4[s3]
_ins2 $t3,$t7,8
lbu $t7,1024($i3) # Td4[s0]
_ins2 $t0,$t8,24
lw $s0,0($key0)
_ins2 $t1,$t9,24
lw $s1,4($key0)
_ins2 $t2,$t10,24
lw $s2,8($key0)
_ins2 $t3,$t11,24
lw $s3,12($key0)
_ins2 $t0,$t4,0
_ins2 $t1,$t5,0
_ins2 $t2,$t6,0
_ins2 $t3,$t7,0
#else
lbu $t4,1024($i0) # Td4[s2>>8] lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24 _xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8] lbu $t5,1024($i1) # Td4[s3>>8]
@ -670,6 +995,7 @@ _mips_AES_decrypt:
xor $t1,$t5 xor $t1,$t5
xor $t2,$t6 xor $t2,$t6
xor $t3,$t7 xor $t3,$t7
#endif
xor $s0,$t0 xor $s0,$t0
xor $s1,$t1 xor $s1,$t1
@ -782,7 +1108,7 @@ _mips_AES_set_encrypt_key:
beqz $inp,.Lekey_done beqz $inp,.Lekey_done
li $t0,-1 li $t0,-1
beqz $key,.Lekey_done beqz $key,.Lekey_done
$PTR_ADD $rcon,$Tbl,1024+256 $PTR_ADD $rcon,$Tbl,256
.set reorder .set reorder
lwl $rk0,0+$MSB($inp) # load 128 bits lwl $rk0,0+$MSB($inp) # load 128 bits
@ -834,10 +1160,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
lbu $i0,1024($i0) lbu $i0,0($i0)
lbu $i1,1024($i1) lbu $i1,0($i1)
lbu $i2,1024($i2) lbu $i2,0($i2)
lbu $i3,1024($i3) lbu $i3,0($i3)
sw $rk0,0($key) sw $rk0,0($key)
sw $rk1,4($key) sw $rk1,4($key)
@ -889,10 +1215,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
lbu $i0,1024($i0) lbu $i0,0($i0)
lbu $i1,1024($i1) lbu $i1,0($i1)
lbu $i2,1024($i2) lbu $i2,0($i2)
lbu $i3,1024($i3) lbu $i3,0($i3)
sw $rk0,0($key) sw $rk0,0($key)
sw $rk1,4($key) sw $rk1,4($key)
@ -948,10 +1274,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
lbu $i0,1024($i0) lbu $i0,0($i0)
lbu $i1,1024($i1) lbu $i1,0($i1)
lbu $i2,1024($i2) lbu $i2,0($i2)
lbu $i3,1024($i3) lbu $i3,0($i3)
sw $rk0,0($key) sw $rk0,0($key)
sw $rk1,4($key) sw $rk1,4($key)
@ -990,10 +1316,10 @@ _mips_AES_set_encrypt_key:
$PTR_ADD $i1,$Tbl $PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl $PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl $PTR_ADD $i3,$Tbl
lbu $i0,1024($i0) lbu $i0,0($i0)
lbu $i1,1024($i1) lbu $i1,0($i1)
lbu $i2,1024($i2) lbu $i2,0($i2)
lbu $i3,1024($i3) lbu $i3,0($i3)
sll $i0,24 sll $i0,24
sll $i1,16 sll $i1,16
sll $i2,8 sll $i2,8
@ -1055,7 +1381,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___ ___
$code.=<<___; $code.=<<___;
.set reorder .set reorder
la $Tbl,AES_Te # PIC-ified 'load address' la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key bal _mips_AES_set_encrypt_key
@ -1110,7 +1436,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___ ___
$code.=<<___; $code.=<<___;
.set reorder .set reorder
la $Tbl,AES_Te # PIC-ified 'load address' la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key bal _mips_AES_set_encrypt_key
@ -1181,6 +1507,16 @@ $code.=<<___;
xor $tpb,$tp9,$tp2 xor $tpb,$tp9,$tp2
xor $tpd,$tp9,$tp4 xor $tpd,$tp9,$tp4
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
rotr $tp1,$tpd,16
xor $tpe,$tp2
rotr $tp2,$tp9,8
xor $tpe,$tp1
rotr $tp4,$tpb,24
xor $tpe,$tp2
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp4
#else
_ror $tp1,$tpd,16 _ror $tp1,$tpd,16
xor $tpe,$tp2 xor $tpe,$tp2
_ror $tp2,$tpd,-16 _ror $tp2,$tpd,-16
@ -1195,6 +1531,7 @@ $code.=<<___;
xor $tpe,$tp1 xor $tpe,$tp1
lw $tp1,4($key) # modulo-scheduled lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2 xor $tpe,$tp2
#endif
sub $cnt,1 sub $cnt,1
sw $tpe,0($key) sw $tpe,0($key)
$PTR_ADD $key,4 $PTR_ADD $key,4
@ -1225,7 +1562,7 @@ ___
# Tables are kept in endian-neutral manner # Tables are kept in endian-neutral manner
$code.=<<___; $code.=<<___;
.rdata .rdata
.align 6 .align 10
AES_Te: AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
@ -1356,46 +1693,6 @@ AES_Te:
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
.align 6
AES_Td: AES_Td:
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
@ -1558,6 +1855,46 @@ AES_Td:
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
AES_Te4:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
___ ___
foreach (split("\n",$code)) { foreach (split("\n",$code)) {
@ -1574,6 +1911,9 @@ foreach (split("\n",$code)) {
s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
: eval("24-$3"))/e or : eval("24-$3"))/e or
s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
: eval("24-$3"))/e or
s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/ s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
: eval("$3*-1"))/e or : eval("$3*-1"))/e or
@ -1596,6 +1936,8 @@ foreach (split("\n",$code)) {
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e; sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
} }
s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
print $_,"\n"; print $_,"\n";
} }

View File

@ -15,6 +15,10 @@
# compatible subroutine. There is room for minor optimization on # compatible subroutine. There is room for minor optimization on
# little-endian platforms... # little-endian platforms...
# September 2012.
#
# Add MIPS32r2 code.
###################################################################### ######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most # There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if # widely used. Then there is a new contender: NUBI. It appears that if
@ -95,6 +99,10 @@ sub BODY_00_14 {
my ($i,$a,$b,$c,$d,$e)=@_; my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1; my $j=$i+1;
$code.=<<___ if (!$big_endian); $code.=<<___ if (!$big_endian);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i) srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8 srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00 andi $t2,@X[$i],0xFF00
@ -104,8 +112,22 @@ $code.=<<___ if (!$big_endian);
or @X[$i],$t0 or @X[$i],$t0
or $t1,$t2 or $t1,$t2
or @X[$i],$t1 or @X[$i],$t1
#endif
___ ___
$code.=<<___; $code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor $t0,$c,$d
rotr $t1,$a,27
lwl @X[$j],$j*4+$MSB($inp)
and $t0,$b
addu $e,$t1
lwr @X[$j],$j*4+$LSB($inp)
xor $t0,$d
addu $e,@X[$i]
rotr $b,$b,2
addu $e,$t0
#else
lwl @X[$j],$j*4+$MSB($inp) lwl @X[$j],$j*4+$MSB($inp)
sll $t0,$a,5 # $i sll $t0,$a,5 # $i
addu $e,$K addu $e,$K
@ -121,6 +143,7 @@ $code.=<<___;
addu $e,@X[$i] addu $e,@X[$i]
or $b,$t2 or $b,$t2
addu $e,$t0 addu $e,$t0
#endif
___ ___
} }
@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1; my $j=$i+1;
$code.=<<___ if (!$big_endian && $i==15); $code.=<<___ if (!$big_endian && $i==15);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[$i],@X[$i] # byte swap($i)
rotr @X[$i],@X[$i],16
#else
srl $t0,@X[$i],24 # byte swap($i) srl $t0,@X[$i],24 # byte swap($i)
srl $t1,@X[$i],8 srl $t1,@X[$i],8
andi $t2,@X[$i],0xFF00 andi $t2,@X[$i],0xFF00
@ -138,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15);
or @X[$i],$t0 or @X[$i],$t0
or @X[$i],$t1 or @X[$i],$t1
or @X[$i],$t2 or @X[$i],$t2
#endif
___ ___
$code.=<<___; $code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
xor @X[$j%16],@X[($j+2)%16]
xor $t0,$c,$d
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
and $t0,$b
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$d
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16] xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i sll $t0,$a,5 # $i
addu $e,$K addu $e,$K
@ -159,6 +202,7 @@ $code.=<<___;
addu $e,@X[$i%16] addu $e,@X[$i%16]
or $b,$t2 or $b,$t2
addu $e,$t0 addu $e,$t0
#endif
___ ___
} }
@ -166,6 +210,20 @@ sub BODY_20_39 {
my ($i,$a,$b,$c,$d,$e)=@_; my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1; my $j=$i+1;
$code.=<<___ if ($i<79); $code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor @X[$j%16],@X[($j+2)%16]
addu $e,$K # $i
rotr $t1,$a,27
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
xor $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16] xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i sll $t0,$a,5 # $i
addu $e,$K addu $e,$K
@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
or @X[$j%16],$t1 or @X[$j%16],$t1
or $b,$t2 or $b,$t2
addu $e,$t0 addu $e,$t0
#endif
___ ___
$code.=<<___ if ($i==79); $code.=<<___ if ($i==79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
lw @X[0],0($ctx)
addu $e,$K # $i
lw @X[1],4($ctx)
rotr $t1,$a,27
lw @X[2],8($ctx)
xor $t0,$c,$d
addu $e,$t1
lw @X[3],12($ctx)
xor $t0,$b
addu $e,@X[$i%16]
lw @X[4],16($ctx)
rotr $b,$b,2
addu $e,$t0
#else
lw @X[0],0($ctx) lw @X[0],0($ctx)
sll $t0,$a,5 # $i sll $t0,$a,5 # $i
addu $e,$K addu $e,$K
@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
addu $e,@X[$i%16] addu $e,@X[$i%16]
or $b,$t2 or $b,$t2
addu $e,$t0 addu $e,$t0
#endif
___ ___
} }
@ -210,6 +285,22 @@ sub BODY_40_59 {
my ($i,$a,$b,$c,$d,$e)=@_; my ($i,$a,$b,$c,$d,$e)=@_;
my $j=$i+1; my $j=$i+1;
$code.=<<___ if ($i<79); $code.=<<___ if ($i<79);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
addu $e,$K # $i
and $t0,$c,$d
xor @X[$j%16],@X[($j+2)%16]
rotr $t1,$a,27
addu $e,$t0
xor @X[$j%16],@X[($j+8)%16]
xor $t0,$c,$d
addu $e,$t1
xor @X[$j%16],@X[($j+13)%16]
and $t0,$b
addu $e,@X[$i%16]
rotr @X[$j%16],@X[$j%16],31
rotr $b,$b,2
addu $e,$t0
#else
xor @X[$j%16],@X[($j+2)%16] xor @X[$j%16],@X[($j+2)%16]
sll $t0,$a,5 # $i sll $t0,$a,5 # $i
addu $e,$K addu $e,$K
@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
addu $e,@X[$i%16] addu $e,@X[$i%16]
or $b,$t2 or $b,$t2
addu $e,$t0 addu $e,$t0
#endif
___ ___
} }

View File

@ -1,7 +1,7 @@
#!/usr/bin/env perl #!/usr/bin/env perl
# ==================================================================== # ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and # project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further # CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/. # details see http://www.openssl.org/~appro/cryptogams/.
@ -17,6 +17,10 @@
# ~17%, but it comes for free, because it's same instruction sequence. # ~17%, but it comes for free, because it's same instruction sequence.
# Improvement coefficients are for aligned input. # Improvement coefficients are for aligned input.
# September 2012.
#
# Add MIPS[32|64]r2 code.
###################################################################### ######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most # There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if # widely used. Then there is a new contender: NUBI. It appears that if
@ -83,6 +87,7 @@ if ($output =~ /512/) {
$SLL="dsll"; # shift left logical $SLL="dsll"; # shift left logical
$SRL="dsrl"; # shift right logical $SRL="dsrl"; # shift right logical
$ADDU="daddu"; $ADDU="daddu";
$ROTR="drotr";
@Sigma0=(28,34,39); @Sigma0=(28,34,39);
@Sigma1=(14,18,41); @Sigma1=(14,18,41);
@sigma0=( 7, 1, 8); # right shift first @sigma0=( 7, 1, 8); # right shift first
@ -97,6 +102,7 @@ if ($output =~ /512/) {
$SLL="sll"; # shift left logical $SLL="sll"; # shift left logical
$SRL="srl"; # shift right logical $SRL="srl"; # shift right logical
$ADDU="addu"; $ADDU="addu";
$ROTR="rotr";
@Sigma0=( 2,13,22); @Sigma0=( 2,13,22);
@Sigma1=( 6,11,25); @Sigma1=( 6,11,25);
@sigma0=( 3, 7,18); # right shift first @sigma0=( 3, 7,18); # right shift first
@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
___ ___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4); $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
wsbh @X[0],@X[0] # byte swap($i)
rotr @X[0],@X[0],16
#else
srl $tmp0,@X[0],24 # byte swap($i) srl $tmp0,@X[0],24 # byte swap($i)
srl $tmp1,@X[0],8 srl $tmp1,@X[0],8
andi $tmp2,@X[0],0xFF00 andi $tmp2,@X[0],0xFF00
@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
or @X[0],$tmp0 or @X[0],$tmp0
or $tmp1,$tmp2 or $tmp1,$tmp2
or @X[0],$tmp1 or @X[0],$tmp1
#endif
___ ___
$code.=<<___ if (!$big_endian && $i<16 && $SZ==8); $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
#if defined(_MIPS_ARCH_MIPS64R2)
dsbh @X[0],@X[0] # byte swap($i)
dshd @X[0],@X[0]
#else
ori $tmp0,$zero,0xFF ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32 dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF or $tmp0,$tmp2 # 0x000000FF000000FF
@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
dsrl $tmp1,@X[0],32 dsrl $tmp1,@X[0],32
dsll @X[0],32 dsll @X[0],32
or @X[0],$tmp1 or @X[0],$tmp1
#endif
___ ___
$code.=<<___; $code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
xor $tmp2,$f,$g # $i
$ROTR $tmp0,$e,@Sigma1[0]
$ADDU $T1,$X[0],$h
$ROTR $tmp1,$e,@Sigma1[1]
and $tmp2,$e
$ROTR $h,$e,@Sigma1[2]
xor $tmp0,$tmp1
$ROTR $tmp1,$a,@Sigma0[0]
xor $tmp2,$g # Ch(e,f,g)
xor $tmp0,$h # Sigma1(e)
$ROTR $h,$a,@Sigma0[1]
$ADDU $T1,$tmp2
$LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
xor $h,$tmp1
$ROTR $tmp1,$a,@Sigma0[2]
$ADDU $T1,$tmp0
and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a)
xor $tmp1,$b,$c
#else
$ADDU $T1,$X[0],$h # $i $ADDU $T1,$X[0],$h # $i
$SRL $h,$e,@Sigma1[0] $SRL $h,$e,@Sigma1[0]
xor $tmp2,$f,$g xor $tmp2,$f,$g
@ -184,16 +222,15 @@ $code.=<<___;
xor $h,$tmp1 xor $h,$tmp1
$SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
xor $h,$tmp0 xor $h,$tmp0
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer and $tmp0,$b,$c
xor $h,$tmp1 # Sigma0(a) xor $h,$tmp1 # Sigma0(a)
xor $tmp1,$b,$c
or $tmp0,$a,$b #endif
and $tmp1,$a,$b $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
and $tmp0,$c $ADDU $h,$tmp0
or $tmp1,$tmp0 # Maj(a,b,c) and $tmp1,$a
$ADDU $T1,$tmp2 # +=K[$i] $ADDU $T1,$tmp2 # +=K[$i]
$ADDU $h,$tmp1 $ADDU $h,$tmp1 # +=Maj(a,b,c)
$ADDU $d,$T1 $ADDU $d,$T1
$ADDU $h,$T1 $ADDU $h,$T1
___ ___
@ -207,6 +244,20 @@ my $i=@_[0];
my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
$code.=<<___; $code.=<<___;
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ROTR $tmp0,@X[1],@sigma0[1]
$ADDU @X[0],@X[9] # +=X[i+9]
xor $tmp2,$tmp0
$ROTR $tmp0,@X[1],@sigma0[2]
$SRL $tmp3,@X[14],@sigma1[0]
$ROTR $tmp1,@X[14],@sigma1[1]
xor $tmp2,$tmp0 # sigma0(X[i+1])
$ROTR $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1
$ADDU @X[0],$tmp2
#else
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
$ADDU @X[0],@X[9] # +=X[i+9] $ADDU @X[0],@X[9] # +=X[i+9]
$SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
@ -227,7 +278,7 @@ $code.=<<___;
xor $tmp3,$tmp0 xor $tmp3,$tmp0
$SRL $tmp0,@X[14],@sigma1[2] $SRL $tmp0,@X[14],@sigma1[2]
xor $tmp3,$tmp1 xor $tmp3,$tmp1
#endif
xor $tmp3,$tmp0 # sigma1(X[i+14]) xor $tmp3,$tmp0 # sigma1(X[i+14])
$ADDU @X[0],$tmp3 $ADDU @X[0],$tmp3
___ ___