MIPS assembly pack: add MIPS[32|64]R2 code.
This commit is contained in:
parent
9b222748e7
commit
8df5518bd9
@ -20,6 +20,10 @@
|
||||
# thing about this module is its endian neutrality, which means that
|
||||
# it processes data without ever changing byte order...
|
||||
|
||||
# September 2012
|
||||
#
|
||||
# Add MIPS32R2 code.
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
@ -52,6 +56,7 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
|
||||
if ($flavour =~ /64|n32/i) {
|
||||
$PTR_ADD="dadd"; # incidentally works even on n32
|
||||
$PTR_SUB="dsub"; # incidentally works even on n32
|
||||
$PTR_INS="dins";
|
||||
$REG_S="sd";
|
||||
$REG_L="ld";
|
||||
$PTR_SLL="dsll"; # incidentally works even on n32
|
||||
@ -59,6 +64,7 @@ if ($flavour =~ /64|n32/i) {
|
||||
} else {
|
||||
$PTR_ADD="add";
|
||||
$PTR_SUB="sub";
|
||||
$PTR_INS="ins";
|
||||
$REG_S="sw";
|
||||
$REG_L="lw";
|
||||
$PTR_SLL="sll";
|
||||
@ -138,6 +144,16 @@ _mips_AES_encrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
lw $t0,0($i0) # Te1[s1>>16]
|
||||
_xtr $i0,$s2,8-2
|
||||
lw $t1,0($i1) # Te1[s2>>16]
|
||||
_xtr $i1,$s3,8-2
|
||||
lw $t2,0($i2) # Te1[s3>>16]
|
||||
_xtr $i2,$s0,8-2
|
||||
lw $t3,0($i3) # Te1[s0>>16]
|
||||
_xtr $i3,$s1,8-2
|
||||
#else
|
||||
lwl $t0,3($i0) # Te1[s1>>16]
|
||||
lwl $t1,3($i1) # Te1[s2>>16]
|
||||
lwl $t2,3($i2) # Te1[s3>>16]
|
||||
@ -150,6 +166,29 @@ _mips_AES_encrypt:
|
||||
_xtr $i2,$s0,8-2
|
||||
lwr $t3,2($i3) # Te1[s0>>16]
|
||||
_xtr $i3,$s1,8-2
|
||||
#endif
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
rotr $t0,$t0,8
|
||||
rotr $t1,$t1,8
|
||||
rotr $t2,$t2,8
|
||||
rotr $t3,$t3,8
|
||||
# if defined(_MIPSEL)
|
||||
lw $t4,0($i0) # Te1[s1>>16]
|
||||
_xtr $i0,$s3,0-2
|
||||
lw $t5,0($i1) # Te1[s2>>16]
|
||||
_xtr $i1,$s0,0-2
|
||||
lw $t6,0($i2) # Te1[s3>>16]
|
||||
_xtr $i2,$s1,0-2
|
||||
lw $t7,0($i3) # Te1[s0>>16]
|
||||
_xtr $i3,$s2,0-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
@ -159,6 +198,52 @@ _mips_AES_encrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lw $t8,0($i0) # Te1[s1>>16]
|
||||
$PTR_INS $i0,$s0,2,8
|
||||
lw $t9,0($i1) # Te1[s2>>16]
|
||||
$PTR_INS $i1,$s1,2,8
|
||||
lw $t10,0($i2) # Te1[s3>>16]
|
||||
$PTR_INS $i2,$s2,2,8
|
||||
lw $t11,0($i3) # Te1[s0>>16]
|
||||
$PTR_INS $i3,$s3,2,8
|
||||
# else
|
||||
lw $t4,0($i0) # Te1[s1>>16]
|
||||
$PTR_INS $i0,$s3,2,8
|
||||
lw $t5,0($i1) # Te1[s2>>16]
|
||||
$PTR_INS $i1,$s0,2,8
|
||||
lw $t6,0($i2) # Te1[s3>>16]
|
||||
$PTR_INS $i2,$s1,2,8
|
||||
lw $t7,0($i3) # Te1[s0>>16]
|
||||
$PTR_INS $i3,$s2,2,8
|
||||
|
||||
lw $t8,0($i0) # Te1[s1>>16]
|
||||
_xtr $i0,$s0,24-2
|
||||
lw $t9,0($i1) # Te1[s2>>16]
|
||||
_xtr $i1,$s1,24-2
|
||||
lw $t10,0($i2) # Te1[s3>>16]
|
||||
_xtr $i2,$s2,24-2
|
||||
lw $t11,0($i3) # Te1[s0>>16]
|
||||
_xtr $i3,$s3,24-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
# endif
|
||||
rotr $t4,$t4,16
|
||||
rotr $t5,$t5,16
|
||||
rotr $t6,$t6,16
|
||||
rotr $t7,$t7,16
|
||||
|
||||
rotr $t8,$t8,24
|
||||
rotr $t9,$t9,24
|
||||
rotr $t10,$t10,24
|
||||
rotr $t11,$t11,24
|
||||
#else
|
||||
lwl $t4,2($i0) # Te2[s2>>8]
|
||||
lwl $t5,2($i1) # Te2[s3>>8]
|
||||
lwl $t6,2($i2) # Te2[s0>>8]
|
||||
@ -201,6 +286,7 @@ _mips_AES_encrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#endif
|
||||
xor $t0,$t4
|
||||
lw $t4,0($i0) # Te0[s0>>24]
|
||||
xor $t1,$t5
|
||||
@ -263,6 +349,89 @@ _mips_AES_encrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
# if defined(_MIPSEL)
|
||||
lbu $t4,2($i0) # Te4[s2>>8]
|
||||
$PTR_INS $i0,$s0,2,8
|
||||
lbu $t5,2($i1) # Te4[s3>>8]
|
||||
$PTR_INS $i1,$s1,2,8
|
||||
lbu $t6,2($i2) # Te4[s0>>8]
|
||||
$PTR_INS $i2,$s2,2,8
|
||||
lbu $t7,2($i3) # Te4[s1>>8]
|
||||
$PTR_INS $i3,$s3,2,8
|
||||
|
||||
lbu $t8,2($i0) # Te4[s0>>24]
|
||||
_xtr $i0,$s3,0-2
|
||||
lbu $t9,2($i1) # Te4[s1>>24]
|
||||
_xtr $i1,$s0,0-2
|
||||
lbu $t10,2($i2) # Te4[s2>>24]
|
||||
_xtr $i2,$s1,0-2
|
||||
lbu $t11,2($i3) # Te4[s3>>24]
|
||||
_xtr $i3,$s2,0-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
# else
|
||||
lbu $t4,2($i0) # Te4[s2>>8]
|
||||
_xtr $i0,$s0,24-2
|
||||
lbu $t5,2($i1) # Te4[s3>>8]
|
||||
_xtr $i1,$s1,24-2
|
||||
lbu $t6,2($i2) # Te4[s0>>8]
|
||||
_xtr $i2,$s2,24-2
|
||||
lbu $t7,2($i3) # Te4[s1>>8]
|
||||
_xtr $i3,$s3,24-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $t8,2($i0) # Te4[s0>>24]
|
||||
$PTR_INS $i0,$s3,2,8
|
||||
lbu $t9,2($i1) # Te4[s1>>24]
|
||||
$PTR_INS $i1,$s0,2,8
|
||||
lbu $t10,2($i2) # Te4[s2>>24]
|
||||
$PTR_INS $i2,$s1,2,8
|
||||
lbu $t11,2($i3) # Te4[s3>>24]
|
||||
$PTR_INS $i3,$s2,2,8
|
||||
# endif
|
||||
_ins $t0,16
|
||||
_ins $t1,16
|
||||
_ins $t2,16
|
||||
_ins $t3,16
|
||||
|
||||
_ins2 $t0,$t4,8
|
||||
lbu $t4,2($i0) # Te4[s3]
|
||||
_ins2 $t1,$t5,8
|
||||
lbu $t5,2($i1) # Te4[s0]
|
||||
_ins2 $t2,$t6,8
|
||||
lbu $t6,2($i2) # Te4[s1]
|
||||
_ins2 $t3,$t7,8
|
||||
lbu $t7,2($i3) # Te4[s2]
|
||||
|
||||
_ins2 $t0,$t8,24
|
||||
lw $s0,0($key0)
|
||||
_ins2 $t1,$t9,24
|
||||
lw $s1,4($key0)
|
||||
_ins2 $t2,$t10,24
|
||||
lw $s2,8($key0)
|
||||
_ins2 $t3,$t11,24
|
||||
lw $s3,12($key0)
|
||||
|
||||
_ins2 $t0,$t4,0
|
||||
_ins2 $t1,$t5,0
|
||||
_ins2 $t2,$t6,0
|
||||
_ins2 $t3,$t7,0
|
||||
#else
|
||||
lbu $t4,2($i0) # Te4[s2>>8]
|
||||
_xtr $i0,$s0,24-2
|
||||
lbu $t5,2($i1) # Te4[s3>>8]
|
||||
@ -340,7 +509,7 @@ _mips_AES_encrypt:
|
||||
xor $t1,$t5
|
||||
xor $t2,$t6
|
||||
xor $t3,$t7
|
||||
|
||||
#endif
|
||||
xor $s0,$t0
|
||||
xor $s1,$t1
|
||||
xor $s2,$t2
|
||||
@ -465,6 +634,16 @@ _mips_AES_decrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
lw $t0,0($i0) # Td1[s3>>16]
|
||||
_xtr $i0,$s2,8-2
|
||||
lw $t1,0($i1) # Td1[s0>>16]
|
||||
_xtr $i1,$s3,8-2
|
||||
lw $t2,0($i2) # Td1[s1>>16]
|
||||
_xtr $i2,$s0,8-2
|
||||
lw $t3,0($i3) # Td1[s2>>16]
|
||||
_xtr $i3,$s1,8-2
|
||||
#else
|
||||
lwl $t0,3($i0) # Td1[s3>>16]
|
||||
lwl $t1,3($i1) # Td1[s0>>16]
|
||||
lwl $t2,3($i2) # Td1[s1>>16]
|
||||
@ -477,6 +656,7 @@ _mips_AES_decrypt:
|
||||
_xtr $i2,$s0,8-2
|
||||
lwr $t3,2($i3) # Td1[s2>>16]
|
||||
_xtr $i3,$s1,8-2
|
||||
#endif
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
@ -486,6 +666,75 @@ _mips_AES_decrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
rotr $t0,$t0,8
|
||||
rotr $t1,$t1,8
|
||||
rotr $t2,$t2,8
|
||||
rotr $t3,$t3,8
|
||||
# if defined(_MIPSEL)
|
||||
lw $t4,0($i0) # Td2[s2>>8]
|
||||
_xtr $i0,$s1,0-2
|
||||
lw $t5,0($i1) # Td2[s3>>8]
|
||||
_xtr $i1,$s2,0-2
|
||||
lw $t6,0($i2) # Td2[s0>>8]
|
||||
_xtr $i2,$s3,0-2
|
||||
lw $t7,0($i3) # Td2[s1>>8]
|
||||
_xtr $i3,$s0,0-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lw $t8,0($i0) # Td3[s1]
|
||||
$PTR_INS $i0,$s0,2,8
|
||||
lw $t9,0($i1) # Td3[s2]
|
||||
$PTR_INS $i1,$s1,2,8
|
||||
lw $t10,0($i2) # Td3[s3]
|
||||
$PTR_INS $i2,$s2,2,8
|
||||
lw $t11,0($i3) # Td3[s0]
|
||||
$PTR_INS $i3,$s3,2,8
|
||||
#else
|
||||
lw $t4,0($i0) # Td2[s2>>8]
|
||||
$PTR_INS $i0,$s1,2,8
|
||||
lw $t5,0($i1) # Td2[s3>>8]
|
||||
$PTR_INS $i1,$s2,2,8
|
||||
lw $t6,0($i2) # Td2[s0>>8]
|
||||
$PTR_INS $i2,$s3,2,8
|
||||
lw $t7,0($i3) # Td2[s1>>8]
|
||||
$PTR_INS $i3,$s0,2,8
|
||||
|
||||
lw $t8,0($i0) # Td3[s1]
|
||||
_xtr $i0,$s0,24-2
|
||||
lw $t9,0($i1) # Td3[s2]
|
||||
_xtr $i1,$s1,24-2
|
||||
lw $t10,0($i2) # Td3[s3]
|
||||
_xtr $i2,$s2,24-2
|
||||
lw $t11,0($i3) # Td3[s0]
|
||||
_xtr $i3,$s3,24-2
|
||||
|
||||
and $i0,0x3fc
|
||||
and $i1,0x3fc
|
||||
and $i2,0x3fc
|
||||
and $i3,0x3fc
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#endif
|
||||
rotr $t4,$t4,16
|
||||
rotr $t5,$t5,16
|
||||
rotr $t6,$t6,16
|
||||
rotr $t7,$t7,16
|
||||
|
||||
rotr $t8,$t8,24
|
||||
rotr $t9,$t9,24
|
||||
rotr $t10,$t10,24
|
||||
rotr $t11,$t11,24
|
||||
#else
|
||||
lwl $t4,2($i0) # Td2[s2>>8]
|
||||
lwl $t5,2($i1) # Td2[s3>>8]
|
||||
lwl $t6,2($i2) # Td2[s0>>8]
|
||||
@ -528,6 +777,7 @@ _mips_AES_decrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#endif
|
||||
|
||||
xor $t0,$t4
|
||||
lw $t4,0($i0) # Td0[s0>>24]
|
||||
@ -601,6 +851,81 @@ _mips_AES_decrypt:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
# if defined(_MIPSEL)
|
||||
lbu $t4,1024($i0) # Td4[s2>>8]
|
||||
$PTR_INS $i0,$s0,0,8
|
||||
lbu $t5,1024($i1) # Td4[s3>>8]
|
||||
$PTR_INS $i1,$s1,0,8
|
||||
lbu $t6,1024($i2) # Td4[s0>>8]
|
||||
$PTR_INS $i2,$s2,0,8
|
||||
lbu $t7,1024($i3) # Td4[s1>>8]
|
||||
$PTR_INS $i3,$s3,0,8
|
||||
|
||||
lbu $t8,1024($i0) # Td4[s0>>24]
|
||||
_xtr $i0,$s1,0
|
||||
lbu $t9,1024($i1) # Td4[s1>>24]
|
||||
_xtr $i1,$s2,0
|
||||
lbu $t10,1024($i2) # Td4[s2>>24]
|
||||
_xtr $i2,$s3,0
|
||||
lbu $t11,1024($i3) # Td4[s3>>24]
|
||||
_xtr $i3,$s0,0
|
||||
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
# else
|
||||
lbu $t4,1024($i0) # Td4[s2>>8]
|
||||
_xtr $i0,$s0,24
|
||||
lbu $t5,1024($i1) # Td4[s3>>8]
|
||||
_xtr $i1,$s1,24
|
||||
lbu $t6,1024($i2) # Td4[s0>>8]
|
||||
_xtr $i2,$s2,24
|
||||
lbu $t7,1024($i3) # Td4[s1>>8]
|
||||
_xtr $i3,$s3,24
|
||||
|
||||
$PTR_ADD $i0,$Tbl
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $t8,1024($i0) # Td4[s0>>24]
|
||||
$PTR_INS $i0,$s1,0,8
|
||||
lbu $t9,1024($i1) # Td4[s1>>24]
|
||||
$PTR_INS $i1,$s2,0,8
|
||||
lbu $t10,1024($i2) # Td4[s2>>24]
|
||||
$PTR_INS $i2,$s3,0,8
|
||||
lbu $t11,1024($i3) # Td4[s3>>24]
|
||||
$PTR_INS $i3,$s0,0,8
|
||||
# endif
|
||||
_ins $t0,16
|
||||
_ins $t1,16
|
||||
_ins $t2,16
|
||||
_ins $t3,16
|
||||
|
||||
_ins2 $t0,$t4,8
|
||||
lbu $t4,1024($i0) # Td4[s1]
|
||||
_ins2 $t1,$t5,8
|
||||
lbu $t5,1024($i1) # Td4[s2]
|
||||
_ins2 $t2,$t6,8
|
||||
lbu $t6,1024($i2) # Td4[s3]
|
||||
_ins2 $t3,$t7,8
|
||||
lbu $t7,1024($i3) # Td4[s0]
|
||||
|
||||
_ins2 $t0,$t8,24
|
||||
lw $s0,0($key0)
|
||||
_ins2 $t1,$t9,24
|
||||
lw $s1,4($key0)
|
||||
_ins2 $t2,$t10,24
|
||||
lw $s2,8($key0)
|
||||
_ins2 $t3,$t11,24
|
||||
lw $s3,12($key0)
|
||||
|
||||
_ins2 $t0,$t4,0
|
||||
_ins2 $t1,$t5,0
|
||||
_ins2 $t2,$t6,0
|
||||
_ins2 $t3,$t7,0
|
||||
#else
|
||||
lbu $t4,1024($i0) # Td4[s2>>8]
|
||||
_xtr $i0,$s0,24
|
||||
lbu $t5,1024($i1) # Td4[s3>>8]
|
||||
@ -670,6 +995,7 @@ _mips_AES_decrypt:
|
||||
xor $t1,$t5
|
||||
xor $t2,$t6
|
||||
xor $t3,$t7
|
||||
#endif
|
||||
|
||||
xor $s0,$t0
|
||||
xor $s1,$t1
|
||||
@ -782,7 +1108,7 @@ _mips_AES_set_encrypt_key:
|
||||
beqz $inp,.Lekey_done
|
||||
li $t0,-1
|
||||
beqz $key,.Lekey_done
|
||||
$PTR_ADD $rcon,$Tbl,1024+256
|
||||
$PTR_ADD $rcon,$Tbl,256
|
||||
|
||||
.set reorder
|
||||
lwl $rk0,0+$MSB($inp) # load 128 bits
|
||||
@ -834,10 +1160,10 @@ _mips_AES_set_encrypt_key:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $i0,1024($i0)
|
||||
lbu $i1,1024($i1)
|
||||
lbu $i2,1024($i2)
|
||||
lbu $i3,1024($i3)
|
||||
lbu $i0,0($i0)
|
||||
lbu $i1,0($i1)
|
||||
lbu $i2,0($i2)
|
||||
lbu $i3,0($i3)
|
||||
|
||||
sw $rk0,0($key)
|
||||
sw $rk1,4($key)
|
||||
@ -889,10 +1215,10 @@ _mips_AES_set_encrypt_key:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $i0,1024($i0)
|
||||
lbu $i1,1024($i1)
|
||||
lbu $i2,1024($i2)
|
||||
lbu $i3,1024($i3)
|
||||
lbu $i0,0($i0)
|
||||
lbu $i1,0($i1)
|
||||
lbu $i2,0($i2)
|
||||
lbu $i3,0($i3)
|
||||
|
||||
sw $rk0,0($key)
|
||||
sw $rk1,4($key)
|
||||
@ -948,10 +1274,10 @@ _mips_AES_set_encrypt_key:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $i0,1024($i0)
|
||||
lbu $i1,1024($i1)
|
||||
lbu $i2,1024($i2)
|
||||
lbu $i3,1024($i3)
|
||||
lbu $i0,0($i0)
|
||||
lbu $i1,0($i1)
|
||||
lbu $i2,0($i2)
|
||||
lbu $i3,0($i3)
|
||||
|
||||
sw $rk0,0($key)
|
||||
sw $rk1,4($key)
|
||||
@ -990,10 +1316,10 @@ _mips_AES_set_encrypt_key:
|
||||
$PTR_ADD $i1,$Tbl
|
||||
$PTR_ADD $i2,$Tbl
|
||||
$PTR_ADD $i3,$Tbl
|
||||
lbu $i0,1024($i0)
|
||||
lbu $i1,1024($i1)
|
||||
lbu $i2,1024($i2)
|
||||
lbu $i3,1024($i3)
|
||||
lbu $i0,0($i0)
|
||||
lbu $i1,0($i1)
|
||||
lbu $i2,0($i2)
|
||||
lbu $i3,0($i3)
|
||||
sll $i0,24
|
||||
sll $i1,16
|
||||
sll $i2,8
|
||||
@ -1055,7 +1381,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Te # PIC-ified 'load address'
|
||||
la $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
|
||||
bal _mips_AES_set_encrypt_key
|
||||
|
||||
@ -1110,7 +1436,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Te # PIC-ified 'load address'
|
||||
la $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
|
||||
bal _mips_AES_set_encrypt_key
|
||||
|
||||
@ -1181,6 +1507,16 @@ $code.=<<___;
|
||||
xor $tpb,$tp9,$tp2
|
||||
xor $tpd,$tp9,$tp4
|
||||
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
rotr $tp1,$tpd,16
|
||||
xor $tpe,$tp2
|
||||
rotr $tp2,$tp9,8
|
||||
xor $tpe,$tp1
|
||||
rotr $tp4,$tpb,24
|
||||
xor $tpe,$tp2
|
||||
lw $tp1,4($key) # modulo-scheduled
|
||||
xor $tpe,$tp4
|
||||
#else
|
||||
_ror $tp1,$tpd,16
|
||||
xor $tpe,$tp2
|
||||
_ror $tp2,$tpd,-16
|
||||
@ -1195,6 +1531,7 @@ $code.=<<___;
|
||||
xor $tpe,$tp1
|
||||
lw $tp1,4($key) # modulo-scheduled
|
||||
xor $tpe,$tp2
|
||||
#endif
|
||||
sub $cnt,1
|
||||
sw $tpe,0($key)
|
||||
$PTR_ADD $key,4
|
||||
@ -1225,7 +1562,7 @@ ___
|
||||
# Tables are kept in endian-neutral manner
|
||||
$code.=<<___;
|
||||
.rdata
|
||||
.align 6
|
||||
.align 10
|
||||
AES_Te:
|
||||
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
|
||||
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
|
||||
@ -1356,46 +1693,6 @@ AES_Te:
|
||||
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
|
||||
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
|
||||
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
|
||||
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
|
||||
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
|
||||
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
|
||||
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
|
||||
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
|
||||
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
|
||||
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
|
||||
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
|
||||
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
|
||||
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
|
||||
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
|
||||
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
|
||||
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
|
||||
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
|
||||
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
|
||||
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
|
||||
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
|
||||
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
|
||||
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
|
||||
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
|
||||
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
|
||||
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
|
||||
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
|
||||
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
|
||||
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
|
||||
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
|
||||
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
|
||||
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
|
||||
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
|
||||
|
||||
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
|
||||
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
|
||||
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
|
||||
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
|
||||
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
|
||||
|
||||
.align 6
|
||||
AES_Td:
|
||||
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
|
||||
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
|
||||
@ -1558,6 +1855,46 @@ AES_Td:
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
|
||||
AES_Te4:
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
|
||||
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
|
||||
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
|
||||
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
|
||||
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
|
||||
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
|
||||
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
|
||||
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
|
||||
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
|
||||
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
|
||||
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
|
||||
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
|
||||
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
|
||||
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
|
||||
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
|
||||
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
|
||||
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
|
||||
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
|
||||
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
|
||||
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
|
||||
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
|
||||
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
|
||||
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
|
||||
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
|
||||
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
|
||||
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
|
||||
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
|
||||
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
|
||||
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
|
||||
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
|
||||
|
||||
.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
|
||||
.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
|
||||
.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
|
||||
.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
|
||||
.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
@ -1574,6 +1911,9 @@ foreach (split("\n",$code)) {
|
||||
s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
|
||||
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
|
||||
: eval("24-$3"))/e or
|
||||
s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
|
||||
sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
|
||||
: eval("24-$3"))/e or
|
||||
s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
|
||||
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
|
||||
: eval("$3*-1"))/e or
|
||||
@ -1596,6 +1936,8 @@ foreach (split("\n",$code)) {
|
||||
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
|
||||
}
|
||||
|
||||
s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,10 @@
|
||||
# compatible subroutine. There is room for minor optimization on
|
||||
# little-endian platforms...
|
||||
|
||||
# September 2012.
|
||||
#
|
||||
# Add MIPS32r2 code.
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
@ -95,6 +99,10 @@ sub BODY_00_14 {
|
||||
my ($i,$a,$b,$c,$d,$e)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___ if (!$big_endian);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
wsbh @X[$i],@X[$i] # byte swap($i)
|
||||
rotr @X[$i],@X[$i],16
|
||||
#else
|
||||
srl $t0,@X[$i],24 # byte swap($i)
|
||||
srl $t1,@X[$i],8
|
||||
andi $t2,@X[$i],0xFF00
|
||||
@ -104,8 +112,22 @@ $code.=<<___ if (!$big_endian);
|
||||
or @X[$i],$t0
|
||||
or $t1,$t2
|
||||
or @X[$i],$t1
|
||||
#endif
|
||||
___
|
||||
$code.=<<___;
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
addu $e,$K # $i
|
||||
xor $t0,$c,$d
|
||||
rotr $t1,$a,27
|
||||
lwl @X[$j],$j*4+$MSB($inp)
|
||||
and $t0,$b
|
||||
addu $e,$t1
|
||||
lwr @X[$j],$j*4+$LSB($inp)
|
||||
xor $t0,$d
|
||||
addu $e,@X[$i]
|
||||
rotr $b,$b,2
|
||||
addu $e,$t0
|
||||
#else
|
||||
lwl @X[$j],$j*4+$MSB($inp)
|
||||
sll $t0,$a,5 # $i
|
||||
addu $e,$K
|
||||
@ -121,6 +143,7 @@ $code.=<<___;
|
||||
addu $e,@X[$i]
|
||||
or $b,$t2
|
||||
addu $e,$t0
|
||||
#endif
|
||||
___
|
||||
}
|
||||
|
||||
@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
|
||||
my $j=$i+1;
|
||||
|
||||
$code.=<<___ if (!$big_endian && $i==15);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
wsbh @X[$i],@X[$i] # byte swap($i)
|
||||
rotr @X[$i],@X[$i],16
|
||||
#else
|
||||
srl $t0,@X[$i],24 # byte swap($i)
|
||||
srl $t1,@X[$i],8
|
||||
andi $t2,@X[$i],0xFF00
|
||||
@ -138,8 +165,24 @@ $code.=<<___ if (!$big_endian && $i==15);
|
||||
or @X[$i],$t0
|
||||
or @X[$i],$t1
|
||||
or @X[$i],$t2
|
||||
#endif
|
||||
___
|
||||
$code.=<<___;
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
addu $e,$K # $i
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
xor $t0,$c,$d
|
||||
rotr $t1,$a,27
|
||||
xor @X[$j%16],@X[($j+8)%16]
|
||||
and $t0,$b
|
||||
addu $e,$t1
|
||||
xor @X[$j%16],@X[($j+13)%16]
|
||||
xor $t0,$d
|
||||
addu $e,@X[$i%16]
|
||||
rotr @X[$j%16],@X[$j%16],31
|
||||
rotr $b,$b,2
|
||||
addu $e,$t0
|
||||
#else
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
sll $t0,$a,5 # $i
|
||||
addu $e,$K
|
||||
@ -159,6 +202,7 @@ $code.=<<___;
|
||||
addu $e,@X[$i%16]
|
||||
or $b,$t2
|
||||
addu $e,$t0
|
||||
#endif
|
||||
___
|
||||
}
|
||||
|
||||
@ -166,6 +210,20 @@ sub BODY_20_39 {
|
||||
my ($i,$a,$b,$c,$d,$e)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___ if ($i<79);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
addu $e,$K # $i
|
||||
rotr $t1,$a,27
|
||||
xor @X[$j%16],@X[($j+8)%16]
|
||||
xor $t0,$c,$d
|
||||
addu $e,$t1
|
||||
xor @X[$j%16],@X[($j+13)%16]
|
||||
xor $t0,$b
|
||||
addu $e,@X[$i%16]
|
||||
rotr @X[$j%16],@X[$j%16],31
|
||||
rotr $b,$b,2
|
||||
addu $e,$t0
|
||||
#else
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
sll $t0,$a,5 # $i
|
||||
addu $e,$K
|
||||
@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
|
||||
or @X[$j%16],$t1
|
||||
or $b,$t2
|
||||
addu $e,$t0
|
||||
#endif
|
||||
___
|
||||
$code.=<<___ if ($i==79);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
lw @X[0],0($ctx)
|
||||
addu $e,$K # $i
|
||||
lw @X[1],4($ctx)
|
||||
rotr $t1,$a,27
|
||||
lw @X[2],8($ctx)
|
||||
xor $t0,$c,$d
|
||||
addu $e,$t1
|
||||
lw @X[3],12($ctx)
|
||||
xor $t0,$b
|
||||
addu $e,@X[$i%16]
|
||||
lw @X[4],16($ctx)
|
||||
rotr $b,$b,2
|
||||
addu $e,$t0
|
||||
#else
|
||||
lw @X[0],0($ctx)
|
||||
sll $t0,$a,5 # $i
|
||||
addu $e,$K
|
||||
@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
|
||||
addu $e,@X[$i%16]
|
||||
or $b,$t2
|
||||
addu $e,$t0
|
||||
#endif
|
||||
___
|
||||
}
|
||||
|
||||
@ -210,6 +285,22 @@ sub BODY_40_59 {
|
||||
my ($i,$a,$b,$c,$d,$e)=@_;
|
||||
my $j=$i+1;
|
||||
$code.=<<___ if ($i<79);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
addu $e,$K # $i
|
||||
and $t0,$c,$d
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
rotr $t1,$a,27
|
||||
addu $e,$t0
|
||||
xor @X[$j%16],@X[($j+8)%16]
|
||||
xor $t0,$c,$d
|
||||
addu $e,$t1
|
||||
xor @X[$j%16],@X[($j+13)%16]
|
||||
and $t0,$b
|
||||
addu $e,@X[$i%16]
|
||||
rotr @X[$j%16],@X[$j%16],31
|
||||
rotr $b,$b,2
|
||||
addu $e,$t0
|
||||
#else
|
||||
xor @X[$j%16],@X[($j+2)%16]
|
||||
sll $t0,$a,5 # $i
|
||||
addu $e,$K
|
||||
@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
|
||||
addu $e,@X[$i%16]
|
||||
or $b,$t2
|
||||
addu $e,$t0
|
||||
#endif
|
||||
___
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@ -17,6 +17,10 @@
|
||||
# ~17%, but it comes for free, because it's same instruction sequence.
|
||||
# Improvement coefficients are for aligned input.
|
||||
|
||||
# September 2012.
|
||||
#
|
||||
# Add MIPS[32|64]r2 code.
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
@ -83,6 +87,7 @@ if ($output =~ /512/) {
|
||||
$SLL="dsll"; # shift left logical
|
||||
$SRL="dsrl"; # shift right logical
|
||||
$ADDU="daddu";
|
||||
$ROTR="drotr";
|
||||
@Sigma0=(28,34,39);
|
||||
@Sigma1=(14,18,41);
|
||||
@sigma0=( 7, 1, 8); # right shift first
|
||||
@ -97,6 +102,7 @@ if ($output =~ /512/) {
|
||||
$SLL="sll"; # shift left logical
|
||||
$SRL="srl"; # shift right logical
|
||||
$ADDU="addu";
|
||||
$ROTR="rotr";
|
||||
@Sigma0=( 2,13,22);
|
||||
@Sigma1=( 6,11,25);
|
||||
@sigma0=( 3, 7,18); # right shift first
|
||||
@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
|
||||
${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
|
||||
___
|
||||
$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
wsbh @X[0],@X[0] # byte swap($i)
|
||||
rotr @X[0],@X[0],16
|
||||
#else
|
||||
srl $tmp0,@X[0],24 # byte swap($i)
|
||||
srl $tmp1,@X[0],8
|
||||
andi $tmp2,@X[0],0xFF00
|
||||
@ -133,8 +143,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
|
||||
or @X[0],$tmp0
|
||||
or $tmp1,$tmp2
|
||||
or @X[0],$tmp1
|
||||
#endif
|
||||
___
|
||||
$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
|
||||
#if defined(_MIPS_ARCH_MIPS64R2)
|
||||
dsbh @X[0],@X[0] # byte swap($i)
|
||||
dshd @X[0],@X[0]
|
||||
#else
|
||||
ori $tmp0,$zero,0xFF
|
||||
dsll $tmp2,$tmp0,32
|
||||
or $tmp0,$tmp2 # 0x000000FF000000FF
|
||||
@ -153,8 +168,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
|
||||
dsrl $tmp1,@X[0],32
|
||||
dsll @X[0],32
|
||||
or @X[0],$tmp1
|
||||
#endif
|
||||
___
|
||||
$code.=<<___;
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
xor $tmp2,$f,$g # $i
|
||||
$ROTR $tmp0,$e,@Sigma1[0]
|
||||
$ADDU $T1,$X[0],$h
|
||||
$ROTR $tmp1,$e,@Sigma1[1]
|
||||
and $tmp2,$e
|
||||
$ROTR $h,$e,@Sigma1[2]
|
||||
xor $tmp0,$tmp1
|
||||
$ROTR $tmp1,$a,@Sigma0[0]
|
||||
xor $tmp2,$g # Ch(e,f,g)
|
||||
xor $tmp0,$h # Sigma1(e)
|
||||
|
||||
$ROTR $h,$a,@Sigma0[1]
|
||||
$ADDU $T1,$tmp2
|
||||
$LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
|
||||
xor $h,$tmp1
|
||||
$ROTR $tmp1,$a,@Sigma0[2]
|
||||
$ADDU $T1,$tmp0
|
||||
and $tmp0,$b,$c
|
||||
xor $h,$tmp1 # Sigma0(a)
|
||||
xor $tmp1,$b,$c
|
||||
#else
|
||||
$ADDU $T1,$X[0],$h # $i
|
||||
$SRL $h,$e,@Sigma1[0]
|
||||
xor $tmp2,$f,$g
|
||||
@ -184,16 +222,15 @@ $code.=<<___;
|
||||
xor $h,$tmp1
|
||||
$SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
|
||||
xor $h,$tmp0
|
||||
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
|
||||
and $tmp0,$b,$c
|
||||
xor $h,$tmp1 # Sigma0(a)
|
||||
|
||||
or $tmp0,$a,$b
|
||||
and $tmp1,$a,$b
|
||||
and $tmp0,$c
|
||||
or $tmp1,$tmp0 # Maj(a,b,c)
|
||||
xor $tmp1,$b,$c
|
||||
#endif
|
||||
$ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
|
||||
$ADDU $h,$tmp0
|
||||
and $tmp1,$a
|
||||
$ADDU $T1,$tmp2 # +=K[$i]
|
||||
$ADDU $h,$tmp1
|
||||
|
||||
$ADDU $h,$tmp1 # +=Maj(a,b,c)
|
||||
$ADDU $d,$T1
|
||||
$ADDU $h,$T1
|
||||
___
|
||||
@ -207,6 +244,20 @@ my $i=@_[0];
|
||||
my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
|
||||
|
||||
$code.=<<___;
|
||||
#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
|
||||
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
|
||||
$ROTR $tmp0,@X[1],@sigma0[1]
|
||||
$ADDU @X[0],@X[9] # +=X[i+9]
|
||||
xor $tmp2,$tmp0
|
||||
$ROTR $tmp0,@X[1],@sigma0[2]
|
||||
|
||||
$SRL $tmp3,@X[14],@sigma1[0]
|
||||
$ROTR $tmp1,@X[14],@sigma1[1]
|
||||
xor $tmp2,$tmp0 # sigma0(X[i+1])
|
||||
$ROTR $tmp0,@X[14],@sigma1[2]
|
||||
xor $tmp3,$tmp1
|
||||
$ADDU @X[0],$tmp2
|
||||
#else
|
||||
$SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
|
||||
$ADDU @X[0],@X[9] # +=X[i+9]
|
||||
$SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
|
||||
@ -227,7 +278,7 @@ $code.=<<___;
|
||||
xor $tmp3,$tmp0
|
||||
$SRL $tmp0,@X[14],@sigma1[2]
|
||||
xor $tmp3,$tmp1
|
||||
|
||||
#endif
|
||||
xor $tmp3,$tmp0 # sigma1(X[i+14])
|
||||
$ADDU @X[0],$tmp3
|
||||
___
|
||||
|
Loading…
Reference in New Issue
Block a user