C64x+ assembply pack: add RC4 module.
This commit is contained in:
parent
bd227733b9
commit
496f2b148b
@ -414,7 +414,7 @@ my %table=(
|
||||
"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
|
||||
#
|
||||
# TI_CGT_C6000_7.3.x is a requirement
|
||||
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
|
||||
"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o::rc4-c64xplus.o:::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
|
||||
|
||||
# Android: linux-* but without -DTERMIO and pointers to headers and libs.
|
||||
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
||||
|
4
TABLE
4
TABLE
@ -1652,7 +1652,7 @@ $multilib =
|
||||
|
||||
*** debug-VC-WIN32
|
||||
$cc = cl
|
||||
$cflags = -W3 -WX -Gs0 -GF -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE
|
||||
$cflags = -W3 -Gs0 -GF -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE
|
||||
$unistd =
|
||||
$thread_cflag =
|
||||
$sys_id = WIN32
|
||||
@ -4174,7 +4174,7 @@ $bf_obj =
|
||||
$md5_obj =
|
||||
$sha1_obj = sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o
|
||||
$cast_obj =
|
||||
$rc4_obj =
|
||||
$rc4_obj = rc4-c64xplus.o
|
||||
$rmd160_obj =
|
||||
$rc5_obj =
|
||||
$wp_obj =
|
||||
|
183
crypto/rc4/asm/rc4-c64xplus.pl
Normal file
183
crypto/rc4/asm/rc4-c64xplus.pl
Normal file
@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# RC4 for C64x+.
|
||||
#
|
||||
# April 2014
|
||||
#
|
||||
# RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster
|
||||
# than TI CGT-generated code. Loop is scheduled in such way that
|
||||
# there is only one reference to memory in each cycle. This is done
|
||||
# to avoid L1D memory banking conflicts, see SPRU871 TI publication
|
||||
# for further details. Otherwise it should be possible to schedule
|
||||
# the loop for iteration interval of 6...
|
||||
|
||||
($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6");
|
||||
|
||||
($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2));
|
||||
($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2));
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.if .ASSEMBLER_VERSION<7000000
|
||||
.asg 0,__TI_EABI__
|
||||
.endif
|
||||
.if __TI_EABI__
|
||||
.nocmp
|
||||
.asg RC4,_RC4
|
||||
.asg RC4_set_key,_RC4_set_key
|
||||
.asg RC4_options,_RC4_options
|
||||
.endif
|
||||
|
||||
.global _RC4
|
||||
.align 16
|
||||
_RC4:
|
||||
.asmfunc
|
||||
MV $LEN,B0
|
||||
[!B0] BNOP B3 ; if (len==0) return;
|
||||
||[B0] ADD $KEY,2,$KEYA
|
||||
||[B0] ADD $KEY,2,$KEYB
|
||||
[B0] MVK 1,$ONE
|
||||
||[B0] LDBU *${KEYA}[-2],$XX ; key->x
|
||||
[B0] LDBU *${KEYB}[-1],$YY ; key->y
|
||||
|| NOP 4
|
||||
|
||||
ADD4 $ONE,$XX,$XX
|
||||
LDBU *${KEYA}[$XX],$TX
|
||||
|| MVC $LEN,ILC
|
||||
NOP 4
|
||||
;;==================================================
|
||||
SPLOOP 7
|
||||
|| ADD4 $TX,$YY,$YY
|
||||
|
||||
LDBU *${KEYB}[$YY],$TY
|
||||
|| MVD $XX,$xx
|
||||
|| ADD4 $ONE,$XX,$XX
|
||||
LDBU *${KEYA}[$XX],$tx
|
||||
CMPEQ $YY,$XX,B0
|
||||
|| NOP 3
|
||||
STB $TX,*${KEYB}[$YY]
|
||||
||[B0] ADD4 $TX,$YY,$YY
|
||||
STB $TY,*${KEYA}[$xx]
|
||||
||[!B0] ADD4 $tx,$YY,$YY
|
||||
||[!B0] MVD $tx,$TX
|
||||
ADD4 $TY,$TX,$SUM ; [0,0] $TX is not replaced by $tx yet!
|
||||
|| NOP 2
|
||||
LDBU *$INP++,$dat
|
||||
|| NOP 2
|
||||
LDBU *${KEYB}[$SUM],$ret
|
||||
|| NOP 5
|
||||
XOR.L $dat,$ret,$ret
|
||||
SPKERNEL
|
||||
|| STB $ret,*$OUT++
|
||||
;;==================================================
|
||||
SUB4 $XX,$ONE,$XX
|
||||
|| NOP 5
|
||||
STB $XX,*${KEYA}[-2] ; key->x
|
||||
|| SUB4 $YY,$TX,$YY
|
||||
|| BNOP B3
|
||||
STB $YY,*${KEYB}[-1] ; key->y
|
||||
|| NOP 5
|
||||
.endasmfunc
|
||||
|
||||
.global _RC4_set_key
|
||||
.align 16
|
||||
_RC4_set_key:
|
||||
.asmfunc
|
||||
.if .BIG_ENDIAN
|
||||
MVK 0x00000404,$ONE
|
||||
|| MVK 0x00000203,B0
|
||||
MVKH 0x04040000,$ONE
|
||||
|| MVKH 0x00010000,B0
|
||||
.else
|
||||
MVK 0x00000404,$ONE
|
||||
|| MVK 0x00000100,B0
|
||||
MVKH 0x04040000,$ONE
|
||||
|| MVKH 0x03020000,B0
|
||||
.endif
|
||||
ADD $KEY,2,$KEYA
|
||||
|| ADD $KEY,2,$KEYB
|
||||
|| ADD $INP,$LEN,$ret ; end of input
|
||||
LDBU *${INP}++,$dat
|
||||
|| MVK 0,$TX
|
||||
STH $TX,*${KEY}++ ; key->x=key->y=0
|
||||
|| MV B0,A0
|
||||
|| MVK 64-4,B0
|
||||
|
||||
;;==================================================
|
||||
SPLOOPD 1
|
||||
|| MVC B0,ILC
|
||||
|
||||
STNW A0,*${KEY}++
|
||||
|| ADD4 $ONE,A0,A0
|
||||
SPKERNEL
|
||||
;;==================================================
|
||||
|
||||
MVK 0,$YY
|
||||
|| MVK 0,$XX
|
||||
MVK 1,$ONE
|
||||
|| MVK 256-1,B0
|
||||
|
||||
;;==================================================
|
||||
SPLOOPD 8
|
||||
|| MVC B0,ILC
|
||||
|
||||
ADD4 $dat,$YY,$YY
|
||||
|| CMPEQ $INP,$ret,A0 ; end of input?
|
||||
LDBU *${KEYB}[$YY],$TY
|
||||
|| MVD $XX,$xx
|
||||
|| ADD4 $ONE,$XX,$XX
|
||||
LDBU *${KEYA}[$XX],$tx
|
||||
||[A0] SUB $INP,$LEN,$INP ; rewind
|
||||
LDBU *${INP}++,$dat
|
||||
|| CMPEQ $YY,$XX,B0
|
||||
|| NOP 3
|
||||
STB $TX,*${KEYB}[$YY]
|
||||
||[B0] ADD4 $TX,$YY,$YY
|
||||
STB $TY,*${KEYA}[$xx]
|
||||
||[!B0] ADD4 $tx,$YY,$YY
|
||||
||[!B0] MV $tx,$TX
|
||||
SPKERNEL
|
||||
;;==================================================
|
||||
|
||||
BNOP B3,5
|
||||
.endasmfunc
|
||||
|
||||
.global _RC4_options
|
||||
.align 16
|
||||
_RC4_options:
|
||||
_rc4_options:
|
||||
.asmfunc
|
||||
BNOP B3,1
|
||||
ADDKPC _rc4_options,B4
|
||||
.if __TI_EABI__
|
||||
MVKL \$PCR_OFFSET(rc4_options,_rc4_options),A4
|
||||
MVKH \$PCR_OFFSET(rc4_options,_rc4_options),A4
|
||||
.else
|
||||
MVKL (rc4_options-_rc4_options),A4
|
||||
MVKH (rc4_options-_rc4_options),A4
|
||||
.endif
|
||||
ADD B4,A4,A4
|
||||
.endasmfunc
|
||||
|
||||
.if __TI_EABI__
|
||||
.sect ".text:rc4_options.const"
|
||||
.else
|
||||
.sect ".const:rc4_options"
|
||||
.endif
|
||||
.align 4
|
||||
rc4_options:
|
||||
.cstring "rc4(sploop,char)"
|
||||
.cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 4
|
||||
___
|
||||
|
||||
print $code;
|
||||
close STDOUT;
|
Loading…
x
Reference in New Issue
Block a user