ae4af7a067
Reviewed-by: Rich Salz <rsalz@openssl.org>
181 lines
4.6 KiB
Perl
181 lines
4.6 KiB
Perl
#!/usr/local/bin/perl
|
|
|
|
# This flag makes the inner loop one cycle longer, but generates
|
|
# code that runs %30 faster on the pentium pro/II, 44% faster
|
|
# of PIII, while only %7 slower on the pentium.
|
|
# By default, this flag is on.
|
|
$ppro=1;
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
push(@INC,"${dir}","${dir}../../perlasm");
|
|
require "x86asm.pl";
|
|
require "cbc.pl";
|
|
|
|
&asm_init($ARGV[0],"cast-586.pl",$ARGV[$#ARGV] eq "386");
|
|
|
|
$CAST_ROUNDS=16;
|
|
$L="edi";
|
|
$R="esi";
|
|
$K="ebp";
|
|
$tmp1="ecx";
|
|
$tmp2="ebx";
|
|
$tmp3="eax";
|
|
$tmp4="edx";
|
|
$S1="CAST_S_table0";
|
|
$S2="CAST_S_table1";
|
|
$S3="CAST_S_table2";
|
|
$S4="CAST_S_table3";
|
|
|
|
@F1=("add","xor","sub");
|
|
@F2=("xor","sub","add");
|
|
@F3=("sub","add","xor");
|
|
|
|
&CAST_encrypt("CAST_encrypt",1);
|
|
&CAST_encrypt("CAST_decrypt",0);
|
|
&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1);
|
|
|
|
&asm_finish();
|
|
|
|
sub CAST_encrypt {
|
|
local($name,$enc)=@_;
|
|
|
|
local($win_ex)=<<"EOF";
|
|
EXTERN _CAST_S_table0:DWORD
|
|
EXTERN _CAST_S_table1:DWORD
|
|
EXTERN _CAST_S_table2:DWORD
|
|
EXTERN _CAST_S_table3:DWORD
|
|
EOF
|
|
&main::external_label(
|
|
"CAST_S_table0",
|
|
"CAST_S_table1",
|
|
"CAST_S_table2",
|
|
"CAST_S_table3",
|
|
);
|
|
|
|
&function_begin_B($name,$win_ex);
|
|
|
|
&comment("");
|
|
|
|
&push("ebp");
|
|
&push("ebx");
|
|
&mov($tmp2,&wparam(0));
|
|
&mov($K,&wparam(1));
|
|
&push("esi");
|
|
&push("edi");
|
|
|
|
&comment("Load the 2 words");
|
|
&mov($L,&DWP(0,$tmp2,"",0));
|
|
&mov($R,&DWP(4,$tmp2,"",0));
|
|
|
|
&comment('Get short key flag');
|
|
&mov($tmp3,&DWP(128,$K,"",0));
|
|
if($enc) {
|
|
&push($tmp3);
|
|
} else {
|
|
&or($tmp3,$tmp3);
|
|
&jnz(&label('cast_dec_skip'));
|
|
}
|
|
|
|
&xor($tmp3, $tmp3);
|
|
|
|
# encrypting part
|
|
|
|
if ($enc) {
|
|
&E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&comment('test short key flag');
|
|
&pop($tmp4);
|
|
&or($tmp4,$tmp4);
|
|
&jnz(&label('cast_enc_done'));
|
|
&E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
} else {
|
|
&E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&set_label('cast_dec_skip');
|
|
&E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
&E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4);
|
|
}
|
|
|
|
&set_label('cast_enc_done') if $enc;
|
|
# Why the nop? - Ben 17/1/99
|
|
&nop();
|
|
&mov($tmp3,&wparam(0));
|
|
&mov(&DWP(4,$tmp3,"",0),$L);
|
|
&mov(&DWP(0,$tmp3,"",0),$R);
|
|
&function_end($name);
|
|
}
|
|
|
|
sub E_CAST {
|
|
local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_;
|
|
# Ri needs to have 16 pre added.
|
|
|
|
&comment("round $i");
|
|
&mov( $tmp4, &DWP($i*8,$K,"",1));
|
|
|
|
&mov( $tmp1, &DWP($i*8+4,$K,"",1));
|
|
&$OP1( $tmp4, $R);
|
|
|
|
&rotl( $tmp4, &LB($tmp1));
|
|
|
|
if ($ppro) {
|
|
&xor( $tmp1, $tmp1);
|
|
&mov( $tmp2, 0xff);
|
|
|
|
&movb( &LB($tmp1), &HB($tmp4)); # A
|
|
&and( $tmp2, $tmp4);
|
|
|
|
&shr( $tmp4, 16); #
|
|
&xor( $tmp3, $tmp3);
|
|
} else {
|
|
&mov( $tmp2, $tmp4); # B
|
|
&movb( &LB($tmp1), &HB($tmp4)); # A # BAD BAD BAD
|
|
|
|
&shr( $tmp4, 16); #
|
|
&and( $tmp2, 0xff);
|
|
}
|
|
|
|
&movb( &LB($tmp3), &HB($tmp4)); # C # BAD BAD BAD
|
|
&and( $tmp4, 0xff); # D
|
|
|
|
&mov( $tmp1, &DWP($S1,"",$tmp1,4));
|
|
&mov( $tmp2, &DWP($S2,"",$tmp2,4));
|
|
|
|
&$OP2( $tmp1, $tmp2);
|
|
&mov( $tmp2, &DWP($S3,"",$tmp3,4));
|
|
|
|
&$OP3( $tmp1, $tmp2);
|
|
&mov( $tmp2, &DWP($S4,"",$tmp4,4));
|
|
|
|
&$OP1( $tmp1, $tmp2);
|
|
# XXX
|
|
|
|
&xor( $L, $tmp1);
|
|
# XXX
|
|
}
|
|
|