SHA clean-up Intel assembler companion.

I've chosen to nest two functions in order to save about 4K. As a result
s1-win32.asm doesn't look right (nested PROC/ENDP SEGMENT/ENDS) and it's
probably impossible to compile. I assume I have to reconsider... But not
today...
This commit is contained in:
Andy Polyakov 1999-09-05 14:17:42 +00:00
parent 7f7c318cfc
commit 69fb1c3f9d
2 changed files with 765 additions and 666 deletions

File diff suppressed because it is too large Load Diff

View File

@ -8,8 +8,8 @@ require "x86asm.pl";
&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386"); &asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386");
$A="eax"; $A="eax";
$B="ebx"; $B="ecx";
$C="ecx"; $C="ebx";
$D="edx"; $D="edx";
$E="edi"; $E="edi";
$T="esi"; $T="esi";
@ -19,7 +19,7 @@ $off=9*4;
@K=(0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6); @K=(0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6);
&sha1_block("sha1_block_x86"); &sha1_block_data("sha1_block_asm_data_order");
&asm_finish(); &asm_finish();
@ -53,11 +53,14 @@ sub X_expand
local($in)=@_; local($in)=@_;
&comment("First, load the words onto the stack in network byte order"); &comment("First, load the words onto the stack in network byte order");
for ($i=0; $i<16; $i++) for ($i=0; $i<16; $i+=2)
{ {
&mov("eax",&DWP(($i+0)*4,$in,"",0)) unless $i == 0; &mov($A,&DWP(($i+0)*4,$in,"",0));# unless $i == 0;
&bswap("eax"); &mov($B,&DWP(($i+1)*4,$in,"",0));
&mov(&swtmp($i+0),"eax"); &bswap($A);
&bswap($B);
&mov(&swtmp($i+0),$A);
&mov(&swtmp($i+1),$B);
} }
&comment("We now have the X array on the stack"); &comment("We now have the X array on the stack");
@ -312,7 +315,7 @@ sub BODY_60_79
&BODY_20_39(@_); &BODY_20_39(@_);
} }
sub sha1_block sub sha1_block_host
{ {
local($name)=@_; local($name)=@_;
@ -325,35 +328,77 @@ sub sha1_block
# D 12 # D 12
# E 16 # E 16
&push("esi"); &mov("ecx", &wparam(2));
&push("ebp"); &push("esi");
&mov("eax", &wparam(2)); &shl("ecx",6);
&mov("esi", &wparam(1)); &mov("esi", &wparam(1));
&add("eax", "esi"); # offset to leave on &push("ebp");
&mov("ebp", &wparam(0)); &add("ecx","esi"); # offset to leave on
&push("ebx"); &push("ebx");
&sub("eax", 64); &mov("ebp", &wparam(0));
&push("edi"); &push("edi");
&mov($B, &DWP( 4,"ebp","",0));
&stack_push(18);
&mov($D, &DWP(12,"ebp","",0)); &mov($D, &DWP(12,"ebp","",0));
&mov($E, &DWP(16,"ebp","",0)); &stack_push(18+9);
&mov($C, &DWP( 8,"ebp","",0)); &mov($E, &DWP(16,"ebp","",0));
&mov(&swtmp(17),"eax"); &mov($C, &DWP( 8,"ebp","",0));
&mov(&swtmp(17),"ecx");
&comment("First we need to setup the X array");
for ($i=0; $i<16; $i+=2)
{
&mov($A,&DWP(($i+0)*4,"esi","",0));# unless $i == 0;
&mov($B,&DWP(($i+1)*4,"esi","",0));
&mov(&swtmp($i+0),$A);
&mov(&swtmp($i+1),$B);
}
&jmp(&label("shortcut"));
&function_end_B($name);
}
sub sha1_block_data
{
local($name)=@_;
&function_begin_B($name,"");
# parameter 1 is the MD5_CTX structure.
# A 0
# B 4
# C 8
# D 12
# E 16
&mov("ecx", &wparam(2));
&push("esi");
&shl("ecx",6);
&mov("esi", &wparam(1));
&push("ebp");
&add("ecx","esi"); # offset to leave on
&push("ebx");
&mov("ebp", &wparam(0));
&push("edi");
&mov($D, &DWP(12,"ebp","",0));
&stack_push(18+9);
&mov($E, &DWP(16,"ebp","",0));
&mov($C, &DWP( 8,"ebp","",0));
&mov(&swtmp(17),"ecx");
&comment("First we need to setup the X array"); &comment("First we need to setup the X array");
&mov("eax",&DWP(0,"esi","",0)); # pulled out of X_expand
&set_label("start") unless $normal; &set_label("start") unless $normal;
&X_expand("esi"); &X_expand("esi");
&mov(&swtmp(16),"esi"); &mov(&wparam(1),"esi");
&set_label("shortcut");
&comment(""); &comment("");
&comment("Start processing"); &comment("Start processing");
# odd start # odd start
&mov($A, &DWP( 0,"ebp","",0)); &mov($A, &DWP( 0,"ebp","",0));
&mov($B, &DWP( 4,"ebp","",0));
$X="esp"; $X="esp";
&BODY_00_15(-2,$K[0],$X, 0,$A,$B,$C,$D,$E,$T); &BODY_00_15(-2,$K[0],$X, 0,$A,$B,$C,$D,$E,$T);
&BODY_00_15( 0,$K[0],$X, 1,$T,$A,$B,$C,$D,$E); &BODY_00_15( 0,$K[0],$X, 1,$T,$A,$B,$C,$D,$E);
@ -468,24 +513,26 @@ sub sha1_block
&add($C,$T); &add($C,$T);
&mov(&DWP( 0,$tmp1,"",0),$A); &mov(&DWP( 0,$tmp1,"",0),$A);
&mov("esi",&swtmp(16)); &mov("esi",&wparam(1));
&mov(&DWP( 8,$tmp1,"",0),$C); # This is for looping &mov(&DWP( 8,$tmp1,"",0),$C);
&add("esi",64); &add("esi",64);
&mov("eax",&swtmp(17)); &mov("eax",&swtmp(17));
&mov(&DWP(16,$tmp1,"",0),$E); &mov(&DWP(16,$tmp1,"",0),$E);
&cmp("eax","esi"); &cmp("esi","eax");
&mov(&DWP( 4,$tmp1,"",0),$B); # This is for looping &mov(&DWP( 4,$tmp1,"",0),$B);
&jl(&label("end")); &jl(&label("start"));
&mov("eax",&DWP(0,"esi","",0)); # Pulled down from
&jmp(&label("start"));
&set_label("end"); &stack_pop(18+9);
&stack_pop(18);
&pop("edi"); &pop("edi");
&pop("ebx"); &pop("ebx");
&pop("ebp"); &pop("ebp");
&pop("esi"); &pop("esi");
&ret(); &ret();
# it has to reside within sha1_block_asm_host_order body
# because it calls &jmp(&label("shortcut"));
&sha1_block_host("sha1_block_asm_host_order");
&function_end_B($name); &function_end_B($name);
} }