e_padlock-x86[_64].pl: SHA fixes, comply with specification and fix bug.

This commit is contained in:
Andy Polyakov 2011-10-08 21:37:44 +00:00
parent 549cd657fd
commit 08d62e9f1a
2 changed files with 156 additions and 13 deletions

View File

@ -352,19 +352,34 @@ my ($mode,$opcode) = @_;
&push ("edi"); &push ("edi");
&push ("esi"); &push ("esi");
&xor ("eax","eax"); &xor ("eax","eax");
&mov ("edi",&wparam(0));
&mov ("esi",&wparam(1));
&mov ("ecx",&wparam(2));
if ($::win32 or $::coff) { if ($::win32 or $::coff) {
&push (&::islabel("_win32_segv_handler")); &push (&::islabel("_win32_segv_handler"));
&data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0xff,0x30); # push %fs:(%eax)
&data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax)
} }
&mov ("edi",&wparam(0)); &mov ("edx","esp"); # put aside %esp
&mov ("esi",&wparam(1)); &add ("esp",-128); # 32 is enough but spec says 128
&mov ("ecx",&wparam(2)); &movups ("xmm0",&QWP(0,"edi")); # copy-in context
&and ("esp",-16);
&mov ("eax",&DWP(16,"edi"));
&movaps (&QWP(0,"esp"),"xmm0");
&mov ("edi","esp");
&mov (&DWP(16,"esp"),"eax");
&xor ("eax","eax");
&data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1
&movaps ("xmm0",&QWP(0,"esp"));
&mov ("eax",&DWP(16,"esp"));
&mov ("esp","edx"); # restore %esp
if ($::win32 or $::coff) { if ($::win32 or $::coff) {
&data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0
&lea ("esp",&DWP(4,"esp")); &lea ("esp",&DWP(4,"esp"));
} }
&mov ("edi",&wparam(0));
&movups (&QWP(0,"edi"),"xmm0"); # copy-out context
&mov (&DWP(16,"edi"),"eax");
&pop ("esi"); &pop ("esi");
&pop ("edi"); &pop ("edi");
&ret (); &ret ();
@ -373,12 +388,26 @@ my ($mode,$opcode) = @_;
&function_begin_B("padlock_sha1_blocks"); &function_begin_B("padlock_sha1_blocks");
&push ("edi"); &push ("edi");
&push ("esi"); &push ("esi");
&mov ("eax",-1);
&mov ("edi",&wparam(0)); &mov ("edi",&wparam(0));
&mov ("esi",&wparam(1)); &mov ("esi",&wparam(1));
&mov ("edx","esp"); # put aside %esp
&mov ("ecx",&wparam(2)); &mov ("ecx",&wparam(2));
&add ("esp",-128);
&movups ("xmm0",&QWP(0,"edi")); # copy-in context
&and ("esp",-16);
&mov ("eax",&DWP(16,"edi"));
&movaps (&QWP(0,"esp"),"xmm0");
&mov ("edi","esp");
&mov (&DWP(16,"esp"),"eax");
&mov ("eax",-1);
&data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1
&pop ("esi"); &movaps ("xmm0",&QWP(0,"esp"));
&mov ("eax",&DWP(16,"esp"));
&mov ("esp","edx"); # restore %esp
&mov ("edi",&wparam(0));
&movups (&QWP(0,"edi"),"xmm0"); # copy-out context
&mov (&DWP(16,"edi"),"eax");
&pop ("esi");
&pop ("edi"); &pop ("edi");
&ret (); &ret ();
&function_end_B("padlock_sha1_blocks"); &function_end_B("padlock_sha1_blocks");
@ -387,19 +416,34 @@ my ($mode,$opcode) = @_;
&push ("edi"); &push ("edi");
&push ("esi"); &push ("esi");
&xor ("eax","eax"); &xor ("eax","eax");
&mov ("edi",&wparam(0));
&mov ("esi",&wparam(1));
&mov ("ecx",&wparam(2));
if ($::win32 or $::coff) { if ($::win32 or $::coff) {
&push (&::islabel("_win32_segv_handler")); &push (&::islabel("_win32_segv_handler"));
&data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0xff,0x30); # push %fs:(%eax)
&data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax)
} }
&mov ("edi",&wparam(0)); &mov ("edx","esp"); # put aside %esp
&mov ("esi",&wparam(1)); &add ("esp",-128);
&mov ("ecx",&wparam(2)); &movups ("xmm0",&QWP(0,"edi")); # copy-in context
&and ("esp",-16);
&movups ("xmm1",&QWP(16,"edi"));
&movaps (&QWP(0,"esp"),"xmm0");
&mov ("edi","esp");
&movaps (&QWP(16,"esp"),"xmm1");
&xor ("eax","eax");
&data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256
&movaps ("xmm0",&QWP(0,"esp"));
&movaps ("xmm1",&QWP(16,"esp"));
&mov ("esp","edx"); # restore %esp
if ($::win32 or $::coff) { if ($::win32 or $::coff) {
&data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0
&lea ("esp",&DWP(4,"esp")); &lea ("esp",&DWP(4,"esp"));
} }
&mov ("edi",&wparam(0));
&movups (&QWP(0,"edi"),"xmm0"); # copy-out context
&movups (&QWP(16,"edi"),"xmm1");
&pop ("esi"); &pop ("esi");
&pop ("edi"); &pop ("edi");
&ret (); &ret ();
@ -408,11 +452,25 @@ my ($mode,$opcode) = @_;
&function_begin_B("padlock_sha256_blocks"); &function_begin_B("padlock_sha256_blocks");
&push ("edi"); &push ("edi");
&push ("esi"); &push ("esi");
&mov ("eax",-1);
&mov ("edi",&wparam(0)); &mov ("edi",&wparam(0));
&mov ("esi",&wparam(1)); &mov ("esi",&wparam(1));
&mov ("ecx",&wparam(2)); &mov ("ecx",&wparam(2));
&mov ("edx","esp"); # put aside %esp
&add ("esp",-128);
&movups ("xmm0",&QWP(0,"edi")); # copy-in context
&and ("esp",-16);
&movups ("xmm1",&QWP(16,"edi"));
&movaps (&QWP(0,"esp"),"xmm0");
&mov ("edi","esp");
&movaps (&QWP(16,"esp"),"xmm1");
&mov ("eax",-1);
&data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256
&movaps ("xmm0",&QWP(0,"esp"));
&movaps ("xmm1",&QWP(16,"esp"));
&mov ("esp","edx"); # restore %esp
&mov ("edi",&wparam(0));
&movups (&QWP(0,"edi"),"xmm0"); # copy-out context
&movups (&QWP(16,"edi"),"xmm1");
&pop ("esi"); &pop ("esi");
&pop ("edi"); &pop ("edi");
&ret (); &ret ();
@ -424,7 +482,29 @@ my ($mode,$opcode) = @_;
&mov ("edi",&wparam(0)); &mov ("edi",&wparam(0));
&mov ("esi",&wparam(1)); &mov ("esi",&wparam(1));
&mov ("ecx",&wparam(2)); &mov ("ecx",&wparam(2));
&mov ("edx","esp"); # put aside %esp
&add ("esp",-128);
&movups ("xmm0",&QWP(0,"edi")); # copy-in context
&and ("esp",-16);
&movups ("xmm1",&QWP(16,"edi"));
&movups ("xmm2",&QWP(32,"edi"));
&movups ("xmm3",&QWP(48,"edi"));
&movaps (&QWP(0,"esp"),"xmm0");
&mov ("edi","esp");
&movaps (&QWP(16,"esp"),"xmm1");
&movaps (&QWP(32,"esp"),"xmm2");
&movaps (&QWP(48,"esp"),"xmm3");
&data_byte(0xf3,0x0f,0xa6,0xe0); # rep xsha512 &data_byte(0xf3,0x0f,0xa6,0xe0); # rep xsha512
&movaps ("xmm0",&QWP(0,"esp"));
&movaps ("xmm1",&QWP(16,"esp"));
&movaps ("xmm2",&QWP(32,"esp"));
&movaps ("xmm3",&QWP(48,"esp"));
&mov ("esp","edx"); # restore %esp
&mov ("edi",&wparam(0));
&movups (&QWP(0,"edi"),"xmm0"); # copy-out context
&movups (&QWP(16,"edi"),"xmm1");
&movups (&QWP(32,"edi"),"xmm2");
&movups (&QWP(48,"edi"),"xmm3");
&pop ("esi"); &pop ("esi");
&pop ("edi"); &pop ("edi");
&ret (); &ret ();

View File

@ -146,9 +146,20 @@ padlock_xstore:
.type padlock_sha1_oneshot,\@function,3 .type padlock_sha1_oneshot,\@function,3
.align 16 .align 16
padlock_sha1_oneshot: padlock_sha1_oneshot:
xor %rax,%rax
mov %rdx,%rcx mov %rdx,%rcx
mov %rdi,%rdx # put aside %rdi
movups (%rdi),%xmm0 # copy-in context
sub \$128+8,%rsp
mov 16(%rdi),%eax
movaps %xmm0,(%rsp)
mov %rsp,%rdi
mov %eax,16(%rsp)
xor %rax,%rax
.byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
movaps (%rsp),%xmm0
mov 16(%rsp),%eax
movups %xmm0,(%rdx) # copy-out context
mov %eax,16(%rdx)
ret ret
.size padlock_sha1_oneshot,.-padlock_sha1_oneshot .size padlock_sha1_oneshot,.-padlock_sha1_oneshot
@ -156,9 +167,20 @@ padlock_sha1_oneshot:
.type padlock_sha1_blocks,\@function,3 .type padlock_sha1_blocks,\@function,3
.align 16 .align 16
padlock_sha1_blocks: padlock_sha1_blocks:
mov \$-1,%rax
mov %rdx,%rcx mov %rdx,%rcx
mov %rdi,%rdx # put aside %rdi
movups (%rdi),%xmm0 # copy-in context
sub \$128+8,%rsp
mov 16(%rdi),%eax
movaps %xmm0,(%rsp)
mov %rsp,%rdi
mov %eax,16(%rsp)
mov \$-1,%rax
.byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
movaps (%rsp),%xmm0
mov 16(%rsp),%eax
movups %xmm0,(%rdx) # copy-out context
mov %eax,16(%rdx)
ret ret
.size padlock_sha1_blocks,.-padlock_sha1_blocks .size padlock_sha1_blocks,.-padlock_sha1_blocks
@ -166,9 +188,20 @@ padlock_sha1_blocks:
.type padlock_sha256_oneshot,\@function,3 .type padlock_sha256_oneshot,\@function,3
.align 16 .align 16
padlock_sha256_oneshot: padlock_sha256_oneshot:
xor %rax,%rax
mov %rdx,%rcx mov %rdx,%rcx
mov %rdi,%rdx # put aside %rdi
movups (%rdi),%xmm0 # copy-in context
sub \$128+8,%rsp
movups 16(%rdi),%xmm1
movaps %xmm0,(%rsp)
mov %rsp,%rdi
movaps %xmm1,16(%rsp)
xor %rax,%rax
.byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
movaps (%rsp),%xmm0
movaps 16(%rsp),%xmm1
movups %xmm0,(%rdx) # copy-out context
movups %xmm1,16(%rdx)
ret ret
.size padlock_sha256_oneshot,.-padlock_sha256_oneshot .size padlock_sha256_oneshot,.-padlock_sha256_oneshot
@ -176,9 +209,20 @@ padlock_sha256_oneshot:
.type padlock_sha256_blocks,\@function,3 .type padlock_sha256_blocks,\@function,3
.align 16 .align 16
padlock_sha256_blocks: padlock_sha256_blocks:
mov \$-1,%rax
mov %rdx,%rcx mov %rdx,%rcx
mov %rdi,%rdx # put aside %rdi
movups (%rdi),%xmm0 # copy-in context
sub \$128+8,%rsp
movups 16(%rdi),%xmm1
movaps %xmm0,(%rsp)
mov %rsp,%rdi
movaps %xmm1,16(%rsp)
mov \$-1,%rax
.byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
movaps (%rsp),%xmm0
movaps 16(%rsp),%xmm1
movups %xmm0,(%rdx) # copy-out context
movups %xmm1,16(%rdx)
ret ret
.size padlock_sha256_blocks,.-padlock_sha256_blocks .size padlock_sha256_blocks,.-padlock_sha256_blocks
@ -187,7 +231,26 @@ padlock_sha256_blocks:
.align 16 .align 16
padlock_sha512_blocks: padlock_sha512_blocks:
mov %rdx,%rcx mov %rdx,%rcx
mov %rdi,%rdx # put aside %rdi
movups (%rdi),%xmm0 # copy-in context
sub \$128+8,%rsp
movups 16(%rdi),%xmm1
movups 32(%rdi),%xmm2
movups 48(%rdi),%xmm3
movaps %xmm0,(%rsp)
mov %rsp,%rdi
movaps %xmm1,16(%rsp)
movaps %xmm2,32(%rsp)
movaps %xmm3,48(%rsp)
.byte 0xf3,0x0f,0xa6,0xe0 # rep xha512 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
movaps (%rsp),%xmm0
movaps 16(%rsp),%xmm1
movaps 32(%rsp),%xmm2
movaps 48(%rsp),%xmm3
movups %xmm0,(%rdx) # copy-out context
movups %xmm1,16(%rdx)
movups %xmm2,32(%rdx)
movups %xmm3,48(%rdx)
ret ret
.size padlock_sha512_blocks,.-padlock_sha512_blocks .size padlock_sha512_blocks,.-padlock_sha512_blocks
___ ___