Facilitate back-porting of AESNI and SHA modules.
Fix SEH and stack handling in Win64 build. (cherry picked from commit 977f32e85241cba8be53e44dade32231e8a91718)
This commit is contained in:
parent
955bfbc268
commit
56ba280ccd
@ -95,6 +95,8 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
|||||||
`ml64 2>&1` =~ /Version ([0-9]+)\./ &&
|
`ml64 2>&1` =~ /Version ([0-9]+)\./ &&
|
||||||
$1>=10);
|
$1>=10);
|
||||||
|
|
||||||
|
$shaext=1; ### set to zero if compiling for 1.0.1
|
||||||
|
|
||||||
$stitched_decrypt=0;
|
$stitched_decrypt=0;
|
||||||
|
|
||||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||||
@ -119,6 +121,8 @@ aesni_cbc_sha1_enc:
|
|||||||
# caller should check for SSSE3 and AES-NI bits
|
# caller should check for SSSE3 and AES-NI bits
|
||||||
mov OPENSSL_ia32cap_P+0(%rip),%r10d
|
mov OPENSSL_ia32cap_P+0(%rip),%r10d
|
||||||
mov OPENSSL_ia32cap_P+4(%rip),%r11
|
mov OPENSSL_ia32cap_P+4(%rip),%r11
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
bt \$61,%r11 # check SHA bit
|
bt \$61,%r11 # check SHA bit
|
||||||
jc aesni_cbc_sha1_enc_shaext
|
jc aesni_cbc_sha1_enc_shaext
|
||||||
___
|
___
|
||||||
@ -1657,7 +1661,7 @@ K_XX_XX:
|
|||||||
.asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
|
.asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
.align 64
|
.align 64
|
||||||
___
|
___
|
||||||
{{{
|
if ($shaext) {{{
|
||||||
($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
|
($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
|
||||||
|
|
||||||
$rounds="%r11d";
|
$rounds="%r11d";
|
||||||
@ -1676,7 +1680,7 @@ aesni_cbc_sha1_enc_shaext:
|
|||||||
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
|
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
lea `-8-4*16`(%rsp),%rsp
|
lea `-8-10*16`(%rsp),%rsp
|
||||||
movaps %xmm6,-8-10*16(%rax)
|
movaps %xmm6,-8-10*16(%rax)
|
||||||
movaps %xmm7,-8-9*16(%rax)
|
movaps %xmm7,-8-9*16(%rax)
|
||||||
movaps %xmm8,-8-8*16(%rax)
|
movaps %xmm8,-8-8*16(%rax)
|
||||||
@ -1867,7 +1871,21 @@ ssse3_handler:
|
|||||||
lea (%rsi,%r10),%r10 # epilogue label
|
lea (%rsi,%r10),%r10 # epilogue label
|
||||||
cmp %r10,%rbx # context->Rip>=epilogue label
|
cmp %r10,%rbx # context->Rip>=epilogue label
|
||||||
jae .Lcommon_seh_tail
|
jae .Lcommon_seh_tail
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
lea aesni_cbc_sha1_enc_shaext(%rip),%r10
|
||||||
|
cmp %r10,%rbx
|
||||||
|
jb .Lseh_no_shaext
|
||||||
|
|
||||||
|
lea (%rax),%rsi
|
||||||
|
lea 512($context),%rdi # &context.Xmm6
|
||||||
|
mov \$20,%ecx
|
||||||
|
.long 0xa548f3fc # cld; rep movsq
|
||||||
|
lea 168(%rax),%rax # adjust stack pointer
|
||||||
|
jmp .Lcommon_seh_tail
|
||||||
|
.Lseh_no_shaext:
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
lea 96(%rax),%rsi
|
lea 96(%rax),%rsi
|
||||||
lea 512($context),%rdi # &context.Xmm6
|
lea 512($context),%rdi # &context.Xmm6
|
||||||
mov \$20,%ecx
|
mov \$20,%ecx
|
||||||
@ -1939,6 +1957,11 @@ $code.=<<___ if ($avx);
|
|||||||
.rva .LSEH_end_aesni_cbc_sha1_enc_avx
|
.rva .LSEH_end_aesni_cbc_sha1_enc_avx
|
||||||
.rva .LSEH_info_aesni_cbc_sha1_enc_avx
|
.rva .LSEH_info_aesni_cbc_sha1_enc_avx
|
||||||
___
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
.rva .LSEH_begin_aesni_cbc_sha1_enc_shaext
|
||||||
|
.rva .LSEH_end_aesni_cbc_sha1_enc_shaext
|
||||||
|
.rva .LSEH_info_aesni_cbc_sha1_enc_shaext
|
||||||
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.section .xdata
|
.section .xdata
|
||||||
.align 8
|
.align 8
|
||||||
@ -1953,6 +1976,12 @@ $code.=<<___ if ($avx);
|
|||||||
.rva ssse3_handler
|
.rva ssse3_handler
|
||||||
.rva .Lprologue_avx,.Lepilogue_avx # HandlerData[]
|
.rva .Lprologue_avx,.Lepilogue_avx # HandlerData[]
|
||||||
___
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
.LSEH_info_aesni_cbc_sha1_enc_shaext:
|
||||||
|
.byte 9,0,0,0
|
||||||
|
.rva ssse3_handler
|
||||||
|
.rva .Lprologue_shaext,.Lepilogue_shaext # HandlerData[]
|
||||||
|
___
|
||||||
}
|
}
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
|
@ -59,6 +59,9 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
|||||||
$avx = ($1>=10) + ($1>=11);
|
$avx = ($1>=10) + ($1>=11);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$shaext=1; ### set to zero if compiling for 1.0.1
|
||||||
|
$avx=1 if (!$shaext && $avx);
|
||||||
|
|
||||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||||
*STDOUT=*OUT;
|
*STDOUT=*OUT;
|
||||||
|
|
||||||
@ -113,10 +116,12 @@ $code.=<<___ if ($avx);
|
|||||||
je .Lprobe
|
je .Lprobe
|
||||||
mov 0(%r11),%eax
|
mov 0(%r11),%eax
|
||||||
mov 4(%r11),%r10
|
mov 4(%r11),%r10
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
bt \$61,%r10 # check for SHA
|
bt \$61,%r10 # check for SHA
|
||||||
jc ${func}_shaext
|
jc ${func}_shaext
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
mov %r10,%r11
|
mov %r10,%r11
|
||||||
shr \$32,%r11
|
shr \$32,%r11
|
||||||
|
|
||||||
@ -1259,16 +1264,17 @@ ___
|
|||||||
$r++; unshift(@rndkey,pop(@rndkey));
|
$r++; unshift(@rndkey,pop(@rndkey));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if ($shaext) {
|
||||||
|
my $Tbl="%rax";
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.type ${func}_shaext,\@function,6
|
.type ${func}_shaext,\@function,6
|
||||||
.align 32
|
.align 32
|
||||||
${func}_shaext:
|
${func}_shaext:
|
||||||
mov %rsp,%rax
|
|
||||||
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
|
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
|
||||||
push %rbx
|
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
lea `-4*16`(%rsp),%rsp
|
lea `-8-10*16`(%rsp),%rsp
|
||||||
movaps %xmm6,-8-10*16(%rax)
|
movaps %xmm6,-8-10*16(%rax)
|
||||||
movaps %xmm7,-8-9*16(%rax)
|
movaps %xmm7,-8-9*16(%rax)
|
||||||
movaps %xmm8,-8-8*16(%rax)
|
movaps %xmm8,-8-8*16(%rax)
|
||||||
@ -1465,24 +1471,24 @@ $code.=<<___;
|
|||||||
movdqu $CDGH,16($ctx)
|
movdqu $CDGH,16($ctx)
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($win64);
|
$code.=<<___ if ($win64);
|
||||||
movaps -8-10*16(%rax),%xmm6
|
movaps 0*16(%rsp),%xmm6
|
||||||
movaps -8-9*16(%rax),%xmm7
|
movaps 1*16(%rsp),%xmm7
|
||||||
movaps -8-8*16(%rax),%xmm8
|
movaps 2*16(%rsp),%xmm8
|
||||||
movaps -8-7*16(%rax),%xmm9
|
movaps 3*16(%rsp),%xmm9
|
||||||
movaps -8-6*16(%rax),%xmm10
|
movaps 4*16(%rsp),%xmm10
|
||||||
movaps -8-5*16(%rax),%xmm11
|
movaps 5*16(%rsp),%xmm11
|
||||||
movaps -8-4*16(%rax),%xmm12
|
movaps 6*16(%rsp),%xmm12
|
||||||
movaps -8-3*16(%rax),%xmm13
|
movaps 7*16(%rsp),%xmm13
|
||||||
movaps -8-2*16(%rax),%xmm14
|
movaps 8*16(%rsp),%xmm14
|
||||||
movaps -8-1*16(%rax),%xmm15
|
movaps 9*16(%rsp),%xmm15
|
||||||
|
lea 8+10*16(%rsp),%rsp
|
||||||
.Lepilogue_shaext:
|
.Lepilogue_shaext:
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov -8(%rax),%rbx
|
|
||||||
mov %rax,%rsp
|
|
||||||
ret
|
ret
|
||||||
.size ${func}_shaext,.-${func}_shaext
|
.size ${func}_shaext,.-${func}_shaext
|
||||||
___
|
___
|
||||||
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
|
|
||||||
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
|
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
|
||||||
@ -1527,6 +1533,19 @@ se_handler:
|
|||||||
cmp %r10,%rbx # context->Rip>=epilogue label
|
cmp %r10,%rbx # context->Rip>=epilogue label
|
||||||
jae .Lin_prologue
|
jae .Lin_prologue
|
||||||
___
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
lea aesni_cbc_sha256_enc_shaext(%rip),%r10
|
||||||
|
cmp %r10,%rbx
|
||||||
|
jb .Lnot_in_shaext
|
||||||
|
|
||||||
|
lea (%rax),%rsi
|
||||||
|
lea 512($context),%rdi # &context.Xmm6
|
||||||
|
mov \$20,%ecx
|
||||||
|
.long 0xa548f3fc # cld; rep movsq
|
||||||
|
lea 168(%rax),%rax # adjust stack pointer
|
||||||
|
jmp .Lin_prologue
|
||||||
|
.Lnot_in_shaext:
|
||||||
|
___
|
||||||
$code.=<<___ if ($avx>1);
|
$code.=<<___ if ($avx>1);
|
||||||
lea .Lavx2_shortcut(%rip),%r10
|
lea .Lavx2_shortcut(%rip),%r10
|
||||||
cmp %r10,%rbx # context->Rip<avx2_shortcut
|
cmp %r10,%rbx # context->Rip<avx2_shortcut
|
||||||
@ -1613,6 +1632,11 @@ $code.=<<___ if ($avx>1);
|
|||||||
.rva .LSEH_end_${func}_avx2
|
.rva .LSEH_end_${func}_avx2
|
||||||
.rva .LSEH_info_${func}_avx2
|
.rva .LSEH_info_${func}_avx2
|
||||||
___
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
.rva .LSEH_begin_${func}_shaext
|
||||||
|
.rva .LSEH_end_${func}_shaext
|
||||||
|
.rva .LSEH_info_${func}_shaext
|
||||||
|
___
|
||||||
$code.=<<___ if ($avx);
|
$code.=<<___ if ($avx);
|
||||||
.section .xdata
|
.section .xdata
|
||||||
.align 8
|
.align 8
|
||||||
@ -1632,6 +1656,12 @@ $code.=<<___ if ($avx>1);
|
|||||||
.rva se_handler
|
.rva se_handler
|
||||||
.rva .Lprologue_avx2,.Lepilogue_avx2 # HandlerData[]
|
.rva .Lprologue_avx2,.Lepilogue_avx2 # HandlerData[]
|
||||||
___
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
|
.LSEH_info_${func}_shaext:
|
||||||
|
.byte 9,0,0,0
|
||||||
|
.rva se_handler
|
||||||
|
.rva .Lprologue_shaext,.Lepilogue_shaext # HandlerData[]
|
||||||
|
___
|
||||||
}
|
}
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
|
@ -128,6 +128,8 @@ $ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" &&
|
|||||||
`ml 2>&1` =~ /Version ([0-9]+)\./ &&
|
`ml 2>&1` =~ /Version ([0-9]+)\./ &&
|
||||||
$1>=10); # first version supporting AVX
|
$1>=10); # first version supporting AVX
|
||||||
|
|
||||||
|
$shaext=$xmm; ### set to zero if compiling for 1.0.1
|
||||||
|
|
||||||
&external_label("OPENSSL_ia32cap_P") if ($xmm);
|
&external_label("OPENSSL_ia32cap_P") if ($xmm);
|
||||||
|
|
||||||
|
|
||||||
@ -307,7 +309,7 @@ if ($alt) {
|
|||||||
|
|
||||||
&function_begin("sha1_block_data_order");
|
&function_begin("sha1_block_data_order");
|
||||||
if ($xmm) {
|
if ($xmm) {
|
||||||
&static_label("shaext_shortcut");
|
&static_label("shaext_shortcut") if ($shaext);
|
||||||
&static_label("ssse3_shortcut");
|
&static_label("ssse3_shortcut");
|
||||||
&static_label("avx_shortcut") if ($ymm);
|
&static_label("avx_shortcut") if ($ymm);
|
||||||
&static_label("K_XX_XX");
|
&static_label("K_XX_XX");
|
||||||
@ -325,8 +327,10 @@ if ($xmm) {
|
|||||||
&mov ($C,&DWP(8,$T));
|
&mov ($C,&DWP(8,$T));
|
||||||
&test ($A,1<<24); # check FXSR bit
|
&test ($A,1<<24); # check FXSR bit
|
||||||
&jz (&label("x86"));
|
&jz (&label("x86"));
|
||||||
&test ($C,1<<29); # check SHA bit
|
if ($shaext) {
|
||||||
&jnz (&label("shaext_shortcut"));
|
&test ($C,1<<29); # check SHA bit
|
||||||
|
&jnz (&label("shaext_shortcut"));
|
||||||
|
}
|
||||||
if ($ymm) {
|
if ($ymm) {
|
||||||
&and ($D,1<<28); # mask AVX bit
|
&and ($D,1<<28); # mask AVX bit
|
||||||
&and ($A,1<<30); # mask "Intel CPU" bit
|
&and ($A,1<<30); # mask "Intel CPU" bit
|
||||||
@ -405,7 +409,7 @@ if ($xmm) {
|
|||||||
&function_end("sha1_block_data_order");
|
&function_end("sha1_block_data_order");
|
||||||
|
|
||||||
if ($xmm) {
|
if ($xmm) {
|
||||||
{
|
if ($shaext) {
|
||||||
######################################################################
|
######################################################################
|
||||||
# Intel SHA Extensions implementation of SHA1 update function.
|
# Intel SHA Extensions implementation of SHA1 update function.
|
||||||
#
|
#
|
||||||
|
@ -107,6 +107,9 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
|||||||
$avx = ($1>=10) + ($1>=11);
|
$avx = ($1>=10) + ($1>=11);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$shaext=1; ### set to zero if compiling for 1.0.1
|
||||||
|
$avx=1 if (!$shaext && $avx);
|
||||||
|
|
||||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||||
*STDOUT=*OUT;
|
*STDOUT=*OUT;
|
||||||
|
|
||||||
@ -245,7 +248,8 @@ sha1_block_data_order:
|
|||||||
mov OPENSSL_ia32cap_P+8(%rip),%r10d
|
mov OPENSSL_ia32cap_P+8(%rip),%r10d
|
||||||
test \$`1<<9`,%r8d # check SSSE3 bit
|
test \$`1<<9`,%r8d # check SSSE3 bit
|
||||||
jz .Lialu
|
jz .Lialu
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
test \$`1<<29`,%r10d # check SHA bit
|
test \$`1<<29`,%r10d # check SHA bit
|
||||||
jnz _shaext_shortcut
|
jnz _shaext_shortcut
|
||||||
___
|
___
|
||||||
@ -321,7 +325,7 @@ $code.=<<___;
|
|||||||
ret
|
ret
|
||||||
.size sha1_block_data_order,.-sha1_block_data_order
|
.size sha1_block_data_order,.-sha1_block_data_order
|
||||||
___
|
___
|
||||||
{{{
|
if ($shaext) {{{
|
||||||
######################################################################
|
######################################################################
|
||||||
# Intel SHA Extensions implementation of SHA1 update function.
|
# Intel SHA Extensions implementation of SHA1 update function.
|
||||||
#
|
#
|
||||||
@ -1956,9 +1960,13 @@ ssse3_handler:
|
|||||||
.rva .LSEH_begin_sha1_block_data_order
|
.rva .LSEH_begin_sha1_block_data_order
|
||||||
.rva .LSEH_end_sha1_block_data_order
|
.rva .LSEH_end_sha1_block_data_order
|
||||||
.rva .LSEH_info_sha1_block_data_order
|
.rva .LSEH_info_sha1_block_data_order
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($shaext);
|
||||||
.rva .LSEH_begin_sha1_block_data_order_shaext
|
.rva .LSEH_begin_sha1_block_data_order_shaext
|
||||||
.rva .LSEH_end_sha1_block_data_order_shaext
|
.rva .LSEH_end_sha1_block_data_order_shaext
|
||||||
.rva .LSEH_info_sha1_block_data_order_shaext
|
.rva .LSEH_info_sha1_block_data_order_shaext
|
||||||
|
___
|
||||||
|
$code.=<<___;
|
||||||
.rva .LSEH_begin_sha1_block_data_order_ssse3
|
.rva .LSEH_begin_sha1_block_data_order_ssse3
|
||||||
.rva .LSEH_end_sha1_block_data_order_ssse3
|
.rva .LSEH_end_sha1_block_data_order_ssse3
|
||||||
.rva .LSEH_info_sha1_block_data_order_ssse3
|
.rva .LSEH_info_sha1_block_data_order_ssse3
|
||||||
|
@ -82,6 +82,8 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" &&
|
|||||||
$avx = ($1>=10) + ($1>=11);
|
$avx = ($1>=10) + ($1>=11);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$shaext=$xmm; ### set to zero if compiling for 1.0.1
|
||||||
|
|
||||||
$unroll_after = 64*4; # If pre-evicted from L1P cache first spin of
|
$unroll_after = 64*4; # If pre-evicted from L1P cache first spin of
|
||||||
# fully unrolled loop was measured to run about
|
# fully unrolled loop was measured to run about
|
||||||
# 3-4x slower. If slowdown coefficient is N and
|
# 3-4x slower. If slowdown coefficient is N and
|
||||||
@ -205,8 +207,8 @@ sub BODY_00_15() {
|
|||||||
&jz ($unroll_after?&label("no_xmm"):&label("loop"));
|
&jz ($unroll_after?&label("no_xmm"):&label("loop"));
|
||||||
&and ("ecx",1<<30); # mask "Intel CPU" bit
|
&and ("ecx",1<<30); # mask "Intel CPU" bit
|
||||||
&and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits
|
&and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits
|
||||||
&test ("edx",1<<29) if ($xmm); # check for SHA
|
&test ("edx",1<<29) if ($shaext); # check for SHA
|
||||||
&jnz (&label("shaext")) if ($xmm);
|
&jnz (&label("shaext")) if ($shaext);
|
||||||
&or ("ecx","ebx");
|
&or ("ecx","ebx");
|
||||||
&and ("ecx",1<<28|1<<30);
|
&and ("ecx",1<<28|1<<30);
|
||||||
&cmp ("ecx",1<<28|1<<30);
|
&cmp ("ecx",1<<28|1<<30);
|
||||||
@ -505,7 +507,7 @@ my @AH=($A,$K256);
|
|||||||
&function_end_A();
|
&function_end_A();
|
||||||
}
|
}
|
||||||
if (!$i386 && $xmm) {{{
|
if (!$i386 && $xmm) {{{
|
||||||
{
|
if ($shaext) {
|
||||||
######################################################################
|
######################################################################
|
||||||
# Intel SHA Extensions implementation of SHA256 update function.
|
# Intel SHA Extensions implementation of SHA256 update function.
|
||||||
#
|
#
|
||||||
|
@ -123,6 +123,9 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
|
|||||||
$avx = ($1>=10) + ($1>=11);
|
$avx = ($1>=10) + ($1>=11);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$shaext=1; ### set to zero if compiling for 1.0.1
|
||||||
|
$avx=1 if (!$shaext && $avx);
|
||||||
|
|
||||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||||
*STDOUT=*OUT;
|
*STDOUT=*OUT;
|
||||||
|
|
||||||
@ -259,7 +262,7 @@ $code.=<<___ if ($SZ==4 || $avx);
|
|||||||
mov 4(%r11),%r10d
|
mov 4(%r11),%r10d
|
||||||
mov 8(%r11),%r11d
|
mov 8(%r11),%r11d
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($SZ==4);
|
$code.=<<___ if ($SZ==4 && $shaext);
|
||||||
test \$`1<<29`,%r11d # check for SHA
|
test \$`1<<29`,%r11d # check for SHA
|
||||||
jnz _shaext_shortcut
|
jnz _shaext_shortcut
|
||||||
___
|
___
|
||||||
@ -518,7 +521,7 @@ ___
|
|||||||
######################################################################
|
######################################################################
|
||||||
# SIMD code paths
|
# SIMD code paths
|
||||||
#
|
#
|
||||||
if ($SZ==4) {{{
|
if ($SZ==4 && $shaext) {{{
|
||||||
######################################################################
|
######################################################################
|
||||||
# Intel SHA Extensions implementation of SHA256 update function.
|
# Intel SHA Extensions implementation of SHA256 update function.
|
||||||
#
|
#
|
||||||
@ -2295,10 +2298,12 @@ shaext_handler:
|
|||||||
.rva .LSEH_end_$func
|
.rva .LSEH_end_$func
|
||||||
.rva .LSEH_info_$func
|
.rva .LSEH_info_$func
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($SZ==4);
|
$code.=<<___ if ($SZ==4 && $shext);
|
||||||
.rva .LSEH_begin_${func}_shaext
|
.rva .LSEH_begin_${func}_shaext
|
||||||
.rva .LSEH_end_${func}_shaext
|
.rva .LSEH_end_${func}_shaext
|
||||||
.rva .LSEH_info_${func}_shaext
|
.rva .LSEH_info_${func}_shaext
|
||||||
|
___
|
||||||
|
$code.=<<___ if ($SZ==4);
|
||||||
.rva .LSEH_begin_${func}_ssse3
|
.rva .LSEH_begin_${func}_ssse3
|
||||||
.rva .LSEH_end_${func}_ssse3
|
.rva .LSEH_end_${func}_ssse3
|
||||||
.rva .LSEH_info_${func}_ssse3
|
.rva .LSEH_info_${func}_ssse3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user