SPARCv9 assembly pack: harmonize ABI handling (so that it's handled in one
place at a time, by pre-processor in .S case and perl - in .s).
This commit is contained in:
parent
8ed11a815e
commit
1efd583085
@ -18,23 +18,8 @@
|
|||||||
# ~100-230% faster than gcc-generated code and ~35-90% faster than
|
# ~100-230% faster than gcc-generated code and ~35-90% faster than
|
||||||
# the pure SPARCv9 code path.
|
# the pure SPARCv9 code path.
|
||||||
|
|
||||||
$bits=32;
|
|
||||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
|
||||||
if ($bits==64) { $bias=2047; $frame=192; }
|
|
||||||
else { $bias=0; $frame=112; }
|
|
||||||
|
|
||||||
$locals=16*8;
|
$locals=16*8;
|
||||||
|
|
||||||
$code.=<<___;
|
|
||||||
#include <sparc_arch.h>
|
|
||||||
|
|
||||||
.section ".text",#alloc,#execinstr
|
|
||||||
___
|
|
||||||
$code.=<<___ if ($bits==64);
|
|
||||||
.register %g2,#scratch
|
|
||||||
.register %g3,#scratch
|
|
||||||
___
|
|
||||||
|
|
||||||
$tab="%l0";
|
$tab="%l0";
|
||||||
|
|
||||||
@T=("%g2","%g3");
|
@T=("%g2","%g3");
|
||||||
@ -44,6 +29,13 @@ $tab="%l0";
|
|||||||
($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
|
($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
#include <sparc_arch.h>
|
||||||
|
|
||||||
|
#ifdef __arch64__
|
||||||
|
.register %g2,#scratch
|
||||||
|
.register %g3,#scratch
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __PIC__
|
#ifdef __PIC__
|
||||||
SPARC_PIC_THUNK(%g1)
|
SPARC_PIC_THUNK(%g1)
|
||||||
#endif
|
#endif
|
||||||
@ -74,7 +66,7 @@ bn_GF2m_mul_2x2:
|
|||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
.Lsoftware:
|
.Lsoftware:
|
||||||
save %sp,-$frame-$locals,%sp
|
save %sp,-STACK_FRAME-$locals,%sp
|
||||||
|
|
||||||
sllx %i1,32,$a
|
sllx %i1,32,$a
|
||||||
mov -1,$a12
|
mov -1,$a12
|
||||||
@ -83,7 +75,7 @@ bn_GF2m_mul_2x2:
|
|||||||
srlx $a12,1,$a48 ! 0x7fff...
|
srlx $a12,1,$a48 ! 0x7fff...
|
||||||
or %i4,$b,$b
|
or %i4,$b,$b
|
||||||
srlx $a12,2,$a12 ! 0x3fff...
|
srlx $a12,2,$a12 ! 0x3fff...
|
||||||
add %sp,$bias+$frame,$tab
|
add %sp,STACK_BIAS+STACK_FRAME,$tab
|
||||||
|
|
||||||
sllx $a,2,$a4
|
sllx $a,2,$a4
|
||||||
mov $a,$a1
|
mov $a,$a1
|
||||||
|
@ -17,11 +17,6 @@
|
|||||||
# single-process result on 8-core processor, or ~11GBps per 2.85GHz
|
# single-process result on 8-core processor, or ~11GBps per 2.85GHz
|
||||||
# socket.
|
# socket.
|
||||||
|
|
||||||
$bits=32;
|
|
||||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
|
||||||
if ($bits==64) { $bias=2047; $frame=192; }
|
|
||||||
else { $bias=0; $frame=112; }
|
|
||||||
|
|
||||||
$output=shift;
|
$output=shift;
|
||||||
open STDOUT,">$output";
|
open STDOUT,">$output";
|
||||||
|
|
||||||
@ -198,13 +193,14 @@ $code.=<<___;
|
|||||||
___
|
___
|
||||||
}
|
}
|
||||||
|
|
||||||
$code.=<<___ if ($bits==64);
|
|
||||||
.register %g2,#scratch
|
|
||||||
.register %g3,#scratch
|
|
||||||
___
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
#include "sparc_arch.h"
|
#include "sparc_arch.h"
|
||||||
|
|
||||||
|
#ifdef __arch64__
|
||||||
|
.register %g2,#scratch
|
||||||
|
.register %g3,#scratch
|
||||||
|
#endif
|
||||||
|
|
||||||
.section ".text",#alloc,#execinstr
|
.section ".text",#alloc,#execinstr
|
||||||
|
|
||||||
#ifdef __PIC__
|
#ifdef __PIC__
|
||||||
@ -246,7 +242,7 @@ md5_block_asm_data_order:
|
|||||||
|
|
||||||
.word 0x81b02800 ! MD5
|
.word 0x81b02800 ! MD5
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop
|
bne,pt SIZE_T_CC, .Lhw_loop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.Lhwfinish:
|
.Lhwfinish:
|
||||||
@ -287,7 +283,7 @@ md5_block_asm_data_order:
|
|||||||
|
|
||||||
.word 0x81b02800 ! MD5
|
.word 0x81b02800 ! MD5
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
|
bne,pt SIZE_T_CC, .Lhwunaligned_loop
|
||||||
for %f26, %f26, %f10 ! %f10=%f26
|
for %f26, %f26, %f10 ! %f10=%f26
|
||||||
|
|
||||||
ba .Lhwfinish
|
ba .Lhwfinish
|
||||||
@ -295,7 +291,7 @@ md5_block_asm_data_order:
|
|||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
.Lsoftware:
|
.Lsoftware:
|
||||||
save %sp,-$frame,%sp
|
save %sp,-STACK_FRAME,%sp
|
||||||
|
|
||||||
rd %asi,$saved_asi
|
rd %asi,$saved_asi
|
||||||
wr %g0,0x88,%asi ! ASI_PRIMARY_LITTLE
|
wr %g0,0x88,%asi ! ASI_PRIMARY_LITTLE
|
||||||
@ -355,7 +351,7 @@ $code.=<<___;
|
|||||||
add $t2,$C,$C
|
add $t2,$C,$C
|
||||||
add $CD,$D,$D
|
add $CD,$D,$D
|
||||||
srl $B,0,$B ! clruw $B
|
srl $B,0,$B ! clruw $B
|
||||||
bne `$bits==64?"%xcc":"%icc"`,.Loop
|
bne SIZE_T_CC,.Loop
|
||||||
srl $D,0,$D ! clruw $D
|
srl $D,0,$D ! clruw $D
|
||||||
|
|
||||||
st $A,[$ctx+0] ! write out ctx
|
st $A,[$ctx+0] ! write out ctx
|
||||||
|
@ -25,11 +25,6 @@
|
|||||||
# single-process result on 8-core processor, or ~9GBps per 2.85GHz
|
# single-process result on 8-core processor, or ~9GBps per 2.85GHz
|
||||||
# socket.
|
# socket.
|
||||||
|
|
||||||
$bits=32;
|
|
||||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
|
||||||
if ($bits==64) { $bias=2047; $frame=192; }
|
|
||||||
else { $bias=0; $frame=112; }
|
|
||||||
|
|
||||||
$output=shift;
|
$output=shift;
|
||||||
open STDOUT,">$output";
|
open STDOUT,">$output";
|
||||||
|
|
||||||
@ -185,13 +180,14 @@ $code.=<<___;
|
|||||||
___
|
___
|
||||||
}
|
}
|
||||||
|
|
||||||
$code.=<<___ if ($bits==64);
|
|
||||||
.register %g2,#scratch
|
|
||||||
.register %g3,#scratch
|
|
||||||
___
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
#include "sparc_arch.h"
|
#include "sparc_arch.h"
|
||||||
|
|
||||||
|
#ifdef __arch64__
|
||||||
|
.register %g2,#scratch
|
||||||
|
.register %g3,#scratch
|
||||||
|
#endif
|
||||||
|
|
||||||
.section ".text",#alloc,#execinstr
|
.section ".text",#alloc,#execinstr
|
||||||
|
|
||||||
#ifdef __PIC__
|
#ifdef __PIC__
|
||||||
@ -231,7 +227,7 @@ sha1_block_data_order:
|
|||||||
|
|
||||||
.word 0x81b02820 ! SHA1
|
.word 0x81b02820 ! SHA1
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop
|
bne,pt SIZE_T_CC, .Lhw_loop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.Lhwfinish:
|
.Lhwfinish:
|
||||||
@ -271,7 +267,7 @@ sha1_block_data_order:
|
|||||||
|
|
||||||
.word 0x81b02820 ! SHA1
|
.word 0x81b02820 ! SHA1
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
|
bne,pt SIZE_T_CC, .Lhwunaligned_loop
|
||||||
for %f26, %f26, %f10 ! %f10=%f26
|
for %f26, %f26, %f10 ! %f10=%f26
|
||||||
|
|
||||||
ba .Lhwfinish
|
ba .Lhwfinish
|
||||||
@ -279,7 +275,7 @@ sha1_block_data_order:
|
|||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
.Lsoftware:
|
.Lsoftware:
|
||||||
save %sp,-$frame,%sp
|
save %sp,-STACK_FRAME,%sp
|
||||||
sllx $len,6,$len
|
sllx $len,6,$len
|
||||||
add $inp,$len,$len
|
add $inp,$len,$len
|
||||||
|
|
||||||
@ -359,7 +355,7 @@ $code.=<<___;
|
|||||||
add $E,@X[4],$E
|
add $E,@X[4],$E
|
||||||
st $E,[$ctx+16]
|
st $E,[$ctx+16]
|
||||||
|
|
||||||
bne `$bits==64?"%xcc":"%icc"`,.Lloop
|
bne SIZE_T_CC,.Lloop
|
||||||
andn $inp,7,$tmp0
|
andn $inp,7,$tmp0
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
@ -49,12 +49,6 @@
|
|||||||
# saturates at 11.5x single-process result on 8-core processor, or
|
# saturates at 11.5x single-process result on 8-core processor, or
|
||||||
# ~11/16GBps per 2.85GHz socket.
|
# ~11/16GBps per 2.85GHz socket.
|
||||||
|
|
||||||
|
|
||||||
$bits=32;
|
|
||||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
|
||||||
if ($bits==64) { $bias=2047; $frame=192; }
|
|
||||||
else { $bias=0; $frame=112; }
|
|
||||||
|
|
||||||
$output=shift;
|
$output=shift;
|
||||||
open STDOUT,">$output";
|
open STDOUT,">$output";
|
||||||
|
|
||||||
@ -191,29 +185,29 @@ $code.=<<___ if ($i<15);
|
|||||||
or @pair[1],$tmp2,$tmp2
|
or @pair[1],$tmp2,$tmp2
|
||||||
`"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)`
|
`"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)`
|
||||||
add $h,$tmp2,$T1
|
add $h,$tmp2,$T1
|
||||||
$ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
|
$ST $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($i==12);
|
$code.=<<___ if ($i==12);
|
||||||
bnz,a,pn %icc,.+8
|
bnz,a,pn %icc,.+8
|
||||||
ld [$inp+128],%l0
|
ld [$inp+128],%l0
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($i==15);
|
$code.=<<___ if ($i==15);
|
||||||
ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
|
||||||
sllx @pair[1],$tmp31,$tmp2 ! Xload($i)
|
sllx @pair[1],$tmp31,$tmp2 ! Xload($i)
|
||||||
add $tmp31,32,$tmp0
|
add $tmp31,32,$tmp0
|
||||||
ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
|
||||||
sllx @pair[0],$tmp0,$tmp1
|
sllx @pair[0],$tmp0,$tmp1
|
||||||
ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
|
||||||
srlx @pair[2],$tmp32,@pair[1]
|
srlx @pair[2],$tmp32,@pair[1]
|
||||||
or $tmp1,$tmp2,$tmp2
|
or $tmp1,$tmp2,$tmp2
|
||||||
ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
|
||||||
or @pair[1],$tmp2,$tmp2
|
or @pair[1],$tmp2,$tmp2
|
||||||
ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
|
||||||
add $h,$tmp2,$T1
|
add $h,$tmp2,$T1
|
||||||
$ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
|
$ST $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
|
||||||
ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
|
||||||
ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
|
||||||
ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
|
||||||
___
|
___
|
||||||
} if ($SZ==8);
|
} if ($SZ==8);
|
||||||
|
|
||||||
@ -349,9 +343,9 @@ $code.=<<___;
|
|||||||
or %l3,$tmp0,$tmp0
|
or %l3,$tmp0,$tmp0
|
||||||
|
|
||||||
srlx $tmp0,@sigma0[0],$T1
|
srlx $tmp0,@sigma0[0],$T1
|
||||||
ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
|
||||||
sllx $tmp0,`64-@sigma0[2]`,$tmp1
|
sllx $tmp0,`64-@sigma0[2]`,$tmp1
|
||||||
ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
|
||||||
srlx $tmp0,@sigma0[1],$tmp0
|
srlx $tmp0,@sigma0[1],$tmp0
|
||||||
xor $tmp1,$T1,$T1
|
xor $tmp1,$T1,$T1
|
||||||
sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
|
sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
|
||||||
@ -363,9 +357,9 @@ $code.=<<___;
|
|||||||
or %l7,$tmp2,$tmp2
|
or %l7,$tmp2,$tmp2
|
||||||
|
|
||||||
srlx $tmp2,@sigma1[0],$tmp1
|
srlx $tmp2,@sigma1[0],$tmp1
|
||||||
ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
|
||||||
sllx $tmp2,`64-@sigma1[2]`,$tmp0
|
sllx $tmp2,`64-@sigma1[2]`,$tmp0
|
||||||
ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
|
||||||
srlx $tmp2,@sigma1[1],$tmp2
|
srlx $tmp2,@sigma1[1],$tmp2
|
||||||
xor $tmp0,$tmp1,$tmp1
|
xor $tmp0,$tmp1,$tmp1
|
||||||
sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
|
sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
|
||||||
@ -374,29 +368,30 @@ $code.=<<___;
|
|||||||
xor $tmp0,$tmp1,$tmp1
|
xor $tmp0,$tmp1,$tmp1
|
||||||
sllx %l4,32,$tmp0
|
sllx %l4,32,$tmp0
|
||||||
xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14])
|
xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14])
|
||||||
ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
|
||||||
or %l5,$tmp0,$tmp0
|
or %l5,$tmp0,$tmp0
|
||||||
ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
|
||||||
|
|
||||||
sllx %l0,32,$tmp2
|
sllx %l0,32,$tmp2
|
||||||
add $tmp1,$T1,$T1
|
add $tmp1,$T1,$T1
|
||||||
ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
|
||||||
or %l1,$tmp2,$tmp2
|
or %l1,$tmp2,$tmp2
|
||||||
add $tmp0,$T1,$T1 ! +=X[$i+9]
|
add $tmp0,$T1,$T1 ! +=X[$i+9]
|
||||||
ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
|
ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
|
||||||
add $tmp2,$T1,$T1 ! +=X[$i]
|
add $tmp2,$T1,$T1 ! +=X[$i]
|
||||||
$ST $T1,[%sp+`$bias+$frame+($i%16)*$SZ`]
|
$ST $T1,[%sp+STACK_BIAS+STACK_FRAME+`($i%16)*$SZ`]
|
||||||
___
|
___
|
||||||
&BODY_00_15(@_);
|
&BODY_00_15(@_);
|
||||||
} if ($SZ==8);
|
} if ($SZ==8);
|
||||||
|
|
||||||
$code.=<<___ if ($bits==64);
|
|
||||||
.register %g2,#scratch
|
|
||||||
.register %g3,#scratch
|
|
||||||
___
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
#include "sparc_arch.h"
|
#include "sparc_arch.h"
|
||||||
|
|
||||||
|
#ifdef __arch64__
|
||||||
|
.register %g2,#scratch
|
||||||
|
.register %g3,#scratch
|
||||||
|
#endif
|
||||||
|
|
||||||
.section ".text",#alloc,#execinstr
|
.section ".text",#alloc,#execinstr
|
||||||
|
|
||||||
.align 64
|
.align 64
|
||||||
@ -519,7 +514,7 @@ $code.=<<___ if ($SZ==8); # SHA512
|
|||||||
|
|
||||||
.word 0x81b02860 ! SHA512
|
.word 0x81b02860 ! SHA512
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
|
bne,pt SIZE_T_CC, .Lhwaligned_loop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.Lhwfinish:
|
.Lhwfinish:
|
||||||
@ -579,7 +574,7 @@ $code.=<<___ if ($SZ==8); # SHA512
|
|||||||
|
|
||||||
.word 0x81b02860 ! SHA512
|
.word 0x81b02860 ! SHA512
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
|
bne,pt SIZE_T_CC, .Lhwunaligned_loop
|
||||||
for %f50, %f50, %f18 ! %f18=%f50
|
for %f50, %f50, %f18 ! %f18=%f50
|
||||||
|
|
||||||
ba .Lhwfinish
|
ba .Lhwfinish
|
||||||
@ -612,7 +607,7 @@ $code.=<<___ if ($SZ==4); # SHA256
|
|||||||
|
|
||||||
.word 0x81b02840 ! SHA256
|
.word 0x81b02840 ! SHA256
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwloop
|
bne,pt SIZE_T_CC, .Lhwloop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.Lhwfinish:
|
.Lhwfinish:
|
||||||
@ -655,7 +650,7 @@ $code.=<<___ if ($SZ==4); # SHA256
|
|||||||
|
|
||||||
.word 0x81b02840 ! SHA256
|
.word 0x81b02840 ! SHA256
|
||||||
|
|
||||||
bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
|
bne,pt SIZE_T_CC, .Lhwunaligned_loop
|
||||||
for %f26, %f26, %f10 ! %f10=%f26
|
for %f26, %f26, %f10 ! %f10=%f26
|
||||||
|
|
||||||
ba .Lhwfinish
|
ba .Lhwfinish
|
||||||
@ -664,7 +659,7 @@ ___
|
|||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.align 16
|
.align 16
|
||||||
.Lsoftware:
|
.Lsoftware:
|
||||||
save %sp,`-$frame-$locals`,%sp
|
save %sp,-STACK_FRAME-$locals,%sp
|
||||||
and $inp,`$align-1`,$tmp31
|
and $inp,`$align-1`,$tmp31
|
||||||
sllx $len,`log(16*$SZ)/log(2)`,$len
|
sllx $len,`log(16*$SZ)/log(2)`,$len
|
||||||
andn $inp,`$align-1`,$inp
|
andn $inp,`$align-1`,$inp
|
||||||
@ -783,7 +778,7 @@ ___
|
|||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
add $inp,`16*$SZ`,$inp ! advance inp
|
add $inp,`16*$SZ`,$inp ! advance inp
|
||||||
cmp $inp,$len
|
cmp $inp,$len
|
||||||
bne `$bits==64?"%xcc":"%icc"`,.Lloop
|
bne SIZE_T_CC,.Lloop
|
||||||
sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl
|
sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
@ -32,6 +32,10 @@
|
|||||||
# define __PIC__
|
# define __PIC__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__)
|
||||||
|
# define __arch64__
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SPARC_PIC_THUNK(reg) \
|
#define SPARC_PIC_THUNK(reg) \
|
||||||
.align 32; \
|
.align 32; \
|
||||||
.Lpic_thunk: \
|
.Lpic_thunk: \
|
||||||
@ -53,18 +57,23 @@
|
|||||||
add %o7, reg, reg
|
add %o7, reg, reg
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined(__GNUC__) && defined(__arch64__)) || \
|
#if defined(__arch64__)
|
||||||
(defined(__SUNPRO_C) && defined(__sparcv9))
|
|
||||||
|
|
||||||
# define SPARC_LOAD_ADDRESS(SYM, reg) \
|
# define SPARC_LOAD_ADDRESS(SYM, reg) \
|
||||||
setx SYM, %o7, reg;
|
setx SYM, %o7, reg;
|
||||||
# define LDPTR ldx
|
# define LDPTR ldx
|
||||||
|
# define SIZE_T_CC %xcc
|
||||||
|
# define STACK_FRAME 192
|
||||||
|
# define STACK_BIAS 2047
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
# define SPARC_LOAD_ADDRESS(SYM, reg) \
|
# define SPARC_LOAD_ADDRESS(SYM, reg) \
|
||||||
set SYM, reg;
|
set SYM, reg;
|
||||||
# define LDPTR ld
|
# define LDPTR ld
|
||||||
|
# define SIZE_T_CC %icc
|
||||||
|
# define STACK_FRAME 112
|
||||||
|
# define STACK_BIAS 0
|
||||||
# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
|
# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user