Multiple assembler packs: add experimental memory bus instrumentation.
This commit is contained in:
parent
764ef43962
commit
5fabb88a78
@ -126,3 +126,93 @@ OPENSSL_cleanse:
|
|||||||
.Ldone: ret ($26)
|
.Ldone: ret ($26)
|
||||||
.end OPENSSL_cleanse
|
.end OPENSSL_cleanse
|
||||||
___
|
___
|
||||||
|
{
|
||||||
|
my ($out,$cnt,$max)=("\$16","\$17","\$18");
|
||||||
|
my ($tick,$lasttick)=("\$19","\$20");
|
||||||
|
my ($diff,$lastdiff)=("\$21","\$22");
|
||||||
|
my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31");
|
||||||
|
|
||||||
|
print <<___;
|
||||||
|
.globl OPENSSL_instrument_bus
|
||||||
|
.ent OPENSSL_instrument_bus
|
||||||
|
OPENSSL_instrument_bus:
|
||||||
|
.frame $sp,0,$ra
|
||||||
|
.prologue 0
|
||||||
|
mov $cnt,$v0
|
||||||
|
|
||||||
|
rpcc $lasttick
|
||||||
|
mov 0,$diff
|
||||||
|
|
||||||
|
ecb ($out)
|
||||||
|
ldl_l $tick,0($out)
|
||||||
|
addl $diff,$tick,$tick
|
||||||
|
mov $tick,$diff
|
||||||
|
stl_c $tick,0($out)
|
||||||
|
stl $diff,0($out)
|
||||||
|
|
||||||
|
.Loop: rpcc $tick
|
||||||
|
subq $tick,$lasttick,$diff
|
||||||
|
mov $tick,$lasttick
|
||||||
|
|
||||||
|
ecb ($out)
|
||||||
|
ldl_l $tick,0($out)
|
||||||
|
addl $diff,$tick,$tick
|
||||||
|
mov $tick,$diff
|
||||||
|
stl_c $tick,0($out)
|
||||||
|
stl $diff,0($out)
|
||||||
|
|
||||||
|
subl $cnt,1,$cnt
|
||||||
|
lda $out,4($out)
|
||||||
|
bne $cnt,.Loop
|
||||||
|
|
||||||
|
ret ($ra)
|
||||||
|
.end OPENSSL_instrument_bus
|
||||||
|
|
||||||
|
.globl OPENSSL_instrument_bus2
|
||||||
|
.ent OPENSSL_instrument_bus2
|
||||||
|
OPENSSL_instrument_bus2:
|
||||||
|
.frame $sp,0,$ra
|
||||||
|
.prologue 0
|
||||||
|
mov $cnt,$v0
|
||||||
|
|
||||||
|
rpcc $lasttick
|
||||||
|
mov 0,$diff
|
||||||
|
|
||||||
|
ecb ($out)
|
||||||
|
ldl_l $tick,0($out)
|
||||||
|
addl $diff,$tick,$tick
|
||||||
|
mov $tick,$diff
|
||||||
|
stl_c $tick,0($out)
|
||||||
|
stl $diff,0($out)
|
||||||
|
|
||||||
|
rpcc $tick
|
||||||
|
subq $tick,$lasttick,$diff
|
||||||
|
mov $tick,$lasttick
|
||||||
|
mov $diff,$lastdiff
|
||||||
|
.Loop2:
|
||||||
|
ecb ($out)
|
||||||
|
ldl_l $tick,0($out)
|
||||||
|
addl $diff,$tick,$tick
|
||||||
|
mov $tick,$diff
|
||||||
|
stl_c $tick,0($out)
|
||||||
|
stl $diff,0($out)
|
||||||
|
|
||||||
|
subl $max,1,$max
|
||||||
|
beq $max,.Ldone2
|
||||||
|
|
||||||
|
rpcc $tick
|
||||||
|
subq $tick,$lasttick,$diff
|
||||||
|
mov $tick,$lasttick
|
||||||
|
subq $lastdiff,$diff,$tick
|
||||||
|
mov $diff,$lastdiff
|
||||||
|
cmovne $tick,1,$tick
|
||||||
|
subl $cnt,$tick,$cnt
|
||||||
|
s4addq $tick,$out,$out
|
||||||
|
bne $cnt,.Loop2
|
||||||
|
|
||||||
|
.Ldone2:
|
||||||
|
subl $v0,$cnt,$v0
|
||||||
|
ret ($ra)
|
||||||
|
.end OPENSSL_instrument_bus2
|
||||||
|
___
|
||||||
|
}
|
||||||
|
@ -26,7 +26,7 @@ OPENSSL_atomic_add:
|
|||||||
{ .mii; mov ar.ccv=r2
|
{ .mii; mov ar.ccv=r2
|
||||||
add r8=r2,r33
|
add r8=r2,r33
|
||||||
mov r3=r2 };;
|
mov r3=r2 };;
|
||||||
{ .mmi; mf
|
{ .mmi; mf;;
|
||||||
cmpxchg4.acq r2=[r32],r8,ar.ccv
|
cmpxchg4.acq r2=[r32],r8,ar.ccv
|
||||||
nop.i 0 };;
|
nop.i 0 };;
|
||||||
{ .mib; cmp.ne p6,p0=r2,r3
|
{ .mib; cmp.ne p6,p0=r2,r3
|
||||||
@ -165,3 +165,89 @@ OPENSSL_cleanse:
|
|||||||
(p7) br.cond.dpnt .Little
|
(p7) br.cond.dpnt .Little
|
||||||
(p6) br.ret.sptk.many b0 };;
|
(p6) br.ret.sptk.many b0 };;
|
||||||
.endp OPENSSL_cleanse#
|
.endp OPENSSL_cleanse#
|
||||||
|
|
||||||
|
.global OPENSSL_instrument_bus#
|
||||||
|
.proc OPENSSL_instrument_bus#
|
||||||
|
OPENSSL_instrument_cache:
|
||||||
|
{ .mmi; mov r2=r33
|
||||||
|
#if defined(_HPUX_SOURCE) && !defined(_LP64)
|
||||||
|
addp4 r32=0,r32
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
{ .mmi; mov r8=ar.itc;;
|
||||||
|
mov r10=r0
|
||||||
|
mov r9=r8 };;
|
||||||
|
|
||||||
|
{ .mmi; fc r32;;
|
||||||
|
ld4 r8=[r32] };;
|
||||||
|
{ .mmi; mf
|
||||||
|
mov ar.ccv=r8
|
||||||
|
add r8=r8,r10 };;
|
||||||
|
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||||
|
};;
|
||||||
|
.Loop:
|
||||||
|
{ .mmi; mov r8=ar.itc;;
|
||||||
|
sub r10=r8,r9 // diff=tick-lasttick
|
||||||
|
mov r9=r8 };; // lasttick=tick
|
||||||
|
{ .mmi; fc r32;;
|
||||||
|
ld4 r8=[r32] };;
|
||||||
|
{ .mmi; mf
|
||||||
|
mov ar.ccv=r8
|
||||||
|
add r8=r8,r10 };;
|
||||||
|
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||||
|
add r33=-1,r33
|
||||||
|
add r32=4,r32 };;
|
||||||
|
{ .mib; cmp4.ne p6,p0=0,r33
|
||||||
|
(p6) br.cond.dptk .Loop };;
|
||||||
|
|
||||||
|
{ .mib; sub r8=r2,r33
|
||||||
|
br.ret.sptk.many b0 };;
|
||||||
|
.endp OPENSSL_instrument_bus#
|
||||||
|
|
||||||
|
.global OPENSSL_instrument_bus2#
|
||||||
|
.proc OPENSSL_instrument_bus2#
|
||||||
|
OPENSSL_instrument_cache2:
|
||||||
|
{ .mmi; mov r2=r33 // put aside cnt
|
||||||
|
#if defined(_HPUX_SOURCE) && !defined(_LP64)
|
||||||
|
addp4 r32=0,r32
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
{ .mmi; mov r8=ar.itc;;
|
||||||
|
mov r10=r0
|
||||||
|
mov r9=r8 };;
|
||||||
|
|
||||||
|
{ .mmi; fc r32;;
|
||||||
|
ld4 r8=[r32] };;
|
||||||
|
{ .mmi; mf
|
||||||
|
mov ar.ccv=r8
|
||||||
|
add r8=r8,r10 };;
|
||||||
|
{ .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||||
|
};;
|
||||||
|
|
||||||
|
{ .mmi; mov r8=ar.itc;;
|
||||||
|
sub r10=r8,r9
|
||||||
|
mov r9=r8 };;
|
||||||
|
.Loop2:
|
||||||
|
{ .mmi; mov r11=r10 // lastdiff=diff
|
||||||
|
add r34=-1,r34 };; // --max
|
||||||
|
{ .mmi; fc r32;;
|
||||||
|
ld4 r8=[r32]
|
||||||
|
cmp4.eq p6,p0=0,r34 };;
|
||||||
|
{ .mmi; mf
|
||||||
|
mov ar.ccv=r8
|
||||||
|
add r8=r8,r10 };;
|
||||||
|
{ .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
||||||
|
(p6) br.cond.spnt .Ldone2 };;
|
||||||
|
|
||||||
|
{ .mmi; mov r8=ar.itc;;
|
||||||
|
sub r10=r8,r9 // diff=tick-lasttick
|
||||||
|
mov r9=r8 };; // lasttick=tick
|
||||||
|
{ .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
|
||||||
|
(p6) add r33=-1,r33 };; // conditional --cnt
|
||||||
|
{ .mib; cmp4.ne p7,p0=0,r33
|
||||||
|
(p6) add r32=4,r32 // conditional ++out
|
||||||
|
(p7) br.cond.dptk .Loop2 };;
|
||||||
|
.Ldone2:
|
||||||
|
{ .mib; sub r8=r2,r33
|
||||||
|
br.ret.sptk.many b0 };;
|
||||||
|
.endp OPENSSL_instrument_bus2#
|
||||||
|
@ -87,8 +87,8 @@ OPENSSL_wipe_cpu
|
|||||||
.PROCEND
|
.PROCEND
|
||||||
___
|
___
|
||||||
{
|
{
|
||||||
$inp="%r26";
|
my $inp="%r26";
|
||||||
$len="%r25";
|
my $len="%r25";
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
|
.EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
|
||||||
@ -112,9 +112,9 @@ Lalign
|
|||||||
|
|
||||||
Laligned
|
Laligned
|
||||||
andcm $len,%r1,%r28
|
andcm $len,%r1,%r28
|
||||||
Loop
|
Lot
|
||||||
$ST %r0,0($inp)
|
$ST %r0,0($inp)
|
||||||
addib,*<> -$SIZE_T,%r28,Loop
|
addib,*<> -$SIZE_T,%r28,Lot
|
||||||
ldo $SIZE_T($inp),$inp
|
ldo $SIZE_T($inp),$inp
|
||||||
|
|
||||||
and,*<> $len,%r1,$len
|
and,*<> $len,%r1,$len
|
||||||
@ -130,7 +130,93 @@ Ldone
|
|||||||
.PROCEND
|
.PROCEND
|
||||||
___
|
___
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
my ($out,$cnt,$max)=("%r26","%r25","%r24");
|
||||||
|
my ($tick,$lasttick)=("%r23","%r22");
|
||||||
|
my ($diff,$lastdiff)=("%r21","%r20");
|
||||||
|
|
||||||
|
$code.=<<___;
|
||||||
|
.EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR
|
||||||
|
.ALIGN 8
|
||||||
|
OPENSSL_instrument_bus
|
||||||
|
.PROC
|
||||||
|
.CALLINFO NO_CALLS
|
||||||
|
.ENTRY
|
||||||
|
copy $cnt,$rv
|
||||||
|
mfctl %cr16,$tick
|
||||||
|
copy $tick,$lasttick
|
||||||
|
ldi 0,$diff
|
||||||
|
|
||||||
|
fdc 0($out)
|
||||||
|
ldw 0($out),$tick
|
||||||
|
add $diff,$tick,$tick
|
||||||
|
stw $tick,0($out)
|
||||||
|
Loop
|
||||||
|
mfctl %cr16,$tick
|
||||||
|
sub $tick,$lasttick,$diff
|
||||||
|
copy $tick,$lasttick
|
||||||
|
|
||||||
|
fdc 0($out)
|
||||||
|
ldw 0($out),$tick
|
||||||
|
add $diff,$tick,$tick
|
||||||
|
stw $tick,0($out)
|
||||||
|
|
||||||
|
addib,<> -1,$cnt,Loop
|
||||||
|
addi 4,$out,$out
|
||||||
|
|
||||||
|
bv ($rp)
|
||||||
|
.EXIT
|
||||||
|
sub $rv,$cnt,$rv
|
||||||
|
.PROCEND
|
||||||
|
|
||||||
|
.EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR
|
||||||
|
.ALIGN 8
|
||||||
|
OPENSSL_instrument_bus2
|
||||||
|
.PROC
|
||||||
|
.CALLINFO NO_CALLS
|
||||||
|
.ENTRY
|
||||||
|
copy $cnt,$rv
|
||||||
|
sub %r0,$cnt,$cnt
|
||||||
|
|
||||||
|
mfctl %cr16,$tick
|
||||||
|
copy $tick,$lasttick
|
||||||
|
ldi 0,$diff
|
||||||
|
|
||||||
|
fdc 0($out)
|
||||||
|
ldw 0($out),$tick
|
||||||
|
add $diff,$tick,$tick
|
||||||
|
stw $tick,0($out)
|
||||||
|
|
||||||
|
mfctl %cr16,$tick
|
||||||
|
sub $tick,$lasttick,$diff
|
||||||
|
copy $tick,$lasttick
|
||||||
|
Loop2
|
||||||
|
copy $diff,$lastdiff
|
||||||
|
fdc 0($out)
|
||||||
|
ldw 0($out),$tick
|
||||||
|
add $diff,$tick,$tick
|
||||||
|
stw $tick,0($out)
|
||||||
|
|
||||||
|
addib,= -1,$max,Ldone2
|
||||||
|
nop
|
||||||
|
|
||||||
|
mfctl %cr16,$tick
|
||||||
|
sub $tick,$lasttick,$diff
|
||||||
|
copy $tick,$lasttick
|
||||||
|
cmpclr,<> $lastdiff,$diff,$tick
|
||||||
|
ldi 1,$tick
|
||||||
|
|
||||||
|
ldi 1,%r1
|
||||||
|
xor %r1,$tick,$tick
|
||||||
|
addb,<> $tick,$cnt,Loop2
|
||||||
|
shladd,l $tick,2,$out,$out
|
||||||
|
Ldone2
|
||||||
|
bv ($rp)
|
||||||
|
.EXIT
|
||||||
|
add $rv,$cnt,$rv
|
||||||
|
.PROCEND
|
||||||
|
___
|
||||||
|
}
|
||||||
$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
|
$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
|
||||||
$code =~ s/,\*/,/gm if ($SIZE_T==4);
|
$code =~ s/,\*/,/gm if ($SIZE_T==4);
|
||||||
print $code;
|
print $code;
|
||||||
|
@ -69,10 +69,10 @@ $code=<<___;
|
|||||||
.globl .OPENSSL_atomic_add
|
.globl .OPENSSL_atomic_add
|
||||||
.align 4
|
.align 4
|
||||||
.OPENSSL_atomic_add:
|
.OPENSSL_atomic_add:
|
||||||
Loop: lwarx r5,0,r3
|
Ladd: lwarx r5,0,r3
|
||||||
add r0,r4,r5
|
add r0,r4,r5
|
||||||
stwcx. r0,0,r3
|
stwcx. r0,0,r3
|
||||||
bne- Loop
|
bne- Ladd
|
||||||
$SIGNX r3,r0
|
$SIGNX r3,r0
|
||||||
blr
|
blr
|
||||||
|
|
||||||
@ -112,6 +112,89 @@ Laligned:
|
|||||||
bne Little
|
bne Little
|
||||||
blr
|
blr
|
||||||
___
|
___
|
||||||
|
{
|
||||||
|
my ($out,$cnt,$max)=("r3","r4","r5");
|
||||||
|
my ($tick,$lasttick)=("r6","r7");
|
||||||
|
my ($diff,$lastdiff)=("r8","r9");
|
||||||
|
|
||||||
|
$code.=<<___;
|
||||||
|
.globl .OPENSSL_instrument_bus
|
||||||
|
.align 4
|
||||||
|
.OPENSSL_instrument_bus:
|
||||||
|
mtctr $cnt
|
||||||
|
|
||||||
|
mftb $lasttick # collect 1st tick
|
||||||
|
li $diff,0
|
||||||
|
|
||||||
|
dcbf 0,$out # flush cache line
|
||||||
|
lwarx $tick,0,$out # load and lock
|
||||||
|
add $tick,$tick,$diff
|
||||||
|
stwcx. $tick,0,$out
|
||||||
|
stwx $tick,0,$out
|
||||||
|
|
||||||
|
Loop: mftb $tick
|
||||||
|
sub $diff,$tick,$lasttick
|
||||||
|
mr $lasttick,$tick
|
||||||
|
dcbf 0,$out # flush cache line
|
||||||
|
lwarx $tick,0,$out # load and lock
|
||||||
|
add $tick,$tick,$diff
|
||||||
|
stwcx. $tick,0,$out
|
||||||
|
stwx $tick,0,$out
|
||||||
|
addi $out,$out,4 # ++$out
|
||||||
|
bdnz Loop
|
||||||
|
|
||||||
|
mr r3,$cnt
|
||||||
|
blr
|
||||||
|
|
||||||
|
.globl .OPENSSL_instrument_bus2
|
||||||
|
.align 4
|
||||||
|
.OPENSSL_instrument_bus2:
|
||||||
|
mr r0,$cnt
|
||||||
|
slwi $cnt,$cnt,2
|
||||||
|
|
||||||
|
mftb $lasttick # collect 1st tick
|
||||||
|
li $diff,0
|
||||||
|
|
||||||
|
dcbf 0,$out # flush cache line
|
||||||
|
lwarx $tick,0,$out # load and lock
|
||||||
|
add $tick,$tick,$diff
|
||||||
|
stwcx. $tick,0,$out
|
||||||
|
stwx $tick,0,$out
|
||||||
|
|
||||||
|
mftb $tick # collect 1st diff
|
||||||
|
sub $diff,$tick,$lasttick
|
||||||
|
mr $lasttick,$tick
|
||||||
|
mr $lastdiff,$diff
|
||||||
|
Loop2:
|
||||||
|
dcbf 0,$out # flush cache line
|
||||||
|
lwarx $tick,0,$out # load and lock
|
||||||
|
add $tick,$tick,$diff
|
||||||
|
stwcx. $tick,0,$out
|
||||||
|
stwx $tick,0,$out
|
||||||
|
|
||||||
|
addic. $max,$max,-1
|
||||||
|
beq Ldone2
|
||||||
|
|
||||||
|
mftb $tick
|
||||||
|
sub $diff,$tick,$lasttick
|
||||||
|
mr $lasttick,$tick
|
||||||
|
cmplw 7,$diff,$lastdiff
|
||||||
|
mr $lastdiff,$diff
|
||||||
|
|
||||||
|
mfcr $tick # pull cr
|
||||||
|
not $tick,$tick # flip bits
|
||||||
|
rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
|
||||||
|
|
||||||
|
sub. $cnt,$cnt,$tick # conditional --$cnt
|
||||||
|
add $out,$out,$tick # conditional ++$out
|
||||||
|
bne Loop2
|
||||||
|
|
||||||
|
Ldone2:
|
||||||
|
srwi $cnt,$cnt,2
|
||||||
|
sub r3,r0,$cnt
|
||||||
|
blr
|
||||||
|
___
|
||||||
|
}
|
||||||
|
|
||||||
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
$code =~ s/\`([^\`]*)\`/eval $1/gem;
|
||||||
print $code;
|
print $code;
|
||||||
|
@ -93,6 +93,22 @@ OPENSSL_cleanse:
|
|||||||
br %r14
|
br %r14
|
||||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||||
|
|
||||||
|
.globl OPENSSL_instrument_bus
|
||||||
|
.type OPENSSL_instrument_bus,@function
|
||||||
|
.align 16
|
||||||
|
OPENSSL_instrument_bus:
|
||||||
|
lghi %r2,0
|
||||||
|
br %r14
|
||||||
|
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||||
|
|
||||||
|
.globl OPENSSL_instrument_bus2
|
||||||
|
.type OPENSSL_instrument_bus2,@function
|
||||||
|
.align 16
|
||||||
|
OPENSSL_instrument_bus2:
|
||||||
|
lghi %r2,0
|
||||||
|
br %r14
|
||||||
|
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||||
|
|
||||||
.section .init
|
.section .init
|
||||||
brasl %r14,OPENSSL_cpuid_setup
|
brasl %r14,OPENSSL_cpuid_setup
|
||||||
|
|
||||||
|
@ -397,6 +397,102 @@ OPENSSL_cleanse:
|
|||||||
.type OPENSSL_cleanse,#function
|
.type OPENSSL_cleanse,#function
|
||||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||||
|
|
||||||
|
.global _sparcv9_vis1_instrument_bus
|
||||||
|
.align 8
|
||||||
|
_sparcv9_vis1_instrument_bus:
|
||||||
|
mov %o1,%o3 ! save cnt
|
||||||
|
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||||
|
mov %o4,%o5 ! lasttick = tick
|
||||||
|
set 0,%g4 ! diff
|
||||||
|
|
||||||
|
andn %o0,63,%g1
|
||||||
|
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
ld [%o0],%o4
|
||||||
|
add %o4,%g4,%g4
|
||||||
|
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||||
|
|
||||||
|
.Loop: .word 0x99410000 !rd %tick,%o4
|
||||||
|
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||||
|
mov %o4,%o5 ! lasttick=tick
|
||||||
|
|
||||||
|
andn %o0,63,%g1
|
||||||
|
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
ld [%o0],%o4
|
||||||
|
add %o4,%g4,%g4
|
||||||
|
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||||
|
subcc %o1,1,%o1 ! --$cnt
|
||||||
|
bnz .Loop
|
||||||
|
add %o0,4,%o0 ! ++$out
|
||||||
|
|
||||||
|
retl
|
||||||
|
mov %o3,%o0
|
||||||
|
.type _sparcv9_vis1_instrument_bus,#function
|
||||||
|
.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
|
||||||
|
|
||||||
|
.global _sparcv9_vis1_instrument_bus2
|
||||||
|
.align 8
|
||||||
|
_sparcv9_vis1_instrument_bus2:
|
||||||
|
mov %o1,%o3 ! save cnt
|
||||||
|
sll %o1,2,%o1 ! cnt*=4
|
||||||
|
|
||||||
|
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||||
|
mov %o4,%o5 ! lasttick = tick
|
||||||
|
set 0,%g4 ! diff
|
||||||
|
|
||||||
|
andn %o0,63,%g1
|
||||||
|
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
ld [%o0],%o4
|
||||||
|
add %o4,%g4,%g4
|
||||||
|
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||||
|
|
||||||
|
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||||
|
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||||
|
mov %o4,%o5 ! lasttick=tick
|
||||||
|
mov %g4,%g5 ! lastdiff=diff
|
||||||
|
.Loop2:
|
||||||
|
andn %o0,63,%g1
|
||||||
|
.word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
.word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
||||||
|
.word 0x8143e040 !membar #Sync
|
||||||
|
ld [%o0],%o4
|
||||||
|
add %o4,%g4,%g4
|
||||||
|
.word 0xc9e2100c !cas [%o0],%o4,%g4
|
||||||
|
|
||||||
|
subcc %o2,1,%o2 ! --max
|
||||||
|
bz .Ldone2
|
||||||
|
nop
|
||||||
|
|
||||||
|
.word 0x99410000 !rd %tick,%o4 ! tick
|
||||||
|
sub %o4,%o5,%g4 ! diff=tick-lasttick
|
||||||
|
mov %o4,%o5 ! lasttick=tick
|
||||||
|
cmp %g4,%g5
|
||||||
|
mov %g4,%g5 ! lastdiff=diff
|
||||||
|
|
||||||
|
.word 0x83408000 !rd %ccr,%g1
|
||||||
|
and %g1,4,%g1 ! isolate zero flag
|
||||||
|
xor %g1,4,%g1 ! flip zero flag
|
||||||
|
|
||||||
|
subcc %o1,%g1,%o1 ! conditional --$cnt
|
||||||
|
bnz .Loop2
|
||||||
|
add %o0,%g1,%o0 ! conditional ++$out
|
||||||
|
|
||||||
|
.Ldone2:
|
||||||
|
srl %o1,2,%o1
|
||||||
|
retl
|
||||||
|
sub %o3,%o1,%o0
|
||||||
|
.type _sparcv9_vis1_instrument_bus2,#function
|
||||||
|
.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
|
||||||
|
|
||||||
.section ".init",#alloc,#execinstr
|
.section ".init",#alloc,#execinstr
|
||||||
call OPENSSL_cpuid_setup
|
call OPENSSL_cpuid_setup
|
||||||
nop
|
nop
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#define SPARCV9_VIS1 (1<<2)
|
#define SPARCV9_VIS1 (1<<2)
|
||||||
#define SPARCV9_VIS2 (1<<3) /* reserved */
|
#define SPARCV9_VIS2 (1<<3) /* reserved */
|
||||||
#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
|
#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
|
||||||
|
#define SPARCV9_BLK (1<<5) /* VIS1 block copy */
|
||||||
|
|
||||||
static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
|
static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
|
||||||
|
|
||||||
@ -31,6 +32,8 @@ void _sparcv9_vis1_probe(void);
|
|||||||
unsigned long _sparcv9_vis1_instrument(void);
|
unsigned long _sparcv9_vis1_instrument(void);
|
||||||
void _sparcv9_vis2_probe(void);
|
void _sparcv9_vis2_probe(void);
|
||||||
void _sparcv9_fmadd_probe(void);
|
void _sparcv9_fmadd_probe(void);
|
||||||
|
size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
|
||||||
|
size_t _sparcv8_vis1_instrument_bus2(unsigned int *,size_t,size_t);
|
||||||
|
|
||||||
unsigned long OPENSSL_rdtsc(void)
|
unsigned long OPENSSL_rdtsc(void)
|
||||||
{
|
{
|
||||||
@ -44,6 +47,24 @@ unsigned long OPENSSL_rdtsc(void)
|
|||||||
return _sparcv9_rdtick();
|
return _sparcv9_rdtick();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
|
||||||
|
{
|
||||||
|
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
|
||||||
|
SPARCV9_BLK)
|
||||||
|
return _sparcv9_vis1_instrument_bus(out,cnt);
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
|
||||||
|
{
|
||||||
|
if (OPENSSL_sparcv9cap_P&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
|
||||||
|
SPARCV9_BLK)
|
||||||
|
return _sparcv9_vis1_instrument_bus2(out,cnt,max);
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#if 0 && defined(__sun) && defined(__SVR4)
|
#if 0 && defined(__sun) && defined(__SVR4)
|
||||||
/* This code path is disabled, because of incompatibility of
|
/* This code path is disabled, because of incompatibility of
|
||||||
* libdevinfo.so.1 and libmalloc.so.1 (see below for details)
|
* libdevinfo.so.1 and libmalloc.so.1 (see below for details)
|
||||||
@ -112,7 +133,7 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
|
if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
|
||||||
{
|
{
|
||||||
if (strstr(si,"+vis"))
|
if (strstr(si,"+vis"))
|
||||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
|
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
|
||||||
if (strstr(si,"+vis2"))
|
if (strstr(si,"+vis2"))
|
||||||
{
|
{
|
||||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
|
OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
|
||||||
@ -169,7 +190,6 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
char *e;
|
char *e;
|
||||||
struct sigaction common_act,ill_oact,bus_oact;
|
struct sigaction common_act,ill_oact,bus_oact;
|
||||||
sigset_t all_masked,oset;
|
sigset_t all_masked,oset;
|
||||||
int sig;
|
|
||||||
static int trigger=0;
|
static int trigger=0;
|
||||||
|
|
||||||
if (trigger) return;
|
if (trigger) return;
|
||||||
@ -211,7 +231,7 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
if (sigsetjmp(common_jmp,1) == 0)
|
if (sigsetjmp(common_jmp,1) == 0)
|
||||||
{
|
{
|
||||||
_sparcv9_vis1_probe();
|
_sparcv9_vis1_probe();
|
||||||
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
|
OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_BLK;
|
||||||
/* detect UltraSPARC-Tx, see sparccpud.S for details... */
|
/* detect UltraSPARC-Tx, see sparccpud.S for details... */
|
||||||
if (_sparcv9_vis1_instrument() >= 12)
|
if (_sparcv9_vis1_instrument() >= 12)
|
||||||
OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
|
OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
|
||||||
|
@ -9,8 +9,9 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
|||||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||||
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
|
||||||
|
|
||||||
if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
|
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
|
||||||
else { $arg1="%rdi"; $arg2="%rsi"; }
|
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
||||||
|
|
||||||
print<<___;
|
print<<___;
|
||||||
.extern OPENSSL_cpuid_setup
|
.extern OPENSSL_cpuid_setup
|
||||||
.section .init
|
.section .init
|
||||||
@ -228,5 +229,95 @@ OPENSSL_wipe_cpu:
|
|||||||
ret
|
ret
|
||||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||||
___
|
___
|
||||||
|
{
|
||||||
|
my $out="%r10";
|
||||||
|
my $cnt="%rcx";
|
||||||
|
my $max="%r11";
|
||||||
|
my $lasttick="%r8d";
|
||||||
|
my $lastdiff="%r9d";
|
||||||
|
my $redzone=win64?8:-8;
|
||||||
|
|
||||||
|
print<<___;
|
||||||
|
.globl OPENSSL_instrument_bus
|
||||||
|
.type OPENSSL_instrument_bus,\@abi-omnipotent
|
||||||
|
.align 16
|
||||||
|
OPENSSL_instrument_bus:
|
||||||
|
mov $arg1,$out # tribute to Win64
|
||||||
|
mov $arg2,$cnt
|
||||||
|
mov $arg2,$max
|
||||||
|
|
||||||
|
rdtsc # collect 1st tick
|
||||||
|
mov %eax,$lasttick # lasttick = tick
|
||||||
|
mov \$0,$lastdiff # lastdiff = 0
|
||||||
|
clflush ($out)
|
||||||
|
lock
|
||||||
|
add $lastdiff,($out)
|
||||||
|
jmp .Loop
|
||||||
|
.align 16
|
||||||
|
.Loop: rdtsc
|
||||||
|
mov %eax,%edx
|
||||||
|
sub $lasttick,%eax
|
||||||
|
mov %edx,$lasttick
|
||||||
|
mov %eax,$lastdiff
|
||||||
|
clflush ($out)
|
||||||
|
lock
|
||||||
|
add %eax,($out)
|
||||||
|
lea 4($out),$out
|
||||||
|
sub \$1,$cnt
|
||||||
|
jnz .Loop
|
||||||
|
|
||||||
|
mov $max,%rax
|
||||||
|
ret
|
||||||
|
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||||
|
|
||||||
|
.globl OPENSSL_instrument_bus2
|
||||||
|
.type OPENSSL_instrument_bus2,\@abi-omnipotent
|
||||||
|
.align 16
|
||||||
|
OPENSSL_instrument_bus2:
|
||||||
|
mov $arg1,$out # tribute to Win64
|
||||||
|
mov $arg2,$cnt
|
||||||
|
mov $arg3,$max
|
||||||
|
mov $cnt,$redzone(%rsp)
|
||||||
|
|
||||||
|
rdtsc # collect 1st tick
|
||||||
|
mov %eax,$lasttick # lasttick = tick
|
||||||
|
mov \$0,$lastdiff # lastdiff = 0
|
||||||
|
|
||||||
|
clflush ($out)
|
||||||
|
lock
|
||||||
|
add $lastdiff,($out)
|
||||||
|
|
||||||
|
rdtsc # collect 1st diff
|
||||||
|
mov %eax,%edx
|
||||||
|
sub $lasttick,%eax # diff
|
||||||
|
mov %edx,$lasttick # lasttick = tick
|
||||||
|
mov %eax,$lastdiff # lastdiff = diff
|
||||||
|
.Loop2:
|
||||||
|
clflush ($out)
|
||||||
|
lock
|
||||||
|
add %eax,($out) # accumulate diff
|
||||||
|
|
||||||
|
sub \$1,$max
|
||||||
|
jz .Ldone2
|
||||||
|
|
||||||
|
rdtsc
|
||||||
|
mov %eax,%edx
|
||||||
|
sub $lasttick,%eax # diff
|
||||||
|
mov %edx,$lasttick # lasttick = tick
|
||||||
|
cmp $lastdiff,%eax
|
||||||
|
mov %eax,$lastdiff # lastdiff = diff
|
||||||
|
mov \$0,%edx
|
||||||
|
setne %dl
|
||||||
|
sub %rdx,$cnt # conditional --$cnt
|
||||||
|
lea ($out,%rdx,4),$out # conditional ++$out
|
||||||
|
jnz .Loop2
|
||||||
|
|
||||||
|
.Ldone2:
|
||||||
|
mov $redzone(%rsp),%rax
|
||||||
|
sub $cnt,%rax
|
||||||
|
ret
|
||||||
|
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||||
|
___
|
||||||
|
}
|
||||||
|
|
||||||
close STDOUT; # flush
|
close STDOUT; # flush
|
||||||
|
@ -307,6 +307,108 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&ret ();
|
&ret ();
|
||||||
&function_end_B("OPENSSL_cleanse");
|
&function_end_B("OPENSSL_cleanse");
|
||||||
|
|
||||||
|
{
|
||||||
|
my $lasttick = "esi";
|
||||||
|
my $lastdiff = "ebx";
|
||||||
|
my $out = "edi";
|
||||||
|
my $cnt = "ecx";
|
||||||
|
my $max = "ebp";
|
||||||
|
|
||||||
|
&function_begin("OPENSSL_instrument_bus");
|
||||||
|
&mov ("eax",0);
|
||||||
|
if ($sse2) {
|
||||||
|
&picmeup("edx","OPENSSL_ia32cap_P");
|
||||||
|
&bt (&DWP(0,"edx"),4);
|
||||||
|
&jnc (&label("nogo")); # no TSC
|
||||||
|
&bt (&DWP(0,"edx"),19);
|
||||||
|
&jnc (&label("nogo")); # no CLFLUSH
|
||||||
|
|
||||||
|
&mov ($out,&wparam(0)); # load arguments
|
||||||
|
&mov ($cnt,&wparam(1));
|
||||||
|
|
||||||
|
# collect 1st tick
|
||||||
|
&rdtsc ();
|
||||||
|
&mov ($lasttick,"eax"); # lasttick = tick
|
||||||
|
&mov ($lastdiff,0); # lastdiff = 0
|
||||||
|
&clflush(&DWP(0,$out));
|
||||||
|
&lock ();
|
||||||
|
&add (&DWP(0,$out),$lastdiff);
|
||||||
|
&jmp (&label("loop"));
|
||||||
|
|
||||||
|
&set_label("loop",16);
|
||||||
|
&rdtsc ();
|
||||||
|
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||||
|
&sub ("eax",$lasttick); # diff
|
||||||
|
&mov ($lasttick,"edx"); # lasttick = tick
|
||||||
|
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||||
|
&clflush(&DWP(0,$out));
|
||||||
|
&lock ();
|
||||||
|
&add (&DWP(0,$out),"eax"); # accumulate diff
|
||||||
|
&lea ($out,&DWP(4,$out)); # ++$out
|
||||||
|
&sub ($cnt,1); # --$cnt
|
||||||
|
&jnz (&label("loop"));
|
||||||
|
|
||||||
|
&mov ("eax",&wparam(1));
|
||||||
|
&set_label("nogo");
|
||||||
|
}
|
||||||
|
&function_end("OPENSSL_instrument_bus");
|
||||||
|
|
||||||
|
&function_begin("OPENSSL_instrument_bus2");
|
||||||
|
&mov ("eax",0);
|
||||||
|
if ($sse2) {
|
||||||
|
&picmeup("edx","OPENSSL_ia32cap_P");
|
||||||
|
&bt (&DWP(0,"edx"),4);
|
||||||
|
&jnc (&label("nogo")); # no TSC
|
||||||
|
&bt (&DWP(0,"edx"),19);
|
||||||
|
&jnc (&label("nogo")); # no CLFLUSH
|
||||||
|
|
||||||
|
&mov ($out,&wparam(0)); # load arguments
|
||||||
|
&mov ($cnt,&wparam(1));
|
||||||
|
&mov ($max,&wparam(2));
|
||||||
|
|
||||||
|
&rdtsc (); # collect 1st tick
|
||||||
|
&mov ($lasttick,"eax"); # lasttick = tick
|
||||||
|
&mov ($lastdiff,0); # lastdiff = 0
|
||||||
|
|
||||||
|
&clflush(&DWP(0,$out));
|
||||||
|
&lock ();
|
||||||
|
&add (&DWP(0,$out),$lastdiff);
|
||||||
|
|
||||||
|
&rdtsc (); # collect 1st diff
|
||||||
|
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||||
|
&sub ("eax",$lasttick); # diff
|
||||||
|
&mov ($lasttick,"edx"); # lasttick = tick
|
||||||
|
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||||
|
&jmp (&label("loop2"));
|
||||||
|
|
||||||
|
&set_label("loop2",16);
|
||||||
|
&clflush(&DWP(0,$out));
|
||||||
|
&lock ();
|
||||||
|
&add (&DWP(0,$out),"eax"); # accumulate diff
|
||||||
|
|
||||||
|
&sub ($max,1);
|
||||||
|
&jz (&label("done2"));
|
||||||
|
|
||||||
|
&rdtsc ();
|
||||||
|
&mov ("edx","eax"); # put aside tick (yes, I neglect edx)
|
||||||
|
&sub ("eax",$lasttick); # diff
|
||||||
|
&mov ($lasttick,"edx"); # lasttick = tick
|
||||||
|
&cmp ("eax",$lastdiff);
|
||||||
|
&mov ($lastdiff,"eax"); # lastdiff = diff
|
||||||
|
&mov ("edx",0);
|
||||||
|
&setne ("dl");
|
||||||
|
&sub ($cnt,"edx"); # conditional --$cnt
|
||||||
|
&lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out
|
||||||
|
&jnz (&label("loop2"));
|
||||||
|
|
||||||
|
&set_label("done2");
|
||||||
|
&mov ("eax",&wparam(1));
|
||||||
|
&sub ("eax",$cnt);
|
||||||
|
&set_label("nogo");
|
||||||
|
}
|
||||||
|
&function_end("OPENSSL_instrument_bus2");
|
||||||
|
}
|
||||||
|
|
||||||
&initseg("OPENSSL_cpuid_setup");
|
&initseg("OPENSSL_cpuid_setup");
|
||||||
|
|
||||||
&asm_finish();
|
&asm_finish();
|
||||||
|
42
doc/crypto/OPENSSL_instrument_bus.pod
Normal file
42
doc/crypto/OPENSSL_instrument_bus.pod
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
=pod
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
OPENSSL_instrument_bus[2] - instrument references to memory bus
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
#ifdef OPENSSL_CPUID_OBJ
|
||||||
|
size_t OPENSSL_instrument_bus (int *vector,size_t num);
|
||||||
|
size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
It was empirically found that timings of references to primary memory
|
||||||
|
are subject to irregular, apparently non-deterministic variations. The
|
||||||
|
subroutines in question instrument these references for purposes of
|
||||||
|
gathering entropy for random number generator. In order to make it
|
||||||
|
bus-bound a 'flush cache line' instruction is used between probes. In
|
||||||
|
addition probes are added to B<vector> elements in atomic or
|
||||||
|
interlocked manner, which should contribute additional noise on
|
||||||
|
multi-processor systems. This also means that B<vector[num]> should be
|
||||||
|
zeroed upon invocation (if you want to retrieve actual probe values).
|
||||||
|
|
||||||
|
OPENSSL_instrument_bus performs B<num> probes and records the number of
|
||||||
|
oscillator cycles every probe took.
|
||||||
|
|
||||||
|
OPENSSL_instrument_bus2 on the other hand B<accumulates> consecutive
|
||||||
|
probes with the same value, i.e. in a way it records duration of
|
||||||
|
periods when probe values appeared deterministic. The subroutine
|
||||||
|
performs at most B<max> probes in attempt to fill the B<vector[num]>,
|
||||||
|
with B<max> value of 0 meaning "as many as it takes."
|
||||||
|
|
||||||
|
=head1 RETURN VALUE
|
||||||
|
|
||||||
|
Return value of 0 indicates that CPU is not capable of performing the
|
||||||
|
benchmark, either because oscillator counter or 'flush cache line' is
|
||||||
|
not available on current platform. For reference, on x86 'flush cache
|
||||||
|
line' was introduced with the SSE2 extensions.
|
||||||
|
|
||||||
|
Otherwise number of recorded values is returned.
|
Loading…
x
Reference in New Issue
Block a user