sha512-ppc.pl mutli-thread safety fix.
This commit is contained in:
@@ -2,8 +2,9 @@
|
|||||||
|
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||||
# project. Rights for redistribution and usage in source and binary
|
# project. The module is, however, dual licensed under OpenSSL and
|
||||||
# forms are granted according to the OpenSSL license.
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||||
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
|
|
||||||
# I let hardware handle unaligned input, except on page boundaries
|
# I let hardware handle unaligned input, except on page boundaries
|
||||||
@@ -17,7 +18,21 @@
|
|||||||
# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*)
|
# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*)
|
||||||
#
|
#
|
||||||
# (*) 64-bit code in 32-bit application context, which actually is
|
# (*) 64-bit code in 32-bit application context, which actually is
|
||||||
# on TODO list
|
# on TODO list. It should be noted that for safe deployment in
|
||||||
|
# 32-bit *mutli-threaded* context asyncronous signals should be
|
||||||
|
# blocked upon entry to SHA512 block routine. This is because
|
||||||
|
# 32-bit signaling procedure invalidates upper halves of GPRs.
|
||||||
|
# Context switch procedure preserves them, but not signaling:-(
|
||||||
|
|
||||||
|
# Second version is true multi-thread safe. Trouble with the original
|
||||||
|
# version was that it was using thread local storage pointer register.
|
||||||
|
# Well, it scrupulously preserved it, but the problem would arise the
|
||||||
|
# moment asynchronous signal was delivered and signal handler would
|
||||||
|
# dereference the TLS pointer. While it's never the case in openssl
|
||||||
|
# application or test suite, we have to respect this scenario and not
|
||||||
|
# use TLS pointer register. Alternative would be to require caller to
|
||||||
|
# block signals prior calling this routine. For the record, in 32-bit
|
||||||
|
# context R2 serves as TLS pointer, while in 64-bit context - R13.
|
||||||
|
|
||||||
$output=shift;
|
$output=shift;
|
||||||
|
|
||||||
@@ -69,24 +84,24 @@ if ($output =~ /512/) {
|
|||||||
$FRAME=32*$SIZE_T;
|
$FRAME=32*$SIZE_T;
|
||||||
|
|
||||||
$sp ="r1";
|
$sp ="r1";
|
||||||
$toc="r2"; # zapped by $Tbl
|
$toc="r2";
|
||||||
$ctx="r3"; # zapped by $a0
|
$ctx="r3"; # zapped by $a0
|
||||||
$inp="r4";
|
$inp="r4"; # zapped by $a1
|
||||||
$num="r5"; # zapped by $a1
|
$num="r5"; # zapped by $t0
|
||||||
|
|
||||||
$T ="r0";
|
$T ="r0";
|
||||||
$Tbl="r2";
|
|
||||||
$a0 ="r3";
|
$a0 ="r3";
|
||||||
$a1 ="r5";
|
$a1 ="r4";
|
||||||
$t0 ="r6";
|
$t0 ="r5";
|
||||||
$t1 ="r7";
|
$t1 ="r6";
|
||||||
|
$Tbl="r7";
|
||||||
|
|
||||||
$A ="r8";
|
$A ="r8";
|
||||||
$B ="r9";
|
$B ="r9";
|
||||||
$C ="r10";
|
$C ="r10";
|
||||||
$D ="r11";
|
$D ="r11";
|
||||||
$E ="r12";
|
$E ="r12";
|
||||||
$F ="r13";
|
$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
|
||||||
$G ="r14";
|
$G ="r14";
|
||||||
$H ="r15";
|
$H ="r15";
|
||||||
|
|
||||||
@@ -94,6 +109,8 @@ $H ="r15";
|
|||||||
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
|
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
|
||||||
"r24","r25","r26","r27","r28","r29","r30","r31");
|
"r24","r25","r26","r27","r28","r29","r30","r31");
|
||||||
|
|
||||||
|
$inp="r31"; # reassigned $inp! aliases with @X[15]
|
||||||
|
|
||||||
sub ROUND_00_15 {
|
sub ROUND_00_15 {
|
||||||
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
|
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
@@ -184,6 +201,7 @@ $func:
|
|||||||
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
|
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
|
||||||
|
|
||||||
$LD $A,`0*$SZ`($ctx)
|
$LD $A,`0*$SZ`($ctx)
|
||||||
|
mr $inp,r4 ; incarnate $inp
|
||||||
$LD $B,`1*$SZ`($ctx)
|
$LD $B,`1*$SZ`($ctx)
|
||||||
$LD $C,`2*$SZ`($ctx)
|
$LD $C,`2*$SZ`($ctx)
|
||||||
$LD $D,`3*$SZ`($ctx)
|
$LD $D,`3*$SZ`($ctx)
|
||||||
@@ -197,8 +215,9 @@ LPICedup:
|
|||||||
andi. r0,$inp,3
|
andi. r0,$inp,3
|
||||||
bne Lunaligned
|
bne Lunaligned
|
||||||
Laligned:
|
Laligned:
|
||||||
add $t0,$inp,$num
|
add $num,$inp,$num
|
||||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
$PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
|
||||||
|
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
|
||||||
bl Lsha2_block_private
|
bl Lsha2_block_private
|
||||||
Ldone:
|
Ldone:
|
||||||
$POP r0,`$FRAME-$SIZE_T*21`($sp)
|
$POP r0,`$FRAME-$SIZE_T*21`($sp)
|
||||||
@@ -242,15 +261,17 @@ Lunaligned:
|
|||||||
$UCMP $num,$t1
|
$UCMP $num,$t1
|
||||||
ble- Laligned ; didn't cross the page boundary
|
ble- Laligned ; didn't cross the page boundary
|
||||||
subfc $num,$t1,$num
|
subfc $num,$t1,$num
|
||||||
add $t0,$inp,$t1
|
add $t1,$inp,$t1
|
||||||
$PUSH $num,`$FRAME-$SIZE_T*24`($sp)
|
$PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real remaining num
|
||||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
$PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; intermediate end pointer
|
||||||
|
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
|
||||||
bl Lsha2_block_private
|
bl Lsha2_block_private
|
||||||
$POP $num,`$FRAME-$SIZE_T*24`($sp)
|
; $inp equals to the intermediate end pointer here
|
||||||
|
$POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real remaining num
|
||||||
Lcross_page:
|
Lcross_page:
|
||||||
li $t1,`16*$SZ/4`
|
li $t1,`16*$SZ/4`
|
||||||
mtctr $t1
|
mtctr $t1
|
||||||
addi r20,$sp,$FRAME ; spot below the frame
|
addi r20,$sp,$FRAME ; aligned spot below the frame
|
||||||
Lmemcpy:
|
Lmemcpy:
|
||||||
lbz r16,0($inp)
|
lbz r16,0($inp)
|
||||||
lbz r17,1($inp)
|
lbz r17,1($inp)
|
||||||
@@ -264,15 +285,16 @@ Lmemcpy:
|
|||||||
addi r20,r20,4
|
addi r20,r20,4
|
||||||
bdnz Lmemcpy
|
bdnz Lmemcpy
|
||||||
|
|
||||||
$PUSH $inp,`$FRAME-$SIZE_T*25`($sp)
|
$PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
|
||||||
addi $inp,$sp,$FRAME
|
addi $t1,$sp,`$FRAME+16*$SZ` ; fictitious end pointer
|
||||||
addi $t0,$sp,`$FRAME+16*$SZ`
|
addi $inp,$sp,$FRAME ; fictitious inp pointer
|
||||||
$PUSH $num,`$FRAME-$SIZE_T*24`($sp)
|
$PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
|
||||||
$PUSH $t0,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
$PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
|
||||||
|
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
|
||||||
bl Lsha2_block_private
|
bl Lsha2_block_private
|
||||||
$POP $inp,`$FRAME-$SIZE_T*25`($sp)
|
$POP $inp,`$FRAME-$SIZE_T*26`($sp) ; restore real inp
|
||||||
$POP $num,`$FRAME-$SIZE_T*24`($sp)
|
$POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
|
||||||
addic. $num,$num,`-16*$SZ`
|
addic. $num,$num,`-16*$SZ` ; num--
|
||||||
bne- Lunaligned
|
bne- Lunaligned
|
||||||
b Ldone
|
b Ldone
|
||||||
___
|
___
|
||||||
@@ -309,9 +331,10 @@ for(;$i<32;$i++) {
|
|||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
bdnz- Lrounds
|
bdnz- Lrounds
|
||||||
|
|
||||||
subi $Tbl,$Tbl,`($rounds-16)*$SZ`
|
|
||||||
$POP $ctx,`$FRAME-$SIZE_T*22`($sp)
|
$POP $ctx,`$FRAME-$SIZE_T*22`($sp)
|
||||||
$POP $num,`$FRAME-$SIZE_T*23`($sp) ; end pointer
|
$POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
|
||||||
|
$POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
|
||||||
|
subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl
|
||||||
|
|
||||||
$LD r16,`0*$SZ`($ctx)
|
$LD r16,`0*$SZ`($ctx)
|
||||||
$LD r17,`1*$SZ`($ctx)
|
$LD r17,`1*$SZ`($ctx)
|
||||||
@@ -320,9 +343,11 @@ $code.=<<___;
|
|||||||
$LD r20,`4*$SZ`($ctx)
|
$LD r20,`4*$SZ`($ctx)
|
||||||
$LD r21,`5*$SZ`($ctx)
|
$LD r21,`5*$SZ`($ctx)
|
||||||
$LD r22,`6*$SZ`($ctx)
|
$LD r22,`6*$SZ`($ctx)
|
||||||
|
addi $inp,$inp,`16*$SZ` ; advance inp
|
||||||
$LD r23,`7*$SZ`($ctx)
|
$LD r23,`7*$SZ`($ctx)
|
||||||
add $A,$A,r16
|
add $A,$A,r16
|
||||||
add $B,$B,r17
|
add $B,$B,r17
|
||||||
|
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp)
|
||||||
add $C,$C,r18
|
add $C,$C,r18
|
||||||
$ST $A,`0*$SZ`($ctx)
|
$ST $A,`0*$SZ`($ctx)
|
||||||
add $D,$D,r19
|
add $D,$D,r19
|
||||||
@@ -335,7 +360,6 @@ $code.=<<___;
|
|||||||
$ST $E,`4*$SZ`($ctx)
|
$ST $E,`4*$SZ`($ctx)
|
||||||
add $H,$H,r23
|
add $H,$H,r23
|
||||||
$ST $F,`5*$SZ`($ctx)
|
$ST $F,`5*$SZ`($ctx)
|
||||||
addi $inp,$inp,`16*$SZ`
|
|
||||||
$ST $G,`6*$SZ`($ctx)
|
$ST $G,`6*$SZ`($ctx)
|
||||||
$UCMP $inp,$num
|
$UCMP $inp,$num
|
||||||
$ST $H,`7*$SZ`($ctx)
|
$ST $H,`7*$SZ`($ctx)
|
||||||
@@ -349,21 +373,21 @@ $code.=<<___;
|
|||||||
.align 6
|
.align 6
|
||||||
LPICmeup:
|
LPICmeup:
|
||||||
bl LPIC
|
bl LPIC
|
||||||
|
addi $Tbl,$Tbl,`64-4` ; "distance" between . and last nop
|
||||||
b LPICedup
|
b LPICedup
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
|
||||||
LPIC: mflr $Tbl
|
LPIC: mflr $Tbl
|
||||||
addi $Tbl,$Tbl,`64-4` ; "distance" between bl and last nop
|
|
||||||
blr
|
blr
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
|
nop
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($SZ==8);
|
$code.=<<___ if ($SZ==8);
|
||||||
.long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
|
.long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
|
||||||
|
Reference in New Issue
Block a user