ec/asm/ecp_nistz256-*.pl: get corner case logic right.
RT#4284 Reviewed-by: Rich Salz <rsalz@openssl.org>
This commit is contained in:
parent
d93753412b
commit
143ee099e9
@ -1252,6 +1252,7 @@ ecp_nistz256_point_double:
|
|||||||
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
|
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
|
||||||
sub sp,sp,#32*5
|
sub sp,sp,#32*5
|
||||||
|
|
||||||
|
.Lpoint_double_shortcut:
|
||||||
add r3,sp,#$in_x
|
add r3,sp,#$in_x
|
||||||
ldmia $a_ptr!,{r4-r11} @ copy in_x
|
ldmia $a_ptr!,{r4-r11} @ copy in_x
|
||||||
stmia r3,{r4-r11}
|
stmia r3,{r4-r11}
|
||||||
@ -1371,7 +1372,7 @@ $code.=<<___;
|
|||||||
.align 5
|
.align 5
|
||||||
ecp_nistz256_point_add:
|
ecp_nistz256_point_add:
|
||||||
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
|
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
|
||||||
sub sp,sp,#32*18
|
sub sp,sp,#32*18+16
|
||||||
|
|
||||||
ldmia $b_ptr!,{r4-r11} @ copy in2
|
ldmia $b_ptr!,{r4-r11} @ copy in2
|
||||||
add r3,sp,#$in2_x
|
add r3,sp,#$in2_x
|
||||||
@ -1504,9 +1505,9 @@ ecp_nistz256_point_add:
|
|||||||
tst $t0,$t1
|
tst $t0,$t1
|
||||||
beq .Ladd_proceed @ (in1infty || in2infty)?
|
beq .Ladd_proceed @ (in1infty || in2infty)?
|
||||||
tst $t2,$t2
|
tst $t2,$t2
|
||||||
beq .Ladd_proceed @ is_equal(S1,S2)?
|
beq .Ladd_double @ is_equal(S1,S2)?
|
||||||
|
|
||||||
ldr $r_ptr,[sp,#32*18]
|
ldr $r_ptr,[sp,#32*18+16]
|
||||||
eor r4,r4,r4
|
eor r4,r4,r4
|
||||||
eor r5,r5,r5
|
eor r5,r5,r5
|
||||||
eor r6,r6,r6
|
eor r6,r6,r6
|
||||||
@ -1520,6 +1521,12 @@ ecp_nistz256_point_add:
|
|||||||
stmia $r_ptr!,{r4-r11}
|
stmia $r_ptr!,{r4-r11}
|
||||||
b .Ladd_done
|
b .Ladd_done
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Ladd_double:
|
||||||
|
ldr $a_ptr,[sp,#32*18+20]
|
||||||
|
add sp,sp,#32*(18-5)+16 @ difference in frame sizes
|
||||||
|
b .Lpoint_double_shortcut
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.Ladd_proceed:
|
.Ladd_proceed:
|
||||||
add $a_ptr,sp,#$R
|
add $a_ptr,sp,#$R
|
||||||
@ -1588,7 +1595,7 @@ ecp_nistz256_point_add:
|
|||||||
add r3,sp,#$in1_x
|
add r3,sp,#$in1_x
|
||||||
and r11,r11,r12
|
and r11,r11,r12
|
||||||
mvn r12,r12
|
mvn r12,r12
|
||||||
ldr $r_ptr,[sp,#32*18]
|
ldr $r_ptr,[sp,#32*18+16]
|
||||||
___
|
___
|
||||||
for($i=0;$i<96;$i+=8) { # conditional moves
|
for($i=0;$i<96;$i+=8) { # conditional moves
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
@ -1610,7 +1617,7 @@ ___
|
|||||||
}
|
}
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.Ladd_done:
|
.Ladd_done:
|
||||||
add sp,sp,#32*18+16 @ +16 means "skip even over saved r0-r3"
|
add sp,sp,#32*18+16+16 @ +16 means "skip even over saved r0-r3"
|
||||||
#if __ARM_ARCH__>=5 || defined(__thumb__)
|
#if __ARM_ARCH__>=5 || defined(__thumb__)
|
||||||
ldmia sp!,{r4-r12,pc}
|
ldmia sp!,{r4-r12,pc}
|
||||||
#else
|
#else
|
||||||
|
@ -691,12 +691,13 @@ $code.=<<___;
|
|||||||
.type ecp_nistz256_point_double,%function
|
.type ecp_nistz256_point_double,%function
|
||||||
.align 5
|
.align 5
|
||||||
ecp_nistz256_point_double:
|
ecp_nistz256_point_double:
|
||||||
stp x29,x30,[sp,#-48]!
|
stp x29,x30,[sp,#-80]!
|
||||||
add x29,sp,#0
|
add x29,sp,#0
|
||||||
stp x19,x20,[sp,#16]
|
stp x19,x20,[sp,#16]
|
||||||
stp x21,x22,[sp,#32]
|
stp x21,x22,[sp,#32]
|
||||||
sub sp,sp,#32*4
|
sub sp,sp,#32*4
|
||||||
|
|
||||||
|
.Ldouble_shortcut:
|
||||||
ldp $acc0,$acc1,[$ap,#32]
|
ldp $acc0,$acc1,[$ap,#32]
|
||||||
mov $rp_real,$rp
|
mov $rp_real,$rp
|
||||||
ldp $acc2,$acc3,[$ap,#48]
|
ldp $acc2,$acc3,[$ap,#48]
|
||||||
@ -823,7 +824,7 @@ ecp_nistz256_point_double:
|
|||||||
add sp,x29,#0 // destroy frame
|
add sp,x29,#0 // destroy frame
|
||||||
ldp x19,x20,[x29,#16]
|
ldp x19,x20,[x29,#16]
|
||||||
ldp x21,x22,[x29,#32]
|
ldp x21,x22,[x29,#32]
|
||||||
ldp x29,x30,[sp],#48
|
ldp x29,x30,[sp],#80
|
||||||
ret
|
ret
|
||||||
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
|
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
|
||||||
___
|
___
|
||||||
@ -963,7 +964,7 @@ ecp_nistz256_point_add:
|
|||||||
b.eq .Ladd_proceed // (in1infty || in2infty)?
|
b.eq .Ladd_proceed // (in1infty || in2infty)?
|
||||||
|
|
||||||
tst $temp,$temp
|
tst $temp,$temp
|
||||||
b.eq .Ladd_proceed // is_equal(S1,S2)?
|
b.eq .Ladd_double // is_equal(S1,S2)?
|
||||||
|
|
||||||
eor $a0,$a0,$a0
|
eor $a0,$a0,$a0
|
||||||
eor $a1,$a1,$a1
|
eor $a1,$a1,$a1
|
||||||
@ -975,6 +976,15 @@ ecp_nistz256_point_add:
|
|||||||
stp $a0,$a1,[$rp_real,#80]
|
stp $a0,$a1,[$rp_real,#80]
|
||||||
b .Ladd_done
|
b .Ladd_done
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
.Ladd_double:
|
||||||
|
mov $ap,$ap_real
|
||||||
|
mov $rp,$rp_real
|
||||||
|
ldp x23,x24,[x29,#48]
|
||||||
|
ldp x25,x26,[x29,#64]
|
||||||
|
add sp,sp,#32*(12-4) // difference in stack frames
|
||||||
|
b .Ldouble_shortcut
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.Ladd_proceed:
|
.Ladd_proceed:
|
||||||
add $rp,sp,#$Rsqr
|
add $rp,sp,#$Rsqr
|
||||||
|
@ -1197,6 +1197,7 @@ for ($i=0;$i<7;$i++) {
|
|||||||
########################################################################
|
########################################################################
|
||||||
# void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp);
|
# void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp);
|
||||||
#
|
#
|
||||||
|
&static_label("point_double_shortcut");
|
||||||
&function_begin("ecp_nistz256_point_double");
|
&function_begin("ecp_nistz256_point_double");
|
||||||
{ my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4));
|
{ my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4));
|
||||||
|
|
||||||
@ -1212,6 +1213,7 @@ for ($i=0;$i<7;$i++) {
|
|||||||
&picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic"));
|
&picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic"));
|
||||||
&mov ("ebp",&DWP(0,"edx")); }
|
&mov ("ebp",&DWP(0,"edx")); }
|
||||||
|
|
||||||
|
&set_label("point_double_shortcut");
|
||||||
&mov ("eax",&DWP(0,"esi")); # copy in_x
|
&mov ("eax",&DWP(0,"esi")); # copy in_x
|
||||||
&mov ("ebx",&DWP(4,"esi"));
|
&mov ("ebx",&DWP(4,"esi"));
|
||||||
&mov ("ecx",&DWP(8,"esi"));
|
&mov ("ecx",&DWP(8,"esi"));
|
||||||
@ -1491,7 +1493,7 @@ for ($i=0;$i<7;$i++) {
|
|||||||
&mov ("ebx",&DWP(32*18+8,"esp"));
|
&mov ("ebx",&DWP(32*18+8,"esp"));
|
||||||
&jz (&label("add_proceed")); # (in1infty || in2infty)?
|
&jz (&label("add_proceed")); # (in1infty || in2infty)?
|
||||||
&test ("ebx","ebx");
|
&test ("ebx","ebx");
|
||||||
&jz (&label("add_proceed")); # is_equal(S1,S2)?
|
&jz (&label("add_double")); # is_equal(S1,S2)?
|
||||||
|
|
||||||
&mov ("edi",&wparam(0));
|
&mov ("edi",&wparam(0));
|
||||||
&xor ("eax","eax");
|
&xor ("eax","eax");
|
||||||
@ -1499,6 +1501,12 @@ for ($i=0;$i<7;$i++) {
|
|||||||
&data_byte(0xfc,0xf3,0xab); # cld; stosd
|
&data_byte(0xfc,0xf3,0xab); # cld; stosd
|
||||||
&jmp (&label("add_done"));
|
&jmp (&label("add_done"));
|
||||||
|
|
||||||
|
&set_label("add_double",16);
|
||||||
|
&mov ("esi",&wparam(1));
|
||||||
|
&mov ("ebp",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
|
||||||
|
&add ("esp",4*((8*18+5)-(8*5+1))); # difference in frame sizes
|
||||||
|
&jmp (&label("point_double_shortcut"));
|
||||||
|
|
||||||
&set_label("add_proceed",16);
|
&set_label("add_proceed",16);
|
||||||
&mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
|
&mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
|
||||||
&lea ("esi",&DWP($R,"esp"));
|
&lea ("esi",&DWP($R,"esp"));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user