ARMv4 assembly pack: implement support for Thumb2.
As some of ARM processors, more specifically Cortex-Mx series, are Thumb2-only, we need to support Thumb2-only builds even in assembly. Reviewed-by: Tim Hudson <tjh@openssl.org>
This commit is contained in:
parent
e7a68985d5
commit
11208dcfb9
@ -70,16 +70,12 @@ $code=<<___;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
.text
|
.text
|
||||||
#if __ARM_ARCH__<7
|
|
||||||
.code 32
|
|
||||||
#else
|
|
||||||
.syntax unified
|
|
||||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
.thumb
|
.thumb
|
||||||
#else
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
.type AES_Te,%object
|
.type AES_Te,%object
|
||||||
.align 5
|
.align 5
|
||||||
@ -193,7 +189,7 @@ AES_Te:
|
|||||||
.type AES_encrypt,%function
|
.type AES_encrypt,%function
|
||||||
.align 5
|
.align 5
|
||||||
AES_encrypt:
|
AES_encrypt:
|
||||||
#if __ARM_ARCH__<7
|
#ifndef __thumb2__
|
||||||
sub r3,pc,#8 @ AES_encrypt
|
sub r3,pc,#8 @ AES_encrypt
|
||||||
#else
|
#else
|
||||||
adr r3,AES_encrypt
|
adr r3,AES_encrypt
|
||||||
@ -443,19 +439,19 @@ _armv4_AES_encrypt:
|
|||||||
.align 5
|
.align 5
|
||||||
AES_set_encrypt_key:
|
AES_set_encrypt_key:
|
||||||
_armv4_AES_set_encrypt_key:
|
_armv4_AES_set_encrypt_key:
|
||||||
#if __ARM_ARCH__<7
|
#ifndef __thumb2__
|
||||||
sub r3,pc,#8 @ AES_set_encrypt_key
|
sub r3,pc,#8 @ AES_set_encrypt_key
|
||||||
#else
|
#else
|
||||||
adr r3,AES_set_encrypt_key
|
adr r3,AES_set_encrypt_key
|
||||||
#endif
|
#endif
|
||||||
teq r0,#0
|
teq r0,#0
|
||||||
#if __ARM_ARCH__>=7
|
#ifdef __thumb2__
|
||||||
itt eq @ Thumb2 thing, sanity check in ARM
|
itt eq @ Thumb2 thing, sanity check in ARM
|
||||||
#endif
|
#endif
|
||||||
moveq r0,#-1
|
moveq r0,#-1
|
||||||
beq .Labrt
|
beq .Labrt
|
||||||
teq r2,#0
|
teq r2,#0
|
||||||
#if __ARM_ARCH__>=7
|
#ifdef __thumb2__
|
||||||
itt eq @ Thumb2 thing, sanity check in ARM
|
itt eq @ Thumb2 thing, sanity check in ARM
|
||||||
#endif
|
#endif
|
||||||
moveq r0,#-1
|
moveq r0,#-1
|
||||||
@ -466,7 +462,7 @@ _armv4_AES_set_encrypt_key:
|
|||||||
teq r1,#192
|
teq r1,#192
|
||||||
beq .Lok
|
beq .Lok
|
||||||
teq r1,#256
|
teq r1,#256
|
||||||
#if __ARM_ARCH__>=7
|
#ifdef __thumb2__
|
||||||
itt ne @ Thumb2 thing, sanity check in ARM
|
itt ne @ Thumb2 thing, sanity check in ARM
|
||||||
#endif
|
#endif
|
||||||
movne r0,#-1
|
movne r0,#-1
|
||||||
@ -627,7 +623,7 @@ _armv4_AES_set_encrypt_key:
|
|||||||
str $s2,[$key,#-16]
|
str $s2,[$key,#-16]
|
||||||
subs $rounds,$rounds,#1
|
subs $rounds,$rounds,#1
|
||||||
str $s3,[$key,#-12]
|
str $s3,[$key,#-12]
|
||||||
#if __ARM_ARCH__>=7
|
#ifdef __thumb2__
|
||||||
itt eq @ Thumb2 thing, sanity check in ARM
|
itt eq @ Thumb2 thing, sanity check in ARM
|
||||||
#endif
|
#endif
|
||||||
subeq r2,$key,#216
|
subeq r2,$key,#216
|
||||||
@ -699,7 +695,7 @@ _armv4_AES_set_encrypt_key:
|
|||||||
str $s2,[$key,#-24]
|
str $s2,[$key,#-24]
|
||||||
subs $rounds,$rounds,#1
|
subs $rounds,$rounds,#1
|
||||||
str $s3,[$key,#-20]
|
str $s3,[$key,#-20]
|
||||||
#if __ARM_ARCH__>=7
|
#ifdef __thumb2__
|
||||||
itt eq @ Thumb2 thing, sanity check in ARM
|
itt eq @ Thumb2 thing, sanity check in ARM
|
||||||
#endif
|
#endif
|
||||||
subeq r2,$key,#256
|
subeq r2,$key,#256
|
||||||
@ -969,7 +965,7 @@ AES_Td:
|
|||||||
.type AES_decrypt,%function
|
.type AES_decrypt,%function
|
||||||
.align 5
|
.align 5
|
||||||
AES_decrypt:
|
AES_decrypt:
|
||||||
#if __ARM_ARCH__<7
|
#ifndef __thumb2__
|
||||||
sub r3,pc,#8 @ AES_decrypt
|
sub r3,pc,#8 @ AES_decrypt
|
||||||
#else
|
#else
|
||||||
adr r3,AES_decrypt
|
adr r3,AES_decrypt
|
||||||
|
@ -15,7 +15,12 @@ $code.=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
.global OPENSSL_atomic_add
|
.global OPENSSL_atomic_add
|
||||||
@ -59,6 +64,9 @@ OPENSSL_atomic_add:
|
|||||||
OPENSSL_cleanse:
|
OPENSSL_cleanse:
|
||||||
eor ip,ip,ip
|
eor ip,ip,ip
|
||||||
cmp r1,#7
|
cmp r1,#7
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt hs
|
||||||
|
#endif
|
||||||
subhs r1,r1,#4
|
subhs r1,r1,#4
|
||||||
bhs .Lot
|
bhs .Lot
|
||||||
cmp r1,#0
|
cmp r1,#0
|
||||||
@ -116,27 +124,43 @@ _armv7_tick:
|
|||||||
.global _armv8_aes_probe
|
.global _armv8_aes_probe
|
||||||
.type _armv8_aes_probe,%function
|
.type _armv8_aes_probe,%function
|
||||||
_armv8_aes_probe:
|
_armv8_aes_probe:
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
|
||||||
|
#else
|
||||||
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
|
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
|
||||||
|
#endif
|
||||||
bx lr
|
bx lr
|
||||||
.size _armv8_aes_probe,.-_armv8_aes_probe
|
.size _armv8_aes_probe,.-_armv8_aes_probe
|
||||||
|
|
||||||
.global _armv8_sha1_probe
|
.global _armv8_sha1_probe
|
||||||
.type _armv8_sha1_probe,%function
|
.type _armv8_sha1_probe,%function
|
||||||
_armv8_sha1_probe:
|
_armv8_sha1_probe:
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
|
||||||
|
#else
|
||||||
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
|
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
|
||||||
|
#endif
|
||||||
bx lr
|
bx lr
|
||||||
.size _armv8_sha1_probe,.-_armv8_sha1_probe
|
.size _armv8_sha1_probe,.-_armv8_sha1_probe
|
||||||
|
|
||||||
.global _armv8_sha256_probe
|
.global _armv8_sha256_probe
|
||||||
.type _armv8_sha256_probe,%function
|
.type _armv8_sha256_probe,%function
|
||||||
_armv8_sha256_probe:
|
_armv8_sha256_probe:
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
|
||||||
|
#else
|
||||||
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
|
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
|
||||||
|
#endif
|
||||||
bx lr
|
bx lr
|
||||||
.size _armv8_sha256_probe,.-_armv8_sha256_probe
|
.size _armv8_sha256_probe,.-_armv8_sha256_probe
|
||||||
.global _armv8_pmull_probe
|
.global _armv8_pmull_probe
|
||||||
.type _armv8_pmull_probe,%function
|
.type _armv8_pmull_probe,%function
|
||||||
_armv8_pmull_probe:
|
_armv8_pmull_probe:
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
|
||||||
|
#else
|
||||||
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
|
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
|
||||||
|
#endif
|
||||||
bx lr
|
bx lr
|
||||||
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
||||||
#endif
|
#endif
|
||||||
|
@ -51,7 +51,12 @@ $code=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
___
|
___
|
||||||
################
|
################
|
||||||
# private interface to mul_1x1_ialu
|
# private interface to mul_1x1_ialu
|
||||||
@ -132,11 +137,17 @@ mul_1x1_ialu:
|
|||||||
eor $hi,$hi,$t0,lsr#8
|
eor $hi,$hi,$t0,lsr#8
|
||||||
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
|
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
|
||||||
|
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt ne
|
||||||
|
#endif
|
||||||
eorne $lo,$lo,$b,lsl#30
|
eorne $lo,$lo,$b,lsl#30
|
||||||
eorne $hi,$hi,$b,lsr#2
|
eorne $hi,$hi,$b,lsr#2
|
||||||
tst $a,#1<<31
|
tst $a,#1<<31
|
||||||
eor $lo,$lo,$t1,lsl#27
|
eor $lo,$lo,$t1,lsl#27
|
||||||
eor $hi,$hi,$t1,lsr#5
|
eor $hi,$hi,$t1,lsr#5
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt ne
|
||||||
|
#endif
|
||||||
eorne $lo,$lo,$b,lsl#31
|
eorne $lo,$lo,$b,lsl#31
|
||||||
eorne $hi,$hi,$b,lsr#1
|
eorne $hi,$hi,$b,lsr#1
|
||||||
eor $lo,$lo,$t0,lsl#30
|
eor $lo,$lo,$t0,lsl#30
|
||||||
@ -156,20 +167,33 @@ $code.=<<___;
|
|||||||
.align 5
|
.align 5
|
||||||
bn_GF2m_mul_2x2:
|
bn_GF2m_mul_2x2:
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
|
stmdb sp!,{r10,lr}
|
||||||
ldr r12,.LOPENSSL_armcap
|
ldr r12,.LOPENSSL_armcap
|
||||||
.Lpic: ldr r12,[pc,r12]
|
adr r10,.LOPENSSL_armcap
|
||||||
tst r12,#1
|
ldr r12,[r12,r10]
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ldr r12,[r12]
|
||||||
|
#endif
|
||||||
|
tst r12,#ARMV7_NEON
|
||||||
|
itt ne
|
||||||
|
ldrne r10,[sp],#8
|
||||||
bne .LNEON
|
bne .LNEON
|
||||||
|
stmdb sp!,{r4-r9}
|
||||||
|
#else
|
||||||
|
stmdb sp!,{r4-r10,lr}
|
||||||
#endif
|
#endif
|
||||||
___
|
___
|
||||||
$ret="r10"; # reassigned 1st argument
|
$ret="r10"; # reassigned 1st argument
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
stmdb sp!,{r4-r10,lr}
|
|
||||||
mov $ret,r0 @ reassign 1st argument
|
mov $ret,r0 @ reassign 1st argument
|
||||||
mov $b,r3 @ $b=b1
|
mov $b,r3 @ $b=b1
|
||||||
|
sub r7,sp,#36
|
||||||
|
mov r8,sp
|
||||||
|
and r7,r7,#-32
|
||||||
ldr r3,[sp,#32] @ load b0
|
ldr r3,[sp,#32] @ load b0
|
||||||
mov $mask,#7<<2
|
mov $mask,#7<<2
|
||||||
sub sp,sp,#32 @ allocate tab[8]
|
mov sp,r7 @ allocate tab[8]
|
||||||
|
str r8,[r7,#32]
|
||||||
|
|
||||||
bl mul_1x1_ialu @ a1·b1
|
bl mul_1x1_ialu @ a1·b1
|
||||||
str $lo,[$ret,#8]
|
str $lo,[$ret,#8]
|
||||||
@ -193,6 +217,7 @@ ___
|
|||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
ldmia $ret,{@r[0]-@r[3]}
|
ldmia $ret,{@r[0]-@r[3]}
|
||||||
eor $lo,$lo,$hi
|
eor $lo,$lo,$hi
|
||||||
|
ldr sp,[sp,#32] @ destroy tab[8]
|
||||||
eor $hi,$hi,@r[1]
|
eor $hi,$hi,@r[1]
|
||||||
eor $lo,$lo,@r[0]
|
eor $lo,$lo,@r[0]
|
||||||
eor $hi,$hi,@r[2]
|
eor $hi,$hi,@r[2]
|
||||||
@ -200,7 +225,6 @@ $code.=<<___;
|
|||||||
eor $hi,$hi,@r[3]
|
eor $hi,$hi,@r[3]
|
||||||
str $hi,[$ret,#8]
|
str $hi,[$ret,#8]
|
||||||
eor $lo,$lo,$hi
|
eor $lo,$lo,$hi
|
||||||
add sp,sp,#32 @ destroy tab[8]
|
|
||||||
str $lo,[$ret,#4]
|
str $lo,[$ret,#4]
|
||||||
|
|
||||||
#if __ARM_ARCH__>=5
|
#if __ARM_ARCH__>=5
|
||||||
@ -279,7 +303,7 @@ $code.=<<___;
|
|||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.align 5
|
.align 5
|
||||||
.LOPENSSL_armcap:
|
.LOPENSSL_armcap:
|
||||||
.word OPENSSL_armcap_P-(.Lpic+8)
|
.word OPENSSL_armcap_P-.
|
||||||
#endif
|
#endif
|
||||||
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
.align 5
|
.align 5
|
||||||
|
@ -82,7 +82,12 @@ $code=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
|
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.align 5
|
.align 5
|
||||||
@ -101,7 +106,7 @@ bn_mul_mont:
|
|||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
tst ip,#7
|
tst ip,#7
|
||||||
bne .Lialu
|
bne .Lialu
|
||||||
adr r0,bn_mul_mont
|
adr r0,.Lbn_mul_mont
|
||||||
ldr r2,.LOPENSSL_armcap
|
ldr r2,.LOPENSSL_armcap
|
||||||
ldr r0,[r0,r2]
|
ldr r0,[r0,r2]
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
@ -117,6 +122,9 @@ bn_mul_mont:
|
|||||||
#endif
|
#endif
|
||||||
cmp ip,#2
|
cmp ip,#2
|
||||||
mov $num,ip @ load num
|
mov $num,ip @ load num
|
||||||
|
#ifdef __thumb2__
|
||||||
|
ittt lt
|
||||||
|
#endif
|
||||||
movlt r0,#0
|
movlt r0,#0
|
||||||
addlt sp,sp,#2*4
|
addlt sp,sp,#2*4
|
||||||
blt .Labrt
|
blt .Labrt
|
||||||
@ -164,10 +172,11 @@ bn_mul_mont:
|
|||||||
ldr $n0,[$_n0] @ restore n0
|
ldr $n0,[$_n0] @ restore n0
|
||||||
adc $nhi,$nhi,#0
|
adc $nhi,$nhi,#0
|
||||||
str $nlo,[$num] @ tp[num-1]=
|
str $nlo,[$num] @ tp[num-1]=
|
||||||
|
mov $tj,sp
|
||||||
str $nhi,[$num,#4] @ tp[num]=
|
str $nhi,[$num,#4] @ tp[num]=
|
||||||
|
|
||||||
.Louter:
|
.Louter:
|
||||||
sub $tj,$num,sp @ "original" $num-1 value
|
sub $tj,$num,$tj @ "original" $num-1 value
|
||||||
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
|
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
|
||||||
ldr $bi,[$tp,#4]! @ *(++bp)
|
ldr $bi,[$tp,#4]! @ *(++bp)
|
||||||
sub $np,$np,$tj @ "rewind" np to &np[1]
|
sub $np,$np,$tj @ "rewind" np to &np[1]
|
||||||
@ -212,11 +221,16 @@ bn_mul_mont:
|
|||||||
str $nhi,[$num,#4] @ tp[num]=
|
str $nhi,[$num,#4] @ tp[num]=
|
||||||
|
|
||||||
cmp $tp,$tj
|
cmp $tp,$tj
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt ne
|
||||||
|
#endif
|
||||||
|
movne $tj,sp
|
||||||
bne .Louter
|
bne .Louter
|
||||||
|
|
||||||
ldr $rp,[$_rp] @ pull rp
|
ldr $rp,[$_rp] @ pull rp
|
||||||
|
mov $aj,sp
|
||||||
add $num,$num,#4 @ $num to point at &tp[num]
|
add $num,$num,#4 @ $num to point at &tp[num]
|
||||||
sub $aj,$num,sp @ "original" num value
|
sub $aj,$num,$aj @ "original" num value
|
||||||
mov $tp,sp @ "rewind" $tp
|
mov $tp,sp @ "rewind" $tp
|
||||||
mov $ap,$tp @ "borrow" $ap
|
mov $ap,$tp @ "borrow" $ap
|
||||||
sub $np,$np,$aj @ "rewind" $np to &np[0]
|
sub $np,$np,$aj @ "rewind" $np to &np[0]
|
||||||
@ -242,7 +256,8 @@ bn_mul_mont:
|
|||||||
cmp $tp,$num
|
cmp $tp,$num
|
||||||
bne .Lcopy
|
bne .Lcopy
|
||||||
|
|
||||||
add sp,$num,#4 @ skip over tp[num+1]
|
mov sp,$num
|
||||||
|
add sp,sp,#4 @ skip over tp[num+1]
|
||||||
ldmia sp!,{r4-r12,lr} @ restore registers
|
ldmia sp!,{r4-r12,lr} @ restore registers
|
||||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||||
mov r0,#1
|
mov r0,#1
|
||||||
@ -283,6 +298,7 @@ bn_mul8x_mont_neon:
|
|||||||
stmdb sp!,{r4-r11}
|
stmdb sp!,{r4-r11}
|
||||||
vstmdb sp!,{d8-d15} @ ABI specification says so
|
vstmdb sp!,{d8-d15} @ ABI specification says so
|
||||||
ldmia ip,{r4-r5} @ load rest of parameter block
|
ldmia ip,{r4-r5} @ load rest of parameter block
|
||||||
|
mov ip,sp
|
||||||
|
|
||||||
sub $toutptr,sp,#16
|
sub $toutptr,sp,#16
|
||||||
vld1.32 {${Bi}[0]}, [$bptr,:32]!
|
vld1.32 {${Bi}[0]}, [$bptr,:32]!
|
||||||
@ -638,8 +654,9 @@ bn_mul8x_mont_neon:
|
|||||||
bne .LNEON_sub
|
bne .LNEON_sub
|
||||||
|
|
||||||
ldr r10, [$aptr] @ load top-most bit
|
ldr r10, [$aptr] @ load top-most bit
|
||||||
|
mov r11,sp
|
||||||
veor q0,q0,q0
|
veor q0,q0,q0
|
||||||
sub r11,$bptr,sp @ this is num*4
|
sub r11,$bptr,r11 @ this is num*4
|
||||||
veor q1,q1,q1
|
veor q1,q1,q1
|
||||||
mov $aptr,sp
|
mov $aptr,sp
|
||||||
sub $rptr,$rptr,r11 @ rewind $rptr
|
sub $rptr,$rptr,r11 @ rewind $rptr
|
||||||
@ -649,27 +666,33 @@ bn_mul8x_mont_neon:
|
|||||||
.LNEON_copy_n_zap:
|
.LNEON_copy_n_zap:
|
||||||
ldmia $aptr!, {r4-r7}
|
ldmia $aptr!, {r4-r7}
|
||||||
ldmia $rptr, {r8-r11}
|
ldmia $rptr, {r8-r11}
|
||||||
|
it cc
|
||||||
movcc r8, r4
|
movcc r8, r4
|
||||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||||
|
itt cc
|
||||||
movcc r9, r5
|
movcc r9, r5
|
||||||
movcc r10,r6
|
movcc r10,r6
|
||||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||||
|
it cc
|
||||||
movcc r11,r7
|
movcc r11,r7
|
||||||
ldmia $aptr, {r4-r7}
|
ldmia $aptr, {r4-r7}
|
||||||
stmia $rptr!, {r8-r11}
|
stmia $rptr!, {r8-r11}
|
||||||
sub $aptr,$aptr,#16
|
sub $aptr,$aptr,#16
|
||||||
ldmia $rptr, {r8-r11}
|
ldmia $rptr, {r8-r11}
|
||||||
|
it cc
|
||||||
movcc r8, r4
|
movcc r8, r4
|
||||||
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
|
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
|
||||||
|
itt cc
|
||||||
movcc r9, r5
|
movcc r9, r5
|
||||||
movcc r10,r6
|
movcc r10,r6
|
||||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
|
||||||
|
it cc
|
||||||
movcc r11,r7
|
movcc r11,r7
|
||||||
teq $aptr,$bptr @ preserves carry
|
teq $aptr,$bptr @ preserves carry
|
||||||
stmia $rptr!, {r8-r11}
|
stmia $rptr!, {r8-r11}
|
||||||
bne .LNEON_copy_n_zap
|
bne .LNEON_copy_n_zap
|
||||||
|
|
||||||
sub sp,ip,#96
|
mov sp,ip
|
||||||
vldmia sp!,{d8-d15}
|
vldmia sp!,{d8-d15}
|
||||||
ldmia sp!,{r4-r11}
|
ldmia sp!,{r4-r11}
|
||||||
ret @ bx lr
|
ret @ bx lr
|
||||||
|
@ -45,7 +45,12 @@ $code.=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
___
|
___
|
||||||
########################################################################
|
########################################################################
|
||||||
# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7
|
# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7
|
||||||
@ -162,6 +167,9 @@ __ecp_nistz256_mul_by_2:
|
|||||||
adcs $a6,$a6,$a6
|
adcs $a6,$a6,$a6
|
||||||
mov $ff,#0
|
mov $ff,#0
|
||||||
adcs $a7,$a7,$a7
|
adcs $a7,$a7,$a7
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it cs
|
||||||
|
#endif
|
||||||
movcs $ff,#-1 @ $ff = carry ? -1 : 0
|
movcs $ff,#-1 @ $ff = carry ? -1 : 0
|
||||||
|
|
||||||
b .Lreduce_by_sub
|
b .Lreduce_by_sub
|
||||||
@ -213,6 +221,9 @@ __ecp_nistz256_add:
|
|||||||
adcs $a6,$a6,$t2
|
adcs $a6,$a6,$t2
|
||||||
mov $ff,#0
|
mov $ff,#0
|
||||||
adcs $a7,$a7,$t3
|
adcs $a7,$a7,$t3
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it cs
|
||||||
|
#endif
|
||||||
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
||||||
ldr lr,[sp],#4 @ pop lr
|
ldr lr,[sp],#4 @ pop lr
|
||||||
|
|
||||||
@ -286,6 +297,9 @@ __ecp_nistz256_mul_by_3:
|
|||||||
adcs $a6,$a6,$a6
|
adcs $a6,$a6,$a6
|
||||||
mov $ff,#0
|
mov $ff,#0
|
||||||
adcs $a7,$a7,$a7
|
adcs $a7,$a7,$a7
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it cs
|
||||||
|
#endif
|
||||||
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
||||||
|
|
||||||
subs $a0,$a0,$ff @ subtract synthesized modulus, see
|
subs $a0,$a0,$ff @ subtract synthesized modulus, see
|
||||||
@ -318,6 +332,9 @@ __ecp_nistz256_mul_by_3:
|
|||||||
adcs $a6,$a6,$t2
|
adcs $a6,$a6,$t2
|
||||||
mov $ff,#0
|
mov $ff,#0
|
||||||
adcs $a7,$a7,$t3
|
adcs $a7,$a7,$t3
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it cs
|
||||||
|
#endif
|
||||||
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
|
||||||
ldr lr,[sp],#4 @ pop lr
|
ldr lr,[sp],#4 @ pop lr
|
||||||
|
|
||||||
@ -781,6 +798,9 @@ ecp_nistz256_gather_w5:
|
|||||||
|
|
||||||
cmp $index,#0
|
cmp $index,#0
|
||||||
mov $mask,#0
|
mov $mask,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt ne
|
||||||
|
#endif
|
||||||
subne $index,$index,#1
|
subne $index,$index,#1
|
||||||
movne $mask,#-1
|
movne $mask,#-1
|
||||||
add $inp,$inp,$index,lsl#2
|
add $inp,$inp,$index,lsl#2
|
||||||
@ -887,6 +907,9 @@ ecp_nistz256_gather_w7:
|
|||||||
|
|
||||||
cmp $index,#0
|
cmp $index,#0
|
||||||
mov $mask,#0
|
mov $mask,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt ne
|
||||||
|
#endif
|
||||||
subne $index,$index,#1
|
subne $index,$index,#1
|
||||||
movne $mask,#-1
|
movne $mask,#-1
|
||||||
add $inp,$inp,$index
|
add $inp,$inp,$index
|
||||||
@ -1180,6 +1203,9 @@ __ecp_nistz256_add_self:
|
|||||||
adcs $a6,$a6,$a6
|
adcs $a6,$a6,$a6
|
||||||
mov $ff,#0
|
mov $ff,#0
|
||||||
adcs $a7,$a7,$a7
|
adcs $a7,$a7,$a7
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it cs
|
||||||
|
#endif
|
||||||
movcs $ff,#-1 @ $ff = carry ? -1 : 0
|
movcs $ff,#-1 @ $ff = carry ? -1 : 0
|
||||||
|
|
||||||
subs $a0,$a0,$ff @ subtract synthesized modulus
|
subs $a0,$a0,$ff @ subtract synthesized modulus
|
||||||
@ -1369,6 +1395,9 @@ ecp_nistz256_point_add:
|
|||||||
stmia r3!,{r4-r11}
|
stmia r3!,{r4-r11}
|
||||||
ldmia $b_ptr,{r4-r11}
|
ldmia $b_ptr,{r4-r11}
|
||||||
cmp r12,#0
|
cmp r12,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it ne
|
||||||
|
#endif
|
||||||
movne r12,#-1
|
movne r12,#-1
|
||||||
stmia r3,{r4-r11}
|
stmia r3,{r4-r11}
|
||||||
str r12,[sp,#32*18+8] @ !in2infty
|
str r12,[sp,#32*18+8] @ !in2infty
|
||||||
@ -1395,6 +1424,9 @@ ecp_nistz256_point_add:
|
|||||||
stmia r3!,{r4-r11}
|
stmia r3!,{r4-r11}
|
||||||
ldmia $a_ptr,{r4-r11}
|
ldmia $a_ptr,{r4-r11}
|
||||||
cmp r12,#0
|
cmp r12,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it ne
|
||||||
|
#endif
|
||||||
movne r12,#-1
|
movne r12,#-1
|
||||||
stmia r3,{r4-r11}
|
stmia r3,{r4-r11}
|
||||||
str r12,[sp,#32*18+4] @ !in1infty
|
str r12,[sp,#32*18+4] @ !in1infty
|
||||||
@ -1636,6 +1668,9 @@ ecp_nistz256_point_add_affine:
|
|||||||
stmia r3!,{r4-r11}
|
stmia r3!,{r4-r11}
|
||||||
ldmia $a_ptr,{r4-r11}
|
ldmia $a_ptr,{r4-r11}
|
||||||
cmp r12,#0
|
cmp r12,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it ne
|
||||||
|
#endif
|
||||||
movne r12,#-1
|
movne r12,#-1
|
||||||
stmia r3,{r4-r11}
|
stmia r3,{r4-r11}
|
||||||
str r12,[sp,#32*15+4] @ !in1infty
|
str r12,[sp,#32*15+4] @ !in1infty
|
||||||
@ -1661,6 +1696,9 @@ ecp_nistz256_point_add_affine:
|
|||||||
orr r12,r12,r11
|
orr r12,r12,r11
|
||||||
stmia r3!,{r4-r11}
|
stmia r3!,{r4-r11}
|
||||||
cmp r12,#0
|
cmp r12,#0
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it ne
|
||||||
|
#endif
|
||||||
movne r12,#-1
|
movne r12,#-1
|
||||||
str r12,[sp,#32*15+8] @ !in2infty
|
str r12,[sp,#32*15+8] @ !in2infty
|
||||||
|
|
||||||
|
@ -136,7 +136,12 @@ $code=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#define ldrplb ldrbpl
|
#define ldrplb ldrbpl
|
||||||
@ -154,19 +159,27 @@ rem_4bit:
|
|||||||
|
|
||||||
.type rem_4bit_get,%function
|
.type rem_4bit_get,%function
|
||||||
rem_4bit_get:
|
rem_4bit_get:
|
||||||
sub $rem_4bit,pc,#8
|
#if defined(__thumb2__)
|
||||||
sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
|
adr $rem_4bit,rem_4bit
|
||||||
|
#else
|
||||||
|
sub $rem_4bit,pc,#8+32 @ &rem_4bit
|
||||||
|
#endif
|
||||||
b .Lrem_4bit_got
|
b .Lrem_4bit_got
|
||||||
nop
|
nop
|
||||||
|
nop
|
||||||
.size rem_4bit_get,.-rem_4bit_get
|
.size rem_4bit_get,.-rem_4bit_get
|
||||||
|
|
||||||
.global gcm_ghash_4bit
|
.global gcm_ghash_4bit
|
||||||
.type gcm_ghash_4bit,%function
|
.type gcm_ghash_4bit,%function
|
||||||
|
.align 4
|
||||||
gcm_ghash_4bit:
|
gcm_ghash_4bit:
|
||||||
sub r12,pc,#8
|
#if defined(__thumb2__)
|
||||||
|
adr r12,rem_4bit
|
||||||
|
#else
|
||||||
|
sub r12,pc,#8+48 @ &rem_4bit
|
||||||
|
#endif
|
||||||
add $len,$inp,$len @ $len to point at the end
|
add $len,$inp,$len @ $len to point at the end
|
||||||
stmdb sp!,{r3-r11,lr} @ save $len/end too
|
stmdb sp!,{r3-r11,lr} @ save $len/end too
|
||||||
sub r12,r12,#48 @ &rem_4bit
|
|
||||||
|
|
||||||
ldmia r12,{r4-r11} @ copy rem_4bit ...
|
ldmia r12,{r4-r11} @ copy rem_4bit ...
|
||||||
stmdb sp!,{r4-r11} @ ... to stack
|
stmdb sp!,{r4-r11} @ ... to stack
|
||||||
@ -213,6 +226,9 @@ gcm_ghash_4bit:
|
|||||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||||
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
|
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
|
||||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it pl
|
||||||
|
#endif
|
||||||
ldrplb $nlo,[$inp,$cnt]
|
ldrplb $nlo,[$inp,$cnt]
|
||||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||||
@ -223,6 +239,9 @@ gcm_ghash_4bit:
|
|||||||
add $nhi,$nhi,$nhi
|
add $nhi,$nhi,$nhi
|
||||||
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
||||||
eor $Zll,$Tll,$Zll,lsr#4
|
eor $Zll,$Tll,$Zll,lsr#4
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it pl
|
||||||
|
#endif
|
||||||
ldrplb $Tll,[$Xi,$cnt]
|
ldrplb $Tll,[$Xi,$cnt]
|
||||||
eor $Zll,$Zll,$Zlh,lsl#28
|
eor $Zll,$Zll,$Zlh,lsl#28
|
||||||
eor $Zlh,$Tlh,$Zlh,lsr#4
|
eor $Zlh,$Tlh,$Zlh,lsr#4
|
||||||
@ -230,8 +249,14 @@ gcm_ghash_4bit:
|
|||||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it pl
|
||||||
|
#endif
|
||||||
eorpl $nlo,$nlo,$Tll
|
eorpl $nlo,$nlo,$Tll
|
||||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt pl
|
||||||
|
#endif
|
||||||
andpl $nhi,$nlo,#0xf0
|
andpl $nhi,$nlo,#0xf0
|
||||||
andpl $nlo,$nlo,#0x0f
|
andpl $nlo,$nlo,#0x0f
|
||||||
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
|
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
|
||||||
@ -241,7 +266,11 @@ gcm_ghash_4bit:
|
|||||||
add $inp,$inp,#16
|
add $inp,$inp,#16
|
||||||
mov $nhi,$Zll
|
mov $nhi,$Zll
|
||||||
___
|
___
|
||||||
&Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
|
&Zsmash("cmp\t$inp,$len","\n".
|
||||||
|
"#ifdef __thumb2__\n".
|
||||||
|
" it ne\n".
|
||||||
|
"#endif\n".
|
||||||
|
" ldrneb $nlo,[$inp,#15]");
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
bne .Louter
|
bne .Louter
|
||||||
|
|
||||||
@ -299,6 +328,9 @@ gcm_gmult_4bit:
|
|||||||
eor $Zlh,$Zlh,$Zhl,lsl#28
|
eor $Zlh,$Zlh,$Zhl,lsl#28
|
||||||
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
|
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
|
||||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||||
|
#ifdef __thumb2__
|
||||||
|
it pl
|
||||||
|
#endif
|
||||||
ldrplb $nlo,[$Xi,$cnt]
|
ldrplb $nlo,[$Xi,$cnt]
|
||||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||||
@ -316,6 +348,9 @@ gcm_gmult_4bit:
|
|||||||
eor $Zhl,$Thl,$Zhl,lsr#4
|
eor $Zhl,$Thl,$Zhl,lsr#4
|
||||||
eor $Zhl,$Zhl,$Zhh,lsl#28
|
eor $Zhl,$Zhl,$Zhh,lsl#28
|
||||||
eor $Zhh,$Thh,$Zhh,lsr#4
|
eor $Zhh,$Thh,$Zhh,lsr#4
|
||||||
|
#ifdef __thumb2__
|
||||||
|
itt pl
|
||||||
|
#endif
|
||||||
andpl $nhi,$nlo,#0xf0
|
andpl $nhi,$nlo,#0xf0
|
||||||
andpl $nlo,$nlo,#0x0f
|
andpl $nlo,$nlo,#0x0f
|
||||||
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
||||||
|
@ -181,7 +181,12 @@ $code=<<___;
|
|||||||
#include "arm_arch.h"
|
#include "arm_arch.h"
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
.syntax unified
|
||||||
|
.thumb
|
||||||
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
|
#endif
|
||||||
|
|
||||||
.global sha1_block_data_order
|
.global sha1_block_data_order
|
||||||
.type sha1_block_data_order,%function
|
.type sha1_block_data_order,%function
|
||||||
@ -189,7 +194,8 @@ $code=<<___;
|
|||||||
.align 5
|
.align 5
|
||||||
sha1_block_data_order:
|
sha1_block_data_order:
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
sub r3,pc,#8 @ sha1_block_data_order
|
.Lsha1_block:
|
||||||
|
adr r3,.Lsha1_block
|
||||||
ldr r12,.LOPENSSL_armcap
|
ldr r12,.LOPENSSL_armcap
|
||||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
ldr r12,[r3,r12] @ OPENSSL_armcap_P
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) {
|
|||||||
&BODY_00_15(@V); unshift(@V,pop(@V));
|
&BODY_00_15(@V); unshift(@V,pop(@V));
|
||||||
}
|
}
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
mov $t3,sp
|
||||||
|
teq $Xi,$t3
|
||||||
|
#else
|
||||||
teq $Xi,sp
|
teq $Xi,sp
|
||||||
|
#endif
|
||||||
bne .L_00_15 @ [((11+4)*5+2)*3]
|
bne .L_00_15 @ [((11+4)*5+2)*3]
|
||||||
sub sp,sp,#25*4
|
sub sp,sp,#25*4
|
||||||
___
|
___
|
||||||
@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) {
|
|||||||
&BODY_20_39(@V); unshift(@V,pop(@V));
|
&BODY_20_39(@V); unshift(@V,pop(@V));
|
||||||
}
|
}
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
mov $t3,sp
|
||||||
|
teq $Xi,$t3
|
||||||
|
#else
|
||||||
teq $Xi,sp @ preserve carry
|
teq $Xi,sp @ preserve carry
|
||||||
|
#endif
|
||||||
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
||||||
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
||||||
|
|
||||||
@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) {
|
|||||||
&BODY_40_59(@V); unshift(@V,pop(@V));
|
&BODY_40_59(@V); unshift(@V,pop(@V));
|
||||||
}
|
}
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
mov $t3,sp
|
||||||
|
teq $Xi,$t3
|
||||||
|
#else
|
||||||
teq $Xi,sp
|
teq $Xi,sp
|
||||||
|
#endif
|
||||||
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
||||||
|
|
||||||
ldr $K,.LK_60_79
|
ldr $K,.LK_60_79
|
||||||
@ -283,7 +304,7 @@ $code.=<<___;
|
|||||||
.LK_60_79: .word 0xca62c1d6
|
.LK_60_79: .word 0xca62c1d6
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
.LOPENSSL_armcap:
|
.LOPENSSL_armcap:
|
||||||
.word OPENSSL_armcap_P-sha1_block_data_order
|
.word OPENSSL_armcap_P-.Lsha1_block
|
||||||
#endif
|
#endif
|
||||||
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||||
.align 5
|
.align 5
|
||||||
@ -458,6 +479,7 @@ sub Xuplast_80 ()
|
|||||||
|
|
||||||
&teq ($inp,$len);
|
&teq ($inp,$len);
|
||||||
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
|
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
|
||||||
|
&it ("eq");
|
||||||
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
|
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
|
||||||
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
|
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
|
||||||
eval(shift(@insns));
|
eval(shift(@insns));
|
||||||
@ -508,12 +530,12 @@ sha1_block_data_order_neon:
|
|||||||
@ dmb @ errata #451034 on early Cortex A8
|
@ dmb @ errata #451034 on early Cortex A8
|
||||||
@ vstmdb sp!,{d8-d15} @ ABI specification says so
|
@ vstmdb sp!,{d8-d15} @ ABI specification says so
|
||||||
mov $saved_sp,sp
|
mov $saved_sp,sp
|
||||||
sub sp,sp,#64 @ alloca
|
sub $Xfer,sp,#64
|
||||||
adr $K_XX_XX,.LK_00_19
|
adr $K_XX_XX,.LK_00_19
|
||||||
bic sp,sp,#15 @ align for 128-bit stores
|
bic $Xfer,$Xfer,#15 @ align for 128-bit stores
|
||||||
|
|
||||||
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
|
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
|
||||||
mov $Xfer,sp
|
mov sp,$Xfer @ alloca
|
||||||
|
|
||||||
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
|
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
|
||||||
veor $zero,$zero,$zero
|
veor $zero,$zero,$zero
|
||||||
@ -560,10 +582,13 @@ $code.=<<___;
|
|||||||
add $b,$b,$t0
|
add $b,$b,$t0
|
||||||
add $c,$c,$t1
|
add $c,$c,$t1
|
||||||
add $d,$d,$Xfer
|
add $d,$d,$Xfer
|
||||||
|
it eq
|
||||||
moveq sp,$saved_sp
|
moveq sp,$saved_sp
|
||||||
add $e,$e,$Ki
|
add $e,$e,$Ki
|
||||||
|
it ne
|
||||||
ldrne $Ki,[sp]
|
ldrne $Ki,[sp]
|
||||||
stmia $ctx,{$a,$b,$c,$d,$e}
|
stmia $ctx,{$a,$b,$c,$d,$e}
|
||||||
|
itt ne
|
||||||
addne $Xfer,sp,#3*16
|
addne $Xfer,sp,#3*16
|
||||||
bne .Loop_neon
|
bne .Loop_neon
|
||||||
|
|
||||||
@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
|
|||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
#if __ARM_MAX_ARCH__>=7
|
#if __ARM_MAX_ARCH__>=7
|
||||||
|
|
||||||
|
# if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
|
# define INST(a,b,c,d) .byte c,d|0xf,a,b
|
||||||
|
# else
|
||||||
|
# define INST(a,b,c,d) .byte a,b,c,d|0x10
|
||||||
|
# endif
|
||||||
|
|
||||||
.type sha1_block_data_order_armv8,%function
|
.type sha1_block_data_order_armv8,%function
|
||||||
.align 5
|
.align 5
|
||||||
sha1_block_data_order_armv8:
|
sha1_block_data_order_armv8:
|
||||||
@ -677,7 +709,10 @@ ___
|
|||||||
# since ARMv7 instructions are always encoded little-endian.
|
# since ARMv7 instructions are always encoded little-endian.
|
||||||
# correct solution is to use .inst directive, but older
|
# correct solution is to use .inst directive, but older
|
||||||
# assemblers don't implement it:-(
|
# assemblers don't implement it:-(
|
||||||
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
|
|
||||||
|
# this fix-up provides Thumb encoding in conjunction with INST
|
||||||
|
$word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
|
||||||
|
sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
|
||||||
$word&0xff,($word>>8)&0xff,
|
$word&0xff,($word>>8)&0xff,
|
||||||
($word>>16)&0xff,($word>>24)&0xff,
|
($word>>16)&0xff,($word>>24)&0xff,
|
||||||
$mnemonic,$arg;
|
$mnemonic,$arg;
|
||||||
|
@ -175,17 +175,13 @@ $code=<<___;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
.text
|
.text
|
||||||
#if __ARM_ARCH__<7
|
|
||||||
.code 32
|
|
||||||
#else
|
|
||||||
.syntax unified
|
|
||||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
# define adrl adr
|
.syntax unified
|
||||||
.thumb
|
.thumb
|
||||||
|
# define adrl adr
|
||||||
#else
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
.type K256,%object
|
.type K256,%object
|
||||||
.align 5
|
.align 5
|
||||||
@ -218,10 +214,10 @@ K256:
|
|||||||
.type sha256_block_data_order,%function
|
.type sha256_block_data_order,%function
|
||||||
sha256_block_data_order:
|
sha256_block_data_order:
|
||||||
.Lsha256_block_data_order:
|
.Lsha256_block_data_order:
|
||||||
#if __ARM_ARCH__<7
|
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||||
sub r3,pc,#8 @ sha256_block_data_order
|
sub r3,pc,#8 @ sha256_block_data_order
|
||||||
#else
|
#else
|
||||||
adr r3,sha256_block_data_order
|
adr r3,.Lsha256_block_data_order
|
||||||
#endif
|
#endif
|
||||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||||
ldr r12,.LOPENSSL_armcap
|
ldr r12,.LOPENSSL_armcap
|
||||||
|
@ -212,17 +212,13 @@ $code=<<___;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
.text
|
.text
|
||||||
#if __ARM_ARCH__<7 || defined(__APPLE__)
|
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||||
.code 32
|
|
||||||
#else
|
|
||||||
.syntax unified
|
.syntax unified
|
||||||
# ifdef __thumb2__
|
|
||||||
# define adrl adr
|
|
||||||
.thumb
|
.thumb
|
||||||
|
# define adrl adr
|
||||||
#else
|
#else
|
||||||
.code 32
|
.code 32
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
.type K512,%object
|
.type K512,%object
|
||||||
.align 5
|
.align 5
|
||||||
@ -280,10 +276,10 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
|
|||||||
.type sha512_block_data_order,%function
|
.type sha512_block_data_order,%function
|
||||||
sha512_block_data_order:
|
sha512_block_data_order:
|
||||||
.Lsha512_block_data_order:
|
.Lsha512_block_data_order:
|
||||||
#if __ARM_ARCH__<7
|
#if __ARM_ARCH__<7 && !defined(__thumb2__)
|
||||||
sub r3,pc,#8 @ sha512_block_data_order
|
sub r3,pc,#8 @ sha512_block_data_order
|
||||||
#else
|
#else
|
||||||
adr r3,sha512_block_data_order
|
adr r3,.Lsha512_block_data_order
|
||||||
#endif
|
#endif
|
||||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
|
||||||
ldr r12,.LOPENSSL_armcap
|
ldr r12,.LOPENSSL_armcap
|
||||||
|
Loading…
x
Reference in New Issue
Block a user