Buglet fixes and minor optimization in aes-x86_86 assembler.
This commit is contained in:
parent
cdb0392159
commit
983180bb8b
@ -1879,10 +1879,10 @@ AES_cbc_encrypt:
|
|||||||
.align 16
|
.align 16
|
||||||
.Lcbc_slow_way:
|
.Lcbc_slow_way:
|
||||||
# allocate aligned stack frame...
|
# allocate aligned stack frame...
|
||||||
lea -80(%rsp),%rbp
|
lea -88(%rsp),%rbp
|
||||||
and \$-64,%rbp
|
and \$-64,%rbp
|
||||||
# ... just "above" key schedule
|
# ... just "above" key schedule
|
||||||
lea -80-63(%rcx),%rax
|
lea -88-63(%rcx),%rax
|
||||||
sub %rbp,%rax
|
sub %rbp,%rax
|
||||||
neg %rax
|
neg %rax
|
||||||
and \$0x3c0,%rax
|
and \$0x3c0,%rax
|
||||||
@ -1891,9 +1891,9 @@ AES_cbc_encrypt:
|
|||||||
xchg %rsp,%rbp
|
xchg %rsp,%rbp
|
||||||
add \$8,%rsp # reserve for return address!
|
add \$8,%rsp # reserve for return address!
|
||||||
mov %rbp,$_rsp # save %rsp
|
mov %rbp,$_rsp # save %rsp
|
||||||
mov %rdi,$_inp # save copy of inp
|
#mov %rdi,$_inp # save copy of inp
|
||||||
mov %rsi,$_out # save copy of out
|
#mov %rsi,$_out # save copy of out
|
||||||
mov %rdx,$_len # save copy of len
|
#mov %rdx,$_len # save copy of len
|
||||||
#mov %rcx,$_key # save copy of key
|
#mov %rcx,$_key # save copy of key
|
||||||
mov %r8,$_ivp # save copy of ivp
|
mov %r8,$_ivp # save copy of ivp
|
||||||
mov %r8,%rbp # rearrange input arguments
|
mov %r8,%rbp # rearrange input arguments
|
||||||
@ -1901,6 +1901,7 @@ AES_cbc_encrypt:
|
|||||||
mov %rsi,$out
|
mov %rsi,$out
|
||||||
mov %rdi,$inp
|
mov %rdi,$inp
|
||||||
mov %rcx,$key
|
mov %rcx,$key
|
||||||
|
mov %rdx,%r10
|
||||||
|
|
||||||
mov 240($key),%eax
|
mov 240($key),%eax
|
||||||
mov $key,$keyp # save key pointer
|
mov $key,$keyp # save key pointer
|
||||||
@ -1919,8 +1920,7 @@ AES_cbc_encrypt:
|
|||||||
je .LSLOW_DECRYPT
|
je .LSLOW_DECRYPT
|
||||||
|
|
||||||
#--------------------------- SLOW ENCRYPT ---------------------------#
|
#--------------------------- SLOW ENCRYPT ---------------------------#
|
||||||
test \$-16,%rdx # check upon length
|
test \$-16,%r10 # check upon length
|
||||||
mov %rdx,%r10
|
|
||||||
mov 0(%rbp),$s0 # load iv
|
mov 0(%rbp),$s0 # load iv
|
||||||
mov 4(%rbp),$s1
|
mov 4(%rbp),$s1
|
||||||
mov 8(%rbp),$s2
|
mov 8(%rbp),$s2
|
||||||
@ -1936,12 +1936,13 @@ AES_cbc_encrypt:
|
|||||||
mov $keyp,$key # restore key
|
mov $keyp,$key # restore key
|
||||||
mov $inp,$_inp # save inp
|
mov $inp,$_inp # save inp
|
||||||
mov $out,$_out # save out
|
mov $out,$_out # save out
|
||||||
|
mov %r10,$_len # save len
|
||||||
|
|
||||||
call _x86_64_AES_encrypt_compact
|
call _x86_64_AES_encrypt_compact
|
||||||
|
|
||||||
mov $_inp,$inp # restore inp
|
mov $_inp,$inp # restore inp
|
||||||
mov $_out,$out # restore out
|
mov $_out,$out # restore out
|
||||||
mov $_len,%r10
|
mov $_len,%r10 # restore len
|
||||||
mov $s0,0($out)
|
mov $s0,0($out)
|
||||||
mov $s1,4($out)
|
mov $s1,4($out)
|
||||||
mov $s2,8($out)
|
mov $s2,8($out)
|
||||||
@ -1951,7 +1952,6 @@ AES_cbc_encrypt:
|
|||||||
lea 16($out),$out
|
lea 16($out),$out
|
||||||
sub \$16,%r10
|
sub \$16,%r10
|
||||||
test \$-16,%r10
|
test \$-16,%r10
|
||||||
mov %r10,$_len
|
|
||||||
jnz .Lcbc_slow_enc_loop
|
jnz .Lcbc_slow_enc_loop
|
||||||
test \$15,%r10
|
test \$15,%r10
|
||||||
jnz .Lcbc_slow_enc_tail
|
jnz .Lcbc_slow_enc_tail
|
||||||
@ -1969,12 +1969,12 @@ AES_cbc_encrypt:
|
|||||||
mov %r10,%rcx
|
mov %r10,%rcx
|
||||||
mov $inp,%rsi
|
mov $inp,%rsi
|
||||||
mov $out,%rdi
|
mov $out,%rdi
|
||||||
.long 0xF689A4F3 # rep movsb
|
.long 0x9066A4F3 # rep movsb
|
||||||
.Lcbc_slow_enc_in_place:
|
.Lcbc_slow_enc_in_place:
|
||||||
mov \$16,%rcx # zero tail
|
mov \$16,%rcx # zero tail
|
||||||
sub %r10,%rcx
|
sub %r10,%rcx
|
||||||
xor %rax,%rax
|
xor %rax,%rax
|
||||||
.long 0xF689AAF3 # rep stosb
|
.long 0x9066AAF3 # rep stosb
|
||||||
mov $out,$inp # this is not a mistake!
|
mov $out,$inp # this is not a mistake!
|
||||||
movq \$16,$_len # len=16
|
movq \$16,$_len # len=16
|
||||||
jmp .Lcbc_slow_enc_loop # one more spin...
|
jmp .Lcbc_slow_enc_loop # one more spin...
|
||||||
@ -1984,10 +1984,10 @@ AES_cbc_encrypt:
|
|||||||
shr \$3,%rax
|
shr \$3,%rax
|
||||||
add %rax,$sbox # recall "magic" constants!
|
add %rax,$sbox # recall "magic" constants!
|
||||||
|
|
||||||
mov 0(%rbp),%r10 # copy iv to stack
|
mov 0(%rbp),%r11 # copy iv to stack
|
||||||
mov 8(%rbp),%r11
|
mov 8(%rbp),%r12
|
||||||
mov %r10,0+$ivec
|
mov %r11,0+$ivec
|
||||||
mov %r11,8+$ivec
|
mov %r12,8+$ivec
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.Lcbc_slow_dec_loop:
|
.Lcbc_slow_dec_loop:
|
||||||
@ -1998,6 +1998,7 @@ AES_cbc_encrypt:
|
|||||||
mov $keyp,$key # restore key
|
mov $keyp,$key # restore key
|
||||||
mov $inp,$_inp # save inp
|
mov $inp,$_inp # save inp
|
||||||
mov $out,$_out # save out
|
mov $out,$_out # save out
|
||||||
|
mov %r10,$_len # save len
|
||||||
|
|
||||||
call _x86_64_AES_decrypt_compact
|
call _x86_64_AES_decrypt_compact
|
||||||
|
|
||||||
@ -2025,7 +2026,6 @@ AES_cbc_encrypt:
|
|||||||
|
|
||||||
lea 16($inp),$inp
|
lea 16($inp),$inp
|
||||||
lea 16($out),$out
|
lea 16($out),$out
|
||||||
mov %r10,$_len
|
|
||||||
jmp .Lcbc_slow_dec_loop
|
jmp .Lcbc_slow_dec_loop
|
||||||
.Lcbc_slow_dec_done:
|
.Lcbc_slow_dec_done:
|
||||||
mov $_ivp,%rdi
|
mov $_ivp,%rdi
|
||||||
@ -2053,7 +2053,7 @@ AES_cbc_encrypt:
|
|||||||
mov $out,%rdi
|
mov $out,%rdi
|
||||||
lea $ivec,%rsi
|
lea $ivec,%rsi
|
||||||
lea 16(%r10),%rcx
|
lea 16(%r10),%rcx
|
||||||
.long 0xF689A4F3 # rep movsb
|
.long 0x9066A4F3 # rep movsb
|
||||||
jmp .Lcbc_exit
|
jmp .Lcbc_exit
|
||||||
.size AES_cbc_encrypt,.-AES_cbc_encrypt
|
.size AES_cbc_encrypt,.-AES_cbc_encrypt
|
||||||
___
|
___
|
||||||
|
Loading…
x
Reference in New Issue
Block a user