Buglet fixes and minor optimization in aes-x86_86 assembler.

This commit is contained in:
Andy Polyakov 2007-07-30 16:42:57 +00:00
parent cdb0392159
commit 983180bb8b

View File

@ -1879,10 +1879,10 @@ AES_cbc_encrypt:
.align 16 .align 16
.Lcbc_slow_way: .Lcbc_slow_way:
# allocate aligned stack frame... # allocate aligned stack frame...
lea -80(%rsp),%rbp lea -88(%rsp),%rbp
and \$-64,%rbp and \$-64,%rbp
# ... just "above" key schedule # ... just "above" key schedule
lea -80-63(%rcx),%rax lea -88-63(%rcx),%rax
sub %rbp,%rax sub %rbp,%rax
neg %rax neg %rax
and \$0x3c0,%rax and \$0x3c0,%rax
@ -1891,9 +1891,9 @@ AES_cbc_encrypt:
xchg %rsp,%rbp xchg %rsp,%rbp
add \$8,%rsp # reserve for return address! add \$8,%rsp # reserve for return address!
mov %rbp,$_rsp # save %rsp mov %rbp,$_rsp # save %rsp
mov %rdi,$_inp # save copy of inp #mov %rdi,$_inp # save copy of inp
mov %rsi,$_out # save copy of out #mov %rsi,$_out # save copy of out
mov %rdx,$_len # save copy of len #mov %rdx,$_len # save copy of len
#mov %rcx,$_key # save copy of key #mov %rcx,$_key # save copy of key
mov %r8,$_ivp # save copy of ivp mov %r8,$_ivp # save copy of ivp
mov %r8,%rbp # rearrange input arguments mov %r8,%rbp # rearrange input arguments
@ -1901,6 +1901,7 @@ AES_cbc_encrypt:
mov %rsi,$out mov %rsi,$out
mov %rdi,$inp mov %rdi,$inp
mov %rcx,$key mov %rcx,$key
mov %rdx,%r10
mov 240($key),%eax mov 240($key),%eax
mov $key,$keyp # save key pointer mov $key,$keyp # save key pointer
@ -1919,8 +1920,7 @@ AES_cbc_encrypt:
je .LSLOW_DECRYPT je .LSLOW_DECRYPT
#--------------------------- SLOW ENCRYPT ---------------------------# #--------------------------- SLOW ENCRYPT ---------------------------#
test \$-16,%rdx # check upon length test \$-16,%r10 # check upon length
mov %rdx,%r10
mov 0(%rbp),$s0 # load iv mov 0(%rbp),$s0 # load iv
mov 4(%rbp),$s1 mov 4(%rbp),$s1
mov 8(%rbp),$s2 mov 8(%rbp),$s2
@ -1936,12 +1936,13 @@ AES_cbc_encrypt:
mov $keyp,$key # restore key mov $keyp,$key # restore key
mov $inp,$_inp # save inp mov $inp,$_inp # save inp
mov $out,$_out # save out mov $out,$_out # save out
mov %r10,$_len # save len
call _x86_64_AES_encrypt_compact call _x86_64_AES_encrypt_compact
mov $_inp,$inp # restore inp mov $_inp,$inp # restore inp
mov $_out,$out # restore out mov $_out,$out # restore out
mov $_len,%r10 mov $_len,%r10 # restore len
mov $s0,0($out) mov $s0,0($out)
mov $s1,4($out) mov $s1,4($out)
mov $s2,8($out) mov $s2,8($out)
@ -1951,7 +1952,6 @@ AES_cbc_encrypt:
lea 16($out),$out lea 16($out),$out
sub \$16,%r10 sub \$16,%r10
test \$-16,%r10 test \$-16,%r10
mov %r10,$_len
jnz .Lcbc_slow_enc_loop jnz .Lcbc_slow_enc_loop
test \$15,%r10 test \$15,%r10
jnz .Lcbc_slow_enc_tail jnz .Lcbc_slow_enc_tail
@ -1969,12 +1969,12 @@ AES_cbc_encrypt:
mov %r10,%rcx mov %r10,%rcx
mov $inp,%rsi mov $inp,%rsi
mov $out,%rdi mov $out,%rdi
.long 0xF689A4F3 # rep movsb .long 0x9066A4F3 # rep movsb
.Lcbc_slow_enc_in_place: .Lcbc_slow_enc_in_place:
mov \$16,%rcx # zero tail mov \$16,%rcx # zero tail
sub %r10,%rcx sub %r10,%rcx
xor %rax,%rax xor %rax,%rax
.long 0xF689AAF3 # rep stosb .long 0x9066AAF3 # rep stosb
mov $out,$inp # this is not a mistake! mov $out,$inp # this is not a mistake!
movq \$16,$_len # len=16 movq \$16,$_len # len=16
jmp .Lcbc_slow_enc_loop # one more spin... jmp .Lcbc_slow_enc_loop # one more spin...
@ -1984,10 +1984,10 @@ AES_cbc_encrypt:
shr \$3,%rax shr \$3,%rax
add %rax,$sbox # recall "magic" constants! add %rax,$sbox # recall "magic" constants!
mov 0(%rbp),%r10 # copy iv to stack mov 0(%rbp),%r11 # copy iv to stack
mov 8(%rbp),%r11 mov 8(%rbp),%r12
mov %r10,0+$ivec mov %r11,0+$ivec
mov %r11,8+$ivec mov %r12,8+$ivec
.align 4 .align 4
.Lcbc_slow_dec_loop: .Lcbc_slow_dec_loop:
@ -1998,6 +1998,7 @@ AES_cbc_encrypt:
mov $keyp,$key # restore key mov $keyp,$key # restore key
mov $inp,$_inp # save inp mov $inp,$_inp # save inp
mov $out,$_out # save out mov $out,$_out # save out
mov %r10,$_len # save len
call _x86_64_AES_decrypt_compact call _x86_64_AES_decrypt_compact
@ -2025,7 +2026,6 @@ AES_cbc_encrypt:
lea 16($inp),$inp lea 16($inp),$inp
lea 16($out),$out lea 16($out),$out
mov %r10,$_len
jmp .Lcbc_slow_dec_loop jmp .Lcbc_slow_dec_loop
.Lcbc_slow_dec_done: .Lcbc_slow_dec_done:
mov $_ivp,%rdi mov $_ivp,%rdi
@ -2053,7 +2053,7 @@ AES_cbc_encrypt:
mov $out,%rdi mov $out,%rdi
lea $ivec,%rsi lea $ivec,%rsi
lea 16(%r10),%rcx lea 16(%r10),%rcx
.long 0xF689A4F3 # rep movsb .long 0x9066A4F3 # rep movsb
jmp .Lcbc_exit jmp .Lcbc_exit
.size AES_cbc_encrypt,.-AES_cbc_encrypt .size AES_cbc_encrypt,.-AES_cbc_encrypt
___ ___