e_padlock: add CTR mode.

This commit is contained in:
Andy Polyakov 2011-10-05 17:03:44 +00:00
parent d18762f7c9
commit 50452b2e60
3 changed files with 120 additions and 16 deletions

View File

@ -183,7 +183,7 @@ my ($mode,$opcode) = @_;
&set_label("${mode}_pic_point"); &set_label("${mode}_pic_point");
&lea ($ctx,&DWP(16,$ctx)); # control word &lea ($ctx,&DWP(16,$ctx)); # control word
&xor ("eax","eax"); &xor ("eax","eax");
if ($mode eq "ctr16") { if ($mode eq "ctr32") {
&movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter
} else { } else {
&xor ("ebx","ebx"); &xor ("ebx","ebx");
@ -216,7 +216,7 @@ my ($mode,$opcode) = @_;
&mov (&DWP(8,"ebp"),$len); &mov (&DWP(8,"ebp"),$len);
&mov ($len,$chunk); &mov ($len,$chunk);
&mov (&DWP(12,"ebp"),$chunk); # chunk &mov (&DWP(12,"ebp"),$chunk); # chunk
if ($mode eq "ctr16") { if ($mode eq "ctr32") {
&mov ("ecx",&DWP(-4,$ctx)); &mov ("ecx",&DWP(-4,$ctx));
&xor ($out,$out); &xor ($out,$out);
&mov ("eax",&DWP(-8,$ctx)); # borrow $len &mov ("eax",&DWP(-8,$ctx)); # borrow $len
@ -257,7 +257,7 @@ my ($mode,$opcode) = @_;
} }
&mov ($out,&DWP(0,"ebp")); # restore parameters &mov ($out,&DWP(0,"ebp")); # restore parameters
&mov ($chunk,&DWP(12,"ebp")); &mov ($chunk,&DWP(12,"ebp"));
if ($mode eq "ctr16") { if ($mode eq "ctr32") {
&mov ($inp,&DWP(4,"ebp")); &mov ($inp,&DWP(4,"ebp"));
&xor ($len,$len); &xor ($len,$len);
&set_label("${mode}_xor"); &set_label("${mode}_xor");
@ -284,7 +284,7 @@ my ($mode,$opcode) = @_;
&sub ($len,$chunk); &sub ($len,$chunk);
&mov ($chunk,$PADLOCK_CHUNK); &mov ($chunk,$PADLOCK_CHUNK);
&jnz (&label("${mode}_loop")); &jnz (&label("${mode}_loop"));
if ($mode ne "ctr16") { if ($mode ne "ctr32") {
&test ($out,0x0f); # out_misaligned &test ($out,0x0f); # out_misaligned
&jz (&label("${mode}_done")); &jz (&label("${mode}_done"));
} }
@ -296,7 +296,7 @@ my ($mode,$opcode) = @_;
&data_byte(0xf3,0xab); # rep stosl &data_byte(0xf3,0xab); # rep stosl
&set_label("${mode}_done"); &set_label("${mode}_done");
&lea ("esp",&DWP(24,"ebp")); &lea ("esp",&DWP(24,"ebp"));
if ($mode ne "ctr16") { if ($mode ne "ctr32") {
&jmp (&label("${mode}_exit")); &jmp (&label("${mode}_exit"));
&set_label("${mode}_aligned",16); &set_label("${mode}_aligned",16);
@ -311,7 +311,7 @@ my ($mode,$opcode) = @_;
&set_label("${mode}_exit"); } &set_label("${mode}_exit"); }
&mov ("eax",1); &mov ("eax",1);
&lea ("esp",&DWP(4,"esp")); # popf &lea ("esp",&DWP(4,"esp")); # popf
&emms () if ($mode eq "ctr16"); &emms () if ($mode eq "ctr32");
&set_label("${mode}_abort"); &set_label("${mode}_abort");
&function_end("padlock_${mode}_encrypt"); &function_end("padlock_${mode}_encrypt");
} }
@ -320,10 +320,11 @@ my ($mode,$opcode) = @_;
&generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0);
&generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0);
&generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8);
&generate_mode("ctr16",0xc8); # yes, it implements own ctr with ecb opcode, &generate_mode("ctr32",0xc8); # yes, it implements own CTR with ECB opcode,
# because hardware ctr was introduced later # because hardware CTR was introduced later
# and even has errata on certain CPU stepping. # and even has errata on certain C7 stepping.
# own implementation *always* works... # own implementation *always* works, though
# ~15% slower than dedicated hardware...
&function_begin_B("padlock_xstore"); &function_begin_B("padlock_xstore");
&push ("edi"); &push ("edi");

View File

@ -9,7 +9,8 @@
# September 2011 # September 2011
# #
# Assembler helpers for Padlock engine. # Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
# details.
$flavour = shift; $flavour = shift;
$output = shift; $output = shift;
@ -26,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
$code=".text\n"; $code=".text\n";
$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16 $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
$ctx="%rdx"; $ctx="%rdx";
$out="%rdi"; $out="%rdi";
@ -234,9 +235,23 @@ padlock_${mode}_encrypt:
neg %rax neg %rax
and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
lea (%rax,%rbp),%rsp lea (%rax,%rbp),%rsp
___
$code.=<<___ if ($mode eq "ctr32");
mov -4($ctx),%eax # pull 32-bit counter
bswap %eax
neg %eax
and \$`$PADLOCK_CHUNK/16-1`,%eax
jz .L${mode}_loop
shl \$4,%eax
cmp %rax,$len
cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
___
$code.=<<___;
jmp .L${mode}_loop jmp .L${mode}_loop
.align 16 .align 16
.L${mode}_loop: .L${mode}_loop:
cmp $len,$chunk # ctr32 artefact
cmova $len,$chunk # ctr32 artefact
mov $out,%r8 # save parameters mov $out,%r8 # save parameters
mov $inp,%r9 mov $inp,%r9
mov $len,%r10 mov $len,%r10
@ -261,6 +276,16 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
movdqa (%rax),%xmm0 movdqa (%rax),%xmm0
movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv
___ ___
$code.=<<___ if ($mode eq "ctr32");
mov -4($ctx),%eax # pull 32-bit counter
test \$0xffff0000,%eax
jnz .L${mode}_no_corr
bswap %eax
add \$0x10000,%eax
bswap %eax
mov %eax,-4($ctx)
.L${mode}_no_corr:
___
$code.=<<___; $code.=<<___;
mov %r8,$out # restore paramters mov %r8,$out # restore paramters
mov %r11,$chunk mov %r11,$chunk
@ -295,6 +320,29 @@ $code.=<<___;
.align 16 .align 16
.L${mode}_aligned: .L${mode}_aligned:
___
$code.=<<___ if ($mode eq "ctr32");
mov -4($ctx),%eax # pull 32-bit counter
mov \$`16*0x10000`,$chunk
bswap %eax
cmp $len,$chunk
cmova $len,$chunk
neg %eax
and \$0xffff,%eax
jz .L${mode}_aligned_loop
shl \$4,%eax
cmp %rax,$len
cmova %rax,$chunk # don't let counter cross 2^16
jmp .L${mode}_aligned_loop
.align 16
.L${mode}_aligned_loop:
cmp $len,$chunk
cmova $len,$chunk
mov $len,%r10 # save parameters
mov $chunk,$len
mov $chunk,%r11
___
$code.=<<___;
lea -16($ctx),%rax # ivp lea -16($ctx),%rax # ivp
lea 16($ctx),%rbx # key lea 16($ctx),%rbx # key
shr \$4,$len # len/=AES_BLOCK_SIZE shr \$4,$len # len/=AES_BLOCK_SIZE
@ -304,6 +352,19 @@ $code.=<<___ if ($mode !~ /ecb|ctr/);
movdqa (%rax),%xmm0 movdqa (%rax),%xmm0
movdqa %xmm0,-16($ctx) # copy [or refresh] iv movdqa %xmm0,-16($ctx) # copy [or refresh] iv
___ ___
$code.=<<___ if ($mode eq "ctr32");
mov -4($ctx),%eax # pull 32-bit counter
bswap %eax
add \$0x10000,%eax
bswap %eax
mov %eax,-4($ctx)
mov %r11,$chunk # restore paramters
mov %r10,$len
sub $chunk,$len
mov \$`16*0x10000`,$chunk
jnz .L${mode}_aligned_loop
___
$code.=<<___; $code.=<<___;
.L${mode}_exit: .L${mode}_exit:
mov \$1,%eax mov \$1,%eax
@ -320,7 +381,7 @@ ___
&generate_mode("cbc",0xd0); &generate_mode("cbc",0xd0);
&generate_mode("cfb",0xe0); &generate_mode("cfb",0xe0);
&generate_mode("ofb",0xe8); &generate_mode("ofb",0xe8);
&generate_mode("ctr16",0xd8); &generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
$code.=<<___; $code.=<<___;
.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"

View File

@ -76,6 +76,7 @@
#endif #endif
#include <openssl/rand.h> #include <openssl/rand.h>
#include <openssl/err.h> #include <openssl/err.h>
#include <openssl/modes.h>
#ifndef OPENSSL_NO_HW #ifndef OPENSSL_NO_HW
#ifndef OPENSSL_NO_HW_PADLOCK #ifndef OPENSSL_NO_HW_PADLOCK
@ -337,16 +338,19 @@ static int padlock_cipher_nids[] = {
NID_aes_128_cbc, NID_aes_128_cbc,
NID_aes_128_cfb, NID_aes_128_cfb,
NID_aes_128_ofb, NID_aes_128_ofb,
NID_aes_128_ctr,
NID_aes_192_ecb, NID_aes_192_ecb,
NID_aes_192_cbc, NID_aes_192_cbc,
NID_aes_192_cfb, NID_aes_192_cfb,
NID_aes_192_ofb, NID_aes_192_ofb,
NID_aes_192_ctr,
NID_aes_256_ecb, NID_aes_256_ecb,
NID_aes_256_cbc, NID_aes_256_cbc,
NID_aes_256_cfb, NID_aes_256_cfb,
NID_aes_256_ofb, NID_aes_256_ofb,
NID_aes_256_ctr
}; };
static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
sizeof(padlock_cipher_nids[0])); sizeof(padlock_cipher_nids[0]));
@ -505,10 +509,35 @@ padlock_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
return 1; return 1;
} }
static void padlock_ctr32_encrypt_glue(const unsigned char *in,
unsigned char *out, size_t blocks,
struct padlock_cipher_data *ctx,
const unsigned char *ivec)
{
memcpy(ctx->iv,ivec,AES_BLOCK_SIZE);
padlock_ctr32_encrypt(out,in,ctx,AES_BLOCK_SIZE*blocks);
}
static int
padlock_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
const unsigned char *in_arg, size_t nbytes)
{
struct padlock_cipher_data *cdata = ALIGNED_CIPHER_DATA(ctx);
unsigned int num = ctx->num;
CRYPTO_ctr128_encrypt_ctr32(in_arg,out_arg,nbytes,
cdata,ctx->iv,ctx->buf,&num,
(ctr128_f)padlock_ctr32_encrypt_glue);
ctx->num = (size_t)num;
return 1;
}
#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
#define EVP_CIPHER_block_size_OFB 1 #define EVP_CIPHER_block_size_OFB 1
#define EVP_CIPHER_block_size_CFB 1 #define EVP_CIPHER_block_size_CFB 1
#define EVP_CIPHER_block_size_CTR 1
/* Declaring so many ciphers by hand would be a pain. /* Declaring so many ciphers by hand would be a pain.
Instead introduce a bit of preprocessor magic :-) */ Instead introduce a bit of preprocessor magic :-) */
@ -533,16 +562,19 @@ DECLARE_AES_EVP(128,ecb,ECB);
DECLARE_AES_EVP(128,cbc,CBC); DECLARE_AES_EVP(128,cbc,CBC);
DECLARE_AES_EVP(128,cfb,CFB); DECLARE_AES_EVP(128,cfb,CFB);
DECLARE_AES_EVP(128,ofb,OFB); DECLARE_AES_EVP(128,ofb,OFB);
DECLARE_AES_EVP(128,ctr,CTR);
DECLARE_AES_EVP(192,ecb,ECB); DECLARE_AES_EVP(192,ecb,ECB);
DECLARE_AES_EVP(192,cbc,CBC); DECLARE_AES_EVP(192,cbc,CBC);
DECLARE_AES_EVP(192,cfb,CFB); DECLARE_AES_EVP(192,cfb,CFB);
DECLARE_AES_EVP(192,ofb,OFB); DECLARE_AES_EVP(192,ofb,OFB);
DECLARE_AES_EVP(192,ctr,CTR);
DECLARE_AES_EVP(256,ecb,ECB); DECLARE_AES_EVP(256,ecb,ECB);
DECLARE_AES_EVP(256,cbc,CBC); DECLARE_AES_EVP(256,cbc,CBC);
DECLARE_AES_EVP(256,cfb,CFB); DECLARE_AES_EVP(256,cfb,CFB);
DECLARE_AES_EVP(256,ofb,OFB); DECLARE_AES_EVP(256,ofb,OFB);
DECLARE_AES_EVP(256,ctr,CTR);
static int static int
padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
@ -567,6 +599,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
case NID_aes_128_ofb: case NID_aes_128_ofb:
*cipher = &padlock_aes_128_ofb; *cipher = &padlock_aes_128_ofb;
break; break;
case NID_aes_128_ctr:
*cipher = &padlock_aes_128_ctr;
break;
case NID_aes_192_ecb: case NID_aes_192_ecb:
*cipher = &padlock_aes_192_ecb; *cipher = &padlock_aes_192_ecb;
@ -580,6 +615,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
case NID_aes_192_ofb: case NID_aes_192_ofb:
*cipher = &padlock_aes_192_ofb; *cipher = &padlock_aes_192_ofb;
break; break;
case NID_aes_192_ctr:
*cipher = &padlock_aes_192_ctr;
break;
case NID_aes_256_ecb: case NID_aes_256_ecb:
*cipher = &padlock_aes_256_ecb; *cipher = &padlock_aes_256_ecb;
@ -593,6 +631,9 @@ padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
case NID_aes_256_ofb: case NID_aes_256_ofb:
*cipher = &padlock_aes_256_ofb; *cipher = &padlock_aes_256_ofb;
break; break;
case NID_aes_256_ctr:
*cipher = &padlock_aes_256_ctr;
break;
default: default:
/* Sorry, we don't support this NID */ /* Sorry, we don't support this NID */
@ -610,6 +651,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
{ {
struct padlock_cipher_data *cdata; struct padlock_cipher_data *cdata;
int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
unsigned long mode = EVP_CIPHER_CTX_mode(ctx);
if (key==NULL) return 0; /* ERROR */ if (key==NULL) return 0; /* ERROR */
@ -617,7 +659,7 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
memset(cdata, 0, sizeof(struct padlock_cipher_data)); memset(cdata, 0, sizeof(struct padlock_cipher_data));
/* Prepare Control word. */ /* Prepare Control word. */
if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE)
cdata->cword.b.encdec = 0; cdata->cword.b.encdec = 0;
else else
cdata->cword.b.encdec = (ctx->encrypt == 0); cdata->cword.b.encdec = (ctx->encrypt == 0);
@ -640,8 +682,8 @@ padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
and is listed as hardware errata. They most and is listed as hardware errata. They most
likely will fix it at some point and then likely will fix it at some point and then
a check for stepping would be due here. */ a check for stepping would be due here. */
if ((EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_ECB_MODE || if ((mode == EVP_CIPH_ECB_MODE ||
EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CBC_MODE) mode == EVP_CIPH_CBC_MODE)
&& !enc) && !enc)
AES_set_decrypt_key(key, key_len, &cdata->ks); AES_set_decrypt_key(key, key_len, &cdata->ks);
else else