Engage ARMv8 assembly pack.

Reviewed-by: Dr. Stephen Henson <steve@openssl.org>
This commit is contained in:
Andy Polyakov
2015-05-11 11:34:56 +02:00
parent b84813ec01
commit 083ed53def
4 changed files with 177 additions and 6 deletions

View File

@@ -10,13 +10,22 @@
# define __ARMEL__ # define __ARMEL__
# endif # endif
# elif defined(__GNUC__) # elif defined(__GNUC__)
# if defined(__aarch64__)
# define __ARM_ARCH__ 8
# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
# define __ARMEB__
# else
# define __ARMEL__
# endif
/* /*
* Why doesn't gcc define __ARM_ARCH__? Instead it defines * Why doesn't gcc define __ARM_ARCH__? Instead it defines
* bunch of below macros. See all_architectires[] table in * bunch of below macros. See all_architectires[] table in
* gcc/config/arm/arm.c. On a side note it defines * gcc/config/arm/arm.c. On a side note it defines
* __ARMEL__/__ARMEB__ for little-/big-endian. * __ARMEL__/__ARMEB__ for little-/big-endian.
*/ */
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ # elif defined(__ARM_ARCH_8A__)
# define __ARM_ARCH__ 8
# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
defined(__ARM_ARCH_7EM__) defined(__ARM_ARCH_7EM__)
# define __ARM_ARCH__ 7 # define __ARM_ARCH__ 7
@@ -42,10 +51,14 @@
#if !__ASSEMBLER__ #if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P; extern unsigned int OPENSSL_armcap_P;
#endif
#define ARMV7_NEON (1<<0) #define ARMV7_NEON (1<<0)
#define ARMV7_TICK (1<<1) #define ARMV7_TICK (1<<1)
#endif #define ARMV8_AES (1<<2)
#define ARMV8_SHA1 (1<<3)
#define ARMV8_SHA256 (1<<4)
#define ARMV8_PMULL (1<<5)
#endif #endif
#endif #endif

View File

@@ -20,6 +20,10 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
*/ */
void _armv7_neon_probe(void); void _armv7_neon_probe(void);
unsigned int _armv7_tick(void); unsigned int _armv7_tick(void);
void _armv8_aes_probe(void);
void _armv8_sha1_probe(void);
void _armv8_sha256_probe(void);
void _armv8_pmull_probe(void);
unsigned int OPENSSL_rdtsc(void) unsigned int OPENSSL_rdtsc(void)
{ {
@@ -68,6 +72,28 @@ void OPENSSL_cpuid_setup(void)
{ {
_armv7_neon_probe(); _armv7_neon_probe();
OPENSSL_armcap_P |= ARMV7_NEON; OPENSSL_armcap_P |= ARMV7_NEON;
#ifdef __aarch64__
if (sigsetjmp(ill_jmp,1) == 0)
{
_armv8_pmull_probe();
OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES;
}
else if (sigsetjmp(ill_jmp,1) == 0)
{
_armv8_aes_probe();
OPENSSL_armcap_P |= ARMV8_AES;
}
if (sigsetjmp(ill_jmp,1) == 0)
{
_armv8_sha1_probe();
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (sigsetjmp(ill_jmp,1) == 0)
{
_armv8_sha256_probe();
OPENSSL_armcap_P |= ARMV8_SHA256;
}
#endif
} }
if (sigsetjmp(ill_jmp,1) == 0) if (sigsetjmp(ill_jmp,1) == 0)
{ {

View File

@@ -471,6 +471,35 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
{ return &aes_##keylen##_##mode; } { return &aes_##keylen##_##mode; }
#endif #endif
#if defined(OPENSSL_CPUID_OBJ) && defined(__aarch64__)
#include "arm_arch.h"
#if __ARM_ARCH__>=7
# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
# define HWAES_set_encrypt_key aes_v8_set_encrypt_key
# define HWAES_set_decrypt_key aes_v8_set_decrypt_key
# define HWAES_encrypt aes_v8_encrypt
# define HWAES_decrypt aes_v8_decrypt
# define HWAES_cbc_encrypt aes_v8_cbc_encrypt
# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
#endif
#endif
#if defined(HWAES_CAPABLE)
int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits,
AES_KEY *key);
int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits,
AES_KEY *key);
void HWAES_encrypt(const unsigned char *in, unsigned char *out,
const AES_KEY *key);
void HWAES_decrypt(const unsigned char *in, unsigned char *out,
const AES_KEY *key);
void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
size_t length, const AES_KEY *key,
unsigned char *ivec, const int enc);
void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
size_t len, const AES_KEY *key, const unsigned char ivec[16]);
#endif
#define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \
BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \
@@ -489,6 +518,19 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
mode = ctx->cipher->flags & EVP_CIPH_MODE; mode = ctx->cipher->flags & EVP_CIPH_MODE;
if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
&& !enc) && !enc)
#ifdef HWAES_CAPABLE
if (HWAES_CAPABLE)
{
ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
dat->block = (block128_f)HWAES_decrypt;
dat->stream.cbc = NULL;
#ifdef HWAES_cbc_encrypt
if (mode==EVP_CIPH_CBC_MODE)
dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
#endif
}
else
#endif
#ifdef BSAES_CAPABLE #ifdef BSAES_CAPABLE
if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)
{ {
@@ -517,6 +559,26 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
NULL; NULL;
} }
else else
#ifdef HWAES_CAPABLE
if (HWAES_CAPABLE)
{
ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
dat->block = (block128_f)HWAES_encrypt;
dat->stream.cbc = NULL;
#ifdef HWAES_cbc_encrypt
if (mode==EVP_CIPH_CBC_MODE)
dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt;
else
#endif
#ifdef HWAES_ctr32_encrypt_blocks
if (mode==EVP_CIPH_CTR_MODE)
dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
else
#endif
(void)0; /* terminate potentially open 'else' */
}
else
#endif
#ifdef BSAES_CAPABLE #ifdef BSAES_CAPABLE
if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)
{ {
@@ -809,6 +871,21 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
return 1; return 1;
if (key) if (key)
{ do { { do {
#ifdef HWAES_CAPABLE
if (HWAES_CAPABLE)
{
HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
(block128_f)HWAES_encrypt);
#ifdef HWAES_ctr32_encrypt_blocks
gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks;
#else
gctx->ctr = NULL;
#endif
break;
}
else
#endif
#ifdef BSAES_CAPABLE #ifdef BSAES_CAPABLE
if (BSAES_CAPABLE) if (BSAES_CAPABLE)
{ {
@@ -1047,6 +1124,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
{ {
xctx->stream = NULL; xctx->stream = NULL;
/* key_len is two AES keys */ /* key_len is two AES keys */
#ifdef HWAES_CAPABLE
if (HWAES_CAPABLE)
{
if (enc)
{
HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
xctx->xts.block1 = (block128_f)HWAES_encrypt;
}
else
{
HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
xctx->xts.block1 = (block128_f)HWAES_decrypt;
}
HWAES_set_encrypt_key(key + ctx->key_len/2,
ctx->key_len * 4, &xctx->ks2);
xctx->xts.block2 = (block128_f)HWAES_encrypt;
xctx->xts.key1 = &xctx->ks1;
break;
}
else
#endif
#ifdef VPAES_CAPABLE #ifdef VPAES_CAPABLE
if (VPAES_CAPABLE) if (VPAES_CAPABLE)
{ {
@@ -1189,6 +1289,19 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
return 1; return 1;
if (key) do if (key) do
{ {
#ifdef HWAES_CAPABLE
if (HWAES_CAPABLE)
{
HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks);
CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
&cctx->ks, (block128_f)HWAES_encrypt);
cctx->str = NULL;
cctx->key_set = 1;
break;
}
else
#endif
#ifdef VPAES_CAPABLE #ifdef VPAES_CAPABLE
if (VPAES_CAPABLE) if (VPAES_CAPABLE)
{ {

View File

@@ -645,7 +645,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
#endif #endif
#if TABLE_BITS==4 && defined(GHASH_ASM) #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
# if !defined(I386_ONLY) && \ # if !defined(I386_ONLY) && \
(defined(__i386) || defined(__i386__) || \ (defined(__i386) || defined(__i386__) || \
defined(__x86_64) || defined(__x86_64__) || \ defined(__x86_64) || defined(__x86_64__) || \
@@ -666,13 +666,22 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif # endif
# elif defined(__arm__) || defined(__arm) # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h" # include "arm_arch.h"
# if __ARM_ARCH__>=7 # if __ARM_ARCH__>=7
# define GHASH_ASM_ARM # define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT # define GCM_FUNCREF_4BIT
# if defined(__aarch64__)
# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
# endif
# if defined(__arm__) || defined(__arm)
# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
# endif
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
# endif # endif
# elif defined(_TMS320C6400_PLUS) # elif defined(_TMS320C6400_PLUS)
# define GHASH_ASM_C64Xplus # define GHASH_ASM_C64Xplus
@@ -740,10 +749,20 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
ctx->ghash = gcm_ghash_4bit; ctx->ghash = gcm_ghash_4bit;
# endif # endif
# elif defined(GHASH_ASM_ARM) # elif defined(GHASH_ASM_ARM)
if (OPENSSL_armcap_P & ARMV7_NEON) { # ifdef PMULL_CAPABLE
if (PMULL_CAPABLE) {
gcm_init_v8(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_v8;
ctx->ghash = gcm_ghash_v8;
} else
# endif
# ifdef NEON_CAPABLE
if (NEON_CAPABLE) {
ctx->gmult = gcm_gmult_neon; ctx->gmult = gcm_gmult_neon;
ctx->ghash = gcm_ghash_neon; ctx->ghash = gcm_ghash_neon;
} else { } else
# endif
{
gcm_init_4bit(ctx->Htable,ctx->H.u); gcm_init_4bit(ctx->Htable,ctx->H.u);
ctx->gmult = gcm_gmult_4bit; ctx->gmult = gcm_gmult_4bit;
ctx->ghash = gcm_ghash_4bit; ctx->ghash = gcm_ghash_4bit;