mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
lib: add new interface supporting AVX2 with GFNI
Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
This commit is contained in:
parent
f971f02309
commit
c8dd92f04a
@ -431,10 +431,97 @@
|
||||
pop mbin_rsi
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
;;;;;
|
||||
; mbin_dispatch_init8 parameters
|
||||
; 1-> function name
|
||||
; 2-> base function
|
||||
; 3-> SSE4_2 or 00/01 optimized function
|
||||
; 4-> AVX/02 opt func
|
||||
; 5-> AVX2/04 opt func
|
||||
; 6-> AVX512/06 opt func
|
||||
; 7-> AVX2 Update/07 opt func
|
||||
; 8-> AVX512 Update/10 opt func
|
||||
;;;;;
|
||||
%macro mbin_dispatch_init8 8
|
||||
section .text
|
||||
%1_dispatch_init:
|
||||
push mbin_rsi
|
||||
push mbin_rax
|
||||
push mbin_rbx
|
||||
push mbin_rcx
|
||||
push mbin_rdx
|
||||
push mbin_rdi
|
||||
lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov ebx, ecx ; save cpuid1.ecx
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||
je _%1_init_done ; Use base function if no SSE4_2
|
||||
lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
|
||||
|
||||
;; Test for XMM_YMM support/AVX
|
||||
test ecx, FLAG_CPUID1_ECX_OSXSAVE
|
||||
je _%1_init_done
|
||||
xor ecx, ecx
|
||||
xgetbv ; xcr -> edx:eax
|
||||
mov edi, eax ; save xgetvb.eax
|
||||
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
jne _%1_init_done
|
||||
test ebx, FLAG_CPUID1_ECX_AVX
|
||||
je _%1_init_done
|
||||
lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
|
||||
|
||||
;; Test for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID7_EBX_AVX2
|
||||
je _%1_init_done ; No AVX2 possible
|
||||
lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
|
||||
|
||||
;; Test for AVX512
|
||||
and edi, FLAG_XGETBV_EAX_ZMM_OPM
|
||||
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
|
||||
jne _%1_check_avx2_g2 ; No AVX512 possible
|
||||
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
||||
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
||||
lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
|
||||
cmove mbin_rsi, mbin_rbx
|
||||
|
||||
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
||||
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
||||
lea mbin_rbx, [%8 WRT_OPT] ; AVX512/10 opt
|
||||
cmove mbin_rsi, mbin_rbx
|
||||
jmp _%1_init_done
|
||||
|
||||
_%1_check_avx2_g2:
|
||||
;; Test for AVX2 Gen 2
|
||||
and ecx, FLAGS_CPUID7_ECX_AVX2_G2
|
||||
cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2
|
||||
lea mbin_rbx, [%7 WRT_OPT] ; AVX2/7 opt
|
||||
cmove mbin_rsi, mbin_rbx
|
||||
|
||||
_%1_init_done:
|
||||
pop mbin_rdi
|
||||
pop mbin_rdx
|
||||
pop mbin_rcx
|
||||
pop mbin_rbx
|
||||
pop mbin_rax
|
||||
mov [%1_dispatched], mbin_rsi
|
||||
pop mbin_rsi
|
||||
ret
|
||||
%endmacro
|
||||
%else
|
||||
%macro mbin_dispatch_init7 7
|
||||
mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
|
||||
%endmacro
|
||||
%macro mbin_dispatch_init8 8
|
||||
mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%endif ; ifndef _MULTIBINARY_ASM_
|
||||
|
@ -67,6 +67,7 @@
|
||||
|
||||
%define FLAGS_CPUID7_EBX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ)
|
||||
%define FLAGS_CPUID7_ECX_AVX512_G2 (FLAG_CPUID7_ECX_AVX512VBMI2 | FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ | FLAG_CPUID7_ECX_VNNI | FLAG_CPUID7_ECX_BITALG | FLAG_CPUID7_ECX_VPOPCNTDQ)
|
||||
%define FLAGS_CPUID7_ECX_AVX2_G2 (FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ)
|
||||
|
||||
%define FLAG_XGETBV_EAX_XMM (1<<1)
|
||||
%define FLAG_XGETBV_EAX_YMM (1<<2)
|
||||
|
Loading…
Reference in New Issue
Block a user