lib: add new interface supporting AVX2 with GFNI

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
This commit is contained in:
Pablo de Lara 2023-11-28 18:19:33 +00:00
parent f971f02309
commit c8dd92f04a
2 changed files with 88 additions and 0 deletions

View File

@ -431,10 +431,97 @@
pop mbin_rsi
ret
%endmacro
;;;;;
; mbin_dispatch_init8 parameters
; 1-> function name
; 2-> base function
; 3-> SSE4_2 or 00/01 optimized function
; 4-> AVX/02 opt func
; 5-> AVX2/04 opt func
; 6-> AVX512/06 opt func
; 7-> AVX2 Update/07 opt func
; 8-> AVX512 Update/10 opt func
;;;;;
%macro mbin_dispatch_init8 8
section .text
%1_dispatch_init:
push mbin_rsi
push mbin_rax
push mbin_rbx
push mbin_rcx
push mbin_rdx
push mbin_rdi
lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
mov eax, 1
cpuid
mov ebx, ecx ; save cpuid1.ecx
test ecx, FLAG_CPUID1_ECX_SSE4_2
je _%1_init_done ; Use base function if no SSE4_2
lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
;; Test for XMM_YMM support/AVX
test ecx, FLAG_CPUID1_ECX_OSXSAVE
je _%1_init_done
xor ecx, ecx
xgetbv ; xcr -> edx:eax
mov edi, eax ; save xgetvb.eax
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _%1_init_done
test ebx, FLAG_CPUID1_ECX_AVX
je _%1_init_done
lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
;; Test for AVX2
xor ecx, ecx
mov eax, 7
cpuid
test ebx, FLAG_CPUID7_EBX_AVX2
je _%1_init_done ; No AVX2 possible
lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
;; Test for AVX512
and edi, FLAG_XGETBV_EAX_ZMM_OPM
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
jne _%1_check_avx2_g2 ; No AVX512 possible
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
cmove mbin_rsi, mbin_rbx
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
lea mbin_rbx, [%8 WRT_OPT] ; AVX512/10 opt
cmove mbin_rsi, mbin_rbx
jmp _%1_init_done
_%1_check_avx2_g2:
;; Test for AVX2 Gen 2
and ecx, FLAGS_CPUID7_ECX_AVX2_G2
cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2
lea mbin_rbx, [%7 WRT_OPT] ; AVX2/7 opt
cmove mbin_rsi, mbin_rbx
_%1_init_done:
pop mbin_rdi
pop mbin_rdx
pop mbin_rcx
pop mbin_rbx
pop mbin_rax
mov [%1_dispatched], mbin_rsi
pop mbin_rsi
ret
%endmacro
%else
%macro mbin_dispatch_init7 7
mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
%endmacro
%macro mbin_dispatch_init8 8
mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
%endmacro
%endif
%endif ; ifndef _MULTIBINARY_ASM_

View File

@ -67,6 +67,7 @@
%define FLAGS_CPUID7_EBX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ)
%define FLAGS_CPUID7_ECX_AVX512_G2 (FLAG_CPUID7_ECX_AVX512VBMI2 | FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ | FLAG_CPUID7_ECX_VNNI | FLAG_CPUID7_ECX_BITALG | FLAG_CPUID7_ECX_VPOPCNTDQ)
%define FLAGS_CPUID7_ECX_AVX2_G2 (FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ)
%define FLAG_XGETBV_EAX_XMM (1<<1)
%define FLAG_XGETBV_EAX_YMM (1<<2)