From c8dd92f04ac508206443d611b351f931901a444c Mon Sep 17 00:00:00 2001 From: Pablo de Lara Date: Tue, 28 Nov 2023 18:19:33 +0000 Subject: [PATCH] lib: add new interface supporting AVX2 with GFNI Signed-off-by: Pablo de Lara --- include/multibinary.asm | 87 +++++++++++++++++++++++++++++++++++++++++ include/reg_sizes.asm | 1 + 2 files changed, 88 insertions(+) diff --git a/include/multibinary.asm b/include/multibinary.asm index 588352a..01db577 100644 --- a/include/multibinary.asm +++ b/include/multibinary.asm @@ -431,10 +431,97 @@ pop mbin_rsi ret %endmacro + +;;;;; +; mbin_dispatch_init8 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_2 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +; 6-> AVX512/06 opt func +; 7-> AVX2 Update/07 opt func +; 8-> AVX512 Update/10 opt func +;;;;; +%macro mbin_dispatch_init8 8 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + push mbin_rdi + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + mov ebx, ecx ; save cpuid1.ecx + test ecx, FLAG_CPUID1_ECX_SSE4_2 + je _%1_init_done ; Use base function if no SSE4_2 + lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt + + ;; Test for XMM_YMM support/AVX + test ecx, FLAG_CPUID1_ECX_OSXSAVE + je _%1_init_done + xor ecx, ecx + xgetbv ; xcr -> edx:eax + mov edi, eax ; save xgetvb.eax + + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + jne _%1_init_done + test ebx, FLAG_CPUID1_ECX_AVX + je _%1_init_done + lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt + + ;; Test for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + je _%1_init_done ; No AVX2 possible + lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func + + ;; Test for AVX512 + and edi, FLAG_XGETBV_EAX_ZMM_OPM + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM + jne _%1_check_avx2_g2 ; No AVX512 possible + and ebx, FLAGS_CPUID7_EBX_AVX512_G1 + cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 + lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + and ecx, FLAGS_CPUID7_ECX_AVX512_G2 + cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 + lea mbin_rbx, [%8 WRT_OPT] ; AVX512/10 opt + cmove mbin_rsi, mbin_rbx + jmp _%1_init_done + + _%1_check_avx2_g2: + ;; Test for AVX2 Gen 2 + and ecx, FLAGS_CPUID7_ECX_AVX2_G2 + cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2 + lea mbin_rbx, [%7 WRT_OPT] ; AVX2/7 opt + cmove mbin_rsi, mbin_rbx + + _%1_init_done: + pop mbin_rdi + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro %else %macro mbin_dispatch_init7 7 mbin_dispatch_init6 %1, %2, %3, %4, %5, %6 %endmacro +%macro mbin_dispatch_init8 8 + mbin_dispatch_init6 %1, %2, %3, %4, %5, %6 +%endmacro %endif %endif ; ifndef _MULTIBINARY_ASM_ diff --git a/include/reg_sizes.asm b/include/reg_sizes.asm index 839b912..983f8b4 100644 --- a/include/reg_sizes.asm +++ b/include/reg_sizes.asm @@ -67,6 +67,7 @@ %define FLAGS_CPUID7_EBX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ) %define FLAGS_CPUID7_ECX_AVX512_G2 (FLAG_CPUID7_ECX_AVX512VBMI2 | FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ | FLAG_CPUID7_ECX_VNNI | FLAG_CPUID7_ECX_BITALG | FLAG_CPUID7_ECX_VPOPCNTDQ) +%define FLAGS_CPUID7_ECX_AVX2_G2 (FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ) %define FLAG_XGETBV_EAX_XMM (1<<1) %define FLAG_XGETBV_EAX_YMM (1<<2)