erasure_code: add special dispatch case

Using highest-level instruction set may not reveal the best
performance on certain platform. E.g. using AVX impl for ec
updating instead of AVX2 impl can be faster on Hygon 1/2/3
platform.

This commit identifies Hygon platform and use a special
dispatch case for ec_encode_data_update to choose certain
instruction set impl.

Signed-off-by: Maodi Ma <mamaodi@hygon.cn>
This commit is contained in:
Maodi Ma
2025-09-25 14:23:41 +00:00
committed by Pablo de Lara
parent a439f0dd5d
commit d36de972ef
3 changed files with 133 additions and 2 deletions

View File

@@ -77,7 +77,7 @@ mbin_interface ec_init_tables
mbin_dispatch_init5 gf_vect_mul, gf_vect_mul_base, gf_vect_mul_sse, gf_vect_mul_avx, gf_vect_mul_avx
mbin_dispatch_init8 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2, ec_encode_data_avx512, ec_encode_data_avx2_gfni, ec_encode_data_avx512_gfni
mbin_dispatch_init8 ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512, ec_encode_data_update_avx2_gfni, ec_encode_data_update_avx512_gfni
mbin_dispatch_init8_hygon ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512, ec_encode_data_update_avx2_gfni, ec_encode_data_update_avx512_gfni
mbin_dispatch_init6 gf_vect_mad, gf_vect_mad_base, gf_vect_mad_sse, gf_vect_mad_avx, gf_vect_mad_avx2, gf_vect_mad_avx512
mbin_dispatch_init6 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2, gf_vect_dot_prod_avx512
mbin_dispatch_init8 ec_init_tables, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_base, ec_init_tables_gfni, ec_init_tables_gfni

View File

@@ -475,4 +475,128 @@
ret
%endmacro
;;;;;
; mbin_dispatch_init8_hygon parameters
; 1-> function name
; 2-> base function
; 3-> SSE4_2 or 00/01 optimized function
; 4-> AVX/02 opt func
; 5-> AVX2/04 opt func
; 6-> AVX512/06 opt func
; 7-> AVX2 Update/07 opt func
; 8-> AVX512 Update/10 opt func
;
; With special case:
; - Use AVX on Hygon 1/2/3 platform
;;;;;
%macro mbin_dispatch_init8_hygon 8
section .text
%1_dispatch_init:
push rsi
push rax
push rbx
push rcx
push rdx
push rdi
lea rsi, [%2 WRT_OPT] ; Default - use base function
mov eax, 1
cpuid
mov ebx, ecx ; save cpuid1.ecx
test ecx, FLAG_CPUID1_ECX_SSE4_2
je _%1_init_done ; Use base function if no SSE4_2
lea rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
;; Test for XMM_YMM support/AVX
test ecx, FLAG_CPUID1_ECX_OSXSAVE
je _%1_init_done
xor ecx, ecx
xgetbv ; xcr -> edx:eax
mov edi, eax ; save xgetvb.eax
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _%1_init_done
test ebx, FLAG_CPUID1_ECX_AVX
je _%1_init_done
lea rsi, [%4 WRT_OPT] ; AVX/02 opt
;; Hygon platform check: Use AVX opt on Hygon 1/2/3 for performance
;; Even if the have the ability to use AVX2 opt
xor eax, eax
cpuid
mov eax, FLAG_CPUID0_EBX_HYGON
cmp eax, ebx
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
mov eax, FLAG_CPUID0_EDX_HYGON
cmp eax, edx
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
mov eax, FLAG_CPUID0_ECX_HYGON
cmp eax, ecx
jne _%1_check_avx2 ; Not Hygon. Proceed as normal
;; All vendor ID matches: Hygon confirmed
;; Further family & model check: Identify Hygon 1/2/3
mov eax, 1
cpuid
and eax, FLAG_CPUID1_EAX_STEP_MASK
mov ecx, FLAG_CPUID1_EAX_HYGON1
mov edx, FLAG_CPUID1_EAX_HYGON2
mov ebx, FLAG_CPUID1_EAX_HYGON3
cmp eax, ecx ; Hygon 1
je _%1_hygon_123_init
cmp eax, edx ; Hygon 2
je _%1_hygon_123_init
cmp eax, ebx ; Hygon 3
jne _%1_check_avx2 ; Not any of Hygon 1/2/3: Continue normal procedure
_%1_hygon_123_init:
;; Init complete early for Hygon 1/2/3.
jmp _%1_init_done ; Use AVX opt func registered before
_%1_check_avx2:
;; Test for AVX2
xor ecx, ecx
mov eax, 7
cpuid
test ebx, FLAG_CPUID7_EBX_AVX2
je _%1_init_done ; No AVX2 possible
lea rsi, [%5 WRT_OPT] ; AVX2/04 opt func
;; Test for AVX512
and edi, FLAG_XGETBV_EAX_ZMM_OPM
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
jne _%1_check_avx2_g2 ; No AVX512 possible
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
lea rbx, [%6 WRT_OPT] ; AVX512/06 opt
cmove rsi, rbx
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
lea rbx, [%8 WRT_OPT] ; AVX512/10 opt
cmove rsi, rbx
jmp _%1_init_done
_%1_check_avx2_g2:
;; Test for AVX2 Gen 2
and ecx, FLAGS_CPUID7_ECX_AVX2_G2
cmp ecx, FLAGS_CPUID7_ECX_AVX2_G2
lea rbx, [%7 WRT_OPT] ; AVX2/7 opt
cmove rsi, rbx
_%1_init_done:
pop rdi
pop rdx
pop rcx
pop rbx
pop rax
mov [%1_dispatched], rsi
pop rsi
ret
%endmacro
%endif ; ifndef _MULTIBINARY_ASM_

View File

@@ -71,6 +71,13 @@
%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0
%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0
%define FLAG_CPUID1_EAX_HYGON1 0x00900f00
%define FLAG_CPUID1_EAX_HYGON2 0x00900f10
%define FLAG_CPUID1_EAX_HYGON3 0x00900f20
%define FLAG_CPUID0_EBX_HYGON 0x6f677948 ;"ogyH"
%define FLAG_CPUID0_EDX_HYGON 0x6e65476e ;"neGn"
%define FLAG_CPUID0_ECX_HYGON 0x656e6975 ;"eniu"
; define d and w variants for registers