From 801df41929846fb386853bacd1a97d24e8697497 Mon Sep 17 00:00:00 2001 From: Pablo de Lara Date: Thu, 16 Nov 2023 20:47:55 +0000 Subject: [PATCH] erasure_code: fix vmovdqa instruction vmovdqa needs to be vmovdqa32/64 when used on ZMMs (EVEX encoded). Signed-off-by: Pablo de Lara --- erasure_code/gf_2vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_2vect_mad_avx512.asm | 4 ++-- erasure_code/gf_3vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_3vect_mad_avx512.asm | 4 ++-- erasure_code/gf_4vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_4vect_mad_avx512.asm | 4 ++-- erasure_code/gf_5vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_5vect_mad_avx512.asm | 4 ++-- erasure_code/gf_6vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_6vect_mad_avx512.asm | 4 ++-- erasure_code/gf_vect_dot_prod_avx512.asm | 4 ++-- erasure_code/gf_vect_mad_avx512.asm | 4 ++-- 12 files changed, 24 insertions(+), 24 deletions(-) diff --git a/erasure_code/gf_2vect_dot_prod_avx512.asm b/erasure_code/gf_2vect_dot_prod_avx512.asm index 92d7e9d..5033d3f 100644 --- a/erasure_code/gf_2vect_dot_prod_avx512.asm +++ b/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -133,8 +133,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_2vect_mad_avx512.asm b/erasure_code/gf_2vect_mad_avx512.asm index ce37248..97830ac 100644 --- a/erasure_code/gf_2vect_mad_avx512.asm +++ b/erasure_code/gf_2vect_mad_avx512.asm @@ -118,8 +118,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_3vect_dot_prod_avx512.asm b/erasure_code/gf_3vect_dot_prod_avx512.asm index 81e96f2..df4b9f2 100644 --- a/erasure_code/gf_3vect_dot_prod_avx512.asm +++ b/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -142,8 +142,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_3vect_mad_avx512.asm b/erasure_code/gf_3vect_mad_avx512.asm index bc61900..3538863 100644 --- a/erasure_code/gf_3vect_mad_avx512.asm +++ b/erasure_code/gf_3vect_mad_avx512.asm @@ -117,8 +117,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_4vect_dot_prod_avx512.asm b/erasure_code/gf_4vect_dot_prod_avx512.asm index 9d32973..b43ec7d 100644 --- a/erasure_code/gf_4vect_dot_prod_avx512.asm +++ b/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -156,8 +156,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_4vect_mad_avx512.asm b/erasure_code/gf_4vect_mad_avx512.asm index 77dc76b..02d9d62 100644 --- a/erasure_code/gf_4vect_mad_avx512.asm +++ b/erasure_code/gf_4vect_mad_avx512.asm @@ -117,8 +117,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_5vect_dot_prod_avx512.asm b/erasure_code/gf_5vect_dot_prod_avx512.asm index 1cca65b..bedca5b 100644 --- a/erasure_code/gf_5vect_dot_prod_avx512.asm +++ b/erasure_code/gf_5vect_dot_prod_avx512.asm @@ -172,8 +172,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_5vect_mad_avx512.asm b/erasure_code/gf_5vect_mad_avx512.asm index e2a1455..4063295 100644 --- a/erasure_code/gf_5vect_mad_avx512.asm +++ b/erasure_code/gf_5vect_mad_avx512.asm @@ -120,8 +120,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_6vect_dot_prod_avx512.asm b/erasure_code/gf_6vect_dot_prod_avx512.asm index bb25e67..e9e4777 100644 --- a/erasure_code/gf_6vect_dot_prod_avx512.asm +++ b/erasure_code/gf_6vect_dot_prod_avx512.asm @@ -172,8 +172,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_6vect_mad_avx512.asm b/erasure_code/gf_6vect_mad_avx512.asm index c2383a2..e07937b 100644 --- a/erasure_code/gf_6vect_mad_avx512.asm +++ b/erasure_code/gf_6vect_mad_avx512.asm @@ -129,8 +129,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_vect_dot_prod_avx512.asm b/erasure_code/gf_vect_dot_prod_avx512.asm index 37fe082..7806250 100644 --- a/erasure_code/gf_vect_dot_prod_avx512.asm +++ b/erasure_code/gf_vect_dot_prod_avx512.asm @@ -104,8 +104,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq diff --git a/erasure_code/gf_vect_mad_avx512.asm b/erasure_code/gf_vect_mad_avx512.asm index 43982e3..8827a8f 100644 --- a/erasure_code/gf_vect_mad_avx512.asm +++ b/erasure_code/gf_vect_mad_avx512.asm @@ -101,8 +101,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq