From 91e7906f3f4c4309ade3636cbf9515bf38cff197 Mon Sep 17 00:00:00 2001 From: Pablo de Lara Date: Fri, 12 Jan 2024 15:16:53 +0000 Subject: [PATCH] erasure_code: check for size on gf_vect_mul_sse/avx gf_vect_mul requires length to be multiple of 32 bytes, so this check is added in the SSE/AVX implementations. Signed-off-by: Pablo de Lara --- erasure_code/gf_vect_mul_avx.asm | 16 ++++++++++------ erasure_code/gf_vect_mul_sse.asm | 15 +++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/erasure_code/gf_vect_mul_avx.asm b/erasure_code/gf_vect_mul_avx.asm index f4ce28e..7ca6d01 100644 --- a/erasure_code/gf_vect_mul_avx.asm +++ b/erasure_code/gf_vect_mul_avx.asm @@ -38,9 +38,6 @@ %define arg1 rsi %define arg2 rdx %define arg3 rcx - %define arg4 r8 - %define arg5 r9 - %define tmp r11 %define return rax %define func(x) x: endbranch %define FUNC_SAVE @@ -81,6 +78,7 @@ %define src arg2 %define dest arg3 %define pos return +%define tmp r11 ;;; Use Non-temporal load/stor @@ -113,7 +111,14 @@ section .text align 16 mk_global gf_vect_mul_avx, function func(gf_vect_mul_avx) + + ; Check if length is multiple of 32 bytes + mov tmp, len + and tmp, 0x1f + jnz return_fail + FUNC_SAVE + mov pos, 0 vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... @@ -140,14 +145,13 @@ loop32: XSTR [dest+pos-16], xtmp2b ;Store +16B result jl loop32 + FUNC_RESTORE return_pass: - FUNC_RESTORE - sub pos, len + xor return, return ret return_fail: - FUNC_RESTORE mov return, 1 ret diff --git a/erasure_code/gf_vect_mul_sse.asm b/erasure_code/gf_vect_mul_sse.asm index 242d85b..b0e3c61 100644 --- a/erasure_code/gf_vect_mul_sse.asm +++ b/erasure_code/gf_vect_mul_sse.asm @@ -38,9 +38,6 @@ %define arg1 rsi %define arg2 rdx %define arg3 rcx - %define arg4 r8 - %define arg5 r9 - %define tmp r11 %define return rax %define func(x) x: endbranch %define FUNC_SAVE @@ -81,6 +78,7 @@ %define src arg2 %define dest arg3 %define pos return +%define tmp r11 ;;; Use Non-temporal load/stor @@ -114,7 +112,13 @@ section .text align 16 mk_global gf_vect_mul_sse, function func(gf_vect_mul_sse) + ; Check if length is multiple of 32 bytes + mov tmp, len + and tmp, 0x1f + jnz return_fail + FUNC_SAVE + mov pos, 0 movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... @@ -147,15 +151,14 @@ loop32: cmp pos, len jl loop32 + FUNC_RESTORE return_pass: - sub pos, len - FUNC_RESTORE + xor return, return ret return_fail: mov return, 1 - FUNC_RESTORE ret endproc_frame