diff --git a/erasure_code/gf_2vect_dot_prod_avx512.asm b/erasure_code/gf_2vect_dot_prod_avx512.asm index 5033d3f..b988622 100644 --- a/erasure_code/gf_2vect_dot_prod_avx512.asm +++ b/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -73,7 +73,7 @@ %define return rax %define PS 8 %define LOG_PS 3 - %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 %define arg(x) [rsp + stack_size + PS + PS*x] %define func(x) proc_frame x @@ -82,16 +82,9 @@ vmovdqa [rsp + 0*16], xmm6 vmovdqa [rsp + 1*16], xmm7 vmovdqa [rsp + 2*16], xmm8 - vmovdqa [rsp + 3*16], xmm9 - vmovdqa [rsp + 4*16], xmm10 - vmovdqa [rsp + 5*16], xmm11 - vmovdqa [rsp + 6*16], xmm12 - vmovdqa [rsp + 7*16], xmm13 - vmovdqa [rsp + 8*16], xmm14 - save_reg r12, 9*16 + 0*8 - save_reg r13, 9*16 + 1*8 - save_reg r14, 9*16 + 2*8 - save_reg r15, 9*16 + 3*8 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r15, 3*16 + 2*8 end_prolog mov arg4, arg(4) %endmacro @@ -100,16 +93,9 @@ vmovdqa xmm6, [rsp + 0*16] vmovdqa xmm7, [rsp + 1*16] vmovdqa xmm8, [rsp + 2*16] - vmovdqa xmm9, [rsp + 3*16] - vmovdqa xmm10, [rsp + 4*16] - vmovdqa xmm11, [rsp + 5*16] - vmovdqa xmm12, [rsp + 6*16] - vmovdqa xmm13, [rsp + 7*16] - vmovdqa xmm14, [rsp + 8*16] - mov r12, [rsp + 9*16 + 0*8] - mov r13, [rsp + 9*16 + 1*8] - mov r14, [rsp + 9*16 + 2*8] - mov r15, [rsp + 9*16 + 3*8] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r15, [rsp + 3*16 + 2*8] add rsp, stack_size %endmacro %endif diff --git a/erasure_code/gf_3vect_dot_prod_avx512.asm b/erasure_code/gf_3vect_dot_prod_avx512.asm index df4b9f2..8b183bc 100644 --- a/erasure_code/gf_3vect_dot_prod_avx512.asm +++ b/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -44,8 +44,6 @@ %define arg5 r9 %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r12 ; must be saved and restored @@ -73,15 +71,13 @@ %define arg4 r12 ; must be saved, loaded and restored %define arg5 r15 ; must be saved and restored %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r14 ; must be saved and restored %define return rax %define PS 8 %define LOG_PS 3 - %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 %define arg(x) [rsp + stack_size + PS + PS*x] %define func(x) proc_frame x @@ -93,13 +89,10 @@ vmovdqa [rsp + 3*16], xmm9 vmovdqa [rsp + 4*16], xmm10 vmovdqa [rsp + 5*16], xmm11 - vmovdqa [rsp + 6*16], xmm12 - vmovdqa [rsp + 7*16], xmm13 - vmovdqa [rsp + 8*16], xmm14 - save_reg r12, 9*16 + 0*8 - save_reg r13, 9*16 + 1*8 - save_reg r14, 9*16 + 2*8 - save_reg r15, 9*16 + 3*8 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 end_prolog mov arg4, arg(4) %endmacro @@ -111,13 +104,10 @@ vmovdqa xmm9, [rsp + 3*16] vmovdqa xmm10, [rsp + 4*16] vmovdqa xmm11, [rsp + 5*16] - vmovdqa xmm12, [rsp + 6*16] - vmovdqa xmm13, [rsp + 7*16] - vmovdqa xmm14, [rsp + 8*16] - mov r12, [rsp + 9*16 + 0*8] - mov r13, [rsp + 9*16 + 1*8] - mov r14, [rsp + 9*16 + 2*8] - mov r15, [rsp + 9*16 + 3*8] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] add rsp, stack_size %endmacro %endif diff --git a/erasure_code/gf_4vect_dot_prod_avx512.asm b/erasure_code/gf_4vect_dot_prod_avx512.asm index b43ec7d..593c031 100644 --- a/erasure_code/gf_4vect_dot_prod_avx512.asm +++ b/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -44,8 +44,6 @@ %define arg5 r9 %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r12 ; must be saved and restored @@ -54,19 +52,22 @@ %define return rax %define PS 8 %define LOG_PS 3 + %define stack_size 4*8 %define func(x) x: endbranch %macro FUNC_SAVE 0 - push r12 - push r13 - push r14 - push r15 + sub rsp, stack_size + mov [rsp + 0*8], r12 + mov [rsp + 1*8], r13 + mov [rsp + 2*8], r14 + mov [rsp + 3*8], r15 %endmacro %macro FUNC_RESTORE 0 - pop r15 - pop r14 - pop r13 - pop r12 + mov r12, [rsp + 0*8] + mov r13, [rsp + 1*8] + mov r14, [rsp + 2*8] + mov r15, [rsp + 3*8] + add rsp, stack_size %endmacro %endif @@ -79,8 +80,6 @@ %define arg4 r12 ; must be saved, loaded and restored %define arg5 r15 ; must be saved and restored %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r14 ; must be saved and restored diff --git a/erasure_code/gf_4vect_mad_avx512.asm b/erasure_code/gf_4vect_mad_avx512.asm index 02d9d62..dca406a 100644 --- a/erasure_code/gf_4vect_mad_avx512.asm +++ b/erasure_code/gf_4vect_mad_avx512.asm @@ -64,16 +64,16 @@ %macro FUNC_SAVE 0 sub rsp, stack_size - movdqa [rsp+16*0],xmm6 - movdqa [rsp+16*1],xmm7 - movdqa [rsp+16*2],xmm8 - movdqa [rsp+16*3],xmm9 - movdqa [rsp+16*4],xmm10 - movdqa [rsp+16*5],xmm11 - movdqa [rsp+16*6],xmm12 - movdqa [rsp+16*7],xmm13 - movdqa [rsp+16*8],xmm14 - movdqa [rsp+16*9],xmm15 + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 save_reg r12, 10*16 + 0*8 save_reg r15, 10*16 + 1*8 end_prolog @@ -82,16 +82,16 @@ %endmacro %macro FUNC_RESTORE 0 - movdqa xmm6, [rsp+16*0] - movdqa xmm7, [rsp+16*1] - movdqa xmm8, [rsp+16*2] - movdqa xmm9, [rsp+16*3] - movdqa xmm10, [rsp+16*4] - movdqa xmm11, [rsp+16*5] - movdqa xmm12, [rsp+16*6] - movdqa xmm13, [rsp+16*7] - movdqa xmm14, [rsp+16*8] - movdqa xmm15, [rsp+16*9] + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] mov r12, [rsp + 10*16 + 0*8] mov r15, [rsp + 10*16 + 1*8] add rsp, stack_size diff --git a/erasure_code/gf_5vect_dot_prod_avx512.asm b/erasure_code/gf_5vect_dot_prod_avx512.asm index bedca5b..04e86cd 100644 --- a/erasure_code/gf_5vect_dot_prod_avx512.asm +++ b/erasure_code/gf_5vect_dot_prod_avx512.asm @@ -44,8 +44,6 @@ %define arg5 r9 %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r12 ; must be saved and restored @@ -56,23 +54,26 @@ %define return rax %define PS 8 %define LOG_PS 3 + %define stack_size 6*8 %define func(x) x: endbranch %macro FUNC_SAVE 0 - push r12 - push r13 - push r14 - push r15 - push rbp - push rbx + sub rsp, stack_size + mov [rsp + 0*8], r12 + mov [rsp + 1*8], r13 + mov [rsp + 2*8], r14 + mov [rsp + 3*8], r15 + mov [rsp + 4*8], rbp + mov [rsp + 5*8], rbx %endmacro %macro FUNC_RESTORE 0 - pop rbx - pop rbp - pop r15 - pop r14 - pop r13 - pop r12 + mov r12, [rsp + 0*8] + mov r13, [rsp + 1*8] + mov r14, [rsp + 2*8] + mov r15, [rsp + 3*8] + mov rbp, [rsp + 4*8] + mov rbx, [rsp + 5*8] + add rsp, stack_size %endmacro %endif @@ -85,8 +86,6 @@ %define arg4 r12 ; must be saved, loaded and restored %define arg5 r15 ; must be saved and restored %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r14 ; must be saved and restored diff --git a/erasure_code/gf_5vect_mad_avx512.asm b/erasure_code/gf_5vect_mad_avx512.asm index 4063295..7a6962a 100644 --- a/erasure_code/gf_5vect_mad_avx512.asm +++ b/erasure_code/gf_5vect_mad_avx512.asm @@ -66,16 +66,16 @@ %macro FUNC_SAVE 0 sub rsp, stack_size - movdqa [rsp+16*0],xmm6 - movdqa [rsp+16*1],xmm7 - movdqa [rsp+16*2],xmm8 - movdqa [rsp+16*3],xmm9 - movdqa [rsp+16*4],xmm10 - movdqa [rsp+16*5],xmm11 - movdqa [rsp+16*6],xmm12 - movdqa [rsp+16*7],xmm13 - movdqa [rsp+16*8],xmm14 - movdqa [rsp+16*9],xmm15 + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 save_reg r12, 10*16 + 0*8 save_reg r15, 10*16 + 1*8 end_prolog @@ -84,16 +84,16 @@ %endmacro %macro FUNC_RESTORE 0 - movdqa xmm6, [rsp+16*0] - movdqa xmm7, [rsp+16*1] - movdqa xmm8, [rsp+16*2] - movdqa xmm9, [rsp+16*3] - movdqa xmm10, [rsp+16*4] - movdqa xmm11, [rsp+16*5] - movdqa xmm12, [rsp+16*6] - movdqa xmm13, [rsp+16*7] - movdqa xmm14, [rsp+16*8] - movdqa xmm15, [rsp+16*9] + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] mov r12, [rsp + 10*16 + 0*8] mov r15, [rsp + 10*16 + 1*8] add rsp, stack_size diff --git a/erasure_code/gf_6vect_dot_prod_avx512.asm b/erasure_code/gf_6vect_dot_prod_avx512.asm index e9e4777..ff3075f 100644 --- a/erasure_code/gf_6vect_dot_prod_avx512.asm +++ b/erasure_code/gf_6vect_dot_prod_avx512.asm @@ -44,8 +44,6 @@ %define arg5 r9 %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r12 ; must be saved and restored @@ -56,23 +54,26 @@ %define return rax %define PS 8 %define LOG_PS 3 + %define stack_size 6*8 %define func(x) x: endbranch %macro FUNC_SAVE 0 - push r12 - push r13 - push r14 - push r15 - push rbp - push rbx + sub rsp, stack_size + mov [rsp + 0*8], r12 + mov [rsp + 1*8], r13 + mov [rsp + 2*8], r14 + mov [rsp + 3*8], r15 + mov [rsp + 4*8], rbp + mov [rsp + 5*8], rbx %endmacro %macro FUNC_RESTORE 0 - pop rbx - pop rbp - pop r15 - pop r14 - pop r13 - pop r12 + mov r12, [rsp + 0*8] + mov r13, [rsp + 1*8] + mov r14, [rsp + 2*8] + mov r15, [rsp + 3*8] + mov rbp, [rsp + 4*8] + mov rbx, [rsp + 5*8] + add rsp, stack_size %endmacro %endif @@ -85,8 +86,6 @@ %define arg4 r12 ; must be saved, loaded and restored %define arg5 r15 ; must be saved and restored %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r14 ; must be saved and restored diff --git a/erasure_code/gf_6vect_mad_avx512.asm b/erasure_code/gf_6vect_mad_avx512.asm index e07937b..1bbcd7d 100644 --- a/erasure_code/gf_6vect_mad_avx512.asm +++ b/erasure_code/gf_6vect_mad_avx512.asm @@ -72,16 +72,16 @@ %macro FUNC_SAVE 0 sub rsp, stack_size - movdqa [rsp+16*0],xmm6 - movdqa [rsp+16*1],xmm7 - movdqa [rsp+16*2],xmm8 - movdqa [rsp+16*3],xmm9 - movdqa [rsp+16*4],xmm10 - movdqa [rsp+16*5],xmm11 - movdqa [rsp+16*6],xmm12 - movdqa [rsp+16*7],xmm13 - movdqa [rsp+16*8],xmm14 - movdqa [rsp+16*9],xmm15 + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 save_reg r12, 10*16 + 0*8 save_reg r15, 10*16 + 1*8 save_reg r13, 10*16 + 2*8 @@ -91,16 +91,16 @@ %endmacro %macro FUNC_RESTORE 0 - movdqa xmm6, [rsp+16*0] - movdqa xmm7, [rsp+16*1] - movdqa xmm8, [rsp+16*2] - movdqa xmm9, [rsp+16*3] - movdqa xmm10, [rsp+16*4] - movdqa xmm11, [rsp+16*5] - movdqa xmm12, [rsp+16*6] - movdqa xmm13, [rsp+16*7] - movdqa xmm14, [rsp+16*8] - movdqa xmm15, [rsp+16*9] + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] mov r12, [rsp + 10*16 + 0*8] mov r15, [rsp + 10*16 + 1*8] mov r13, [rsp + 10*16 + 2*8]