diff --git a/crc/crc16_t10dif_01.asm b/crc/crc16_t10dif_01.asm index 33f4555..536b6f3 100644 --- a/crc/crc16_t10dif_01.asm +++ b/crc/crc16_t10dif_01.asm @@ -75,6 +75,7 @@ section .text align 16 mk_global crc16_t10dif_01, function crc16_t10dif_01: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_02.asm b/crc/crc16_t10dif_02.asm index 157ac53..0e392af 100644 --- a/crc/crc16_t10dif_02.asm +++ b/crc/crc16_t10dif_02.asm @@ -75,6 +75,7 @@ section .text align 16 mk_global crc16_t10dif_02, function crc16_t10dif_02: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_by16_10.asm b/crc/crc16_t10dif_by16_10.asm index 479b635..27a2e02 100644 --- a/crc/crc16_t10dif_by16_10.asm +++ b/crc/crc16_t10dif_by16_10.asm @@ -84,6 +84,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_by4.asm b/crc/crc16_t10dif_by4.asm index bde071a..1326eb2 100644 --- a/crc/crc16_t10dif_by4.asm +++ b/crc/crc16_t10dif_by4.asm @@ -68,6 +68,7 @@ section .text align 16 mk_global crc16_t10dif_by4, function crc16_t10dif_by4: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_copy_by4.asm b/crc/crc16_t10dif_copy_by4.asm index 0f82d69..b8a6838 100644 --- a/crc/crc16_t10dif_copy_by4.asm +++ b/crc/crc16_t10dif_copy_by4.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global crc16_t10dif_copy_by4, function crc16_t10dif_copy_by4: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_copy_by4_02.asm b/crc/crc16_t10dif_copy_by4_02.asm index 1a7338f..254a187 100644 --- a/crc/crc16_t10dif_copy_by4_02.asm +++ b/crc/crc16_t10dif_copy_by4_02.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global crc16_t10dif_copy_by4_02, function crc16_t10dif_copy_by4_02: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc32_gzip_refl_by16_10.asm b/crc/crc32_gzip_refl_by16_10.asm index 69cb366..15280b8 100644 --- a/crc/crc32_gzip_refl_by16_10.asm +++ b/crc/crc32_gzip_refl_by16_10.asm @@ -94,6 +94,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_gzip_refl_by8.asm b/crc/crc32_gzip_refl_by8.asm index 780ae35..4384024 100644 --- a/crc/crc32_gzip_refl_by8.asm +++ b/crc/crc32_gzip_refl_by8.asm @@ -88,6 +88,7 @@ section .text align 16 mk_global crc32_gzip_refl_by8, function crc32_gzip_refl_by8: + endbranch ; unsigned long c = crc ^ 0xffffffffL; not arg1_low32 ; diff --git a/crc/crc32_gzip_refl_by8_02.asm b/crc/crc32_gzip_refl_by8_02.asm index bba5ae6..712fe87 100644 --- a/crc/crc32_gzip_refl_by8_02.asm +++ b/crc/crc32_gzip_refl_by8_02.asm @@ -88,6 +88,7 @@ section .text align 16 mk_global crc32_gzip_refl_by8_02, function crc32_gzip_refl_by8_02: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_ieee_01.asm b/crc/crc32_ieee_01.asm index 5b9d465..368261d 100644 --- a/crc/crc32_ieee_01.asm +++ b/crc/crc32_ieee_01.asm @@ -74,6 +74,7 @@ section .text align 16 mk_global crc32_ieee_01, function crc32_ieee_01: + endbranch not arg1_low32 ;~init_crc diff --git a/crc/crc32_ieee_02.asm b/crc/crc32_ieee_02.asm index 411e923..95d53e8 100644 --- a/crc/crc32_ieee_02.asm +++ b/crc/crc32_ieee_02.asm @@ -74,6 +74,7 @@ section .text align 16 mk_global crc32_ieee_02, function crc32_ieee_02: + endbranch not arg1_low32 ;~init_crc diff --git a/crc/crc32_ieee_by16_10.asm b/crc/crc32_ieee_by16_10.asm index c6aa741..5c3f52a 100644 --- a/crc/crc32_ieee_by16_10.asm +++ b/crc/crc32_ieee_by16_10.asm @@ -84,6 +84,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_ieee_by4.asm b/crc/crc32_ieee_by4.asm index 2ce2289..f432640 100644 --- a/crc/crc32_ieee_by4.asm +++ b/crc/crc32_ieee_by4.asm @@ -76,6 +76,7 @@ section .text align 16 mk_global crc32_ieee_by4, function crc32_ieee_by4: + endbranch not arg1_low32 diff --git a/crc/crc32_iscsi_00.asm b/crc/crc32_iscsi_00.asm index e1ad903..1a5e029 100644 --- a/crc/crc32_iscsi_00.asm +++ b/crc/crc32_iscsi_00.asm @@ -155,6 +155,7 @@ default rel mk_global crc32_iscsi_00, function crc32_iscsi_00: + endbranch %ifidn __OUTPUT_FORMAT__, elf64 %define bufp rdi diff --git a/crc/crc32_iscsi_01.asm b/crc/crc32_iscsi_01.asm index 30adb04..e0f2b5e 100644 --- a/crc/crc32_iscsi_01.asm +++ b/crc/crc32_iscsi_01.asm @@ -52,6 +52,7 @@ default rel mk_global crc32_iscsi_01, function crc32_iscsi_01: + endbranch %ifidn __OUTPUT_FORMAT__, elf64 %define bufp rdi @@ -214,6 +215,7 @@ non_prefetch: %rep 128-1 CONCAT(crc_,i,:) + endbranch crc32 crc_init, qword [block_0 - i*8] crc32 crc1, qword [block_1 - i*8] crc32 crc2, qword [block_2 - i*8] diff --git a/crc/crc64_ecma_norm_by8.asm b/crc/crc64_ecma_norm_by8.asm index 5599d98..ca99e34 100644 --- a/crc/crc64_ecma_norm_by8.asm +++ b/crc/crc64_ecma_norm_by8.asm @@ -64,6 +64,7 @@ section .text align 16 mk_global crc64_ecma_norm_by8, function crc64_ecma_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_ecma_refl_by8.asm b/crc/crc64_ecma_refl_by8.asm index b641934..c09ddfa 100644 --- a/crc/crc64_ecma_refl_by8.asm +++ b/crc/crc64_ecma_refl_by8.asm @@ -70,6 +70,7 @@ section .text align 16 mk_global crc64_ecma_refl_by8, function crc64_ecma_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_norm_by16_10.asm b/crc/crc64_iso_norm_by16_10.asm index 28630a1..4eefbd3 100644 --- a/crc/crc64_iso_norm_by16_10.asm +++ b/crc/crc64_iso_norm_by16_10.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_norm_by8.asm b/crc/crc64_iso_norm_by8.asm index 887fca8..16147d5 100644 --- a/crc/crc64_iso_norm_by8.asm +++ b/crc/crc64_iso_norm_by8.asm @@ -63,6 +63,7 @@ section .text align 16 mk_global crc64_iso_norm_by8, function crc64_iso_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_iso_refl_by16_10.asm b/crc/crc64_iso_refl_by16_10.asm index d58ac0a..e5d5a08 100644 --- a/crc/crc64_iso_refl_by16_10.asm +++ b/crc/crc64_iso_refl_by16_10.asm @@ -72,6 +72,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_refl_by8.asm b/crc/crc64_iso_refl_by8.asm index 3abc5da..b6dfcf0 100644 --- a/crc/crc64_iso_refl_by8.asm +++ b/crc/crc64_iso_refl_by8.asm @@ -67,6 +67,7 @@ section .text align 16 mk_global crc64_iso_refl_by8, function crc64_iso_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_jones_norm_by8.asm b/crc/crc64_jones_norm_by8.asm index bc3b521..0cf8b4a 100644 --- a/crc/crc64_jones_norm_by8.asm +++ b/crc/crc64_jones_norm_by8.asm @@ -63,6 +63,7 @@ section .text align 16 mk_global crc64_jones_norm_by8, function crc64_jones_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_jones_refl_by8.asm b/crc/crc64_jones_refl_by8.asm index a9ea19a..eea9c8d 100644 --- a/crc/crc64_jones_refl_by8.asm +++ b/crc/crc64_jones_refl_by8.asm @@ -67,6 +67,7 @@ section .text align 16 mk_global crc64_jones_refl_by8, function crc64_jones_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc_multibinary.asm b/crc/crc_multibinary.asm index b1f425a..8b9d7bd 100644 --- a/crc/crc_multibinary.asm +++ b/crc/crc_multibinary.asm @@ -81,8 +81,10 @@ section .text ;;;; mk_global crc32_iscsi, function crc32_iscsi_mbinit: + endbranch call crc32_iscsi_dispatch_init crc32_iscsi: + endbranch jmp qword [crc32_iscsi_dispatched] crc32_iscsi_dispatch_init: @@ -115,8 +117,10 @@ crc32_iscsi_dispatch_init: ;;;; mk_global crc32_ieee, function crc32_ieee_mbinit: + endbranch call crc32_ieee_dispatch_init crc32_ieee: + endbranch jmp qword [crc32_ieee_dispatched] crc32_ieee_dispatch_init: @@ -194,8 +198,10 @@ crc32_ieee_dispatch_init: ;;;; mk_global crc16_t10dif, function crc16_t10dif_mbinit: + endbranch call crc16_t10dif_dispatch_init crc16_t10dif: + endbranch jmp qword [crc16_t10dif_dispatched] crc16_t10dif_dispatch_init: diff --git a/erasure_code/gf_2vect_dot_prod_avx.asm b/erasure_code/gf_2vect_dot_prod_avx.asm index f512d7d..cfbc2eb 100644 --- a/erasure_code/gf_2vect_dot_prod_avx.asm +++ b/erasure_code/gf_2vect_dot_prod_avx.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -127,7 +127,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_dot_prod_avx2.asm b/erasure_code/gf_2vect_dot_prod_avx2.asm index ba704d0..a06f67a 100644 --- a/erasure_code/gf_2vect_dot_prod_avx2.asm +++ b/erasure_code/gf_2vect_dot_prod_avx2.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -131,7 +131,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_dot_prod_avx512.asm b/erasure_code/gf_2vect_dot_prod_avx512.asm index 2444216..92d7e9d 100644 --- a/erasure_code/gf_2vect_dot_prod_avx512.asm +++ b/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -50,7 +50,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_2vect_dot_prod_sse.asm b/erasure_code/gf_2vect_dot_prod_sse.asm index 7e1006b..f7e44e7 100644 --- a/erasure_code/gf_2vect_dot_prod_sse.asm +++ b/erasure_code/gf_2vect_dot_prod_sse.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -127,7 +127,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_mad_avx.asm b/erasure_code/gf_2vect_mad_avx.asm index 65af8b0..995c36b 100644 --- a/erasure_code/gf_2vect_mad_avx.asm +++ b/erasure_code/gf_2vect_mad_avx.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_avx2.asm b/erasure_code/gf_2vect_mad_avx2.asm index f4c1cae..751677d 100644 --- a/erasure_code/gf_2vect_mad_avx2.asm +++ b/erasure_code/gf_2vect_mad_avx2.asm @@ -104,7 +104,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_avx512.asm b/erasure_code/gf_2vect_mad_avx512.asm index 5a35a89..ce37248 100644 --- a/erasure_code/gf_2vect_mad_avx512.asm +++ b/erasure_code/gf_2vect_mad_avx512.asm @@ -45,7 +45,7 @@ %define tmp r11 %define tmp2 r10 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_sse.asm b/erasure_code/gf_2vect_mad_sse.asm index c85b431..2bff82f 100644 --- a/erasure_code/gf_2vect_mad_sse.asm +++ b/erasure_code/gf_2vect_mad_sse.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_dot_prod_avx.asm b/erasure_code/gf_3vect_dot_prod_avx.asm index deb44d0..79c7ed4 100644 --- a/erasure_code/gf_3vect_dot_prod_avx.asm +++ b/erasure_code/gf_3vect_dot_prod_avx.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -139,7 +139,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_dot_prod_avx2.asm b/erasure_code/gf_3vect_dot_prod_avx2.asm index fa55dd6..606c3a1 100644 --- a/erasure_code/gf_3vect_dot_prod_avx2.asm +++ b/erasure_code/gf_3vect_dot_prod_avx2.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -143,7 +143,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_dot_prod_avx512.asm b/erasure_code/gf_3vect_dot_prod_avx512.asm index eecde81..81e96f2 100644 --- a/erasure_code/gf_3vect_dot_prod_avx512.asm +++ b/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_3vect_dot_prod_sse.asm b/erasure_code/gf_3vect_dot_prod_sse.asm index 2b13e71..d52c72b 100644 --- a/erasure_code/gf_3vect_dot_prod_sse.asm +++ b/erasure_code/gf_3vect_dot_prod_sse.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -139,7 +139,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_mad_avx.asm b/erasure_code/gf_3vect_mad_avx.asm index 4aea710..13963f6 100644 --- a/erasure_code/gf_3vect_mad_avx.asm +++ b/erasure_code/gf_3vect_mad_avx.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_avx2.asm b/erasure_code/gf_3vect_mad_avx2.asm index e8071dd..797d954 100644 --- a/erasure_code/gf_3vect_mad_avx2.asm +++ b/erasure_code/gf_3vect_mad_avx2.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_avx512.asm b/erasure_code/gf_3vect_mad_avx512.asm index b8b8d9b..bc61900 100644 --- a/erasure_code/gf_3vect_mad_avx512.asm +++ b/erasure_code/gf_3vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_sse.asm b/erasure_code/gf_3vect_mad_sse.asm index 10744ec..c0fd0b9 100644 --- a/erasure_code/gf_3vect_mad_sse.asm +++ b/erasure_code/gf_3vect_mad_sse.asm @@ -96,7 +96,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_dot_prod_avx.asm b/erasure_code/gf_4vect_dot_prod_avx.asm index f436048..bad8692 100644 --- a/erasure_code/gf_4vect_dot_prod_avx.asm +++ b/erasure_code/gf_4vect_dot_prod_avx.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -159,7 +159,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_dot_prod_avx2.asm b/erasure_code/gf_4vect_dot_prod_avx2.asm index 0c7ae4e..e422e28 100644 --- a/erasure_code/gf_4vect_dot_prod_avx2.asm +++ b/erasure_code/gf_4vect_dot_prod_avx2.asm @@ -56,7 +56,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -163,7 +163,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_dot_prod_avx512.asm b/erasure_code/gf_4vect_dot_prod_avx512.asm index 6d67426..9d32973 100644 --- a/erasure_code/gf_4vect_dot_prod_avx512.asm +++ b/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -55,7 +55,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_4vect_dot_prod_sse.asm b/erasure_code/gf_4vect_dot_prod_sse.asm index 25134c7..25b5cff 100644 --- a/erasure_code/gf_4vect_dot_prod_sse.asm +++ b/erasure_code/gf_4vect_dot_prod_sse.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -159,7 +159,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_mad_avx.asm b/erasure_code/gf_4vect_mad_avx.asm index 284c76b..3a00623 100644 --- a/erasure_code/gf_4vect_mad_avx.asm +++ b/erasure_code/gf_4vect_mad_avx.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_4vect_mad_avx2.asm b/erasure_code/gf_4vect_mad_avx2.asm index bf6cc7e..e1cf910 100644 --- a/erasure_code/gf_4vect_mad_avx2.asm +++ b/erasure_code/gf_4vect_mad_avx2.asm @@ -101,7 +101,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_mad_avx512.asm b/erasure_code/gf_4vect_mad_avx512.asm index 3948ab1..77dc76b 100644 --- a/erasure_code/gf_4vect_mad_avx512.asm +++ b/erasure_code/gf_4vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_mad_sse.asm b/erasure_code/gf_4vect_mad_sse.asm index 377b31f..d5efc97 100644 --- a/erasure_code/gf_4vect_mad_sse.asm +++ b/erasure_code/gf_4vect_mad_sse.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_5vect_dot_prod_avx.asm b/erasure_code/gf_5vect_dot_prod_avx.asm index 3226dde..a5bdb2a 100644 --- a/erasure_code/gf_5vect_dot_prod_avx.asm +++ b/erasure_code/gf_5vect_dot_prod_avx.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_avx2.asm b/erasure_code/gf_5vect_dot_prod_avx2.asm index 4bee087..d019e97 100644 --- a/erasure_code/gf_5vect_dot_prod_avx2.asm +++ b/erasure_code/gf_5vect_dot_prod_avx2.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_avx512.asm b/erasure_code/gf_5vect_dot_prod_avx512.asm index e955ea5..1cca65b 100644 --- a/erasure_code/gf_5vect_dot_prod_avx512.asm +++ b/erasure_code/gf_5vect_dot_prod_avx512.asm @@ -57,7 +57,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_sse.asm b/erasure_code/gf_5vect_dot_prod_sse.asm index 5ff9460..c96bed5 100644 --- a/erasure_code/gf_5vect_dot_prod_sse.asm +++ b/erasure_code/gf_5vect_dot_prod_sse.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_mad_avx.asm b/erasure_code/gf_5vect_mad_avx.asm index ccdbc6e..e9e246c 100644 --- a/erasure_code/gf_5vect_mad_avx.asm +++ b/erasure_code/gf_5vect_mad_avx.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_mad_avx2.asm b/erasure_code/gf_5vect_mad_avx2.asm index ac61437..87038a7 100644 --- a/erasure_code/gf_5vect_mad_avx2.asm +++ b/erasure_code/gf_5vect_mad_avx2.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_5vect_mad_avx512.asm b/erasure_code/gf_5vect_mad_avx512.asm index 5de47d1..e2a1455 100644 --- a/erasure_code/gf_5vect_mad_avx512.asm +++ b/erasure_code/gf_5vect_mad_avx512.asm @@ -45,7 +45,7 @@ %define tmp r11 %define tmp2 r10 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_5vect_mad_sse.asm b/erasure_code/gf_5vect_mad_sse.asm index fc99aaf..17760d0 100644 --- a/erasure_code/gf_5vect_mad_sse.asm +++ b/erasure_code/gf_5vect_mad_sse.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx.asm b/erasure_code/gf_6vect_dot_prod_avx.asm index 1f9df8d..7604711 100644 --- a/erasure_code/gf_6vect_dot_prod_avx.asm +++ b/erasure_code/gf_6vect_dot_prod_avx.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx2.asm b/erasure_code/gf_6vect_dot_prod_avx2.asm index ccb4e77..5885d97 100644 --- a/erasure_code/gf_6vect_dot_prod_avx2.asm +++ b/erasure_code/gf_6vect_dot_prod_avx2.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx512.asm b/erasure_code/gf_6vect_dot_prod_avx512.asm index 6ebfd26..bb25e67 100644 --- a/erasure_code/gf_6vect_dot_prod_avx512.asm +++ b/erasure_code/gf_6vect_dot_prod_avx512.asm @@ -57,7 +57,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_sse.asm b/erasure_code/gf_6vect_dot_prod_sse.asm index 51bd116..41176bb 100644 --- a/erasure_code/gf_6vect_dot_prod_sse.asm +++ b/erasure_code/gf_6vect_dot_prod_sse.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_mad_avx.asm b/erasure_code/gf_6vect_mad_avx.asm index 4e20dbb..c9ce490 100644 --- a/erasure_code/gf_6vect_mad_avx.asm +++ b/erasure_code/gf_6vect_mad_avx.asm @@ -111,7 +111,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_mad_avx2.asm b/erasure_code/gf_6vect_mad_avx2.asm index 45d750e..8f94c6a 100644 --- a/erasure_code/gf_6vect_mad_avx2.asm +++ b/erasure_code/gf_6vect_mad_avx2.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_6vect_mad_avx512.asm b/erasure_code/gf_6vect_mad_avx512.asm index 6ae11f3..c2383a2 100644 --- a/erasure_code/gf_6vect_mad_avx512.asm +++ b/erasure_code/gf_6vect_mad_avx512.asm @@ -46,7 +46,7 @@ %define tmp2 r10 %define tmp3 r12 ;must be saved and restored %define return rax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_6vect_mad_sse.asm b/erasure_code/gf_6vect_mad_sse.asm index 695fd6b..f33ec06 100644 --- a/erasure_code/gf_6vect_mad_sse.asm +++ b/erasure_code/gf_6vect_mad_sse.asm @@ -113,7 +113,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_vect_dot_prod_avx.asm b/erasure_code/gf_vect_dot_prod_avx.asm index 179e985..7bd8700 100644 --- a/erasure_code/gf_vect_dot_prod_avx.asm +++ b/erasure_code/gf_vect_dot_prod_avx.asm @@ -48,7 +48,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -106,7 +106,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_dot_prod_avx2.asm b/erasure_code/gf_vect_dot_prod_avx2.asm index 2cfa0f0..c385e3b 100644 --- a/erasure_code/gf_vect_dot_prod_avx2.asm +++ b/erasure_code/gf_vect_dot_prod_avx2.asm @@ -51,7 +51,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -111,7 +111,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_dot_prod_avx512.asm b/erasure_code/gf_vect_dot_prod_avx512.asm index 8a02fd8..37fe082 100644 --- a/erasure_code/gf_vect_dot_prod_avx512.asm +++ b/erasure_code/gf_vect_dot_prod_avx512.asm @@ -49,7 +49,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_dot_prod_sse.asm b/erasure_code/gf_vect_dot_prod_sse.asm index 602bd89..9b0a440 100644 --- a/erasure_code/gf_vect_dot_prod_sse.asm +++ b/erasure_code/gf_vect_dot_prod_sse.asm @@ -48,7 +48,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -106,7 +106,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_mad_avx.asm b/erasure_code/gf_vect_mad_avx.asm index 2b0e623..448fbd7 100644 --- a/erasure_code/gf_vect_mad_avx.asm +++ b/erasure_code/gf_vect_mad_avx.asm @@ -82,7 +82,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_avx2.asm b/erasure_code/gf_vect_mad_avx2.asm index 9941fca..097d8fa 100644 --- a/erasure_code/gf_vect_mad_avx2.asm +++ b/erasure_code/gf_vect_mad_avx2.asm @@ -88,7 +88,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_avx512.asm b/erasure_code/gf_vect_mad_avx512.asm index 931e0cc..43982e3 100644 --- a/erasure_code/gf_vect_mad_avx512.asm +++ b/erasure_code/gf_vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_sse.asm b/erasure_code/gf_vect_mad_sse.asm index 1ea69fe..1044404 100644 --- a/erasure_code/gf_vect_mad_sse.asm +++ b/erasure_code/gf_vect_mad_sse.asm @@ -82,7 +82,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mul_avx.asm b/erasure_code/gf_vect_mul_avx.asm index 0186bbc..91f6d6d 100644 --- a/erasure_code/gf_vect_mul_avx.asm +++ b/erasure_code/gf_vect_mul_avx.asm @@ -42,7 +42,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE diff --git a/erasure_code/gf_vect_mul_sse.asm b/erasure_code/gf_vect_mul_sse.asm index bad257a..fefe7ef 100644 --- a/erasure_code/gf_vect_mul_sse.asm +++ b/erasure_code/gf_vect_mul_sse.asm @@ -42,7 +42,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE diff --git a/igzip/adler32_avx2_4.asm b/igzip/adler32_avx2_4.asm index 62c62bb..798310f 100644 --- a/igzip/adler32_avx2_4.asm +++ b/igzip/adler32_avx2_4.asm @@ -55,7 +55,7 @@ default rel %define b_d r8d %define end r13 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/igzip/adler32_sse.asm b/igzip/adler32_sse.asm index 6aea7cb..fc986cb 100644 --- a/igzip/adler32_sse.asm +++ b/igzip/adler32_sse.asm @@ -52,7 +52,7 @@ default rel %define b_d r8d %define end r13 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/igzip/encode_df_04.asm b/igzip/encode_df_04.asm index 2c52af8..5b913ae 100644 --- a/igzip/encode_df_04.asm +++ b/igzip/encode_df_04.asm @@ -177,6 +177,7 @@ section .text global encode_deflate_icf_ %+ ARCH encode_deflate_icf_ %+ ARCH: + endbranch FUNC_SAVE %ifnidn ptr, arg1 diff --git a/igzip/encode_df_06.asm b/igzip/encode_df_06.asm index aaec754..9e74795 100644 --- a/igzip/encode_df_06.asm +++ b/igzip/encode_df_06.asm @@ -190,6 +190,7 @@ section .text global encode_deflate_icf_ %+ ARCH encode_deflate_icf_ %+ ARCH: + endbranch FUNC_SAVE %ifnidn ptr, arg1 diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm index d69b27c..b9620d6 100644 --- a/igzip/igzip_body.asm +++ b/igzip/igzip_body.asm @@ -143,6 +143,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_body_ %+ ARCH isal_deflate_body_ %+ ARCH %+ : + endbranch %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif diff --git a/igzip/igzip_decode_block_stateless.asm b/igzip/igzip_decode_block_stateless.asm index 733194b..22f3bf2 100644 --- a/igzip/igzip_decode_block_stateless.asm +++ b/igzip/igzip_decode_block_stateless.asm @@ -465,6 +465,7 @@ section .text global decode_huffman_code_block_stateless_ %+ ARCH decode_huffman_code_block_stateless_ %+ ARCH %+ : + endbranch FUNC_SAVE diff --git a/igzip/igzip_deflate_hash.asm b/igzip/igzip_deflate_hash.asm index bcb0d5d..32a1482 100644 --- a/igzip/igzip_deflate_hash.asm +++ b/igzip/igzip_deflate_hash.asm @@ -104,6 +104,7 @@ section .text global isal_deflate_hash_crc_01 isal_deflate_hash_crc_01: + endbranch FUNC_SAVE neg f_i diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm index fbf8839..2b539dd 100644 --- a/igzip/igzip_finish.asm +++ b/igzip/igzip_finish.asm @@ -94,6 +94,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_finish_01 isal_deflate_finish_01: + endbranch PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size diff --git a/igzip/igzip_gen_icf_map_lh1_04.asm b/igzip/igzip_gen_icf_map_lh1_04.asm index 077f56c..d188846 100644 --- a/igzip/igzip_gen_icf_map_lh1_04.asm +++ b/igzip/igzip_gen_icf_map_lh1_04.asm @@ -147,7 +147,7 @@ %define stack_size 16 %define local_storage_offset 0 -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push rbp push r12 @@ -175,6 +175,7 @@ section .text global gen_icf_map_lh1_04 func(gen_icf_map_lh1_04) + endbranch FUNC_SAVE mov file_start, [stream + _next_in] diff --git a/igzip/igzip_gen_icf_map_lh1_06.asm b/igzip/igzip_gen_icf_map_lh1_06.asm index d134357..7985ab5 100644 --- a/igzip/igzip_gen_icf_map_lh1_06.asm +++ b/igzip/igzip_gen_icf_map_lh1_06.asm @@ -143,7 +143,7 @@ add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push rbp push r12 @@ -166,6 +166,7 @@ section .text global gen_icf_map_lh1_06 func(gen_icf_map_lh1_06) + endbranch FUNC_SAVE mov file_start, [stream + _next_in] diff --git a/igzip/igzip_icf_body_h1_gr_bt.asm b/igzip/igzip_icf_body_h1_gr_bt.asm index 51871c5..c74a24d 100644 --- a/igzip/igzip_icf_body_h1_gr_bt.asm +++ b/igzip/igzip_icf_body_h1_gr_bt.asm @@ -164,6 +164,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : + endbranch %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif diff --git a/igzip/igzip_icf_finish.asm b/igzip/igzip_icf_finish.asm index b9f88a9..231ac06 100644 --- a/igzip/igzip_icf_finish.asm +++ b/igzip/igzip_icf_finish.asm @@ -102,6 +102,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_icf_finish_ %+ METHOD %+ _01 isal_deflate_icf_finish_ %+ METHOD %+ _01: + endbranch PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size diff --git a/igzip/igzip_set_long_icf_fg_04.asm b/igzip/igzip_set_long_icf_fg_04.asm index 070e614..09fcb64 100644 --- a/igzip/igzip_set_long_icf_fg_04.asm +++ b/igzip/igzip_set_long_icf_fg_04.asm @@ -122,7 +122,7 @@ default rel add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -141,6 +141,7 @@ section .text global set_long_icf_fg_04 func(set_long_icf_fg_04) + endbranch FUNC_SAVE lea end_in, [next_in + arg3] diff --git a/igzip/igzip_set_long_icf_fg_06.asm b/igzip/igzip_set_long_icf_fg_06.asm index b36871c..3152ef4 100644 --- a/igzip/igzip_set_long_icf_fg_06.asm +++ b/igzip/igzip_set_long_icf_fg_06.asm @@ -129,7 +129,7 @@ add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -148,6 +148,7 @@ section .text global set_long_icf_fg_06 func(set_long_icf_fg_06) + endbranch FUNC_SAVE lea end_in, [next_in + arg3] diff --git a/igzip/igzip_update_histogram.asm b/igzip/igzip_update_histogram.asm index e1939ad..698c8be 100644 --- a/igzip/igzip_update_histogram.asm +++ b/igzip/igzip_update_histogram.asm @@ -256,6 +256,7 @@ section .text ; void isal_update_histogram global isal_update_histogram_ %+ ARCH isal_update_histogram_ %+ ARCH %+ : + endbranch FUNC_SAVE %ifnidn file_start, arg0 diff --git a/igzip/proc_heap.asm b/igzip/proc_heap.asm index 5ed9c8e..ea9365a 100644 --- a/igzip/proc_heap.asm +++ b/igzip/proc_heap.asm @@ -60,6 +60,7 @@ section .text global build_huff_tree build_huff_tree: + endbranch %ifidn __OUTPUT_FORMAT__, win64 push rsi push rdi @@ -108,6 +109,7 @@ build_huff_tree: align 32 global build_heap build_heap: + endbranch %ifidn __OUTPUT_FORMAT__, win64 push rsi push rdi diff --git a/include/multibinary.asm b/include/multibinary.asm index 16838cb..588352a 100644 --- a/include/multibinary.asm +++ b/include/multibinary.asm @@ -71,10 +71,12 @@ section .text mk_global %1, function %1_mbinit: + endbranch ;;; only called the first time to setup hardware match call %1_dispatch_init ;;; falls thru to execute the hw optimized code %1: + endbranch jmp mbin_ptr_sz [%1_dispatched] %endmacro diff --git a/include/reg_sizes.asm b/include/reg_sizes.asm index 37d61f8..b7ad842 100644 --- a/include/reg_sizes.asm +++ b/include/reg_sizes.asm @@ -200,9 +200,22 @@ section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif %ifidn __OUTPUT_FORMAT__,elf64 + %define __x86_64__ section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif +%ifidn __OUTPUT_FORMAT__,win64 + %define __x86_64__ +%endif +%ifidn __OUTPUT_FORMAT__,macho64 + %define __x86_64__ +%endif + +%ifdef __x86_64__ + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa +%else + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb +%endif %ifdef REL_TEXT %define WRT_OPT diff --git a/mem/mem_zero_detect_avx.asm b/mem/mem_zero_detect_avx.asm index e85e08d..1b5de84 100644 --- a/mem/mem_zero_detect_avx.asm +++ b/mem/mem_zero_detect_avx.asm @@ -40,7 +40,7 @@ %define tmpb r11b %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/mem/mem_zero_detect_sse.asm b/mem/mem_zero_detect_sse.asm index 78350aa..c84f0f0 100644 --- a/mem/mem_zero_detect_sse.asm +++ b/mem/mem_zero_detect_sse.asm @@ -40,7 +40,7 @@ %define tmpb r11b %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_check_sse.asm b/raid/pq_check_sse.asm index ca32051..f2bc8a6 100644 --- a/raid/pq_check_sse.asm +++ b/raid/pq_check_sse.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_check_sse_i32.asm b/raid/pq_check_sse_i32.asm index f05d43a..3271c03 100644 --- a/raid/pq_check_sse_i32.asm +++ b/raid/pq_check_sse_i32.asm @@ -46,7 +46,7 @@ %define tmp r11 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -79,7 +79,7 @@ %define arg1 ecx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/pq_gen_avx.asm b/raid/pq_gen_avx.asm index 57d2b22..db4bcfb 100644 --- a/raid/pq_gen_avx.asm +++ b/raid/pq_gen_avx.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_avx2.asm b/raid/pq_gen_avx2.asm index 7def9ea..a0bf0cc 100644 --- a/raid/pq_gen_avx2.asm +++ b/raid/pq_gen_avx2.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_avx512.asm b/raid/pq_gen_avx512.asm index 9ec6584..179ad5c 100644 --- a/raid/pq_gen_avx512.asm +++ b/raid/pq_gen_avx512.asm @@ -49,7 +49,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_sse.asm b/raid/pq_gen_sse.asm index 4c5a349..b6d5148 100644 --- a/raid/pq_gen_sse.asm +++ b/raid/pq_gen_sse.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_sse_i32.asm b/raid/pq_gen_sse_i32.asm index 7a918f4..8dabb78 100644 --- a/raid/pq_gen_sse_i32.asm +++ b/raid/pq_gen_sse_i32.asm @@ -46,7 +46,7 @@ %define tmp r11 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -78,7 +78,7 @@ %define arg1 ecx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/raid_multibinary.asm b/raid/raid_multibinary.asm index c84e5ef..47ef1e3 100644 --- a/raid/raid_multibinary.asm +++ b/raid/raid_multibinary.asm @@ -74,8 +74,10 @@ section .text ;;;; mk_global pq_check, function pq_check_mbinit: + endbranch call pq_check_dispatch_init pq_check: + endbranch jmp qword [pq_check_dispatched] pq_check_dispatch_init: @@ -106,8 +108,10 @@ pq_check_dispatch_init: ;;;; mk_global xor_check, function xor_check_mbinit: + endbranch call xor_check_dispatch_init xor_check: + endbranch jmp qword [xor_check_dispatched] xor_check_dispatch_init: diff --git a/raid/xor_check_sse.asm b/raid/xor_check_sse.asm index 9620412..a5fe0b2 100644 --- a/raid/xor_check_sse.asm +++ b/raid/xor_check_sse.asm @@ -49,7 +49,7 @@ %define tmp3 arg4 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -88,7 +88,7 @@ %define tmp3 edx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/xor_gen_avx.asm b/raid/xor_gen_avx.asm index cddd539..b5527b2 100644 --- a/raid/xor_gen_avx.asm +++ b/raid/xor_gen_avx.asm @@ -45,7 +45,7 @@ %define arg5 r9 %define tmp r11 %define tmp3 arg4 - %define func(x) x: + %define func(x) x: endbranch %define return rax %define FUNC_SAVE %define FUNC_RESTORE diff --git a/raid/xor_gen_avx512.asm b/raid/xor_gen_avx512.asm index 552c590..5b07868 100644 --- a/raid/xor_gen_avx512.asm +++ b/raid/xor_gen_avx512.asm @@ -47,7 +47,7 @@ %define arg5 r9 %define tmp r11 %define tmp3 arg4 - %define func(x) x: + %define func(x) x: endbranch %define return rax %define FUNC_SAVE %define FUNC_RESTORE diff --git a/raid/xor_gen_sse.asm b/raid/xor_gen_sse.asm index 7509548..f31ae63 100644 --- a/raid/xor_gen_sse.asm +++ b/raid/xor_gen_sse.asm @@ -49,7 +49,7 @@ %define tmp3 arg4 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -88,7 +88,7 @@ %define tmp3 edx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi