From cd888f01a447dd04c3a8b50362079648d432d2ca Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 22 May 2020 10:46:50 -0700 Subject: [PATCH] x86: Add ENDBR32/ENDBR64 at function entries for Intel CET To support Intel CET, all indirect branch targets must start with ENDBR32/ENDBR64. Here is a patch to define endbranch and add it to function entries in x86 assembly codes which are indirect branch targets as discovered by running testsuite on Intel CET machine and visual inspection. Verified with $ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure x86_64-linux $ make -j8 $ make -j8 check with both nasm and yasm on both CET and non-CET machines. Change-Id: I9822578e7294fb5043a64ab7de5c41de81a7d337 Signed-off-by: H.J. Lu --- crc/crc16_t10dif_01.asm | 1 + crc/crc16_t10dif_02.asm | 1 + crc/crc16_t10dif_by16_10.asm | 1 + crc/crc16_t10dif_by4.asm | 1 + crc/crc16_t10dif_copy_by4.asm | 1 + crc/crc16_t10dif_copy_by4_02.asm | 1 + crc/crc32_gzip_refl_by16_10.asm | 1 + crc/crc32_gzip_refl_by8.asm | 1 + crc/crc32_gzip_refl_by8_02.asm | 1 + crc/crc32_ieee_01.asm | 1 + crc/crc32_ieee_02.asm | 1 + crc/crc32_ieee_by16_10.asm | 1 + crc/crc32_ieee_by4.asm | 1 + crc/crc32_iscsi_00.asm | 1 + crc/crc32_iscsi_01.asm | 2 ++ crc/crc64_ecma_norm_by8.asm | 1 + crc/crc64_ecma_refl_by8.asm | 1 + crc/crc64_iso_norm_by16_10.asm | 1 + crc/crc64_iso_norm_by8.asm | 1 + crc/crc64_iso_refl_by16_10.asm | 1 + crc/crc64_iso_refl_by8.asm | 1 + crc/crc64_jones_norm_by8.asm | 1 + crc/crc64_jones_refl_by8.asm | 1 + crc/crc_multibinary.asm | 6 ++++++ erasure_code/gf_2vect_dot_prod_avx.asm | 4 ++-- erasure_code/gf_2vect_dot_prod_avx2.asm | 4 ++-- erasure_code/gf_2vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_2vect_dot_prod_sse.asm | 4 ++-- erasure_code/gf_2vect_mad_avx.asm | 2 +- erasure_code/gf_2vect_mad_avx2.asm | 2 +- erasure_code/gf_2vect_mad_avx512.asm | 2 +- erasure_code/gf_2vect_mad_sse.asm | 2 +- erasure_code/gf_3vect_dot_prod_avx.asm | 4 ++-- erasure_code/gf_3vect_dot_prod_avx2.asm | 4 ++-- erasure_code/gf_3vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_3vect_dot_prod_sse.asm | 4 ++-- erasure_code/gf_3vect_mad_avx.asm | 2 +- erasure_code/gf_3vect_mad_avx2.asm | 2 +- erasure_code/gf_3vect_mad_avx512.asm | 2 +- erasure_code/gf_3vect_mad_sse.asm | 2 +- erasure_code/gf_4vect_dot_prod_avx.asm | 4 ++-- erasure_code/gf_4vect_dot_prod_avx2.asm | 4 ++-- erasure_code/gf_4vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_4vect_dot_prod_sse.asm | 4 ++-- erasure_code/gf_4vect_mad_avx.asm | 2 +- erasure_code/gf_4vect_mad_avx2.asm | 2 +- erasure_code/gf_4vect_mad_avx512.asm | 2 +- erasure_code/gf_4vect_mad_sse.asm | 2 +- erasure_code/gf_5vect_dot_prod_avx.asm | 2 +- erasure_code/gf_5vect_dot_prod_avx2.asm | 2 +- erasure_code/gf_5vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_5vect_dot_prod_sse.asm | 2 +- erasure_code/gf_5vect_mad_avx.asm | 2 +- erasure_code/gf_5vect_mad_avx2.asm | 2 +- erasure_code/gf_5vect_mad_avx512.asm | 2 +- erasure_code/gf_5vect_mad_sse.asm | 2 +- erasure_code/gf_6vect_dot_prod_avx.asm | 2 +- erasure_code/gf_6vect_dot_prod_avx2.asm | 2 +- erasure_code/gf_6vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_6vect_dot_prod_sse.asm | 2 +- erasure_code/gf_6vect_mad_avx.asm | 2 +- erasure_code/gf_6vect_mad_avx2.asm | 2 +- erasure_code/gf_6vect_mad_avx512.asm | 2 +- erasure_code/gf_6vect_mad_sse.asm | 2 +- erasure_code/gf_vect_dot_prod_avx.asm | 4 ++-- erasure_code/gf_vect_dot_prod_avx2.asm | 4 ++-- erasure_code/gf_vect_dot_prod_avx512.asm | 2 +- erasure_code/gf_vect_dot_prod_sse.asm | 4 ++-- erasure_code/gf_vect_mad_avx.asm | 2 +- erasure_code/gf_vect_mad_avx2.asm | 2 +- erasure_code/gf_vect_mad_avx512.asm | 2 +- erasure_code/gf_vect_mad_sse.asm | 2 +- erasure_code/gf_vect_mul_avx.asm | 2 +- erasure_code/gf_vect_mul_sse.asm | 2 +- igzip/adler32_avx2_4.asm | 2 +- igzip/adler32_sse.asm | 2 +- igzip/encode_df_04.asm | 1 + igzip/encode_df_06.asm | 1 + igzip/igzip_body.asm | 1 + igzip/igzip_decode_block_stateless.asm | 1 + igzip/igzip_deflate_hash.asm | 1 + igzip/igzip_finish.asm | 1 + igzip/igzip_gen_icf_map_lh1_04.asm | 3 ++- igzip/igzip_gen_icf_map_lh1_06.asm | 3 ++- igzip/igzip_icf_body_h1_gr_bt.asm | 1 + igzip/igzip_icf_finish.asm | 1 + igzip/igzip_set_long_icf_fg_04.asm | 3 ++- igzip/igzip_set_long_icf_fg_06.asm | 3 ++- igzip/igzip_update_histogram.asm | 1 + igzip/proc_heap.asm | 2 ++ include/multibinary.asm | 2 ++ include/reg_sizes.asm | 13 +++++++++++++ mem/mem_zero_detect_avx.asm | 2 +- mem/mem_zero_detect_sse.asm | 2 +- raid/pq_check_sse.asm | 2 +- raid/pq_check_sse_i32.asm | 4 ++-- raid/pq_gen_avx.asm | 2 +- raid/pq_gen_avx2.asm | 2 +- raid/pq_gen_avx512.asm | 2 +- raid/pq_gen_sse.asm | 2 +- raid/pq_gen_sse_i32.asm | 4 ++-- raid/raid_multibinary.asm | 4 ++++ raid/xor_check_sse.asm | 4 ++-- raid/xor_gen_avx.asm | 2 +- raid/xor_gen_avx512.asm | 2 +- raid/xor_gen_sse.asm | 4 ++-- 106 files changed, 149 insertions(+), 85 deletions(-) diff --git a/crc/crc16_t10dif_01.asm b/crc/crc16_t10dif_01.asm index 33f4555..536b6f3 100644 --- a/crc/crc16_t10dif_01.asm +++ b/crc/crc16_t10dif_01.asm @@ -75,6 +75,7 @@ section .text align 16 mk_global crc16_t10dif_01, function crc16_t10dif_01: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_02.asm b/crc/crc16_t10dif_02.asm index 157ac53..0e392af 100644 --- a/crc/crc16_t10dif_02.asm +++ b/crc/crc16_t10dif_02.asm @@ -75,6 +75,7 @@ section .text align 16 mk_global crc16_t10dif_02, function crc16_t10dif_02: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_by16_10.asm b/crc/crc16_t10dif_by16_10.asm index 479b635..27a2e02 100644 --- a/crc/crc16_t10dif_by16_10.asm +++ b/crc/crc16_t10dif_by16_10.asm @@ -84,6 +84,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_by4.asm b/crc/crc16_t10dif_by4.asm index bde071a..1326eb2 100644 --- a/crc/crc16_t10dif_by4.asm +++ b/crc/crc16_t10dif_by4.asm @@ -68,6 +68,7 @@ section .text align 16 mk_global crc16_t10dif_by4, function crc16_t10dif_by4: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_copy_by4.asm b/crc/crc16_t10dif_copy_by4.asm index 0f82d69..b8a6838 100644 --- a/crc/crc16_t10dif_copy_by4.asm +++ b/crc/crc16_t10dif_copy_by4.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global crc16_t10dif_copy_by4, function crc16_t10dif_copy_by4: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc16_t10dif_copy_by4_02.asm b/crc/crc16_t10dif_copy_by4_02.asm index 1a7338f..254a187 100644 --- a/crc/crc16_t10dif_copy_by4_02.asm +++ b/crc/crc16_t10dif_copy_by4_02.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global crc16_t10dif_copy_by4_02, function crc16_t10dif_copy_by4_02: + endbranch ; adjust the 16-bit initial_crc value, scale it to 32 bits shl arg1_low32, 16 diff --git a/crc/crc32_gzip_refl_by16_10.asm b/crc/crc32_gzip_refl_by16_10.asm index 69cb366..15280b8 100644 --- a/crc/crc32_gzip_refl_by16_10.asm +++ b/crc/crc32_gzip_refl_by16_10.asm @@ -94,6 +94,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_gzip_refl_by8.asm b/crc/crc32_gzip_refl_by8.asm index 780ae35..4384024 100644 --- a/crc/crc32_gzip_refl_by8.asm +++ b/crc/crc32_gzip_refl_by8.asm @@ -88,6 +88,7 @@ section .text align 16 mk_global crc32_gzip_refl_by8, function crc32_gzip_refl_by8: + endbranch ; unsigned long c = crc ^ 0xffffffffL; not arg1_low32 ; diff --git a/crc/crc32_gzip_refl_by8_02.asm b/crc/crc32_gzip_refl_by8_02.asm index bba5ae6..712fe87 100644 --- a/crc/crc32_gzip_refl_by8_02.asm +++ b/crc/crc32_gzip_refl_by8_02.asm @@ -88,6 +88,7 @@ section .text align 16 mk_global crc32_gzip_refl_by8_02, function crc32_gzip_refl_by8_02: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_ieee_01.asm b/crc/crc32_ieee_01.asm index 5b9d465..368261d 100644 --- a/crc/crc32_ieee_01.asm +++ b/crc/crc32_ieee_01.asm @@ -74,6 +74,7 @@ section .text align 16 mk_global crc32_ieee_01, function crc32_ieee_01: + endbranch not arg1_low32 ;~init_crc diff --git a/crc/crc32_ieee_02.asm b/crc/crc32_ieee_02.asm index 411e923..95d53e8 100644 --- a/crc/crc32_ieee_02.asm +++ b/crc/crc32_ieee_02.asm @@ -74,6 +74,7 @@ section .text align 16 mk_global crc32_ieee_02, function crc32_ieee_02: + endbranch not arg1_low32 ;~init_crc diff --git a/crc/crc32_ieee_by16_10.asm b/crc/crc32_ieee_by16_10.asm index c6aa741..5c3f52a 100644 --- a/crc/crc32_ieee_by16_10.asm +++ b/crc/crc32_ieee_by16_10.asm @@ -84,6 +84,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1_low32 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc32_ieee_by4.asm b/crc/crc32_ieee_by4.asm index 2ce2289..f432640 100644 --- a/crc/crc32_ieee_by4.asm +++ b/crc/crc32_ieee_by4.asm @@ -76,6 +76,7 @@ section .text align 16 mk_global crc32_ieee_by4, function crc32_ieee_by4: + endbranch not arg1_low32 diff --git a/crc/crc32_iscsi_00.asm b/crc/crc32_iscsi_00.asm index e1ad903..1a5e029 100644 --- a/crc/crc32_iscsi_00.asm +++ b/crc/crc32_iscsi_00.asm @@ -155,6 +155,7 @@ default rel mk_global crc32_iscsi_00, function crc32_iscsi_00: + endbranch %ifidn __OUTPUT_FORMAT__, elf64 %define bufp rdi diff --git a/crc/crc32_iscsi_01.asm b/crc/crc32_iscsi_01.asm index 30adb04..e0f2b5e 100644 --- a/crc/crc32_iscsi_01.asm +++ b/crc/crc32_iscsi_01.asm @@ -52,6 +52,7 @@ default rel mk_global crc32_iscsi_01, function crc32_iscsi_01: + endbranch %ifidn __OUTPUT_FORMAT__, elf64 %define bufp rdi @@ -214,6 +215,7 @@ non_prefetch: %rep 128-1 CONCAT(crc_,i,:) + endbranch crc32 crc_init, qword [block_0 - i*8] crc32 crc1, qword [block_1 - i*8] crc32 crc2, qword [block_2 - i*8] diff --git a/crc/crc64_ecma_norm_by8.asm b/crc/crc64_ecma_norm_by8.asm index 5599d98..ca99e34 100644 --- a/crc/crc64_ecma_norm_by8.asm +++ b/crc/crc64_ecma_norm_by8.asm @@ -64,6 +64,7 @@ section .text align 16 mk_global crc64_ecma_norm_by8, function crc64_ecma_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_ecma_refl_by8.asm b/crc/crc64_ecma_refl_by8.asm index b641934..c09ddfa 100644 --- a/crc/crc64_ecma_refl_by8.asm +++ b/crc/crc64_ecma_refl_by8.asm @@ -70,6 +70,7 @@ section .text align 16 mk_global crc64_ecma_refl_by8, function crc64_ecma_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_norm_by16_10.asm b/crc/crc64_iso_norm_by16_10.asm index 28630a1..4eefbd3 100644 --- a/crc/crc64_iso_norm_by16_10.asm +++ b/crc/crc64_iso_norm_by16_10.asm @@ -71,6 +71,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_norm_by8.asm b/crc/crc64_iso_norm_by8.asm index 887fca8..16147d5 100644 --- a/crc/crc64_iso_norm_by8.asm +++ b/crc/crc64_iso_norm_by8.asm @@ -63,6 +63,7 @@ section .text align 16 mk_global crc64_iso_norm_by8, function crc64_iso_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_iso_refl_by16_10.asm b/crc/crc64_iso_refl_by16_10.asm index d58ac0a..e5d5a08 100644 --- a/crc/crc64_iso_refl_by16_10.asm +++ b/crc/crc64_iso_refl_by16_10.asm @@ -72,6 +72,7 @@ section .text align 16 mk_global FUNCTION_NAME, function FUNCTION_NAME: + endbranch not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_iso_refl_by8.asm b/crc/crc64_iso_refl_by8.asm index 3abc5da..b6dfcf0 100644 --- a/crc/crc64_iso_refl_by8.asm +++ b/crc/crc64_iso_refl_by8.asm @@ -67,6 +67,7 @@ section .text align 16 mk_global crc64_iso_refl_by8, function crc64_iso_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc64_jones_norm_by8.asm b/crc/crc64_jones_norm_by8.asm index bc3b521..0cf8b4a 100644 --- a/crc/crc64_jones_norm_by8.asm +++ b/crc/crc64_jones_norm_by8.asm @@ -63,6 +63,7 @@ section .text align 16 mk_global crc64_jones_norm_by8, function crc64_jones_norm_by8: + endbranch not arg1 ;~init_crc diff --git a/crc/crc64_jones_refl_by8.asm b/crc/crc64_jones_refl_by8.asm index a9ea19a..eea9c8d 100644 --- a/crc/crc64_jones_refl_by8.asm +++ b/crc/crc64_jones_refl_by8.asm @@ -67,6 +67,7 @@ section .text align 16 mk_global crc64_jones_refl_by8, function crc64_jones_refl_by8: + endbranch ; uint64_t c = crc ^ 0xffffffff,ffffffffL; not arg1 sub rsp, VARIABLE_OFFSET diff --git a/crc/crc_multibinary.asm b/crc/crc_multibinary.asm index b1f425a..8b9d7bd 100644 --- a/crc/crc_multibinary.asm +++ b/crc/crc_multibinary.asm @@ -81,8 +81,10 @@ section .text ;;;; mk_global crc32_iscsi, function crc32_iscsi_mbinit: + endbranch call crc32_iscsi_dispatch_init crc32_iscsi: + endbranch jmp qword [crc32_iscsi_dispatched] crc32_iscsi_dispatch_init: @@ -115,8 +117,10 @@ crc32_iscsi_dispatch_init: ;;;; mk_global crc32_ieee, function crc32_ieee_mbinit: + endbranch call crc32_ieee_dispatch_init crc32_ieee: + endbranch jmp qword [crc32_ieee_dispatched] crc32_ieee_dispatch_init: @@ -194,8 +198,10 @@ crc32_ieee_dispatch_init: ;;;; mk_global crc16_t10dif, function crc16_t10dif_mbinit: + endbranch call crc16_t10dif_dispatch_init crc16_t10dif: + endbranch jmp qword [crc16_t10dif_dispatched] crc16_t10dif_dispatch_init: diff --git a/erasure_code/gf_2vect_dot_prod_avx.asm b/erasure_code/gf_2vect_dot_prod_avx.asm index f512d7d..cfbc2eb 100644 --- a/erasure_code/gf_2vect_dot_prod_avx.asm +++ b/erasure_code/gf_2vect_dot_prod_avx.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -127,7 +127,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_dot_prod_avx2.asm b/erasure_code/gf_2vect_dot_prod_avx2.asm index ba704d0..a06f67a 100644 --- a/erasure_code/gf_2vect_dot_prod_avx2.asm +++ b/erasure_code/gf_2vect_dot_prod_avx2.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -131,7 +131,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_dot_prod_avx512.asm b/erasure_code/gf_2vect_dot_prod_avx512.asm index 2444216..92d7e9d 100644 --- a/erasure_code/gf_2vect_dot_prod_avx512.asm +++ b/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -50,7 +50,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_2vect_dot_prod_sse.asm b/erasure_code/gf_2vect_dot_prod_sse.asm index 7e1006b..f7e44e7 100644 --- a/erasure_code/gf_2vect_dot_prod_sse.asm +++ b/erasure_code/gf_2vect_dot_prod_sse.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro @@ -127,7 +127,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_2vect_mad_avx.asm b/erasure_code/gf_2vect_mad_avx.asm index 65af8b0..995c36b 100644 --- a/erasure_code/gf_2vect_mad_avx.asm +++ b/erasure_code/gf_2vect_mad_avx.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_avx2.asm b/erasure_code/gf_2vect_mad_avx2.asm index f4c1cae..751677d 100644 --- a/erasure_code/gf_2vect_mad_avx2.asm +++ b/erasure_code/gf_2vect_mad_avx2.asm @@ -104,7 +104,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_avx512.asm b/erasure_code/gf_2vect_mad_avx512.asm index 5a35a89..ce37248 100644 --- a/erasure_code/gf_2vect_mad_avx512.asm +++ b/erasure_code/gf_2vect_mad_avx512.asm @@ -45,7 +45,7 @@ %define tmp r11 %define tmp2 r10 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_2vect_mad_sse.asm b/erasure_code/gf_2vect_mad_sse.asm index c85b431..2bff82f 100644 --- a/erasure_code/gf_2vect_mad_sse.asm +++ b/erasure_code/gf_2vect_mad_sse.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_dot_prod_avx.asm b/erasure_code/gf_3vect_dot_prod_avx.asm index deb44d0..79c7ed4 100644 --- a/erasure_code/gf_3vect_dot_prod_avx.asm +++ b/erasure_code/gf_3vect_dot_prod_avx.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -139,7 +139,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_dot_prod_avx2.asm b/erasure_code/gf_3vect_dot_prod_avx2.asm index fa55dd6..606c3a1 100644 --- a/erasure_code/gf_3vect_dot_prod_avx2.asm +++ b/erasure_code/gf_3vect_dot_prod_avx2.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -143,7 +143,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_dot_prod_avx512.asm b/erasure_code/gf_3vect_dot_prod_avx512.asm index eecde81..81e96f2 100644 --- a/erasure_code/gf_3vect_dot_prod_avx512.asm +++ b/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_3vect_dot_prod_sse.asm b/erasure_code/gf_3vect_dot_prod_sse.asm index 2b13e71..d52c72b 100644 --- a/erasure_code/gf_3vect_dot_prod_sse.asm +++ b/erasure_code/gf_3vect_dot_prod_sse.asm @@ -52,7 +52,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -139,7 +139,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_3vect_mad_avx.asm b/erasure_code/gf_3vect_mad_avx.asm index 4aea710..13963f6 100644 --- a/erasure_code/gf_3vect_mad_avx.asm +++ b/erasure_code/gf_3vect_mad_avx.asm @@ -97,7 +97,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_avx2.asm b/erasure_code/gf_3vect_mad_avx2.asm index e8071dd..797d954 100644 --- a/erasure_code/gf_3vect_mad_avx2.asm +++ b/erasure_code/gf_3vect_mad_avx2.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_avx512.asm b/erasure_code/gf_3vect_mad_avx512.asm index b8b8d9b..bc61900 100644 --- a/erasure_code/gf_3vect_mad_avx512.asm +++ b/erasure_code/gf_3vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_3vect_mad_sse.asm b/erasure_code/gf_3vect_mad_sse.asm index 10744ec..c0fd0b9 100644 --- a/erasure_code/gf_3vect_mad_sse.asm +++ b/erasure_code/gf_3vect_mad_sse.asm @@ -96,7 +96,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_dot_prod_avx.asm b/erasure_code/gf_4vect_dot_prod_avx.asm index f436048..bad8692 100644 --- a/erasure_code/gf_4vect_dot_prod_avx.asm +++ b/erasure_code/gf_4vect_dot_prod_avx.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -159,7 +159,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_dot_prod_avx2.asm b/erasure_code/gf_4vect_dot_prod_avx2.asm index 0c7ae4e..e422e28 100644 --- a/erasure_code/gf_4vect_dot_prod_avx2.asm +++ b/erasure_code/gf_4vect_dot_prod_avx2.asm @@ -56,7 +56,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -163,7 +163,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_dot_prod_avx512.asm b/erasure_code/gf_4vect_dot_prod_avx512.asm index 6d67426..9d32973 100644 --- a/erasure_code/gf_4vect_dot_prod_avx512.asm +++ b/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -55,7 +55,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_4vect_dot_prod_sse.asm b/erasure_code/gf_4vect_dot_prod_sse.asm index 25134c7..25b5cff 100644 --- a/erasure_code/gf_4vect_dot_prod_sse.asm +++ b/erasure_code/gf_4vect_dot_prod_sse.asm @@ -54,7 +54,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -159,7 +159,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define var(x) [ebp - PS - PS*x] diff --git a/erasure_code/gf_4vect_mad_avx.asm b/erasure_code/gf_4vect_mad_avx.asm index 284c76b..3a00623 100644 --- a/erasure_code/gf_4vect_mad_avx.asm +++ b/erasure_code/gf_4vect_mad_avx.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_4vect_mad_avx2.asm b/erasure_code/gf_4vect_mad_avx2.asm index bf6cc7e..e1cf910 100644 --- a/erasure_code/gf_4vect_mad_avx2.asm +++ b/erasure_code/gf_4vect_mad_avx2.asm @@ -101,7 +101,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_mad_avx512.asm b/erasure_code/gf_4vect_mad_avx512.asm index 3948ab1..77dc76b 100644 --- a/erasure_code/gf_4vect_mad_avx512.asm +++ b/erasure_code/gf_4vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_4vect_mad_sse.asm b/erasure_code/gf_4vect_mad_sse.asm index 377b31f..d5efc97 100644 --- a/erasure_code/gf_4vect_mad_sse.asm +++ b/erasure_code/gf_4vect_mad_sse.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_5vect_dot_prod_avx.asm b/erasure_code/gf_5vect_dot_prod_avx.asm index 3226dde..a5bdb2a 100644 --- a/erasure_code/gf_5vect_dot_prod_avx.asm +++ b/erasure_code/gf_5vect_dot_prod_avx.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_avx2.asm b/erasure_code/gf_5vect_dot_prod_avx2.asm index 4bee087..d019e97 100644 --- a/erasure_code/gf_5vect_dot_prod_avx2.asm +++ b/erasure_code/gf_5vect_dot_prod_avx2.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_avx512.asm b/erasure_code/gf_5vect_dot_prod_avx512.asm index e955ea5..1cca65b 100644 --- a/erasure_code/gf_5vect_dot_prod_avx512.asm +++ b/erasure_code/gf_5vect_dot_prod_avx512.asm @@ -57,7 +57,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_dot_prod_sse.asm b/erasure_code/gf_5vect_dot_prod_sse.asm index 5ff9460..c96bed5 100644 --- a/erasure_code/gf_5vect_dot_prod_sse.asm +++ b/erasure_code/gf_5vect_dot_prod_sse.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_mad_avx.asm b/erasure_code/gf_5vect_mad_avx.asm index ccdbc6e..e9e246c 100644 --- a/erasure_code/gf_5vect_mad_avx.asm +++ b/erasure_code/gf_5vect_mad_avx.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_5vect_mad_avx2.asm b/erasure_code/gf_5vect_mad_avx2.asm index ac61437..87038a7 100644 --- a/erasure_code/gf_5vect_mad_avx2.asm +++ b/erasure_code/gf_5vect_mad_avx2.asm @@ -103,7 +103,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_5vect_mad_avx512.asm b/erasure_code/gf_5vect_mad_avx512.asm index 5de47d1..e2a1455 100644 --- a/erasure_code/gf_5vect_mad_avx512.asm +++ b/erasure_code/gf_5vect_mad_avx512.asm @@ -45,7 +45,7 @@ %define tmp r11 %define tmp2 r10 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_5vect_mad_sse.asm b/erasure_code/gf_5vect_mad_sse.asm index fc99aaf..17760d0 100644 --- a/erasure_code/gf_5vect_mad_sse.asm +++ b/erasure_code/gf_5vect_mad_sse.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx.asm b/erasure_code/gf_6vect_dot_prod_avx.asm index 1f9df8d..7604711 100644 --- a/erasure_code/gf_6vect_dot_prod_avx.asm +++ b/erasure_code/gf_6vect_dot_prod_avx.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx2.asm b/erasure_code/gf_6vect_dot_prod_avx2.asm index ccb4e77..5885d97 100644 --- a/erasure_code/gf_6vect_dot_prod_avx2.asm +++ b/erasure_code/gf_6vect_dot_prod_avx2.asm @@ -53,7 +53,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_avx512.asm b/erasure_code/gf_6vect_dot_prod_avx512.asm index 6ebfd26..bb25e67 100644 --- a/erasure_code/gf_6vect_dot_prod_avx512.asm +++ b/erasure_code/gf_6vect_dot_prod_avx512.asm @@ -57,7 +57,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_dot_prod_sse.asm b/erasure_code/gf_6vect_dot_prod_sse.asm index 51bd116..41176bb 100644 --- a/erasure_code/gf_6vect_dot_prod_sse.asm +++ b/erasure_code/gf_6vect_dot_prod_sse.asm @@ -51,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_mad_avx.asm b/erasure_code/gf_6vect_mad_avx.asm index 4e20dbb..c9ce490 100644 --- a/erasure_code/gf_6vect_mad_avx.asm +++ b/erasure_code/gf_6vect_mad_avx.asm @@ -111,7 +111,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_6vect_mad_avx2.asm b/erasure_code/gf_6vect_mad_avx2.asm index 45d750e..8f94c6a 100644 --- a/erasure_code/gf_6vect_mad_avx2.asm +++ b/erasure_code/gf_6vect_mad_avx2.asm @@ -107,7 +107,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_6vect_mad_avx512.asm b/erasure_code/gf_6vect_mad_avx512.asm index 6ae11f3..c2383a2 100644 --- a/erasure_code/gf_6vect_mad_avx512.asm +++ b/erasure_code/gf_6vect_mad_avx512.asm @@ -46,7 +46,7 @@ %define tmp2 r10 %define tmp3 r12 ;must be saved and restored %define return rax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 %endmacro diff --git a/erasure_code/gf_6vect_mad_sse.asm b/erasure_code/gf_6vect_mad_sse.asm index 695fd6b..f33ec06 100644 --- a/erasure_code/gf_6vect_mad_sse.asm +++ b/erasure_code/gf_6vect_mad_sse.asm @@ -113,7 +113,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/erasure_code/gf_vect_dot_prod_avx.asm b/erasure_code/gf_vect_dot_prod_avx.asm index 179e985..7bd8700 100644 --- a/erasure_code/gf_vect_dot_prod_avx.asm +++ b/erasure_code/gf_vect_dot_prod_avx.asm @@ -48,7 +48,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -106,7 +106,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_dot_prod_avx2.asm b/erasure_code/gf_vect_dot_prod_avx2.asm index 2cfa0f0..c385e3b 100644 --- a/erasure_code/gf_vect_dot_prod_avx2.asm +++ b/erasure_code/gf_vect_dot_prod_avx2.asm @@ -51,7 +51,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -111,7 +111,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_dot_prod_avx512.asm b/erasure_code/gf_vect_dot_prod_avx512.asm index 8a02fd8..37fe082 100644 --- a/erasure_code/gf_vect_dot_prod_avx512.asm +++ b/erasure_code/gf_vect_dot_prod_avx512.asm @@ -49,7 +49,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_dot_prod_sse.asm b/erasure_code/gf_vect_dot_prod_sse.asm index 602bd89..9b0a440 100644 --- a/erasure_code/gf_vect_dot_prod_sse.asm +++ b/erasure_code/gf_vect_dot_prod_sse.asm @@ -48,7 +48,7 @@ %endmacro %define SSTR SLDR %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif @@ -106,7 +106,7 @@ %define PS 4 %define LOG_PS 2 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp + PS*2 + PS*x] %define trans ecx ;trans is for the variables in stack diff --git a/erasure_code/gf_vect_mad_avx.asm b/erasure_code/gf_vect_mad_avx.asm index 2b0e623..448fbd7 100644 --- a/erasure_code/gf_vect_mad_avx.asm +++ b/erasure_code/gf_vect_mad_avx.asm @@ -82,7 +82,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_avx2.asm b/erasure_code/gf_vect_mad_avx2.asm index 9941fca..097d8fa 100644 --- a/erasure_code/gf_vect_mad_avx2.asm +++ b/erasure_code/gf_vect_mad_avx2.asm @@ -88,7 +88,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_avx512.asm b/erasure_code/gf_vect_mad_avx512.asm index 931e0cc..43982e3 100644 --- a/erasure_code/gf_vect_mad_avx512.asm +++ b/erasure_code/gf_vect_mad_avx512.asm @@ -44,7 +44,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mad_sse.asm b/erasure_code/gf_vect_mad_sse.asm index 1ea69fe..1044404 100644 --- a/erasure_code/gf_vect_mad_sse.asm +++ b/erasure_code/gf_vect_mad_sse.asm @@ -82,7 +82,7 @@ %define return rax %define return.w eax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/erasure_code/gf_vect_mul_avx.asm b/erasure_code/gf_vect_mul_avx.asm index 0186bbc..91f6d6d 100644 --- a/erasure_code/gf_vect_mul_avx.asm +++ b/erasure_code/gf_vect_mul_avx.asm @@ -42,7 +42,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE diff --git a/erasure_code/gf_vect_mul_sse.asm b/erasure_code/gf_vect_mul_sse.asm index bad257a..fefe7ef 100644 --- a/erasure_code/gf_vect_mul_sse.asm +++ b/erasure_code/gf_vect_mul_sse.asm @@ -42,7 +42,7 @@ %define arg5 r9 %define tmp r11 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE diff --git a/igzip/adler32_avx2_4.asm b/igzip/adler32_avx2_4.asm index 62c62bb..798310f 100644 --- a/igzip/adler32_avx2_4.asm +++ b/igzip/adler32_avx2_4.asm @@ -55,7 +55,7 @@ default rel %define b_d r8d %define end r13 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/igzip/adler32_sse.asm b/igzip/adler32_sse.asm index 6aea7cb..fc986cb 100644 --- a/igzip/adler32_sse.asm +++ b/igzip/adler32_sse.asm @@ -52,7 +52,7 @@ default rel %define b_d r8d %define end r13 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 diff --git a/igzip/encode_df_04.asm b/igzip/encode_df_04.asm index 2c52af8..5b913ae 100644 --- a/igzip/encode_df_04.asm +++ b/igzip/encode_df_04.asm @@ -177,6 +177,7 @@ section .text global encode_deflate_icf_ %+ ARCH encode_deflate_icf_ %+ ARCH: + endbranch FUNC_SAVE %ifnidn ptr, arg1 diff --git a/igzip/encode_df_06.asm b/igzip/encode_df_06.asm index aaec754..9e74795 100644 --- a/igzip/encode_df_06.asm +++ b/igzip/encode_df_06.asm @@ -190,6 +190,7 @@ section .text global encode_deflate_icf_ %+ ARCH encode_deflate_icf_ %+ ARCH: + endbranch FUNC_SAVE %ifnidn ptr, arg1 diff --git a/igzip/igzip_body.asm b/igzip/igzip_body.asm index d69b27c..b9620d6 100644 --- a/igzip/igzip_body.asm +++ b/igzip/igzip_body.asm @@ -143,6 +143,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_body_ %+ ARCH isal_deflate_body_ %+ ARCH %+ : + endbranch %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif diff --git a/igzip/igzip_decode_block_stateless.asm b/igzip/igzip_decode_block_stateless.asm index 733194b..22f3bf2 100644 --- a/igzip/igzip_decode_block_stateless.asm +++ b/igzip/igzip_decode_block_stateless.asm @@ -465,6 +465,7 @@ section .text global decode_huffman_code_block_stateless_ %+ ARCH decode_huffman_code_block_stateless_ %+ ARCH %+ : + endbranch FUNC_SAVE diff --git a/igzip/igzip_deflate_hash.asm b/igzip/igzip_deflate_hash.asm index bcb0d5d..32a1482 100644 --- a/igzip/igzip_deflate_hash.asm +++ b/igzip/igzip_deflate_hash.asm @@ -104,6 +104,7 @@ section .text global isal_deflate_hash_crc_01 isal_deflate_hash_crc_01: + endbranch FUNC_SAVE neg f_i diff --git a/igzip/igzip_finish.asm b/igzip/igzip_finish.asm index fbf8839..2b539dd 100644 --- a/igzip/igzip_finish.asm +++ b/igzip/igzip_finish.asm @@ -94,6 +94,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_finish_01 isal_deflate_finish_01: + endbranch PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size diff --git a/igzip/igzip_gen_icf_map_lh1_04.asm b/igzip/igzip_gen_icf_map_lh1_04.asm index 077f56c..d188846 100644 --- a/igzip/igzip_gen_icf_map_lh1_04.asm +++ b/igzip/igzip_gen_icf_map_lh1_04.asm @@ -147,7 +147,7 @@ %define stack_size 16 %define local_storage_offset 0 -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push rbp push r12 @@ -175,6 +175,7 @@ section .text global gen_icf_map_lh1_04 func(gen_icf_map_lh1_04) + endbranch FUNC_SAVE mov file_start, [stream + _next_in] diff --git a/igzip/igzip_gen_icf_map_lh1_06.asm b/igzip/igzip_gen_icf_map_lh1_06.asm index d134357..7985ab5 100644 --- a/igzip/igzip_gen_icf_map_lh1_06.asm +++ b/igzip/igzip_gen_icf_map_lh1_06.asm @@ -143,7 +143,7 @@ add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push rbp push r12 @@ -166,6 +166,7 @@ section .text global gen_icf_map_lh1_06 func(gen_icf_map_lh1_06) + endbranch FUNC_SAVE mov file_start, [stream + _next_in] diff --git a/igzip/igzip_icf_body_h1_gr_bt.asm b/igzip/igzip_icf_body_h1_gr_bt.asm index 51871c5..c74a24d 100644 --- a/igzip/igzip_icf_body_h1_gr_bt.asm +++ b/igzip/igzip_icf_body_h1_gr_bt.asm @@ -164,6 +164,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : + endbranch %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif diff --git a/igzip/igzip_icf_finish.asm b/igzip/igzip_icf_finish.asm index b9f88a9..231ac06 100644 --- a/igzip/igzip_icf_finish.asm +++ b/igzip/igzip_icf_finish.asm @@ -102,6 +102,7 @@ section .text ; arg 1: rcx: addr of stream global isal_deflate_icf_finish_ %+ METHOD %+ _01 isal_deflate_icf_finish_ %+ METHOD %+ _01: + endbranch PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size diff --git a/igzip/igzip_set_long_icf_fg_04.asm b/igzip/igzip_set_long_icf_fg_04.asm index 070e614..09fcb64 100644 --- a/igzip/igzip_set_long_icf_fg_04.asm +++ b/igzip/igzip_set_long_icf_fg_04.asm @@ -122,7 +122,7 @@ default rel add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -141,6 +141,7 @@ section .text global set_long_icf_fg_04 func(set_long_icf_fg_04) + endbranch FUNC_SAVE lea end_in, [next_in + arg3] diff --git a/igzip/igzip_set_long_icf_fg_06.asm b/igzip/igzip_set_long_icf_fg_06.asm index b36871c..3152ef4 100644 --- a/igzip/igzip_set_long_icf_fg_06.asm +++ b/igzip/igzip_set_long_icf_fg_06.asm @@ -129,7 +129,7 @@ add rsp, stack_size %endm %else -%define func(x) x: +%define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -148,6 +148,7 @@ section .text global set_long_icf_fg_06 func(set_long_icf_fg_06) + endbranch FUNC_SAVE lea end_in, [next_in + arg3] diff --git a/igzip/igzip_update_histogram.asm b/igzip/igzip_update_histogram.asm index e1939ad..698c8be 100644 --- a/igzip/igzip_update_histogram.asm +++ b/igzip/igzip_update_histogram.asm @@ -256,6 +256,7 @@ section .text ; void isal_update_histogram global isal_update_histogram_ %+ ARCH isal_update_histogram_ %+ ARCH %+ : + endbranch FUNC_SAVE %ifnidn file_start, arg0 diff --git a/igzip/proc_heap.asm b/igzip/proc_heap.asm index 5ed9c8e..ea9365a 100644 --- a/igzip/proc_heap.asm +++ b/igzip/proc_heap.asm @@ -60,6 +60,7 @@ section .text global build_huff_tree build_huff_tree: + endbranch %ifidn __OUTPUT_FORMAT__, win64 push rsi push rdi @@ -108,6 +109,7 @@ build_huff_tree: align 32 global build_heap build_heap: + endbranch %ifidn __OUTPUT_FORMAT__, win64 push rsi push rdi diff --git a/include/multibinary.asm b/include/multibinary.asm index 16838cb..588352a 100644 --- a/include/multibinary.asm +++ b/include/multibinary.asm @@ -71,10 +71,12 @@ section .text mk_global %1, function %1_mbinit: + endbranch ;;; only called the first time to setup hardware match call %1_dispatch_init ;;; falls thru to execute the hw optimized code %1: + endbranch jmp mbin_ptr_sz [%1_dispatched] %endmacro diff --git a/include/reg_sizes.asm b/include/reg_sizes.asm index 37d61f8..b7ad842 100644 --- a/include/reg_sizes.asm +++ b/include/reg_sizes.asm @@ -200,9 +200,22 @@ section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif %ifidn __OUTPUT_FORMAT__,elf64 + %define __x86_64__ section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif +%ifidn __OUTPUT_FORMAT__,win64 + %define __x86_64__ +%endif +%ifidn __OUTPUT_FORMAT__,macho64 + %define __x86_64__ +%endif + +%ifdef __x86_64__ + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa +%else + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb +%endif %ifdef REL_TEXT %define WRT_OPT diff --git a/mem/mem_zero_detect_avx.asm b/mem/mem_zero_detect_avx.asm index e85e08d..1b5de84 100644 --- a/mem/mem_zero_detect_avx.asm +++ b/mem/mem_zero_detect_avx.asm @@ -40,7 +40,7 @@ %define tmpb r11b %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/mem/mem_zero_detect_sse.asm b/mem/mem_zero_detect_sse.asm index 78350aa..c84f0f0 100644 --- a/mem/mem_zero_detect_sse.asm +++ b/mem/mem_zero_detect_sse.asm @@ -40,7 +40,7 @@ %define tmpb r11b %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_check_sse.asm b/raid/pq_check_sse.asm index ca32051..f2bc8a6 100644 --- a/raid/pq_check_sse.asm +++ b/raid/pq_check_sse.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_check_sse_i32.asm b/raid/pq_check_sse_i32.asm index f05d43a..3271c03 100644 --- a/raid/pq_check_sse_i32.asm +++ b/raid/pq_check_sse_i32.asm @@ -46,7 +46,7 @@ %define tmp r11 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -79,7 +79,7 @@ %define arg1 ecx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/pq_gen_avx.asm b/raid/pq_gen_avx.asm index 57d2b22..db4bcfb 100644 --- a/raid/pq_gen_avx.asm +++ b/raid/pq_gen_avx.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_avx2.asm b/raid/pq_gen_avx2.asm index 7def9ea..a0bf0cc 100644 --- a/raid/pq_gen_avx2.asm +++ b/raid/pq_gen_avx2.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_avx512.asm b/raid/pq_gen_avx512.asm index 9ec6584..179ad5c 100644 --- a/raid/pq_gen_avx512.asm +++ b/raid/pq_gen_avx512.asm @@ -49,7 +49,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_sse.asm b/raid/pq_gen_sse.asm index 4c5a349..b6d5148 100644 --- a/raid/pq_gen_sse.asm +++ b/raid/pq_gen_sse.asm @@ -46,7 +46,7 @@ %define tmp r11 %define tmp3 arg4 %define return rax - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE %endif diff --git a/raid/pq_gen_sse_i32.asm b/raid/pq_gen_sse_i32.asm index 7a918f4..8dabb78 100644 --- a/raid/pq_gen_sse_i32.asm +++ b/raid/pq_gen_sse_i32.asm @@ -46,7 +46,7 @@ %define tmp r11 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -78,7 +78,7 @@ %define arg1 ecx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/raid_multibinary.asm b/raid/raid_multibinary.asm index c84e5ef..47ef1e3 100644 --- a/raid/raid_multibinary.asm +++ b/raid/raid_multibinary.asm @@ -74,8 +74,10 @@ section .text ;;;; mk_global pq_check, function pq_check_mbinit: + endbranch call pq_check_dispatch_init pq_check: + endbranch jmp qword [pq_check_dispatched] pq_check_dispatch_init: @@ -106,8 +108,10 @@ pq_check_dispatch_init: ;;;; mk_global xor_check, function xor_check_mbinit: + endbranch call xor_check_dispatch_init xor_check: + endbranch jmp qword [xor_check_dispatched] xor_check_dispatch_init: diff --git a/raid/xor_check_sse.asm b/raid/xor_check_sse.asm index 9620412..a5fe0b2 100644 --- a/raid/xor_check_sse.asm +++ b/raid/xor_check_sse.asm @@ -49,7 +49,7 @@ %define tmp3 arg4 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -88,7 +88,7 @@ %define tmp3 edx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi diff --git a/raid/xor_gen_avx.asm b/raid/xor_gen_avx.asm index cddd539..b5527b2 100644 --- a/raid/xor_gen_avx.asm +++ b/raid/xor_gen_avx.asm @@ -45,7 +45,7 @@ %define arg5 r9 %define tmp r11 %define tmp3 arg4 - %define func(x) x: + %define func(x) x: endbranch %define return rax %define FUNC_SAVE %define FUNC_RESTORE diff --git a/raid/xor_gen_avx512.asm b/raid/xor_gen_avx512.asm index 552c590..5b07868 100644 --- a/raid/xor_gen_avx512.asm +++ b/raid/xor_gen_avx512.asm @@ -47,7 +47,7 @@ %define arg5 r9 %define tmp r11 %define tmp3 arg4 - %define func(x) x: + %define func(x) x: endbranch %define return rax %define FUNC_SAVE %define FUNC_RESTORE diff --git a/raid/xor_gen_sse.asm b/raid/xor_gen_sse.asm index 7509548..f31ae63 100644 --- a/raid/xor_gen_sse.asm +++ b/raid/xor_gen_sse.asm @@ -49,7 +49,7 @@ %define tmp3 arg4 %define return rax %define PS 8 - %define func(x) x: + %define func(x) x: endbranch %define FUNC_SAVE %define FUNC_RESTORE @@ -88,7 +88,7 @@ %define tmp3 edx %define return eax %define PS 4 - %define func(x) x: + %define func(x) x: endbranch %define arg(x) [ebp+8+PS*x] %define arg2 edi ; must sav/restore %define arg3 esi