mirror of
https://github.com/intel/isa-l.git
synced 2025-01-06 15:10:02 +01:00
cd888f01a4
To support Intel CET, all indirect branch targets must start with ENDBR32/ENDBR64. Here is a patch to define endbranch and add it to function entries in x86 assembly codes which are indirect branch targets as discovered by running testsuite on Intel CET machine and visual inspection. Verified with $ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure x86_64-linux $ make -j8 $ make -j8 check with both nasm and yasm on both CET and non-CET machines. Change-Id: I9822578e7294fb5043a64ab7de5c41de81a7d337 Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
301 lines
7.9 KiB
NASM
301 lines
7.9 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions
|
|
; are met:
|
|
; * Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; * Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in
|
|
; the documentation and/or other materials provided with the
|
|
; distribution.
|
|
; * Neither the name of Intel Corporation nor the names of its
|
|
; contributors may be used to endorse or promote products derived
|
|
; from this software without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%include "reg_sizes.asm"
|
|
%include "lz0a_const.asm"
|
|
%include "data_struct2.asm"
|
|
%include "igzip_compare_types.asm"
|
|
%define NEQ 4
|
|
|
|
default rel
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
%define arg1 rcx
|
|
%define arg2 rdx
|
|
%define arg3 r8
|
|
%define arg4 r9
|
|
%define len rdi
|
|
%define tmp2 rdi
|
|
%define dist rsi
|
|
%else
|
|
%define arg1 rdi
|
|
%define arg2 rsi
|
|
%define arg3 rdx
|
|
%define arg4 rcx
|
|
%define len r8
|
|
%define tmp2 r8
|
|
%define dist r9
|
|
%endif
|
|
|
|
%define next_in arg1
|
|
%define end_processed arg2
|
|
%define end_in arg3
|
|
%define match_lookup arg4
|
|
%define match_in rax
|
|
%define match_offset r10
|
|
%define tmp1 r11
|
|
%define end_processed_orig r12
|
|
%define dist_code r13
|
|
%define tmp3 r13
|
|
|
|
%define ymatch_lookup ymm0
|
|
%define ymatch_lookup2 ymm1
|
|
%define ylens ymm2
|
|
%define ycmp2 ymm3
|
|
%define ylens1 ymm4
|
|
%define ylens2 ymm5
|
|
%define ycmp ymm6
|
|
%define ytmp1 ymm7
|
|
%define ytmp2 ymm8
|
|
%define yvect_size ymm9
|
|
%define ymax_len ymm10
|
|
%define ytwofiftysix ymm11
|
|
%define ynlen_mask ymm12
|
|
%define ydists_mask ymm13
|
|
%define ylong_lens ymm14
|
|
%define ylens_mask ymm15
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
%define stack_size 10*16 + 4 * 8 + 8
|
|
%define func(x) proc_frame x
|
|
%macro FUNC_SAVE 0
|
|
alloc_stack stack_size
|
|
vmovdqa [rsp + 0*16], xmm6
|
|
vmovdqa [rsp + 1*16], xmm7
|
|
vmovdqa [rsp + 2*16], xmm8
|
|
vmovdqa [rsp + 3*16], xmm9
|
|
vmovdqa [rsp + 4*16], xmm10
|
|
vmovdqa [rsp + 5*16], xmm11
|
|
vmovdqa [rsp + 6*16], xmm12
|
|
vmovdqa [rsp + 7*16], xmm13
|
|
vmovdqa [rsp + 8*16], xmm14
|
|
vmovdqa [rsp + 9*16], xmm15
|
|
save_reg rsi, 10*16 + 0*8
|
|
save_reg rdi, 10*16 + 1*8
|
|
save_reg r12, 10*16 + 2*8
|
|
save_reg r13, 10*16 + 3*8
|
|
end_prolog
|
|
%endm
|
|
|
|
%macro FUNC_RESTORE 0
|
|
vmovdqa xmm6, [rsp + 0*16]
|
|
vmovdqa xmm7, [rsp + 1*16]
|
|
vmovdqa xmm8, [rsp + 2*16]
|
|
vmovdqa xmm9, [rsp + 3*16]
|
|
vmovdqa xmm10, [rsp + 4*16]
|
|
vmovdqa xmm11, [rsp + 5*16]
|
|
vmovdqa xmm12, [rsp + 6*16]
|
|
vmovdqa xmm13, [rsp + 7*16]
|
|
vmovdqa xmm14, [rsp + 8*16]
|
|
vmovdqa xmm15, [rsp + 9*16]
|
|
|
|
mov rsi, [rsp + 10*16 + 0*8]
|
|
mov rdi, [rsp + 10*16 + 1*8]
|
|
mov r12, [rsp + 10*16 + 2*8]
|
|
mov r13, [rsp + 10*16 + 3*8]
|
|
add rsp, stack_size
|
|
%endm
|
|
%else
|
|
%define func(x) x: endbranch
|
|
%macro FUNC_SAVE 0
|
|
push r12
|
|
push r13
|
|
%endm
|
|
|
|
%macro FUNC_RESTORE 0
|
|
pop r13
|
|
pop r12
|
|
%endm
|
|
%endif
|
|
%define VECT_SIZE 8
|
|
|
|
[bits 64]
|
|
default rel
|
|
section .text
|
|
|
|
global set_long_icf_fg_04
|
|
func(set_long_icf_fg_04)
|
|
endbranch
|
|
FUNC_SAVE
|
|
|
|
lea end_in, [next_in + arg3]
|
|
add end_processed, next_in
|
|
mov end_processed_orig, end_processed
|
|
lea tmp1, [end_processed + LA_STATELESS]
|
|
cmp end_in, tmp1
|
|
cmovg end_in, tmp1
|
|
sub end_processed, VECT_SIZE - 1
|
|
vmovdqu ylong_lens, [long_len]
|
|
vmovdqu ylens_mask, [len_mask]
|
|
vmovdqu ydists_mask, [dists_mask]
|
|
vmovdqu ynlen_mask, [nlen_mask]
|
|
vmovdqu yvect_size, [vect_size]
|
|
vmovdqu ymax_len, [max_len]
|
|
vmovdqu ytwofiftysix, [twofiftysix]
|
|
vmovdqu ymatch_lookup, [match_lookup]
|
|
|
|
.fill_loop: ; Tahiti is a magical place
|
|
vmovdqu ymatch_lookup2, ymatch_lookup
|
|
vmovdqu ymatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
|
|
|
|
cmp next_in, end_processed
|
|
jae .end_fill
|
|
|
|
.finish_entry:
|
|
vpand ylens, ymatch_lookup2, ylens_mask
|
|
vpcmpgtd ycmp, ylens, ylong_lens
|
|
vpmovmskb tmp1, ycmp
|
|
|
|
;; Speculatively increment
|
|
add next_in, VECT_SIZE
|
|
add match_lookup, ICF_CODE_BYTES * VECT_SIZE
|
|
|
|
test tmp1, tmp1
|
|
jz .fill_loop
|
|
|
|
tzcnt match_offset, tmp1
|
|
shr match_offset, 2
|
|
|
|
lea next_in, [next_in + match_offset - VECT_SIZE]
|
|
lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)]
|
|
mov dist %+ d, [match_lookup]
|
|
vmovd ymatch_lookup2 %+ x, dist %+ d
|
|
|
|
mov tmp1, dist
|
|
shr dist, DIST_OFFSET
|
|
and dist, LIT_DIST_MASK
|
|
shr tmp1, EXTRA_BITS_OFFSET
|
|
lea tmp2, [dist_start]
|
|
mov dist %+ w, [tmp2 + 2 * dist]
|
|
add dist, tmp1
|
|
|
|
mov match_in, next_in
|
|
sub match_in, dist
|
|
|
|
mov len, 8
|
|
mov tmp3, end_in
|
|
sub tmp3, next_in
|
|
|
|
compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
|
|
|
|
vmovd ylens1 %+ x, len %+ d
|
|
vpbroadcastd ylens1, ylens1 %+ x
|
|
vpsubd ylens1, ylens1, [increment]
|
|
vpaddd ylens1, ylens1, [twofiftyfour]
|
|
|
|
mov tmp3, end_processed
|
|
sub tmp3, next_in
|
|
cmp len, tmp3
|
|
cmovg len, tmp3
|
|
|
|
add next_in, len
|
|
lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
|
|
vmovdqu ymatch_lookup, [match_lookup]
|
|
|
|
vpbroadcastd ymatch_lookup2, ymatch_lookup2 %+ x
|
|
vpand ymatch_lookup2, ymatch_lookup2, ynlen_mask
|
|
|
|
neg len
|
|
|
|
.update_match_lookup:
|
|
vpand ylens2, ylens_mask, [match_lookup + ICF_CODE_BYTES * len]
|
|
|
|
vpcmpgtd ycmp, ylens1, ylens2
|
|
vpcmpgtd ytmp1, ylens1, ytwofiftysix
|
|
vpand ycmp, ycmp, ytmp1
|
|
vpmovmskb tmp1, ycmp
|
|
|
|
vpcmpgtd ycmp2, ylens1, ymax_len
|
|
vpandn ylens, ycmp2, ylens1
|
|
vpand ycmp2, ymax_len, ycmp2
|
|
vpor ylens, ycmp2
|
|
|
|
vpaddd ylens2, ylens, ymatch_lookup2
|
|
vpand ylens2, ylens2, ycmp
|
|
|
|
vpmaskmovd [match_lookup + ICF_CODE_BYTES * len], ycmp, ylens2
|
|
|
|
test tmp1 %+ d, tmp1 %+ d
|
|
jz .fill_loop
|
|
|
|
add len, VECT_SIZE
|
|
vpsubd ylens1, ylens1, yvect_size
|
|
|
|
jmp .update_match_lookup
|
|
|
|
.end_fill:
|
|
mov end_processed, end_processed_orig
|
|
cmp next_in, end_processed
|
|
jge .finish
|
|
|
|
mov tmp1, end_processed
|
|
sub tmp1, next_in
|
|
vmovd ytmp1 %+ x, tmp1 %+ d
|
|
vpbroadcastd ytmp1, ytmp1 %+ x
|
|
vpcmpgtd ytmp1, ytmp1, [increment]
|
|
vpand ymatch_lookup2, ymatch_lookup2, ytmp1
|
|
jmp .finish_entry
|
|
|
|
.finish:
|
|
FUNC_RESTORE
|
|
ret
|
|
|
|
endproc_frame
|
|
|
|
section .data
|
|
align 64
|
|
dist_start:
|
|
dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
|
|
dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
|
|
dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
|
dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
|
len_mask:
|
|
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
|
dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
|
|
dists_mask:
|
|
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
|
dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
|
|
long_len:
|
|
dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
|
|
increment:
|
|
dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
|
vect_size:
|
|
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
|
dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
|
|
twofiftyfour:
|
|
dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
|
|
twofiftysix:
|
|
dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
|
|
nlen_mask:
|
|
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
|
dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
|
|
max_len:
|
|
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
|
|
dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
|