mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 17:33:50 +01:00
igzip: More optimizations by speeding up rarely taken branch
For some reason optimizing the rarely taken branch speeds up the program. Signed-off-by: Roy Oursler <roy.j.oursler@intel.com> Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
parent
84ffaead82
commit
40b5104397
@ -221,6 +221,9 @@ extern rfc1951_lookup_table
|
|||||||
%define %%next_bits2 %6
|
%define %%next_bits2 %6
|
||||||
|
|
||||||
;; Save length associated with symbol
|
;; Save length associated with symbol
|
||||||
|
mov %%next_bits2, %%read_in
|
||||||
|
shr %%next_bits2, DECODE_LOOKUP_SIZE
|
||||||
|
|
||||||
mov rcx, %%next_sym
|
mov rcx, %%next_sym
|
||||||
shr rcx, 9
|
shr rcx, 9
|
||||||
|
|
||||||
@ -230,22 +233,20 @@ extern rfc1951_lookup_table
|
|||||||
jl %%end
|
jl %%end
|
||||||
|
|
||||||
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
|
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
|
||||||
|
lea %%next_sym, [%%state + 2 * %%next_sym]
|
||||||
|
sub rcx, 0x40 + DECODE_LOOKUP_SIZE
|
||||||
|
|
||||||
%ifdef USE_HSWNI
|
%ifdef USE_HSWNI
|
||||||
and rcx, 0x1F
|
bzhi %%next_bits2, %%next_bits2, rcx
|
||||||
bzhi %%next_bits2, %%read_in, rcx
|
|
||||||
%else
|
%else
|
||||||
;; Decode next_sym using hint
|
;; Decode next_sym using hint
|
||||||
mov %%next_bits2, %%read_in
|
|
||||||
neg rcx
|
neg rcx
|
||||||
shl %%next_bits2, cl
|
shl %%next_bits2, cl
|
||||||
shr %%next_bits2, cl
|
shr %%next_bits2, cl
|
||||||
%endif
|
%endif
|
||||||
shr %%next_bits2, DECODE_LOOKUP_SIZE
|
|
||||||
|
|
||||||
add %%next_bits2, %%next_sym
|
|
||||||
|
|
||||||
;; Lookup actual next symbol
|
;; Lookup actual next symbol
|
||||||
movzx %%next_sym, word [%%state + %%state_offset + 2 * %%next_bits2 + 2 * ((1 << DECODE_LOOKUP_SIZE) - 0x8000)]
|
movzx %%next_sym, word [%%next_sym + %%state_offset + 2 * %%next_bits2 + 2 * ((1 << DECODE_LOOKUP_SIZE) - 0x8000)]
|
||||||
|
|
||||||
;; Save length associated with symbol
|
;; Save length associated with symbol
|
||||||
mov rcx, %%next_sym
|
mov rcx, %%next_sym
|
||||||
@ -312,7 +313,8 @@ loop_block:
|
|||||||
and tmp3, (1 << DECODE_LOOKUP_SIZE) - 1
|
and tmp3, (1 << DECODE_LOOKUP_SIZE) - 1
|
||||||
|
|
||||||
;; Start reloading read_in
|
;; Start reloading read_in
|
||||||
SHLX tmp1, [next_in], read_in_length
|
mov tmp1, [next_in]
|
||||||
|
SHLX tmp1, tmp1, read_in_length
|
||||||
or read_in, tmp1
|
or read_in, tmp1
|
||||||
|
|
||||||
;; Specutively load data associated with length symbol
|
;; Specutively load data associated with length symbol
|
||||||
|
Loading…
Reference in New Issue
Block a user