mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: implement set_long_icf_fg with assembly
Change-Id: I21ac55985a56c2b7b0a684934c076600d90f8b0a Signed-off-by: Zhiyuan Zhu <zhiyuan.zhu@arm.com>
This commit is contained in:
parent
4ed944c4b1
commit
6b70da5051
@ -47,6 +47,7 @@ lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
|
|||||||
igzip/aarch64/igzip_deflate_finish_aarch64.S \
|
igzip/aarch64/igzip_deflate_finish_aarch64.S \
|
||||||
igzip/aarch64/isal_deflate_icf_body_hash_hist.S \
|
igzip/aarch64/isal_deflate_icf_body_hash_hist.S \
|
||||||
igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \
|
igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \
|
||||||
|
igzip/aarch64/igzip_set_long_icf_fg.S \
|
||||||
igzip/aarch64/isal_update_histogram.S \
|
igzip/aarch64/isal_update_histogram.S \
|
||||||
igzip/proc_heap_base.c
|
igzip/proc_heap_base.c
|
||||||
|
|
||||||
|
@ -150,5 +150,24 @@ _compare_258_loop:
|
|||||||
csel x_\match_length,x_\match_length,x_\max_length,ls
|
csel x_\match_length,x_\match_length,x_\max_length,ls
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
|
||||||
|
mov x_\match_length,0
|
||||||
|
_compare_loop:
|
||||||
|
ldr x_\tmp0,[x_\str0,x_\match_length]
|
||||||
|
ldr x_\tmp1,[x_\str1,x_\match_length]
|
||||||
|
eor x_\tmp0,x_\tmp1,x_\tmp0
|
||||||
|
rbit x_\tmp0,x_\tmp0
|
||||||
|
clz x_\tmp0,x_\tmp0
|
||||||
|
lsr x_\tmp0,x_\tmp0,3
|
||||||
|
add x_\match_length,x_\match_length,x_\tmp0
|
||||||
|
|
||||||
|
cmp x_\max_length,x_\match_length
|
||||||
|
ccmp x_\tmp0,8,0,hi
|
||||||
|
beq _compare_loop
|
||||||
|
|
||||||
|
cmp x_\match_length,x_\max_length
|
||||||
|
csel x_\match_length,x_\match_length,x_\max_length,ls
|
||||||
|
.endm
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -113,6 +113,11 @@ DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
|
|||||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg)
|
||||||
|
{
|
||||||
|
return PROVIDER_INFO(set_long_icf_fg_aarch64);
|
||||||
|
}
|
||||||
|
|
||||||
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
||||||
{
|
{
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
|
@ -38,7 +38,7 @@ mbin_interface isal_deflate_icf_finish_lvl2
|
|||||||
mbin_interface isal_deflate_icf_finish_lvl3
|
mbin_interface isal_deflate_icf_finish_lvl3
|
||||||
mbin_interface isal_update_histogram
|
mbin_interface isal_update_histogram
|
||||||
mbin_interface_base encode_deflate_icf , encode_deflate_icf_base
|
mbin_interface_base encode_deflate_icf , encode_deflate_icf_base
|
||||||
mbin_interface_base set_long_icf_fg , set_long_icf_fg_base
|
mbin_interface set_long_icf_fg
|
||||||
mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base
|
mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base
|
||||||
mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base
|
mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base
|
||||||
mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base
|
mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base
|
||||||
|
194
igzip/aarch64/igzip_set_long_icf_fg.S
Normal file
194
igzip/aarch64/igzip_set_long_icf_fg.S
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2019 Arm Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Arm Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
.arch armv8-a
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
#include "lz0a_const_aarch64.h"
|
||||||
|
#include "data_struct_aarch64.h"
|
||||||
|
#include "huffman_aarch64.h"
|
||||||
|
#include "bitbuf2_aarch64.h"
|
||||||
|
#include "stdmac_aarch64.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
declare Macros
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro declare_generic_reg name:req,reg:req,default:req
|
||||||
|
\name .req \default\reg
|
||||||
|
w_\name .req w\reg
|
||||||
|
x_\name .req x\reg
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.global set_long_icf_fg_aarch64
|
||||||
|
.type set_long_icf_fg_aarch64, %function
|
||||||
|
|
||||||
|
/*
|
||||||
|
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
||||||
|
struct deflate_icf *match_lookup)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* arguments */
|
||||||
|
declare_generic_reg next_in_param, 0,x
|
||||||
|
declare_generic_reg processed_param, 1,x
|
||||||
|
declare_generic_reg input_size_param, 2,x
|
||||||
|
declare_generic_reg match_lookup_param, 3,x
|
||||||
|
|
||||||
|
declare_generic_reg param0, 0,x
|
||||||
|
declare_generic_reg param1, 1,x
|
||||||
|
declare_generic_reg param2, 2,x
|
||||||
|
|
||||||
|
/* local variable */
|
||||||
|
declare_generic_reg len, 7,w
|
||||||
|
declare_generic_reg dist_code, 8,w
|
||||||
|
declare_generic_reg shortest_match_len 9,w
|
||||||
|
declare_generic_reg len_max, 10,w
|
||||||
|
declare_generic_reg dist_extra, 11,w
|
||||||
|
declare_generic_reg const_8, 13,x
|
||||||
|
declare_generic_reg next_in, 20,x
|
||||||
|
declare_generic_reg dist_start, 21,x
|
||||||
|
declare_generic_reg end_processed, 22,x
|
||||||
|
declare_generic_reg end_in, 23,x
|
||||||
|
declare_generic_reg match_lookup, 19,x
|
||||||
|
|
||||||
|
declare_generic_reg match_length, 4,w
|
||||||
|
declare_generic_reg tmp0, 5,w
|
||||||
|
declare_generic_reg tmp1, 6,w
|
||||||
|
|
||||||
|
/* constant */
|
||||||
|
.equ DIST_START_SIZE, 128
|
||||||
|
.equ ISAL_LOOK_AHEAD, 288
|
||||||
|
.equ LEN_OFFSET, 254
|
||||||
|
.equ SHORTEST_MATCH, 4
|
||||||
|
.equ LEN_MAX_CONST, 512
|
||||||
|
|
||||||
|
set_long_icf_fg_aarch64:
|
||||||
|
stp x29, x30, [sp, -192]!
|
||||||
|
add x29, sp, 0
|
||||||
|
stp x21, x22, [sp, 32]
|
||||||
|
add x21, x29, 64
|
||||||
|
stp x19, x20, [sp, 16]
|
||||||
|
str x23, [sp, 48]
|
||||||
|
|
||||||
|
add end_processed, next_in_param, processed_param
|
||||||
|
mov next_in, next_in_param
|
||||||
|
add end_in, next_in_param, input_size_param
|
||||||
|
mov match_lookup, match_lookup_param
|
||||||
|
|
||||||
|
adrp x1, .data_dist_start
|
||||||
|
mov x2, DIST_START_SIZE // 128
|
||||||
|
add x1, x1, :lo12:.data_dist_start
|
||||||
|
mov x0, dist_start
|
||||||
|
bl memcpy
|
||||||
|
|
||||||
|
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
||||||
|
cmp end_in, x_tmp0
|
||||||
|
csel end_in, end_in, x_tmp0, cc
|
||||||
|
cmp next_in, end_processed
|
||||||
|
bcs .done
|
||||||
|
|
||||||
|
mov const_8, 8
|
||||||
|
mov len_max, LEN_MAX_CONST // 512
|
||||||
|
mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
|
||||||
|
b .while_outer_loop
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
.while_outer_check:
|
||||||
|
add next_in, next_in, 1
|
||||||
|
add match_lookup, match_lookup, 4
|
||||||
|
cmp end_processed, next_in
|
||||||
|
bls .done
|
||||||
|
|
||||||
|
.while_outer_loop:
|
||||||
|
ldrh len, [match_lookup]
|
||||||
|
and len, len, LIT_LEN_MASK // 1023
|
||||||
|
cmp len, (LEN_OFFSET + 8 - 1) // 261
|
||||||
|
bls .while_outer_check
|
||||||
|
|
||||||
|
ldr dist_code, [match_lookup]
|
||||||
|
add x1, next_in, 8
|
||||||
|
ldrh dist_extra, [match_lookup, 2]
|
||||||
|
sub w2, w_end_in, w1
|
||||||
|
ubfx x_dist_code, x_dist_code, 10, 9
|
||||||
|
ubfx x_dist_extra, x_dist_extra, 3, 13
|
||||||
|
uxtw x0, dist_code
|
||||||
|
ldr w0, [dist_start, x0, lsl 2]
|
||||||
|
add w0, dist_extra, w0
|
||||||
|
sub x0, const_8, x0
|
||||||
|
add x0, next_in, x0
|
||||||
|
|
||||||
|
compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
|
||||||
|
mov w0, w_match_length
|
||||||
|
|
||||||
|
add w0, w0, (LEN_OFFSET + 8) // 262
|
||||||
|
cmp w0, len
|
||||||
|
bls .while_outer_check
|
||||||
|
|
||||||
|
lsl w2, dist_extra, 19
|
||||||
|
orr w2, w2, dist_code, lsl 10
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
.while_inner_loop:
|
||||||
|
cmp w0, LEN_MAX_CONST // 512
|
||||||
|
add next_in, next_in, 1
|
||||||
|
csel w1, w0, len_max, ls
|
||||||
|
sub w0, w0, #1
|
||||||
|
orr w1, w1, w2
|
||||||
|
str w1, [match_lookup]
|
||||||
|
ldrh w1, [match_lookup, 4]!
|
||||||
|
|
||||||
|
and w1, w1, LIT_LEN_MASK // 1023
|
||||||
|
cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
|
||||||
|
csel w1, w1, shortest_match_len, cs
|
||||||
|
cmp w1, w0
|
||||||
|
bcc .while_inner_loop
|
||||||
|
|
||||||
|
add next_in, next_in, 1
|
||||||
|
add match_lookup, match_lookup, 4
|
||||||
|
cmp end_processed, next_in
|
||||||
|
bhi .while_outer_loop
|
||||||
|
|
||||||
|
.done:
|
||||||
|
ldp x19, x20, [sp, 16]
|
||||||
|
ldp x21, x22, [sp, 32]
|
||||||
|
ldr x23, [sp, 48]
|
||||||
|
ldp x29, x30, [sp], 192
|
||||||
|
ret
|
||||||
|
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
||||||
|
|
||||||
|
.section .rodata
|
||||||
|
.align 3
|
||||||
|
.set .data_dist_start,. + 0
|
||||||
|
.real_data_dist_start:
|
||||||
|
.word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
|
||||||
|
.word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
|
||||||
|
.word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
||||||
|
.word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
Loading…
Reference in New Issue
Block a user