mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
igzip: implement set_long_icf_fg with assembly
Change-Id: I21ac55985a56c2b7b0a684934c076600d90f8b0a Signed-off-by: Zhiyuan Zhu <zhiyuan.zhu@arm.com>
This commit is contained in:
parent
4ed944c4b1
commit
6b70da5051
@ -47,6 +47,7 @@ lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
|
||||
igzip/aarch64/igzip_deflate_finish_aarch64.S \
|
||||
igzip/aarch64/isal_deflate_icf_body_hash_hist.S \
|
||||
igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \
|
||||
igzip/aarch64/igzip_set_long_icf_fg.S \
|
||||
igzip/aarch64/isal_update_histogram.S \
|
||||
igzip/proc_heap_base.c
|
||||
|
||||
|
@ -150,5 +150,24 @@ _compare_258_loop:
|
||||
csel x_\match_length,x_\match_length,x_\max_length,ls
|
||||
.endm
|
||||
|
||||
.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
|
||||
mov x_\match_length,0
|
||||
_compare_loop:
|
||||
ldr x_\tmp0,[x_\str0,x_\match_length]
|
||||
ldr x_\tmp1,[x_\str1,x_\match_length]
|
||||
eor x_\tmp0,x_\tmp1,x_\tmp0
|
||||
rbit x_\tmp0,x_\tmp0
|
||||
clz x_\tmp0,x_\tmp0
|
||||
lsr x_\tmp0,x_\tmp0,3
|
||||
add x_\match_length,x_\match_length,x_\tmp0
|
||||
|
||||
cmp x_\max_length,x_\match_length
|
||||
ccmp x_\tmp0,8,0,hi
|
||||
beq _compare_loop
|
||||
|
||||
cmp x_\match_length,x_\max_length
|
||||
csel x_\match_length,x_\match_length,x_\max_length,ls
|
||||
.endm
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@ -113,6 +113,11 @@ DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
|
||||
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg)
|
||||
{
|
||||
return PROVIDER_INFO(set_long_icf_fg_aarch64);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
||||
{
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
@ -38,7 +38,7 @@ mbin_interface isal_deflate_icf_finish_lvl2
|
||||
mbin_interface isal_deflate_icf_finish_lvl3
|
||||
mbin_interface isal_update_histogram
|
||||
mbin_interface_base encode_deflate_icf , encode_deflate_icf_base
|
||||
mbin_interface_base set_long_icf_fg , set_long_icf_fg_base
|
||||
mbin_interface set_long_icf_fg
|
||||
mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base
|
||||
mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base
|
||||
mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base
|
||||
|
194
igzip/aarch64/igzip_set_long_icf_fg.S
Normal file
194
igzip/aarch64/igzip_set_long_icf_fg.S
Normal file
@ -0,0 +1,194 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2019 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Arm Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
.arch armv8-a
|
||||
.text
|
||||
.align 2
|
||||
|
||||
#include "lz0a_const_aarch64.h"
|
||||
#include "data_struct_aarch64.h"
|
||||
#include "huffman_aarch64.h"
|
||||
#include "bitbuf2_aarch64.h"
|
||||
#include "stdmac_aarch64.h"
|
||||
|
||||
/*
|
||||
declare Macros
|
||||
*/
|
||||
|
||||
.macro declare_generic_reg name:req,reg:req,default:req
|
||||
\name .req \default\reg
|
||||
w_\name .req w\reg
|
||||
x_\name .req x\reg
|
||||
.endm
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.global set_long_icf_fg_aarch64
|
||||
.type set_long_icf_fg_aarch64, %function
|
||||
|
||||
/*
|
||||
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
|
||||
struct deflate_icf *match_lookup)
|
||||
*/
|
||||
|
||||
/* arguments */
|
||||
declare_generic_reg next_in_param, 0,x
|
||||
declare_generic_reg processed_param, 1,x
|
||||
declare_generic_reg input_size_param, 2,x
|
||||
declare_generic_reg match_lookup_param, 3,x
|
||||
|
||||
declare_generic_reg param0, 0,x
|
||||
declare_generic_reg param1, 1,x
|
||||
declare_generic_reg param2, 2,x
|
||||
|
||||
/* local variable */
|
||||
declare_generic_reg len, 7,w
|
||||
declare_generic_reg dist_code, 8,w
|
||||
declare_generic_reg shortest_match_len 9,w
|
||||
declare_generic_reg len_max, 10,w
|
||||
declare_generic_reg dist_extra, 11,w
|
||||
declare_generic_reg const_8, 13,x
|
||||
declare_generic_reg next_in, 20,x
|
||||
declare_generic_reg dist_start, 21,x
|
||||
declare_generic_reg end_processed, 22,x
|
||||
declare_generic_reg end_in, 23,x
|
||||
declare_generic_reg match_lookup, 19,x
|
||||
|
||||
declare_generic_reg match_length, 4,w
|
||||
declare_generic_reg tmp0, 5,w
|
||||
declare_generic_reg tmp1, 6,w
|
||||
|
||||
/* constant */
|
||||
.equ DIST_START_SIZE, 128
|
||||
.equ ISAL_LOOK_AHEAD, 288
|
||||
.equ LEN_OFFSET, 254
|
||||
.equ SHORTEST_MATCH, 4
|
||||
.equ LEN_MAX_CONST, 512
|
||||
|
||||
set_long_icf_fg_aarch64:
|
||||
stp x29, x30, [sp, -192]!
|
||||
add x29, sp, 0
|
||||
stp x21, x22, [sp, 32]
|
||||
add x21, x29, 64
|
||||
stp x19, x20, [sp, 16]
|
||||
str x23, [sp, 48]
|
||||
|
||||
add end_processed, next_in_param, processed_param
|
||||
mov next_in, next_in_param
|
||||
add end_in, next_in_param, input_size_param
|
||||
mov match_lookup, match_lookup_param
|
||||
|
||||
adrp x1, .data_dist_start
|
||||
mov x2, DIST_START_SIZE // 128
|
||||
add x1, x1, :lo12:.data_dist_start
|
||||
mov x0, dist_start
|
||||
bl memcpy
|
||||
|
||||
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
|
||||
cmp end_in, x_tmp0
|
||||
csel end_in, end_in, x_tmp0, cc
|
||||
cmp next_in, end_processed
|
||||
bcs .done
|
||||
|
||||
mov const_8, 8
|
||||
mov len_max, LEN_MAX_CONST // 512
|
||||
mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
|
||||
b .while_outer_loop
|
||||
|
||||
.align 2
|
||||
.while_outer_check:
|
||||
add next_in, next_in, 1
|
||||
add match_lookup, match_lookup, 4
|
||||
cmp end_processed, next_in
|
||||
bls .done
|
||||
|
||||
.while_outer_loop:
|
||||
ldrh len, [match_lookup]
|
||||
and len, len, LIT_LEN_MASK // 1023
|
||||
cmp len, (LEN_OFFSET + 8 - 1) // 261
|
||||
bls .while_outer_check
|
||||
|
||||
ldr dist_code, [match_lookup]
|
||||
add x1, next_in, 8
|
||||
ldrh dist_extra, [match_lookup, 2]
|
||||
sub w2, w_end_in, w1
|
||||
ubfx x_dist_code, x_dist_code, 10, 9
|
||||
ubfx x_dist_extra, x_dist_extra, 3, 13
|
||||
uxtw x0, dist_code
|
||||
ldr w0, [dist_start, x0, lsl 2]
|
||||
add w0, dist_extra, w0
|
||||
sub x0, const_8, x0
|
||||
add x0, next_in, x0
|
||||
|
||||
compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
|
||||
mov w0, w_match_length
|
||||
|
||||
add w0, w0, (LEN_OFFSET + 8) // 262
|
||||
cmp w0, len
|
||||
bls .while_outer_check
|
||||
|
||||
lsl w2, dist_extra, 19
|
||||
orr w2, w2, dist_code, lsl 10
|
||||
|
||||
.align 3
|
||||
.while_inner_loop:
|
||||
cmp w0, LEN_MAX_CONST // 512
|
||||
add next_in, next_in, 1
|
||||
csel w1, w0, len_max, ls
|
||||
sub w0, w0, #1
|
||||
orr w1, w1, w2
|
||||
str w1, [match_lookup]
|
||||
ldrh w1, [match_lookup, 4]!
|
||||
|
||||
and w1, w1, LIT_LEN_MASK // 1023
|
||||
cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
|
||||
csel w1, w1, shortest_match_len, cs
|
||||
cmp w1, w0
|
||||
bcc .while_inner_loop
|
||||
|
||||
add next_in, next_in, 1
|
||||
add match_lookup, match_lookup, 4
|
||||
cmp end_processed, next_in
|
||||
bhi .while_outer_loop
|
||||
|
||||
.done:
|
||||
ldp x19, x20, [sp, 16]
|
||||
ldp x21, x22, [sp, 32]
|
||||
ldr x23, [sp, 48]
|
||||
ldp x29, x30, [sp], 192
|
||||
ret
|
||||
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
|
||||
|
||||
.section .rodata
|
||||
.align 3
|
||||
.set .data_dist_start,. + 0
|
||||
.real_data_dist_start:
|
||||
.word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
|
||||
.word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
|
||||
.word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
|
||||
.word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
|
Loading…
Reference in New Issue
Block a user