igzip: implement set_long_icf_fg with assembly

Change-Id: I21ac55985a56c2b7b0a684934c076600d90f8b0a
Signed-off-by: Zhiyuan Zhu <zhiyuan.zhu@arm.com>
This commit is contained in:
Zhiyuan Zhu 2019-10-22 16:14:18 +08:00 committed by Greg Tucker
parent 4ed944c4b1
commit 6b70da5051
5 changed files with 220 additions and 1 deletions

View File

@ -47,6 +47,7 @@ lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
igzip/aarch64/igzip_deflate_finish_aarch64.S \
igzip/aarch64/isal_deflate_icf_body_hash_hist.S \
igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \
igzip/aarch64/igzip_set_long_icf_fg.S \
igzip/aarch64/isal_update_histogram.S \
igzip/proc_heap_base.c

View File

@ -150,5 +150,24 @@ _compare_258_loop:
csel x_\match_length,x_\match_length,x_\max_length,ls
.endm
.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
mov x_\match_length,0
_compare_loop:
ldr x_\tmp0,[x_\str0,x_\match_length]
ldr x_\tmp1,[x_\str1,x_\match_length]
eor x_\tmp0,x_\tmp1,x_\tmp0
rbit x_\tmp0,x_\tmp0
clz x_\tmp0,x_\tmp0
lsr x_\tmp0,x_\tmp0,3
add x_\match_length,x_\match_length,x_\tmp0
cmp x_\max_length,x_\match_length
ccmp x_\tmp0,8,0,hi
beq _compare_loop
cmp x_\match_length,x_\max_length
csel x_\match_length,x_\match_length,x_\max_length,ls
.endm
#endif
#endif

View File

@ -113,6 +113,11 @@ DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
}
DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg)
{
return PROVIDER_INFO(set_long_icf_fg_aarch64);
}
DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
{
unsigned long auxval = getauxval(AT_HWCAP);

View File

@ -38,7 +38,7 @@ mbin_interface isal_deflate_icf_finish_lvl2
mbin_interface isal_deflate_icf_finish_lvl3
mbin_interface isal_update_histogram
mbin_interface_base encode_deflate_icf , encode_deflate_icf_base
mbin_interface_base set_long_icf_fg , set_long_icf_fg_base
mbin_interface set_long_icf_fg
mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base
mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base
mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base

View File

@ -0,0 +1,194 @@
/**********************************************************************
Copyright(c) 2019 Arm Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Arm Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
.arch armv8-a
.text
.align 2
#include "lz0a_const_aarch64.h"
#include "data_struct_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"
/*
declare Macros
*/
.macro declare_generic_reg name:req,reg:req,default:req
\name .req \default\reg
w_\name .req w\reg
x_\name .req x\reg
.endm
.text
.align 2
.global set_long_icf_fg_aarch64
.type set_long_icf_fg_aarch64, %function
/*
void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
struct deflate_icf *match_lookup)
*/
/* arguments */
declare_generic_reg next_in_param, 0,x
declare_generic_reg processed_param, 1,x
declare_generic_reg input_size_param, 2,x
declare_generic_reg match_lookup_param, 3,x
declare_generic_reg param0, 0,x
declare_generic_reg param1, 1,x
declare_generic_reg param2, 2,x
/* local variable */
declare_generic_reg len, 7,w
declare_generic_reg dist_code, 8,w
declare_generic_reg shortest_match_len 9,w
declare_generic_reg len_max, 10,w
declare_generic_reg dist_extra, 11,w
declare_generic_reg const_8, 13,x
declare_generic_reg next_in, 20,x
declare_generic_reg dist_start, 21,x
declare_generic_reg end_processed, 22,x
declare_generic_reg end_in, 23,x
declare_generic_reg match_lookup, 19,x
declare_generic_reg match_length, 4,w
declare_generic_reg tmp0, 5,w
declare_generic_reg tmp1, 6,w
/* constant */
.equ DIST_START_SIZE, 128
.equ ISAL_LOOK_AHEAD, 288
.equ LEN_OFFSET, 254
.equ SHORTEST_MATCH, 4
.equ LEN_MAX_CONST, 512
set_long_icf_fg_aarch64:
stp x29, x30, [sp, -192]!
add x29, sp, 0
stp x21, x22, [sp, 32]
add x21, x29, 64
stp x19, x20, [sp, 16]
str x23, [sp, 48]
add end_processed, next_in_param, processed_param
mov next_in, next_in_param
add end_in, next_in_param, input_size_param
mov match_lookup, match_lookup_param
adrp x1, .data_dist_start
mov x2, DIST_START_SIZE // 128
add x1, x1, :lo12:.data_dist_start
mov x0, dist_start
bl memcpy
add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
cmp end_in, x_tmp0
csel end_in, end_in, x_tmp0, cc
cmp next_in, end_processed
bcs .done
mov const_8, 8
mov len_max, LEN_MAX_CONST // 512
mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
b .while_outer_loop
.align 2
.while_outer_check:
add next_in, next_in, 1
add match_lookup, match_lookup, 4
cmp end_processed, next_in
bls .done
.while_outer_loop:
ldrh len, [match_lookup]
and len, len, LIT_LEN_MASK // 1023
cmp len, (LEN_OFFSET + 8 - 1) // 261
bls .while_outer_check
ldr dist_code, [match_lookup]
add x1, next_in, 8
ldrh dist_extra, [match_lookup, 2]
sub w2, w_end_in, w1
ubfx x_dist_code, x_dist_code, 10, 9
ubfx x_dist_extra, x_dist_extra, 3, 13
uxtw x0, dist_code
ldr w0, [dist_start, x0, lsl 2]
add w0, dist_extra, w0
sub x0, const_8, x0
add x0, next_in, x0
compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
mov w0, w_match_length
add w0, w0, (LEN_OFFSET + 8) // 262
cmp w0, len
bls .while_outer_check
lsl w2, dist_extra, 19
orr w2, w2, dist_code, lsl 10
.align 3
.while_inner_loop:
cmp w0, LEN_MAX_CONST // 512
add next_in, next_in, 1
csel w1, w0, len_max, ls
sub w0, w0, #1
orr w1, w1, w2
str w1, [match_lookup]
ldrh w1, [match_lookup, 4]!
and w1, w1, LIT_LEN_MASK // 1023
cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
csel w1, w1, shortest_match_len, cs
cmp w1, w0
bcc .while_inner_loop
add next_in, next_in, 1
add match_lookup, match_lookup, 4
cmp end_processed, next_in
bhi .while_outer_loop
.done:
ldp x19, x20, [sp, 16]
ldp x21, x22, [sp, 32]
ldr x23, [sp, 48]
ldp x29, x30, [sp], 192
ret
.size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
.section .rodata
.align 3
.set .data_dist_start,. + 0
.real_data_dist_start:
.word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
.word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
.word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
.word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000