mirror of
https://github.com/intel/isa-l.git
synced 2025-10-28 11:31:51 +01:00
igzip: implement gen_icf_map with assembly
Change-Id: I74e6200a732acfaac44b7f5a82bd4a2215ba1535 Signed-off-by: Zhiyuan Zhu <zhiyuan.zhu@arm.com>
This commit is contained in:
@@ -50,6 +50,7 @@ lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
|
||||
igzip/aarch64/igzip_set_long_icf_fg.S \
|
||||
igzip/aarch64/encode_df.S \
|
||||
igzip/aarch64/isal_update_histogram.S \
|
||||
igzip/aarch64/gen_icf_map.S \
|
||||
igzip/aarch64/igzip_deflate_hash_aarch64.S \
|
||||
igzip/proc_heap_base.c
|
||||
|
||||
|
||||
266
igzip/aarch64/gen_icf_map.S
Normal file
266
igzip/aarch64/gen_icf_map.S
Normal file
@@ -0,0 +1,266 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2019 Arm Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Arm Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
.arch armv8-a+crc+crypto
|
||||
.text
|
||||
.align 2
|
||||
|
||||
#include "lz0a_const_aarch64.h"
|
||||
#include "data_struct_aarch64.h"
|
||||
#include "huffman_aarch64.h"
|
||||
#include "bitbuf2_aarch64.h"
|
||||
#include "stdmac_aarch64.h"
|
||||
|
||||
/*
|
||||
declare Macros
|
||||
*/
|
||||
|
||||
.macro declare_generic_reg name:req,reg:req,default:req
|
||||
\name .req \default\reg
|
||||
w_\name .req w\reg
|
||||
x_\name .req x\reg
|
||||
.endm
|
||||
|
||||
.macro tzbytecnt param0:req,param1:req
|
||||
rbit x_\param1, x_\param0
|
||||
cmp x_\param0, 0
|
||||
clz x_\param1, x_\param1
|
||||
mov w_\param0, 8
|
||||
lsr w_\param1, w_\param1, 3
|
||||
csel w_\param0, w_\param1, w_\param0, ne
|
||||
.endm
|
||||
|
||||
.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
|
||||
orr w_\param1, w_\param1, w_\param3, lsl 19
|
||||
orr w_\param1, w_\param1, w_\param2, lsl 10
|
||||
str w_\param1, [x_\param0]
|
||||
.endm
|
||||
|
||||
.align 2
|
||||
.global gen_icf_map_h1_aarch64
|
||||
.type gen_icf_map_h1_aarch64, %function
|
||||
|
||||
/* arguments */
|
||||
declare_generic_reg stream_param, 0,x
|
||||
declare_generic_reg matches_icf_lookup_param, 1,x
|
||||
declare_generic_reg input_size_param, 2,x
|
||||
|
||||
declare_generic_reg param0, 0,x
|
||||
declare_generic_reg param1, 1,x
|
||||
declare_generic_reg param2, 2,x
|
||||
declare_generic_reg param3, 3,x
|
||||
|
||||
/* return */
|
||||
declare_generic_reg ret_val, 0,x
|
||||
|
||||
/* variables */
|
||||
declare_generic_reg input_size, 3,x
|
||||
declare_generic_reg next_in, 4,x
|
||||
declare_generic_reg matches_icf_lookup, 6,x
|
||||
declare_generic_reg hash_table, 7,x
|
||||
declare_generic_reg end_in, 8,x
|
||||
declare_generic_reg file_start, 9,x
|
||||
declare_generic_reg hash_mask, 10,w
|
||||
declare_generic_reg hist_size, 11,w
|
||||
declare_generic_reg stream_saved, 12,x
|
||||
declare_generic_reg literal_32, 13,w
|
||||
declare_generic_reg literal_1, 14,w
|
||||
declare_generic_reg dist, 15,w
|
||||
|
||||
declare_generic_reg tmp_has_hist, 0,w
|
||||
declare_generic_reg tmp_offset_hash_table, 1,x
|
||||
declare_generic_reg tmp0, 0,x
|
||||
declare_generic_reg tmp1, 1,x
|
||||
declare_generic_reg tmp2, 2,x
|
||||
declare_generic_reg tmp3, 3,x
|
||||
declare_generic_reg tmp5, 5,x
|
||||
|
||||
/* constant */
|
||||
.equ ISAL_LOOK_AHEAD, 288
|
||||
.equ SHORTEST_MATCH, 4
|
||||
.equ LEN_OFFSET, 254
|
||||
|
||||
/* mask */
|
||||
.equ mask_10bit, 1023
|
||||
.equ mask_lit_dist, 0x7800
|
||||
|
||||
/* offset of struct isal_zstream */
|
||||
.equ offset_next_in, 0
|
||||
.equ offset_avail_in, 8
|
||||
.equ offset_total_in, 12
|
||||
.equ offset_next_out, 16
|
||||
.equ offset_avail_out, 24
|
||||
.equ offset_total_out, 28
|
||||
.equ offset_hufftables, 32
|
||||
.equ offset_level, 40
|
||||
.equ offset_level_buf_size, 44
|
||||
.equ offset_level_buf, 48
|
||||
.equ offset_end_of_stream, 56
|
||||
.equ offset_flush, 58
|
||||
.equ offset_gzip_flag, 60
|
||||
.equ offset_hist_bits, 62
|
||||
.equ offset_state, 64
|
||||
.equ offset_state_block_end, 72
|
||||
.equ offset_state_dist_mask, 76
|
||||
.equ offset_state_has_hist, 135
|
||||
|
||||
/* offset of struct level_buf */
|
||||
.equ offset_hash_map_hash_table, 4712
|
||||
|
||||
/*
|
||||
uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
|
||||
struct deflate_icf *matches_icf_lookup, uint64_t input_size)
|
||||
*/
|
||||
|
||||
gen_icf_map_h1_aarch64:
|
||||
cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
|
||||
bls .fast_exit
|
||||
stp x29, x30, [sp, -16]!
|
||||
|
||||
mov stream_saved, stream_param
|
||||
mov matches_icf_lookup, matches_icf_lookup_param
|
||||
mov x29, sp
|
||||
|
||||
ldrb tmp_has_hist, [stream_saved, offset_state_has_hist]
|
||||
mov tmp_offset_hash_table, offset_hash_map_hash_table
|
||||
ldr end_in, [stream_saved, offset_next_in]
|
||||
mov input_size, input_size_param
|
||||
ldr hash_table, [stream_saved, offset_level_buf]
|
||||
ldr w_file_start, [stream_saved, offset_total_in]
|
||||
ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
|
||||
add hash_table, hash_table, tmp_offset_hash_table
|
||||
sub file_start, end_in, file_start
|
||||
cbz tmp_has_hist, .igzip_no_hist
|
||||
b .while_check1
|
||||
|
||||
.align 3
|
||||
.igzip_no_hist:
|
||||
ldrb w_tmp1, [end_in]
|
||||
add next_in, end_in, 1
|
||||
ldrh w_tmp0, [matches_icf_lookup]
|
||||
bfi w_tmp0, w_tmp1, 0, 10
|
||||
strh w_tmp0, [matches_icf_lookup]
|
||||
ldr w_tmp0, [matches_icf_lookup]
|
||||
and w_tmp0, w_tmp0, mask_10bit
|
||||
orr w_tmp0, w_tmp0, mask_lit_dist
|
||||
str w_tmp0, [matches_icf_lookup], 4
|
||||
ldr w_tmp0, [end_in]
|
||||
crc32cw w_tmp0, wzr, w_tmp0
|
||||
|
||||
and w_tmp5, w_tmp0, hash_mask
|
||||
sub x_tmp1, end_in, file_start
|
||||
mov w_tmp2, 1
|
||||
mov x_tmp0, 1
|
||||
strh w_tmp1, [hash_table, x_tmp5, lsl 1]
|
||||
strb w_tmp2, [stream_saved, offset_state_has_hist]
|
||||
b .while_check2
|
||||
|
||||
.while_check1:
|
||||
mov next_in, end_in
|
||||
mov x_tmp0, 0
|
||||
|
||||
.while_check2:
|
||||
sub input_size, input_size, #288
|
||||
add end_in, end_in, input_size
|
||||
cmp next_in, end_in
|
||||
bcs .exit
|
||||
mov literal_32, 32
|
||||
mov literal_1, 1
|
||||
b .while_loop
|
||||
|
||||
.align 3
|
||||
.new_match_found:
|
||||
clz w_tmp5, w_tmp2
|
||||
add w_tmp1, w_tmp0, LEN_OFFSET
|
||||
sub w_tmp5, literal_32, w_tmp5
|
||||
cmp dist, 2
|
||||
sub w_tmp5, w_tmp5, #2
|
||||
bls .skip_compute_dist_icf_code
|
||||
|
||||
lsl w_tmp3, literal_1, w_tmp5
|
||||
sub w_tmp3, w_tmp3, #1
|
||||
lsr w_tmp0, w_tmp2, w_tmp5
|
||||
and w_tmp3, w_tmp3, w_tmp2
|
||||
add w_tmp2, w_tmp0, w_tmp5, lsl 1
|
||||
|
||||
.skip_compute_dist_icf_code:
|
||||
mov param0, matches_icf_lookup
|
||||
write_deflate_icf param0,param1,param2,param3
|
||||
|
||||
add next_in, next_in, 1
|
||||
add matches_icf_lookup, matches_icf_lookup, 4
|
||||
cmp next_in, end_in
|
||||
beq .save_with_exit
|
||||
|
||||
.while_loop:
|
||||
ldr w_tmp0, [next_in]
|
||||
crc32cw w_tmp0, wzr, w_tmp0
|
||||
|
||||
and w_tmp0, w_tmp0, hash_mask
|
||||
sub x_tmp1, next_in, file_start
|
||||
lsl x_tmp0, x_tmp0, 1
|
||||
sub w_tmp2, w_tmp1, #1
|
||||
ldrh w_tmp3, [hash_table, x_tmp0]
|
||||
strh w_tmp1, [hash_table, x_tmp0]
|
||||
sub w_tmp2, w_tmp2, w_tmp3
|
||||
and w_tmp2, w_tmp2, hist_size
|
||||
add dist, w_tmp2, 1
|
||||
ldr x_tmp0, [next_in]
|
||||
sub x_tmp1, next_in, x_dist, uxtw
|
||||
ldr x_tmp1, [x_tmp1]
|
||||
eor x_tmp0, x_tmp1, x_tmp0
|
||||
tzbytecnt param0,param1
|
||||
|
||||
cmp w_tmp0, (SHORTEST_MATCH-1)
|
||||
mov w_tmp3, 0
|
||||
bhi .new_match_found
|
||||
|
||||
ldrb w_param1, [next_in]
|
||||
mov x_param0, matches_icf_lookup
|
||||
mov w_param3, 0
|
||||
mov w_param2, 0x1e
|
||||
write_deflate_icf param0,param1,param2,param3
|
||||
|
||||
add next_in, next_in, 1
|
||||
add matches_icf_lookup, matches_icf_lookup, 4
|
||||
cmp next_in, end_in
|
||||
bne .while_loop
|
||||
|
||||
.save_with_exit:
|
||||
ldr ret_val, [stream_saved, offset_next_in]
|
||||
sub ret_val, next_in, ret_val
|
||||
|
||||
.exit:
|
||||
ldp x29, x30, [sp], 16
|
||||
ret
|
||||
|
||||
.align 3
|
||||
.fast_exit:
|
||||
mov ret_val, 0
|
||||
ret
|
||||
.size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
|
||||
@@ -132,6 +132,16 @@ DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
|
||||
return PROVIDER_BASIC(isal_update_histogram);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
|
||||
{
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
if (auxval & HWCAP_CRC32) {
|
||||
return PROVIDER_INFO(gen_icf_map_h1_aarch64);
|
||||
}
|
||||
|
||||
return PROVIDER_BASIC(gen_icf_map_h1);
|
||||
}
|
||||
|
||||
DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
|
||||
{
|
||||
unsigned long auxval = getauxval(AT_HWCAP);
|
||||
|
||||
@@ -39,7 +39,7 @@ mbin_interface isal_deflate_icf_finish_lvl3
|
||||
mbin_interface isal_update_histogram
|
||||
mbin_interface encode_deflate_icf
|
||||
mbin_interface set_long_icf_fg
|
||||
mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base
|
||||
mbin_interface gen_icf_map_lh1
|
||||
mbin_interface isal_deflate_hash_lvl0
|
||||
mbin_interface isal_deflate_hash_lvl1
|
||||
mbin_interface isal_deflate_hash_lvl2
|
||||
|
||||
Reference in New Issue
Block a user