igzip:Add decode huffman code for aarch64

Change-Id: If26cc4fd97b078b5f3b02e5f6f121a12ec73f671
Signed-off-by: Jerry Yu <jerry.h.yu@arm.com>
This commit is contained in:
Jerry Yu 2019-12-12 16:14:48 +08:00
parent ad49e580dc
commit 936d05fc4f
4 changed files with 700 additions and 1 deletions

View File

@ -52,6 +52,7 @@ lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
igzip/aarch64/isal_update_histogram.S \
igzip/aarch64/gen_icf_map.S \
igzip/aarch64/igzip_deflate_hash_aarch64.S \
igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S \
igzip/proc_heap_base.c
lsrc_x86_64 += igzip/igzip_body.asm \

View File

@ -0,0 +1,689 @@
/**********************************************************************
Copyright(c) 2019 Arm Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Arm Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
.arch armv8-a
.text
.align 2
#include "lz0a_const_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"
#define ENABLE_TBL_INSTRUCTION 1
#define FIELD(name,size,align) \
.set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \
.equ name,_FIELD_OFFSET ; \
.set _FIELD_OFFSET,_FIELD_OFFSET + size; \
.if align > _STRUCT_ALIGN; \
.set _STRUCT_ALIGN, align; \
.endif;
#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0;
#define END_STRUCT(name) .set _##name##_size , _FIELD_OFFSET;\
.set _##name##_align,_STRUCT_ALIGN
#define CONST(name,value) .equ name,value
#define ISAL_DECODE_LONG_BITS 12
#define ISAL_DECODE_SHORT_BITS 10
#define L_REM (21 - ISAL_DECODE_LONG_BITS)
#define S_REM (15 - ISAL_DECODE_SHORT_BITS)
#define L_DUP ((1 << L_REM) - (L_REM + 1))
#define S_DUP ((1 << S_REM) - (S_REM + 1))
#define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
#define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
#define L_SIZE (286 + L_DUP + L_UNUSED)
#define S_SIZE (30 + S_DUP + S_UNUSED)
#define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
#define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
#define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
#define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
#define LARGE_SHORT_CODE_SIZE 4
#define LARGE_LONG_CODE_SIZE 2
#define SMALL_SHORT_CODE_SIZE 2
#define SMALL_LONG_CODE_SIZE 2
// inflate_huff_code
START_STRUCT( inflate_huff_code_large )
// name size align
FIELD ( _short_code_lookup_large, LARGE_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE )
FIELD ( _long_code_lookup_large, LARGE_LONG_CODE_SIZE*MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE )
END_STRUCT(inflate_huff_code_large)
// inflate_huff_code
START_STRUCT( inflate_huff_code_small )
// name size align
FIELD ( _short_code_lookup_small, SMALL_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE )
FIELD ( _long_code_lookup_small, SMALL_LONG_CODE_SIZE*MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE )
END_STRUCT(inflate_huff_code_small)
// inflate_state
START_STRUCT( inflate_state )
// name size align
FIELD ( _next_out, 8, 8 )
FIELD ( _avail_out, 4, 4 )
FIELD ( _total_out, 4, 4 )
FIELD ( _next_in, 8, 8 )
FIELD ( _read_in, 8, 8 )
FIELD ( _avail_in, 4, 4 )
FIELD ( _read_in_length, 4, 4 )
FIELD ( _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align )
FIELD ( _dist_huff_code, _inflate_huff_code_small_size, _inflate_huff_code_small_align )
FIELD ( _block_state, 4, 4 )
FIELD ( _dict_length, 4, 4 )
FIELD ( _bfinal, 4, 4 )
FIELD ( _crc_flag, 4, 4 )
FIELD ( _crc, 4, 4 )
FIELD ( _hist_bits, 4, 4 )
FIELD ( _type0_block_len, 4, 4 )
FIELD ( _write_overflow_lits, 4, 4 )
FIELD ( _write_overflow_len, 4, 4 )
FIELD ( _copy_overflow_len, 4, 4 )
FIELD ( _copy_overflow_dist, 4, 4 )
END_STRUCT(inflate_state)
CONST( _lit_huff_code_short_code_lookup , _lit_huff_code+_short_code_lookup_large )
CONST( _lit_huff_code_long_code_lookup , _lit_huff_code+_long_code_lookup_large )
CONST( _dist_huff_code_short_code_lookup , _dist_huff_code+_short_code_lookup_small )
CONST( _dist_huff_code_long_code_lookup , _dist_huff_code+_long_code_lookup_small )
CONST( ISAL_BLOCK_NEW_HDR , 0 )
CONST( ISAL_BLOCK_HDR , 1 )
CONST( ISAL_BLOCK_TYPE0 , 2 )
CONST( ISAL_BLOCK_CODED , 3 )
CONST( ISAL_BLOCK_INPUT_DONE , 4 )
CONST( ISAL_BLOCK_FINISH , 5 )
/* Inflate Return values */
#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */
#define ISAL_END_INPUT 1 /* End of input reached */
#define ISAL_OUT_OVERFLOW 2 /* End of output reached */
#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */
#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */
#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */
#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */
#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */
#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */
#define ISAL_DEF_MAX_CODE_LEN 15
#define LARGE_SHORT_SYM_LEN 25
#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
#define LARGE_LONG_SYM_LEN 10
#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
#define LARGE_SHORT_CODE_LEN_OFFSET 28
#define LARGE_LONG_CODE_LEN_OFFSET 10
#define LARGE_FLAG_BIT_OFFSET 25
#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
#define LARGE_SYM_COUNT_OFFSET 26
#define LARGE_SYM_COUNT_LEN 2
#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
#define LARGE_SHORT_MAX_LEN_OFFSET 26
#define SMALL_SHORT_SYM_LEN 9
#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
#define SMALL_LONG_SYM_LEN 9
#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
#define SMALL_SHORT_CODE_LEN_OFFSET 11
#define SMALL_LONG_CODE_LEN_OFFSET 10
#define SMALL_FLAG_BIT_OFFSET 10
#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
#define DIST_SYM_OFFSET 0
#define DIST_SYM_LEN 5
#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
#define DIST_SYM_EXTRA_OFFSET 5
#define DIST_SYM_EXTRA_LEN 4
#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
#define MAX_LIT_LEN_CODE_LEN 21
#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2)
#define MAX_LIT_LEN_SYM 512
#define LIT_LEN_ELEMS 514
#define INVALID_SYMBOL 0x1FFF
#define INVALID_CODE 0xFFFFFF
#define MIN_DEF_MATCH 3
#define TRIPLE_SYM_FLAG 0
#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1
#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1
#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG
#define SINGLE_SYM_THRESH (2 * 1024)
#define DOUBLE_SYM_THRESH (4 * 1024)
/*
declare Macros
*/
.macro declare_generic_reg name:req,reg:req,default:req
\name .req \default\reg
w_\name .req w\reg
x_\name .req x\reg
.endm
.macro inflate_in_load_read_byte
cmp read_in_length,56
bgt 1f
cbz avail_in,1f
ldrb w_temp,[next_in],1
sub avail_in,avail_in,1
lsl temp,temp,x_read_in_length
orr read_in,read_in,temp
add read_in_length,read_in_length,8
uxtw read_in_length,read_in_length
.endm
.macro inflate_in_load
cmp read_in_length, 63
bgt 1f
/*if (state->avail_in >= 8) */
cmp avail_in, 7
bhi 2f
// loop max 7 times
// while (state->read_in_length < 57 && state->avail_in > 0)
inflate_in_load_read_byte
inflate_in_load_read_byte
inflate_in_load_read_byte
inflate_in_load_read_byte
inflate_in_load_read_byte
inflate_in_load_read_byte
inflate_in_load_read_byte
b 1f
2:
add new_bytes,read_in_length,7
mov w_temp,8
lsr new_bytes,new_bytes,3
sub new_bytes,w_temp,new_bytes
ldr temp,[next_in]
lsl temp,temp,x_read_in_length
orr read_in,read_in,temp
add next_in,next_in,new_bytes,uxtb
add read_in_length,read_in_length,new_bytes,lsl 3
sub avail_in,avail_in,new_bytes
1:
.endm
.macro copy_word
sub repeat_length,repeat_length,#4
ldr w_arg0, [arg1],4
cmp repeat_length, 3
str w_arg0, [next_out],4
bls load_byte_less_than_4
.endm
.global decode_huffman_code_block_stateless_aarch64
.type decode_huffman_code_block_stateless_aarch64, %function
/*
void decode_huffman_code_block_stateless_aarch64(
struct inflate_state *state,
uint8_t * start_out)
*/
declare_generic_reg arg0 0, x
declare_generic_reg arg1 1, x
declare_generic_reg arg2 2, x
declare_generic_reg state, 11,x
declare_generic_reg start_out, 18,x
declare_generic_reg read_in, 3,x
declare_generic_reg read_in_length, 4,w
declare_generic_reg sym_count, 5,w
declare_generic_reg next_bits, 6,w
declare_generic_reg next_lits, 6,w
declare_generic_reg avail_in, 20,w
declare_generic_reg next_in, 23,x
declare_generic_reg temp, 16,x //local temp variable
declare_generic_reg new_bytes, 7,w //temp variable
declare_generic_reg copy_overflow_length, 28,w
declare_generic_reg block_state, 8,w
declare_generic_reg block_state_adr,9,x
declare_generic_reg look_back_dist, 10,w
declare_generic_reg bfinal, 22,x
declare_generic_reg next_out, 12,x
declare_generic_reg avail_out, 13,w
declare_generic_reg total_out, 14,w
declare_generic_reg rfc_table, 15,x
declare_generic_reg next_sym, 17,w
declare_generic_reg next_dist, 17,w
declare_generic_reg bit_count, 19,w
declare_generic_reg bit_mask, 21,w
declare_generic_reg next_lit, 24,w
declare_generic_reg write_overflow_len,25,w
declare_generic_reg write_overflow_lits,26,w
declare_generic_reg repeat_length,27,w
decode_huffman_code_block_stateless_aarch64:
//save registers
push_stack
//load variables
mov state,arg0
mov block_state,_block_state
mov start_out,arg1
add block_state_adr,state,block_state,uxtw
ldr block_state, [block_state_adr]
ldr bfinal, [block_state_adr,_bfinal-_block_state]
ldr next_out, [state]
ldp avail_out,total_out,[state,_avail_out]
ldp next_in, read_in, [state,_next_in]
ldp avail_in, read_in_length, [state,_avail_in]
ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
//init rfc_table
adrp rfc_table,rfc_lookup_table
add rfc_table,rfc_table,:lo12:rfc_lookup_table
#if ENABLE_TBL_INSTRUCTION
ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
add rfc_table,rfc_table,48
ld1 {v4.16b-v7.16b},[rfc_table]
#endif
/*
state->copy_overflow_length = 0;
state->copy_overflow_distance = 0;
*/
mov x_copy_overflow_length,xzr
str xzr,[block_state_adr,_copy_overflow_len-_block_state]
/* while (state->block_state == ISAL_BLOCK_CODED) */
block_state_loop:
cmp block_state ,ISAL_BLOCK_CODED
bne exit_func_success
inflate_in_load
/* save state here */
str next_out, [state]
stp avail_out,total_out,[state,_avail_out]
stp next_in, read_in, [state,_next_in]
stp avail_in, read_in_length, [state,_avail_in]
stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
/*
decode_next_lit_len(&next_lits, &sym_count,
state, &state->lit_huff_code,
&temp_dat, &temp_bytes);
*/
cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
ble inflate_in_load_decode
decode_next_lit_len_start:
and x_next_bits,read_in,((1 << ISAL_DECODE_LONG_BITS) - 1)
/*next_sym = huff_code->short_code_lookup[next_bits];*/
add next_bits,next_bits,_lit_huff_code>>2
ldr next_sym,[state,x_next_bits,lsl 2]
/*if ((next_sym & LARGE_FLAG_BIT) == 0) {*/
tbnz next_sym,LARGE_FLAG_BIT_OFFSET,long_code_lookup_routine
lsr bit_count,next_sym,LARGE_SHORT_CODE_LEN_OFFSET
sub read_in_length,read_in_length,bit_count
lsr read_in,read_in,x_bit_count
mov temp,0x1fff
cmp bit_count,0
csel next_sym,next_sym,w_temp,ne
ubfx sym_count,next_sym,LARGE_SYM_COUNT_OFFSET,LARGE_SYM_COUNT_LEN
and next_lits,next_sym,LARGE_SHORT_SYM_MASK
b decode_next_lit_len_end
long_code_lookup_routine:
lsr bit_mask,next_sym,LARGE_SHORT_MAX_LEN_OFFSET
mov sym_count,1
and next_sym,next_sym,LARGE_SHORT_SYM_MASK
mov temp,1023
lsl bit_mask,sym_count,bit_mask
sub bit_mask,bit_mask,1
and x_next_bits,read_in,x_bit_mask
add next_bits,next_sym,next_bits,lsr ISAL_DECODE_LONG_BITS
mov next_sym,(_lit_huff_code+_long_code_lookup_large)>>1
add next_bits,next_bits,next_sym
ldrh next_sym,[state,x_next_bits,lsl 1]
lsr bit_count,next_sym,10
sub read_in_length,read_in_length,bit_count
and next_lits,next_sym,w_temp
lsr read_in,read_in,x_bit_count
cmp bit_count,0
csel next_lits,next_lits,w_temp,ne
decode_next_lit_len_end:
/* if (sym_count == 0) */
cbz sym_count,invalid_symbol
tbnz read_in_length,31, end_input
/* while (sym_count > 0) start */
sym_count_loop:
and next_lit,next_lits , 0xffff
/*if (next_lit < 256 || sym_count > 1) {*/
cmp next_lit,255
ccmp sym_count,1,0,hi
beq next_lit_256
/* if (state->avail_out < 1) { */
cbnz avail_out,sym_count_adjust
mov write_overflow_len,sym_count
lsl sym_count,sym_count,3
mov write_overflow_lits,next_lits
sub sym_count,sym_count,8
lsr next_lits,next_lits,sym_count
mov sym_count,1
cmp next_lits,255
bls isal_out_overflow
cmp next_lits,256
sub write_overflow_len,write_overflow_len,1
beq isal_out_overflow_1
b sym_count_loop
sym_count_adjust:
/*
while (sym_count > 0) end
next_lits >>= 8;
sym_count--;
*/
subs sym_count,sym_count,1
lsr next_lits,next_lits,8
strb next_lit,[next_out],1
sub avail_out,avail_out,1
add total_out,total_out,1
bne sym_count_loop
b block_state_loop
next_lit_256:
/* if (next_lit == 256) { */
cmp next_lit,256
beq next_lit_eq_256
/*
if (next_lit <= MAX_LIT_LEN_SYM)
sym_count must be 1
*/
cmp next_lit,MAX_LIT_LEN_SYM
bhi invalid_symbol
sub repeat_length,next_lit,254
/*
next_dist =
decode_next_dist(state, &state->dist_huff_code, &temp_dat,
&temp_bytes);
*/
cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
ble inflate_in_load_decode_next_dist
decode_next_dist_start:
and x_next_bits,read_in,((1 << ISAL_DECODE_SHORT_BITS) - 1)
mov next_sym,_dist_huff_code>>1
add next_bits,next_bits,next_sym
ldrh next_sym, [state,x_next_bits,lsl 1]
tbz next_sym,SMALL_FLAG_BIT_OFFSET,decode_next_dist_flag
sub bit_mask,next_sym,SMALL_FLAG_BIT
mov temp,1
asr bit_mask,bit_mask,SMALL_SHORT_CODE_LEN_OFFSET
and next_sym,next_sym,SMALL_SHORT_SYM_MASK
lsl bit_mask,w_temp,bit_mask
sub bit_mask,bit_mask,1
and x_next_bits,read_in,x_bit_mask
add next_bits,next_sym,next_bits,lsr ISAL_DECODE_SHORT_BITS
mov next_sym,(_dist_huff_code + _long_code_lookup_small)>>1
add next_bits,next_bits,next_sym
ldrh next_sym,[state,x_next_bits,lsl 1]
lsr bit_count,next_sym,SMALL_LONG_CODE_LEN_OFFSET
b decode_next_dist_adjust
decode_next_dist_flag:
lsr bit_count,next_sym,SMALL_SHORT_CODE_LEN_OFFSET
decode_next_dist_adjust:
sub read_in_length,read_in_length,bit_count
lsr read_in,read_in,x_bit_count
cbnz bit_count,decode_next_dist_end
sub read_in_length,read_in_length,next_sym
mov next_sym,INVALID_SYMBOL
decode_next_dist_end:
and next_sym,next_sym,DIST_SYM_MASK
tbnz read_in_length,31,end_input_1
cmp next_dist,29
bhi invalid_symbol
#if ENABLE_TBL_INSTRUCTION
ins v0.b[0],next_dist
tbl v0.8b,{v2.16b,v3.16b},v0.8b
umov bit_count,v0.b[0]
#else
ldrb bit_count,[rfc_table,next_dist,sxtw]
#endif
/*inflate_in_read_bits(state,
dist_extra_bit_count, &temp_dat,
&temp_bytes);
*/
inflate_in_load
mov temp,1
lsl temp,temp,x_bit_count
sub read_in_length,read_in_length,bit_count
sub temp,temp,1
and x_look_back_dist,temp,read_in
lsr read_in,read_in,x_bit_count
#if ENABLE_TBL_INSTRUCTION
dup v0.8b,next_dist
add v0.8b,v1.8b,v0.8b
tbl v0.8b,{v4.16b-v7.16b},v0.8b
umov next_dist,v0.h[0]
#else
add next_dist,next_dist,16
ldrh next_dist,[rfc_table,x_next_dist,lsl 1]
#endif
add look_back_dist,look_back_dist,next_dist
/*
if (state->read_in_length < 0) {
*/
tbnz read_in_length,31,end_input_1
/*
if (state->next_out - look_back_dist < start_out) {
*/
sub temp,next_out,x_look_back_dist
cmp temp,start_out
bcc isal_invalid_lookback
/*
if (state->avail_out < repeat_length) {
*/
cmp avail_out , repeat_length
bcs decompress_data_start
sub copy_overflow_length,repeat_length,avail_out
stp copy_overflow_length,look_back_dist,[block_state_adr,_copy_overflow_len-_block_state]
mov repeat_length,avail_out
decompress_data_start:
add total_out,total_out,repeat_length
sub avail_out,avail_out,repeat_length
sub arg1,next_out,x_look_back_dist
#if 1
cmp look_back_dist,repeat_length
bls byte_copy_start
#else
b byte_copy_start
#endif
cbz repeat_length,decompress_data_end
cmp repeat_length, 3
bls load_byte_less_than_4 //0.5% will jump
load_byte_4:
sub repeat_length, repeat_length, #4
ldr w_arg0, [arg1],4
cmp repeat_length, 3
str w_arg0, [next_out],4
bls load_byte_less_than_4
.rept 62
copy_word
.endr
sub repeat_length, repeat_length, #4
ldr w_arg0, [arg1],4
cmp repeat_length, 4
str w_arg0, [next_out],4
bge load_byte_4
load_byte_less_than_4:
tbz repeat_length,0,load_byte_2
ldrb w_arg0, [arg1],1
sub repeat_length, repeat_length, #1
strb w_arg0, [next_out],1
load_byte_2:
tbz repeat_length,1,decompress_data_end
ldrh w_arg0, [arg1],2
strh w_arg0, [next_out],2
decompress_data_end:
/*
if (state->copy_overflow_length > 0)
*/
cmp copy_overflow_length,0
bgt isal_out_overflow
b block_state_loop
next_lit_eq_256:
/*
state->block_state = state->bfinal ?
ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
*/
mov block_state, ISAL_BLOCK_INPUT_DONE
cmp w_bfinal,0
csel block_state, block_state, w_bfinal, ne
str block_state, [block_state_adr]
b block_state_loop
exit_func_success:
mov w0 , 0
exit_func:
str next_out, [state]
stp avail_out,total_out,[state,_avail_out]
stp next_in, read_in, [state,_next_in]
stp avail_in, read_in_length, [state,_avail_in]
stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
pop_stack
ret
end_input_1:
end_input:
mov w0,ISAL_END_INPUT
pop_stack
ret
invalid_symbol:
/*
below variable was changed
*/
str next_out, [state]
stp avail_out,total_out,[state,_avail_out]
stp next_in, read_in, [state,_next_in]
stp avail_in, read_in_length, [state,_avail_in]
stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
mov w0, ISAL_INVALID_SYMBOL
b exit_func
isal_out_overflow_1:
cmp bfinal,0
mov block_state, ISAL_BLOCK_INPUT_DONE
csel block_state, block_state, wzr, ne
str block_state, [block_state_adr]
isal_out_overflow:
mov w0, ISAL_OUT_OVERFLOW
b exit_func
isal_invalid_lookback:
mov w0, ISAL_INVALID_LOOKBACK
b exit_func
inflate_in_load_decode:
inflate_in_load
b decode_next_lit_len_start
inflate_in_load_decode_next_dist:
inflate_in_load
b decode_next_dist_start
byte_copy_start:
add arg2,next_out,x_repeat_length
cmp arg2, next_out
beq decompress_data_end
sub arg2,arg2,1
byte_copy_loop:
ldrb w_arg0, [arg1] , 1
cmp arg2, next_out
strb w_arg0, [next_out],1
bne byte_copy_loop
b decompress_data_end
.size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
.type rfc_lookup_table, %object
rfc_lookup_table:
#if ENABLE_TBL_INSTRUCTION
.byte 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.zero 8
#endif
//dist_extra_bit_count
.byte 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
.byte 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
.byte 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
.byte 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
//dist_start
#if ENABLE_TBL_INSTRUCTION
.byte 0x01,0x02,0x03,0x04,0x05,0x07,0x09,0x0d,0x11,0x19,0x21,0x31,0x41,0x61,0x81,0xc1
.byte 0x01,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x01,0x01,0x02,0x03,0x04,0x06,0x08,0x0c,0x10,0x18,0x20,0x30,0x40,0x60,0x00,0x00
#else
.short 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
.short 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
.short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
.short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
#endif
.size rfc_lookup_table, . - rfc_lookup_table

View File

@ -29,4 +29,4 @@
#include "aarch64_multibinary.h"
mbin_interface_base decode_huffman_code_block_stateless,decode_huffman_code_block_stateless_base
mbin_interface decode_huffman_code_block_stateless

View File

@ -177,3 +177,12 @@ DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
return PROVIDER_BASIC(isal_deflate_hash);
}
DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
{
unsigned long auxval = getauxval(AT_HWCAP);
if (auxval & HWCAP_CRC32)
return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
return PROVIDER_BASIC(decode_huffman_code_block_stateless);
}