igzip: Implement statelesss inflate in assembly

Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Roy Oursler 2016-03-07 18:08:20 -07:00 committed by Greg Tucker
parent 17dac9f641
commit 09a5a243bf
15 changed files with 1038 additions and 420 deletions

View File

@ -38,16 +38,19 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
igzip/igzip_stateless_02.asm \
igzip/igzip_stateless_04.asm \
igzip/crc_data.asm \
igzip/rfc1951_lookup.asm \
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
igzip/igzip_multibinary.asm \
igzip/igzip_stateless_base.c \
igzip/igzip_base.c \
igzip/igzip_update_histogram_01.asm \
igzip/igzip_update_histogram_04.asm \
igzip/rfc1951_lookup.asm
igzip/igzip_inflate_01.asm \
igzip/igzip_inflate_04.asm \
igzip/igzip_inflate_multibinary.asm
src_include += -I $(srcdir)/igzip
extern_hdrs += include/igzip_lib.h
extern_hdrs += include/igzip_lib.h include/inflate.h
pkginclude_HEADERS += include/types.h
@ -60,6 +63,7 @@ perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
igzip/inflate_data_structs.asm \
igzip/igzip_buffer_utils_01.asm \
igzip/igzip_buffer_utils_04.asm \
igzip/igzip_body.asm igzip/igzip_finish.asm \
@ -72,13 +76,13 @@ other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
include/reg_sizes.asm \
include/multibinary.asm \
include/test.h \
igzip/huffman.h
igzip/huffman.h \
igzip/igzip_inflate.asm
examples += igzip/igzip_example igzip/igzip_sync_flush_example
igzip_rand_test: igzip_inflate_ref.o
igzip_igzip_rand_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_rand_test:
igzip_igzip_rand_test_LDADD = libisal.la
# Include tools to make custom Huffman tables based on sample data
other_tests += igzip/generate_custom_hufftables
@ -89,19 +93,16 @@ lsrc += igzip/huff_codes.c
# Include tools and tests using the reference inflate
other_tests += igzip/igzip_inflate_perf
other_tests += igzip/igzip_inflate_test
other_src += igzip/igzip_inflate_ref.h
other_src += igzip/igzip_inflate_ref.c
lsrc += igzip/igzip_inflate.c
other_src += igzip/crc_inflate.h
igzip_inflate_perf: igzip_inflate_ref.o
igzip_inflate_perf: LDLIBS += -lz
igzip_igzip_inflate_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_igzip_inflate_perf_LDADD = libisal.la
igzip_igzip_inflate_perf_LDFLAGS = -lz
igzip_inflate_test: igzip_inflate_ref.o
igzip_inflate_test: LDLIBS += -lz
igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_igzip_inflate_test_LDADD = libisal.la
igzip_igzip_inflate_test_LDFLAGS = -lz
igzip_check: igzip_inflate_ref.o
igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_hist_perf: igzip_inflate_ref.o
igzip_igzip_hist_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_check:
igzip_igzip_check_LDADD = libisal.la
igzip_hist_perf: igzip_inflate.o
igzip_igzip_hist_perf_LDADD = igzip/igzip_inflate.lo libisal.la

View File

@ -31,7 +31,7 @@
#include <stdlib.h>
#include <string.h>
#include "igzip_lib.h"
#include "igzip_inflate_ref.h"
#include "inflate.h"
#include "crc_inflate.h"
#include <math.h>
@ -347,8 +347,8 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size)
z_size -= gzip_hdr_bytes;
#endif
igzip_inflate_init(&gstream, z_buf, z_size, test_buf, test_size);
ret = igzip_inflate(&gstream);
isal_inflate_init(&gstream, z_buf, z_size, test_buf, test_size);
ret = isal_inflate_stateless(&gstream);
if (test_buf != NULL)
mem_result = memcmp(in_buf, test_buf, in_size);

View File

@ -33,7 +33,7 @@
#include <string.h>
#include "igzip_lib.h"
#include "test.h"
#include "igzip_inflate_ref.h"
#include "inflate.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 8
@ -41,8 +41,12 @@
# define RUN_MEM_SIZE 2000000000
#endif
extern uint64_t inflate_in_read_bits(struct inflate_in_buffer *, uint8_t);
extern int read_header(struct inflate_state *);
extern uint16_t decode_next(struct inflate_in_buffer *, struct inflate_huff_code *);
/* Inflates and fills a histogram of lit, len, and dist codes seen in non-type 0 blocks.*/
int igzip_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *histogram)
int isal_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *histogram)
{
/* The following tables are based on the tables in the deflate standard,
* RFC 1951 page 11. */
@ -336,8 +340,8 @@ int main(int argc, char *argv[])
stream.hufftables = &hufftables_custom;
isal_deflate_stateless(&stream);
igzip_inflate_init(&gstream, outbuf, stream.total_out, NULL, 0);
igzip_inflate_hist(&gstream, &histogram2);
isal_inflate_init(&gstream, outbuf, stream.total_out, NULL, 0);
isal_inflate_hist(&gstream, &histogram2);
printf("Histogram Error \n");
print_diff_histogram(&histogram1, &histogram2);

460
igzip/igzip_inflate.asm Normal file
View File

@ -0,0 +1,460 @@
default rel
%include "reg_sizes.asm"
%define DECOMPRESSION_FINISHED 0
%define END_OF_INPUT 1
%define OUT_BUFFER_OVERFLOW 2
%define INVALID_BLOCK_HEADER 3
%define INVALID_SYMBOL 4
%define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5
%define INVALID_LOOK_BACK_DISTANCE 6
%define DECODE_LOOKUP_SIZE 10
%if DECODE_LOOKUP_SIZE > 15
%undef DECODE_LOOKUP_SIZE
%define DECODE_LOOKUP_SIZE 15
%endif
%if DECODE_LOOKUP_SIZE > 7
%define MAX_LONG_CODE (((2 << 8) + 1) * (2 << (15 - DECODE_LOOKUP_SIZE)) + 32)
%else
%define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32
%endif
%define COPY_SIZE 16
%define COPY_LEN_MAX 258
%define IN_BUFFER_SLOP 8
%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX
%include "inflate_data_structs.asm"
%include "stdmac.asm"
extern rfc1951_lookup_table
;; rax
%define tmp3 rax
%define look_back_dist rax
%define next_bits2 rax
;; rcx
;; rdx arg3
%define next_sym rdx
%define tmp4 rdx
;; rdi arg1
%define tmp1 rdi
;; rsi arg2
%define tmp2 rsi
%define copy_start rsi
%define next_bits rsi
;; rbx ; Saved
%define next_in rbx
;; rbp ; Saved
%define end_in rbp
;; r8
%define repeat_length r8
;; r9
%define read_in r9
;; r10
%define read_in_length r10
;; r11
%define state r11
;; r12 ; Saved
%define next_out r12
;; r13 ; Saved
%define end_out r13
;; r14 ; Saved
%define start_out r14
;; r15 ; Saved
%define rfc_lookup r15
%define _dist_extra_bit_count 264
%define _dist_start _dist_extra_bit_count + 1*32
%define _len_extra_bit_count _dist_start + 4*32
%define _len_start _len_extra_bit_count + 1*32
;; Load read_in and updated in_buffer accordingly
;; when there are at least 8 bytes in the in buffer
;; Clobbers rcx, unless rcx is %%read_in_length
%macro inflate_in_load 6
%define %%next_in %1
%define %%end_in %2
%define %%read_in %3
%define %%read_in_length %4
%define %%tmp1 %5 ; Tmp registers
%define %%tmp2 %6
SHLX %%tmp1, [%%next_in], %%read_in_length
or %%read_in, %%tmp1
mov %%tmp1, 64
sub %%tmp1, %%read_in_length
shr %%tmp1, 3
add %%next_in, %%tmp1
lea %%read_in_length, [%%read_in_length + 8 * %%tmp1]
%%end:
%endm
;; Load read_in and updated in_buffer accordingly
;; Clobbers rcx, unless rcx is %%read_in_length
%macro inflate_in_small_load 6
%define %%next_in %1
%define %%end_in %2
%define %%read_in %3
%define %%read_in_length %4
%define %%avail_in %5 ; Tmp registers
%define %%tmp1 %5
%define %%loop_count %6
mov %%avail_in, %%end_in
sub %%avail_in, %%next_in
%ifnidn %%read_in_length, rcx
mov rcx, %%read_in_length
%endif
mov %%loop_count, 64
sub %%loop_count, %%read_in_length
shr %%loop_count, 3
cmp %%loop_count, %%avail_in
cmovg %%loop_count, %%avail_in
cmp %%loop_count, 0
je %%end
%%load_byte:
xor %%tmp1, %%tmp1
mov %%tmp1 %+ b, byte [%%next_in]
SHLX %%tmp1, %%tmp1, rcx
or %%read_in, %%tmp1
add rcx, 8
add %%next_in, 1
sub %%loop_count, 1
jg %%load_byte
%ifnidn %%read_in_length, rcx
mov %%read_in_length, rcx
%endif
%%end:
%endm
;; Decode next symbol
;; Clobber rcx
%macro decode_next 8
%define %%state %1 ; State structure associated with compressed stream
%define %%state_offset %2 ; Type of huff code, should be either LIT or DIST
%define %%read_in %3 ; Bits read in from compressed stream
%define %%read_in_length %4 ; Number of valid bits in read_in
%define %%next_sym %5 ; Returned symobl
%define %%tmp1 %6 ; Tmp registers
%define %%tmp2 %7
%define %%next_bits %7
%define %%tmp3 %8
%define %%next_bits2 %8
;; Lookup possible next symbol
mov %%next_bits, %%read_in
and %%next_bits, (1 << DECODE_LOOKUP_SIZE) - 1
movzx %%next_sym, word [%%state + %%state_offset + 2 * %%next_bits]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, 9
;; Check if symbol or hint was looked up
and %%next_sym, 0x81FF
cmp %%next_sym, 0x8000
jl %%end
;; Decode next_sym using hint
mov %%next_bits2, %%read_in
;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
neg rcx
SHLX %%next_bits2, %%next_bits2, rcx
SHRX %%next_bits2, %%next_bits2, rcx
shr %%next_bits2, DECODE_LOOKUP_SIZE
add %%next_bits2, %%next_sym
;; Lookup actual next symbol
movzx %%next_sym, word [%%state + %%state_offset + 2 * %%next_bits2 + 2 * ((1 << DECODE_LOOKUP_SIZE) - 0x8000)]
;; Save length associated with symbol
mov rcx, %%next_sym
shr rcx, 9
and %%next_sym, 0x1FF
%%end:
;; Updated read_in to reflect the bits which were decoded
SHRX %%read_in, %%read_in, rcx
sub %%read_in_length, rcx
%endm
global decode_huffman_code_block_stateless_ %+ ARCH
decode_huffman_code_block_stateless_ %+ ARCH %+ :
push rbx
push rbp
push r12
push r13
push r14
push r15
mov state, rdi
lea rfc_lookup, [rfc1951_lookup_table]
mov read_in,[state + _in_buffer_read_in]
mov read_in_length %+ d, dword [state + _in_buffer_read_in_length]
mov start_out, [state + _out_buffer_start_out]
mov next_out, [state + _out_buffer_next_out]
mov end_out %+ d, dword [state + _out_buffer_avail_out]
add end_out, next_out
mov next_in, [state + _in_buffer_next_in]
mov end_in %+ d, dword [state + _in_buffer_avail_in]
add end_in, next_in
sub end_out, OUT_BUFFER_SLOP
sub end_in, IN_BUFFER_SLOP
cmp next_in, end_in
jg end_loop_block_pre
inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Main Loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
loop_block:
;; Check if near end of in buffer or out buffer
cmp next_in, end_in
jg end_loop_block_pre
cmp next_out, end_out
jg end_loop_block_pre
;; Decode next symbol and reload the read_in buffer
decode_next state, _lit_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2, tmp3
inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
;; Specutively write next_sym if it is a literal
mov [next_out], next_sym
add next_out, 1
;; Specutively load the length extra bits if next_sym is a length
mov next_bits, read_in
movzx repeat_length, word [rfc_lookup + _len_start + 2 * (next_sym - 257)]
movzx rcx, byte [rfc_lookup + _len_extra_bit_count + next_sym - 257]
;; Specutively calculate the repeat length
BZHI next_bits, next_bits, rcx, tmp1
add repeat_length, next_bits
;; Check if next_sym is a literal, length, or end of block symbol
cmp next_sym, 256
jl loop_block
je end_symbol_pre
decode_len_dist:
;; Update read_in for the length extra bits which were read in
SHRX read_in, read_in, rcx
sub read_in_length, rcx
;; Decode distance code
decode_next state, _dist_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2, tmp3
;; Load distance code extra bits
mov next_bits, read_in
mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym]
movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym]
;; Determine next_out after the copy is finished
lea next_out, [next_out + repeat_length - 1]
;; Calculate the look back distance
BZHI next_bits, next_bits, rcx, tmp1
SHRX read_in, read_in, rcx
sub read_in_length, rcx
add look_back_dist, next_bits
mov copy_start, next_out
sub copy_start, repeat_length
sub copy_start, look_back_dist
;; Check if a valid look back distances was decoded
cmp copy_start, start_out
jl invalid_look_back_distance
vmovdqu xmm1, [copy_start]
;; Set tmp4 to be the minimum of COPY_SIZE and repeat_length
;; This is to decrease use of small_byte_copy branch
mov tmp4, COPY_SIZE
cmp tmp4, repeat_length
cmovg tmp4, repeat_length
;; Check for overlapping memory in the copy
cmp look_back_dist, tmp4
jl small_byte_copy_pre
large_byte_copy:
;; Copy length distance pair when memory overlap is not an issue
vmovdqu [copy_start + look_back_dist], xmm1
sub repeat_length, COPY_SIZE
jle loop_block
add copy_start, COPY_SIZE
vmovdqu xmm1, [copy_start]
jmp large_byte_copy
small_byte_copy_pre:
;; Copy length distance pair when source and destination overlap
add repeat_length, look_back_dist
small_byte_copy:
vmovdqu [copy_start + look_back_dist], xmm1
shl look_back_dist, 1
vmovdqu xmm1, [copy_start]
cmp look_back_dist, COPY_SIZE
jl small_byte_copy
sub repeat_length, look_back_dist
jge large_byte_copy
jmp loop_block
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Finish Main Loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
end_loop_block_pre:
;; Fix up in buffer and out buffer to reflect the actual buffer end
add end_out, OUT_BUFFER_SLOP
add end_in, IN_BUFFER_SLOP
end_loop_block:
;; Load read in buffer and decode next lit/len symbol
inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
decode_next state, _lit_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2, tmp3
;; Check that enough input was available to decode symbol
cmp read_in_length, 0
jl end_of_input
cmp next_sym, 256
jl decode_literal
je end_symbol
decode_len_dist_2:
;; Load length exta bits
mov next_bits, read_in
movzx repeat_length, word [rfc_lookup + _len_start + 2 * (next_sym - 257)]
movzx rcx, byte [rfc_lookup + _len_extra_bit_count + next_sym - 257]
;; Calculate repeat length
BZHI next_bits, next_bits, rcx, tmp1
add repeat_length, next_bits
;; Update read_in for the length extra bits which were read in
SHRX read_in, read_in, rcx
sub read_in_length, rcx
;; Decode distance code
decode_next state, _dist_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2, tmp3
;; Load distance code extra bits
mov next_bits, read_in
mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym]
movzx rcx, byte [rfc_lookup + _dist_extra_bit_count + next_sym]
;; Calculate the look back distance and check for enough input
BZHI next_bits, next_bits, rcx, tmp1
SHRX read_in, read_in, rcx
sub read_in_length, rcx
add look_back_dist, next_bits
jl end_of_input
;; Setup code for byte copy using rep movsb
mov rsi, next_out
mov rdi, rsi
mov rcx, repeat_length
sub rsi, look_back_dist
;; Check for out buffer overflow
add next_out, repeat_length
cmp next_out, end_out
jg out_buffer_overflow
;; Check if a valid look back distance was decoded
cmp rsi, start_out
jl invalid_look_back_distance
rep movsb
jmp end_loop_block
decode_literal:
;; Store literal decoded from the input stream
add next_out, 1
cmp next_out, end_out
jg out_buffer_overflow
mov byte [next_out - 1], next_sym %+ b
jmp end_loop_block
;; Set exit codes
end_of_input:
mov rax, END_OF_INPUT
jmp end
out_buffer_overflow:
mov rax, OUT_BUFFER_OVERFLOW
jmp end
invalid_look_back_distance:
mov rax, INVALID_LOOK_BACK_DISTANCE
jmp end
end_symbol_pre:
;; Fix up in buffer and out buffer to reflect the actual buffer
dec next_out
add end_out, OUT_BUFFER_SLOP
add end_in, IN_BUFFER_SLOP
end_symbol:
;; Set flag identifying a new block is required
mov byte [state + _new_block], 1
xor rax, rax
end:
;; Save current buffer states
mov [state + _in_buffer_read_in], read_in
mov [state + _in_buffer_read_in_length], read_in_length %+ d
mov [state + _out_buffer_next_out], next_out
sub end_out, next_out
mov dword [state + _out_buffer_avail_out], end_out %+ d
sub next_out, start_out
mov [state + _out_buffer_total_out], next_out %+ d
mov [state + _in_buffer_next_in], next_in
sub end_in, next_in
mov [state + _in_buffer_avail_in], end_in %+ d
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret

View File

@ -1,34 +1,51 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
// <COPYRIGHT_TAG>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
#include <stdint.h>
#include "inflate.h"
#include "huff_codes.h"
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
extern int decode_huffman_code_block_stateless(struct inflate_state *);
#include "igzip_inflate_ref.h"
/* structure contain lookup data based on RFC 1951 */
struct rfc1951_tables {
uint8_t dist_extra_bit_count[32];
uint32_t dist_start[32];
uint8_t len_extra_bit_count[32];
uint16_t len_start[32];
};
/* The following tables are based on the tables in the deflate standard,
* RFC 1951 page 11. */
static struct rfc1951_tables rfc_lookup_table = {
.dist_extra_bit_count = {
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a,
0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00},
.dist_start = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000},
.len_extra_bit_count = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x00},
.len_start = {
0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a,
0x000b, 0x000d, 0x000f, 0x0011, 0x0013, 0x0017, 0x001b, 0x001f,
0x0023, 0x002b, 0x0033, 0x003b, 0x0043, 0x0053, 0x0063, 0x0073,
0x0083, 0x00a3, 0x00c3, 0x00e3, 0x0102, 0x0000, 0x0000, 0x0000}
};
/*Performs a copy of length repeat_length data starting at dest -
* lookback_distance into dest. This copy copies data previously copied when the
* src buffer and the dest buffer overlap. */
void inline byte_copy(uint8_t * dest, uint64_t lookback_distance, int repeat_length)
{
uint8_t *src = dest - lookback_distance;
@ -49,12 +66,14 @@ uint16_t inline bit_reverse2(uint16_t bits, uint8_t length)
return bits >> (16 - length);
}
/* Initialize a struct in_buffer for use */
void inline init_inflate_in_buffer(struct inflate_in_buffer *inflate_in)
{
inflate_in->read_in = 0;
inflate_in->read_in_length = 0;
}
/* Set up the in_stream used for the in_buffer*/
void inline set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t * in_stream,
uint32_t in_size)
{
@ -62,14 +81,17 @@ void inline set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t
inflate_in->avail_in = in_size;
}
/* Set up the out_stream used for the out_buffer */
void inline set_inflate_out_buffer(struct inflate_out_buffer *inflate_out,
uint8_t * out_stream, uint32_t out_size)
{
inflate_out->start_out = out_stream;
inflate_out->next_out = out_stream;
inflate_out->avail_out = out_size;
inflate_out->total_out = 0;
}
void inline inflate_in_clear_bits(struct inflate_in_buffer *inflate_in)
{
uint8_t bytes;
@ -82,6 +104,7 @@ void inline inflate_in_clear_bits(struct inflate_in_buffer *inflate_in)
inflate_in->avail_in += bytes;
}
/* Load data from the in_stream into a buffer to allow for handling unaligned data*/
void inline inflate_in_load(struct inflate_in_buffer *inflate_in, int min_required)
{
uint64_t temp = 0;
@ -112,6 +135,7 @@ void inline inflate_in_load(struct inflate_in_buffer *inflate_in, int min_requir
}
/* Returns the next bit_count bits from the in stream*/
uint64_t inline inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
assert(bit_count < 57);
@ -123,6 +147,7 @@ uint64_t inline inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8
return (inflate_in->read_in) & ((1 << bit_count) - 1);
}
/* Shifts the in stream over by bit-count bits */
void inline inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
@ -130,6 +155,9 @@ void inline inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t
inflate_in->read_in_length -= bit_count;
}
/* Returns the next bit_count bits from the in stream and shifts the stream over
* by bit-count bits */
uint64_t inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
uint64_t inline inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
uint64_t ret;
@ -146,48 +174,10 @@ uint64_t inline inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8
return ret;
}
int inline setup_static_header(struct inflate_state *state)
{
/* This could be turned into a memcpy of this functions output for
* higher speed, but then DECODE_LOOKUP_SIZE couldn't be changed without
* regenerating the table. */
int i;
struct huff_code lit_code[LIT_LEN + 2];
struct huff_code dist_code[DIST_LEN + 2];
/* These tables are based on the static huffman tree described in RFC
* 1951 */
uint16_t lit_count[16] = {
0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
};
uint16_t dist_count[16] = {
0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* These for loops set the code lengths for the static literal/length
* and distance codes defined in the deflate standard RFC 1951 */
for (i = 0; i < 144; i++)
lit_code[i].length = 8;
for (i = 144; i < 256; i++)
lit_code[i].length = 9;
for (i = 256; i < 280; i++)
lit_code[i].length = 7;
for (i = 280; i < LIT_LEN + 2; i++)
lit_code[i].length = 8;
for (i = 0; i < DIST_LEN + 2; i++)
dist_code[i].length = 5;
make_inflate_huff_code(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count);
make_inflate_huff_code(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count);
return 0;
}
/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
* the lengths in huff_code_table,where count is a histogram of the appearance
* of each code length */
void inline make_inflate_huff_code(struct inflate_huff_code *result,
struct huff_code *huff_code_table, int table_length,
uint16_t * count)
@ -206,8 +196,6 @@ void inline make_inflate_huff_code(struct inflate_huff_code *result,
uint16_t long_bits;
uint16_t min_increment;
memset(result, 0, sizeof(struct inflate_huff_code));
next_code[0] = code;
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
@ -289,6 +277,53 @@ void inline make_inflate_huff_code(struct inflate_huff_code *result,
}
}
/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
* deflate static header */
int inline setup_static_header(struct inflate_state *state)
{
/* This could be turned into a memcpy of this functions output for
* higher speed, but then DECODE_LOOKUP_SIZE couldn't be changed without
* regenerating the table. */
int i;
struct huff_code lit_code[LIT_LEN + 2];
struct huff_code dist_code[DIST_LEN + 2];
/* These tables are based on the static huffman tree described in RFC
* 1951 */
uint16_t lit_count[16] = {
0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
};
uint16_t dist_count[16] = {
0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* These for loops set the code lengths for the static literal/length
* and distance codes defined in the deflate standard RFC 1951 */
for (i = 0; i < 144; i++)
lit_code[i].length = 8;
for (i = 144; i < 256; i++)
lit_code[i].length = 9;
for (i = 256; i < 280; i++)
lit_code[i].length = 7;
for (i = 280; i < LIT_LEN + 2; i++)
lit_code[i].length = 8;
for (i = 0; i < DIST_LEN + 2; i++)
dist_code[i].length = 5;
make_inflate_huff_code(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count);
make_inflate_huff_code(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count);
return 0;
}
/* Decodes the next symbol symbol in in_buffer using the huff code defined by
* huff_code */
uint16_t decode_next(struct inflate_in_buffer *in_buffer, struct inflate_huff_code *huff_code);
uint16_t inline decode_next(struct inflate_in_buffer *in_buffer,
struct inflate_huff_code *huff_code)
{
@ -325,6 +360,8 @@ uint16_t inline decode_next(struct inflate_in_buffer *in_buffer,
}
}
/* Reads data from the in_buffer and sets the huff code corresponding to that
* data */
int inline setup_dynamic_header(struct inflate_state *state)
{
int i, j;
@ -465,6 +502,8 @@ int inline setup_dynamic_header(struct inflate_state *state)
return 0;
}
/* Reads in the header pointed to by in_stream and sets up state to reflect that
* header information*/
int read_header(struct inflate_state *state)
{
state->new_block = 0;
@ -491,7 +530,131 @@ int read_header(struct inflate_state *state)
return INVALID_BLOCK_HEADER;
}
void igzip_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size,
int inline decode_literal_block(struct inflate_state *state)
{
uint16_t len, nlen;
/* If the block is uncompressed, perform a memcopy while
* updating state data */
if (state->in_buffer.avail_in < 4)
return END_OF_INPUT;
len = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
nlen = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
/* Check if len and nlen match */
if (len != (~nlen & 0xffff))
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
if (state->out_buffer.avail_out < len)
return OUT_BUFFER_OVERFLOW;
if (state->in_buffer.avail_in < len)
len = state->in_buffer.avail_in;
else
state->new_block = 1;
memcpy(state->out_buffer.next_out, state->in_buffer.next_in, len);
state->out_buffer.next_out += len;
state->out_buffer.avail_out -= len;
state->out_buffer.total_out += len;
state->in_buffer.next_in += len;
state->in_buffer.avail_in -= len + 4;
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
return END_OF_INPUT;
return 0;
}
/* Decodes the next block if it was encoded using a huffman code */
int decode_huffman_code_block_stateless_base(struct inflate_state *state)
{
uint16_t next_lit;
uint8_t next_dist;
uint32_t repeat_length;
uint32_t look_back_dist;
while (state->new_block == 0) {
/* While not at the end of block, decode the next
* symbol */
next_lit = decode_next(&state->in_buffer, &state->lit_huff_code);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (next_lit < 256) {
/* If the next symbol is a literal,
* write out the symbol and update state
* data accordingly. */
if (state->out_buffer.avail_out < 1)
return OUT_BUFFER_OVERFLOW;
*state->out_buffer.next_out = next_lit;
state->out_buffer.next_out++;
state->out_buffer.avail_out--;
state->out_buffer.total_out++;
} else if (next_lit == 256) {
/* If the next symbol is the end of
* block, update the state data
* accordingly */
state->new_block = 1;
} else if (next_lit < 286) {
/* Else if the next symbol is a repeat
* length, read in the length extra
* bits, the distance code, the distance
* extra bits. Then write out the
* corresponding data and update the
* state data accordingly*/
repeat_length =
rfc_lookup_table.len_start[next_lit - 257] +
inflate_in_read_bits(&state->in_buffer,
rfc_lookup_table.
len_extra_bit_count[next_lit - 257]);
if (state->out_buffer.avail_out < repeat_length)
return OUT_BUFFER_OVERFLOW;
next_dist = decode_next(&state->in_buffer, &state->dist_huff_code);
look_back_dist = rfc_lookup_table.dist_start[next_dist] +
inflate_in_read_bits(&state->in_buffer,
rfc_lookup_table.
dist_extra_bit_count[next_dist]);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (look_back_dist > state->out_buffer.total_out)
return INVALID_LOOK_BACK_DISTANCE;
if (look_back_dist > repeat_length)
memcpy(state->out_buffer.next_out,
state->out_buffer.next_out -
look_back_dist, repeat_length);
else
byte_copy(state->out_buffer.next_out,
look_back_dist, repeat_length);
state->out_buffer.next_out += repeat_length;
state->out_buffer.avail_out -= repeat_length;
state->out_buffer.total_out += repeat_length;
} else
/* Else the read in bits do not
* correspond to any valid symbol */
return INVALID_SYMBOL;
}
return 0;
}
void isal_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size,
uint8_t * out_stream, uint64_t out_size)
{
@ -504,165 +667,31 @@ void igzip_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32
state->bfinal = 0;
}
int igzip_inflate(struct inflate_state *state)
int isal_inflate_stateless(struct inflate_state *state)
{
/* The following tables are based on the tables in the deflate standard,
* RFC 1951 page 11. */
const uint16_t len_start[29] = {
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f,
0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73,
0x83, 0xa3, 0xc3, 0xe3, 0x102
};
const uint8_t len_extra_bit_count[29] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
0x5, 0x5, 0x5, 0x5, 0x0
};
const uint32_t dist_start[30] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
};
const uint8_t dist_extra_bit_count[30] = {
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
0xb, 0xb, 0xc, 0xc, 0xd, 0xd
};
uint16_t next_lit, len, nlen;
uint8_t next_dist;
uint32_t repeat_length;
uint32_t look_back_dist;
uint32_t tmp;
uint32_t ret;
while (state->new_block == 0 || state->bfinal == 0) {
if (state->new_block != 0) {
tmp = read_header(state);
ret = read_header(state);
if (tmp)
return tmp;
if (ret)
return ret;
}
if (state->btype == 0) {
/* If the block is uncompressed, perform a memcopy while
* updating state data */
if (state->in_buffer.avail_in < 4)
return END_OF_INPUT;
if (state->btype == 0)
ret = decode_literal_block(state);
else
ret = decode_huffman_code_block_stateless(state);
len = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
nlen = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
/* Check if len and nlen match */
if (len != (~nlen & 0xffff))
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
if (state->out_buffer.avail_out < len)
return OUT_BUFFER_OVERFLOW;
if (state->in_buffer.avail_in < len)
len = state->in_buffer.avail_in;
else
state->new_block = 1;
memcpy(state->out_buffer.next_out, state->in_buffer.next_in, len);
state->out_buffer.next_out += len;
state->out_buffer.avail_out -= len;
state->out_buffer.total_out += len;
state->in_buffer.next_in += len;
state->in_buffer.avail_in -= len + 4;
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
return END_OF_INPUT;
} else {
/* Else decode a huffman encoded block */
while (state->new_block == 0) {
/* While not at the end of block, decode the next
* symbol */
next_lit =
decode_next(&state->in_buffer, &state->lit_huff_code);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (next_lit < 256) {
/* If the next symbol is a literal,
* write out the symbol and update state
* data accordingly. */
if (state->out_buffer.avail_out < 1)
return OUT_BUFFER_OVERFLOW;
*state->out_buffer.next_out = next_lit;
state->out_buffer.next_out++;
state->out_buffer.avail_out--;
state->out_buffer.total_out++;
} else if (next_lit == 256) {
/* If the next symbol is the end of
* block, update the state data
* accordingly */
state->new_block = 1;
} else if (next_lit < 286) {
/* Else if the next symbol is a repeat
* length, read in the length extra
* bits, the distance code, the distance
* extra bits. Then write out the
* corresponding data and update the
* state data accordingly*/
repeat_length =
len_start[next_lit - 257] +
inflate_in_read_bits(&state->in_buffer,
len_extra_bit_count[next_lit -
257]);
if (state->out_buffer.avail_out < repeat_length)
return OUT_BUFFER_OVERFLOW;
next_dist = decode_next(&state->in_buffer,
&state->dist_huff_code);
look_back_dist = dist_start[next_dist] +
inflate_in_read_bits(&state->in_buffer,
dist_extra_bit_count
[next_dist]);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (look_back_dist > state->out_buffer.total_out)
return INVALID_LOOK_BACK_DISTANCE;
if (look_back_dist > repeat_length) {
memcpy(state->out_buffer.next_out,
state->out_buffer.next_out -
look_back_dist, repeat_length);
} else
byte_copy(state->out_buffer.next_out,
look_back_dist, repeat_length);
state->out_buffer.next_out += repeat_length;
state->out_buffer.avail_out -= repeat_length;
state->out_buffer.total_out += repeat_length;
} else
/* Else the read in bits do not
* correspond to any valid symbol */
return INVALID_SYMBOL;
}
}
if (ret)
return ret;
}
/* Undo count stuff of bytes read into the read buffer */
state->in_buffer.next_in -= state->in_buffer.read_in_length / 8;
state->in_buffer.avail_in += state->in_buffer.read_in_length / 8;
return DECOMPRESSION_FINISHED;
}

View File

@ -0,0 +1,3 @@
%define ARCH 01
%include "igzip_inflate.asm"

View File

@ -0,0 +1,4 @@
%define ARCH 04
%define USE_HSWNI
%include "igzip_inflate.asm"

View File

@ -0,0 +1,24 @@
; <COPYRIGHT_TAG>
default rel
[bits 64]
%ifidn __OUTPUT_FORMAT__, elf64
%define WRT_OPT wrt ..plt
%else
%define WRT_OPT
%endif
%include "reg_sizes.asm"
extern decode_huffman_code_block_stateless_base
extern decode_huffman_code_block_stateless_01
extern decode_huffman_code_block_stateless_04
section .text
%include "multibinary.asm"
mbin_interface decode_huffman_code_block_stateless
mbin_dispatch_init5 decode_huffman_code_block_stateless, decode_huffman_code_block_stateless_base, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_04

View File

@ -31,7 +31,7 @@
#include <assert.h>
#include <zlib.h>
#include "huff_codes.h"
#include "igzip_inflate_ref.h"
#include "inflate.h"
#include "test.h"
#define BUF_SIZE 1024
@ -60,7 +60,7 @@ int main(int argc, char *argv[])
struct inflate_state state;
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_inflate_file_perf infile\n"
fprintf(stderr, "Usage: isal_inflate_file_perf infile\n"
"\t - Runs multiple iterations of igzip on a file to "
"get more accurate time results.\n");
exit(0);
@ -78,7 +78,7 @@ int main(int argc, char *argv[])
}
printf("outfile=%s\n", argv[2]);
}
printf("igzip_inflate_perf: \n");
printf("isal_inflate_perf: \n");
fflush(0);
/* Allocate space for entire input file and output
* (assuming some possible expansion on output size)
@ -106,7 +106,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
outbuf = malloc(infile_size);
outbuf = malloc(outbuf_size);
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
@ -117,16 +117,16 @@ int main(int argc, char *argv[])
printf("Compression of input file failed\n");
exit(0);
}
printf("igzip_inflate_perf: %s %d iterations\n", argv[1], iterations);
printf("isal_inflate_stateless_perf: %s %d iterations\n", argv[1], iterations);
/* Read complete input file into buffer */
fclose(in);
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
igzip_inflate_init(&state, inbuf + 2, inbuf_size - 2, outbuf, outbuf_size);
isal_inflate_init(&state, inbuf + 2, inbuf_size - 2, outbuf, outbuf_size);
check = igzip_inflate(&state);
check = isal_inflate_stateless(&state);
if (check) {
printf("Error in decompression with error %d\n", check);
break;
@ -140,7 +140,7 @@ int main(int argc, char *argv[])
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
printf("End of igzip_inflate_perf\n\n");
printf("End of isal_inflate_stateless_perf\n\n");
fflush(0);
free(inbuf);

View File

@ -1,150 +0,0 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef INFLATE_H
#define INFLATE_H
#include <stdint.h>
#include "huff_codes.h"
#define DECOMPRESSION_FINISHED 0
#define END_OF_INPUT 1
#define OUT_BUFFER_OVERFLOW 2
#define INVALID_BLOCK_HEADER 3
#define INVALID_SYMBOL 4
#define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5
#define INVALID_LOOK_BACK_DISTANCE 6
#define DECODE_LOOKUP_SIZE 10
#if DECODE_LOOKUP_SIZE > 15
# undef DECODE_LOOKUP_SIZE
# define DECODE_LOOKUP_SIZE 15
#endif
#if DECODE_LOOKUP_SIZE > 7
# define MAX_LONG_CODE ((2 << 8) + 1) * (2 << (15 - DECODE_LOOKUP_SIZE)) + 32
#else
# define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32
#endif
/* Buffer used to manage decompressed output */
struct inflate_out_buffer{
uint8_t *next_out;
uint32_t avail_out;
uint32_t total_out;
};
/* Buffer used to manager compressed input */
struct inflate_in_buffer{
uint8_t *start;
uint8_t *next_in;
uint32_t avail_in;
uint64_t read_in;
int32_t read_in_length;
};
/* Data structure used to store a huffman code for fast look up */
struct inflate_huff_code{
uint16_t small_code_lookup[ 1 << (DECODE_LOOKUP_SIZE)];
uint16_t long_code_lookup[MAX_LONG_CODE];
};
/* Structure contained current state of decompression of data */
struct inflate_state {
struct inflate_out_buffer out_buffer;
struct inflate_in_buffer in_buffer;
struct inflate_huff_code lit_huff_code;
struct inflate_huff_code dist_huff_code;
uint8_t new_block;
uint8_t bfinal;
uint8_t btype;
};
/*Performs a copy of length repeat_length data starting at dest -
* lookback_distance into dest. This copy copies data previously copied when the
* src buffer and the dest buffer overlap. */
void byte_copy(uint8_t *dest, uint64_t lookback_distance, int repeat_length);
/* Initialize a struct in_buffer for use */
void init_inflate_in_buffer(struct inflate_in_buffer *inflate_in);
/* Set up the in_stream used for the in_buffer*/
void set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t *in_stream,
uint32_t in_size);
/* Set up the out_stream used for the out_buffer */
void set_inflate_out_buffer(struct inflate_out_buffer *inflate_out, uint8_t *out_stream,
uint32_t out_size);
/* Load data from the in_stream into a buffer to allow for handling unaligned data*/
void inflate_in_load(struct inflate_in_buffer *inflate_in, int min_load);
/* Returns the next bit_count bits from the in stream*/
uint64_t inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Shifts the in stream over by bit-count bits */
void inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Returns the next bit_count bits from the in stream and shifts the stream over
* by bit-count bits */
uint64_t inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
* deflate static header */
int setup_static_header(struct inflate_state *state);
/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
* the lengths in huff_code_table,where count is a histogram of the appearance
* of each code length */
void make_inflate_huff_code(struct inflate_huff_code *result, struct huff_code *huff_code_table,
int table_length, uint16_t * count);
/* Decodes the next symbol symbol in in_buffer using the huff code defined by
* huff_code */
uint16_t decode_next(struct inflate_in_buffer *in_buffer, struct inflate_huff_code *huff_code);
/* Reads data from the in_buffer and sets the huff code corresponding to that
* data */
int setup_dynamic_header(struct inflate_state *state);
/* Reads in the header pointed to by in_stream and sets up state to reflect that
* header information*/
int read_header(struct inflate_state *state);
/* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output
* data into out_stream */
void igzip_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size,
uint8_t *out_stream, uint64_t out_size);
/* Decompress a deflate data. This function assumes a call to igzip_inflate_init
* has been made to set up the state structure to allow for decompression.*/
int igzip_inflate(struct inflate_state *state);
#endif //INFLATE_H

View File

@ -30,16 +30,18 @@
#include <stdint.h>
#include <stdio.h>
#include <zlib.h>
#include "igzip_inflate_ref.h"
#include "inflate.h"
#include "huff_codes.h"
#define OUT_BUFFER_SLOP 16
/*Don't use file larger memory can support because compression and decompression
* are done in a stateless manner. */
#define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L
int test(uint8_t * compressed_stream, uint64_t * compressed_length,
uint8_t * uncompressed_stream, int uncompressed_length,
uint8_t * uncompressed_test_stream)
uint8_t * uncompressed_test_stream, int uncompressed_test_stream_length)
{
struct inflate_state state;
int ret;
@ -51,9 +53,9 @@ int test(uint8_t * compressed_stream, uint64_t * compressed_length,
return ret;
}
igzip_inflate_init(&state, compressed_stream + 2, *compressed_length - 2,
uncompressed_test_stream, uncompressed_length);
ret = igzip_inflate(&state);
isal_inflate_init(&state, compressed_stream + 2, *compressed_length - 2,
uncompressed_test_stream, uncompressed_test_stream_length);
ret = isal_inflate_stateless(&state);
switch (ret) {
case 0:
@ -95,6 +97,13 @@ int test(uint8_t * compressed_stream, uint64_t * compressed_length,
return -1;
}
if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) {
int i;
for (i = 0; i < uncompressed_length; i++) {
if (uncompressed_stream[i] != uncompressed_test_stream[i]) {
printf("first error at %d, 0x%x != 0x%x\n", i,
uncompressed_stream[i], uncompressed_test_stream[i]);
}
}
printf(" decompressed data is not the same as the compressed data\n");
return -1;
}
@ -104,9 +113,12 @@ int test(uint8_t * compressed_stream, uint64_t * compressed_length,
int main(int argc, char **argv)
{
int i, j, ret = 0, fin_ret = 0;
FILE *file;
uint64_t compressed_length, file_length, uncompressed_length;
uint8_t *uncompressed_stream, *compressed_stream, *uncompressed_test_stream;
FILE *file = NULL;
uint64_t compressed_length, file_length;
uint64_t uncompressed_length, uncompressed_test_stream_length;
uint8_t *uncompressed_stream = NULL;
uint8_t *compressed_stream = NULL;
uint8_t *uncompressed_test_stream = NULL;
if (argc == 1)
printf("Error, no input file\n");
@ -128,12 +140,15 @@ int main(int argc, char **argv)
fclose(file);
continue;
}
compressed_length = compressBound(file_length);
uncompressed_stream = malloc(file_length);
compressed_stream = malloc(compressed_length);
uncompressed_test_stream = malloc(file_length);
if (uncompressed_stream == NULL) {
compressed_length = compressBound(file_length);
if (file_length != 0) {
uncompressed_stream = malloc(file_length);
uncompressed_test_stream = malloc(file_length);
}
compressed_stream = malloc(compressed_length);
if (uncompressed_stream == NULL && file_length != 0) {
printf("Failed to allocate memory\n");
exit(0);
}
@ -149,9 +164,12 @@ int main(int argc, char **argv)
}
uncompressed_length = fread(uncompressed_stream, 1, file_length, file);
uncompressed_test_stream_length = uncompressed_length + OUT_BUFFER_SLOP;
ret =
test(compressed_stream, &compressed_length, uncompressed_stream,
uncompressed_length, uncompressed_test_stream);
uncompressed_length, uncompressed_test_stream,
uncompressed_test_stream_length);
if (ret) {
for (j = 0; j < compressed_length; j++) {
if ((j & 31) == 0)
@ -165,10 +183,13 @@ int main(int argc, char **argv)
}
fflush(0);
fclose(file);
free(compressed_stream);
free(uncompressed_stream);
free(uncompressed_test_stream);
if (uncompressed_stream != NULL)
free(uncompressed_stream);
if (uncompressed_test_stream != NULL)
free(uncompressed_test_stream);
if (ret) {
printf(" ... Fail with exit code %d\n", ret);

View File

@ -30,8 +30,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "igzip_lib.h"
#include "igzip_inflate_ref.h"
#include "inflate.h"
#include "crc_inflate.h"
#include <math.h>
@ -350,8 +351,8 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size)
z_size -= gzip_hdr_bytes;
#endif
igzip_inflate_init(&gstream, z_buf, z_size, test_buf, test_size);
ret = igzip_inflate(&gstream);
isal_inflate_init(&gstream, z_buf, z_size, test_buf, test_size);
ret = isal_inflate_stateless(&gstream);
if (test_buf != NULL)
mem_result = memcmp(in_buf, test_buf, in_size);

View File

@ -0,0 +1,106 @@
; <COPYRIGHT_TAG>
;; START_FIELDS
%macro START_FIELDS 0
%assign _FIELD_OFFSET 0
%assign _STRUCT_ALIGN 0
%endm
;; FIELD name size align
%macro FIELD 3
%define %%name %1
%define %%size %2
%define %%align %3
%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
%%name equ _FIELD_OFFSET
%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
%if (%%align > _STRUCT_ALIGN)
%assign _STRUCT_ALIGN %%align
%endif
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; inflate out buffer
;; name size align
FIELD _start_out, 8, 8
FIELD _next_out, 8, 8
FIELD _avail_out, 4, 4
FIELD _total_out, 4, 4
%assign _inflate_out_buffer_size _FIELD_OFFSET
%assign _inflate_out_buffer_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; inflate in buffer
;; name size align
FIELD _start_in, 8, 8
FIELD _next_in, 8, 8
FIELD _avail_in, 4, 4
FIELD _read_in, 8, 8
FIELD _read_in_length,4, 4
%assign _inflate_in_buffer_size _FIELD_OFFSET
%assign _inflate_in_buffer_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; inflate huff code
;; name size align
FIELD _small_code_lookup, 2 * (1 << (DECODE_LOOKUP_SIZE)), 8
FIELD _long_code_lookup, 2 * MAX_LONG_CODE, 2
%assign _inflate_huff_code_size _FIELD_OFFSET
%assign _inflate_huff_code_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; inflate state
;; name size align
FIELD _out_buffer, _inflate_out_buffer_size, _inflate_out_buffer_align
FIELD _in_buffer, _inflate_in_buffer_size, _inflate_in_buffer_align
FIELD _lit_huff_code, _inflate_huff_code_size, _inflate_huff_code_align
FIELD _dist_huff_code,_inflate_huff_code_size, _inflate_huff_code_align
FIELD _new_block, 1, 1
FIELD _bfinal, 1, 1
FIELD _btype, 1, 1
%assign _inflate_state_size _FIELD_OFFSET
%assign _inflate_state_align _STRUCT_ALIGN
_out_buffer_start_out equ _out_buffer+_start_out
_out_buffer_next_out equ _out_buffer+_next_out
_out_buffer_avail_out equ _out_buffer+_avail_out
_out_buffer_total_out equ _out_buffer+_total_out
_in_buffer_start equ _in_buffer+_start_in
_in_buffer_next_in equ _in_buffer+_next_in
_in_buffer_avail_in equ _in_buffer+_avail_in
_in_buffer_read_in equ _in_buffer+_read_in
_in_buffer_read_in_length equ _in_buffer+_read_in_length
_lit_huff_code_small_code_lookup equ _lit_huff_code+_small_code_lookup
_lit_huff_code_long_code_lookup equ _lit_huff_code+_long_code_lookup
_dist_huff_code_small_code_lookup equ _dist_huff_code+_small_code_lookup
_dist_huff_code_long_code_lookup equ _dist_huff_code+_long_code_lookup
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -5,6 +5,15 @@ section .data
align 8
;; /* Structure contain lookup data based on RFC 1951 */
;; struct rfc1951_tables {
;; uint8_t len_to_code[264];
;; uint8_t dist_extra_bit_count[32];
;; uint32_t dist_start[32];
;; uint8_t len_extra_bit_count[32];
;; uint16_t len_start[32];
;; };
global rfc1951_lookup_table:data internal
rfc1951_lookup_table:
len_to_code:
@ -41,4 +50,38 @@ len_to_code:
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
%endif
db 0x00, 0x00, 0x00, 0x00, 0x00
dist_extra_bit_count:
db 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
db 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
db 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
db 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
dist_start:
dd 0x00000001, 0x00000002, 0x00000003, 0x00000004
dd 0x00000005, 0x00000007, 0x00000009, 0x0000000d
dd 0x00000011, 0x00000019, 0x00000021, 0x00000031
dd 0x00000041, 0x00000061, 0x00000081, 0x000000c1
dd 0x00000101, 0x00000181, 0x00000201, 0x00000301
dd 0x00000401, 0x00000601, 0x00000801, 0x00000c01
dd 0x00001001, 0x00001801, 0x00002001, 0x00003001
dd 0x00004001, 0x00006001, 0x00000000, 0x00000000
len_extra_bit_count:
db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
db 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02
db 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04
db 0x05, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x00
len_start:
dw 0x0003, 0x0004, 0x0005, 0x0006
dw 000007, 0x0008, 0x0009, 0x000a
dw 0x000b, 0x000d, 0x000f, 0x0011
dw 0x0013, 0x0017, 0x001b, 0x001f
dw 0x0023, 0x002b, 0x0033, 0x003b
dw 0x0043, 0x0053, 0x0063, 0x0073
dw 0x0083, 0x00a3, 0x00c3, 0x00e3
dw 0x0102, 0x0000, 0x0000, 0x0000
%endif ; RFC1951_LOOKUP

72
include/inflate.h Normal file
View File

@ -0,0 +1,72 @@
#ifndef INFLATE_H
#define INFLATE_H
#include <stdint.h>
#define DECOMPRESSION_FINISHED 0
#define END_OF_INPUT 1
#define OUT_BUFFER_OVERFLOW 2
#define INVALID_BLOCK_HEADER 3
#define INVALID_SYMBOL 4
#define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5
#define INVALID_LOOK_BACK_DISTANCE 6
#define DECODE_LOOKUP_SIZE 10
#if DECODE_LOOKUP_SIZE > 15
# undef DECODE_LOOKUP_SIZE
# define DECODE_LOOKUP_SIZE 15
#endif
#if DECODE_LOOKUP_SIZE > 7
# define TMP1 ((2 << 8 ) + 1)
# define TMP2 (2 << (15 - DECODE_LOOKUP_SIZE))
# define MAX_LONG_CODE (TMP1 * TMP2 + 32)
#else
# define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32
#endif
/* Buffer used to manage decompressed output */
struct inflate_out_buffer{
uint8_t *start_out;
uint8_t *next_out;
uint32_t avail_out;
uint32_t total_out;
};
/* Buffer used to manager compressed input */
struct inflate_in_buffer{
uint8_t *start;
uint8_t *next_in;
uint32_t avail_in;
uint64_t read_in;
int32_t read_in_length;
};
/* Data structure used to store a huffman code for fast look up */
struct inflate_huff_code{
uint16_t small_code_lookup[ 1 << (DECODE_LOOKUP_SIZE)];
uint16_t long_code_lookup[MAX_LONG_CODE];
};
/* Structure contained current state of decompression of data */
struct inflate_state {
struct inflate_out_buffer out_buffer;
struct inflate_in_buffer in_buffer;
struct inflate_huff_code lit_huff_code;
struct inflate_huff_code dist_huff_code;
uint8_t new_block;
uint8_t bfinal;
uint8_t btype;
};
/* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output
* data into out_stream */
void isal_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size,
uint8_t *out_stream, uint64_t out_size);
/* Decompress a deflate data. This function assumes a call to igzip_inflate_init
* has been made to set up the state structure to allow for decompression.*/
int isal_inflate_stateless(struct inflate_state *state);
#endif