diff --git a/igzip/igzip_check.c b/igzip/igzip_check.c index 0e13d9d..c0e1793 100644 --- a/igzip/igzip_check.c +++ b/igzip/igzip_check.c @@ -348,7 +348,12 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) z_size -= gzip_hdr_bytes; #endif - isal_inflate_init(&gstream, z_buf, z_size, test_buf, test_size); + isal_inflate_init(&gstream); + gstream.next_in = z_buf; + gstream.avail_in = z_size; + gstream.next_out = test_buf; + gstream.avail_out = test_size; + ret = isal_inflate_stateless(&gstream); if (test_buf != NULL) diff --git a/igzip/igzip_decode_block_stateless.asm b/igzip/igzip_decode_block_stateless.asm index 38c5cec..f35d6f2 100644 --- a/igzip/igzip_decode_block_stateless.asm +++ b/igzip/igzip_decode_block_stateless.asm @@ -79,7 +79,9 @@ extern rfc1951_lookup_table %define rfc_lookup r15 start_out_mem_offset equ 0 -stack_size equ 8 +read_in_mem_offset equ 8 +read_in_length_mem_offset equ 16 +stack_size equ 4 * 8 + 8 %define _dist_extra_bit_count 264 %define _dist_start _dist_extra_bit_count + 1*32 @@ -280,6 +282,9 @@ decode_huffman_code_block_stateless_ %+ ARCH %+ : mov end_in %+ d, dword [state + _avail_in] add end_in, next_in + mov dword [state + _copy_overflow_len], 0 + mov dword [state + _copy_overflow_dist], 0 + mov tmp3 %+ d, dword [state + _total_out] sub tmp3, next_out neg tmp3 @@ -292,8 +297,12 @@ decode_huffman_code_block_stateless_ %+ ARCH %+ : cmp next_in, end_in jg end_loop_block_pre + cmp read_in_length, 64 + je skip_load + inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 +skip_load: mov tmp3, read_in and tmp3, (1 << DECODE_LOOKUP_SIZE_LARGE) - 1 movzx next_sym, word [state + _lit_huff_code + 2 * tmp3] @@ -395,7 +404,7 @@ decode_len_dist: sub copy_start, repeat_length sub copy_start, look_back_dist2 - ;; ;; Check if a valid look back distances was decoded + ;; ;; ;; Check if a valid look back distances was decoded cmp copy_start, [rsp + start_out_mem_offset] jl invalid_look_back_distance MOVDQU xmm1, [copy_start] @@ -443,9 +452,13 @@ end_loop_block_pre: ;; Fix up in buffer and out buffer to reflect the actual buffer end add end_out, OUT_BUFFER_SLOP add end_in, IN_BUFFER_SLOP + end_loop_block: ;; Load read in buffer and decode next lit/len symbol inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 + mov [rsp + read_in_mem_offset], read_in + mov [rsp + read_in_length_mem_offset], read_in_length + decode_next state, DECODE_LOOKUP_SIZE_LARGE, _lit_huff_code, read_in, read_in_length, next_sym, tmp1, tmp2 ;; Check that enough input was available to decode symbol @@ -457,7 +470,6 @@ end_loop_block: je end_symbol decode_len_dist_2: - ;; Load length exta bits mov next_bits, read_in @@ -484,8 +496,8 @@ decode_len_dist_2: ;; Calculate the look back distance and check for enough input BZHI next_bits, next_bits, rcx, tmp1 SHRX read_in, read_in, rcx - sub read_in_length, rcx add look_back_dist, next_bits + sub read_in_length, rcx jl end_of_input ;; Setup code for byte copy using rep movsb @@ -494,32 +506,53 @@ decode_len_dist_2: mov rcx, repeat_length sub rsi, look_back_dist - ;; Check for out buffer overflow - add next_out, repeat_length - cmp next_out, end_out - jg out_buffer_overflow - ;; Check if a valid look back distance was decoded cmp rsi, [rsp + start_out_mem_offset] jl invalid_look_back_distance + ;; Check for out buffer overflow + add repeat_length, next_out + cmp repeat_length, end_out + jg out_buffer_overflow_repeat + + mov next_out, repeat_length + rep movsb jmp end_loop_block decode_literal: ;; Store literal decoded from the input stream - add next_out, 1 cmp next_out, end_out - jg out_buffer_overflow + jge out_buffer_overflow_lit + add next_out, 1 mov byte [next_out - 1], next_sym %+ b jmp end_loop_block ;; Set exit codes end_of_input: + mov read_in, [rsp + read_in_mem_offset] + mov read_in_length, [rsp + read_in_length_mem_offset] mov rax, END_OF_INPUT jmp end -out_buffer_overflow: +out_buffer_overflow_repeat: + mov rcx, end_out + sub rcx, next_out + sub repeat_length, rcx + sub repeat_length, next_out + rep movsb + + mov [state + _copy_overflow_len], repeat_length %+ d + mov [state + _copy_overflow_dist], look_back_dist %+ d + + mov next_out, end_out + + mov rax, OUT_BUFFER_OVERFLOW + jmp end + +out_buffer_overflow_lit: + mov read_in, [rsp + read_in_mem_offset] + mov read_in_length, [rsp + read_in_length_mem_offset] mov rax, OUT_BUFFER_OVERFLOW jmp end @@ -533,7 +566,7 @@ end_symbol_pre: add end_in, IN_BUFFER_SLOP end_symbol: ;; Set flag identifying a new block is required - mov byte [state + _new_block], 1 + mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR xor rax, rax end: ;; Save current buffer states diff --git a/igzip/igzip_hist_perf.c b/igzip/igzip_hist_perf.c index 2c88ccd..21d674c 100644 --- a/igzip/igzip_hist_perf.c +++ b/igzip/igzip_hist_perf.c @@ -83,15 +83,15 @@ int isal_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *h uint32_t tmp; memset(histogram, 0, sizeof(struct isal_huff_histogram)); - while (state->new_block == 0 || state->bfinal == 0) { - if (state->new_block != 0) { + while (state->block_state != ISAL_BLOCK_INPUT_DONE) { + if (state->block_state == ISAL_BLOCK_NEW_HDR) { tmp = read_header(state); if (tmp) return tmp; } - if (state->btype == 0) { + if (state->block_state == ISAL_BLOCK_TYPE0) { /* If the block is uncompressed, update state data accordingly */ if (state->avail_in < 4) return END_OF_INPUT; @@ -108,18 +108,18 @@ int isal_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *h if (state->avail_in < len) len = state->avail_in; else - state->new_block = 1; + state->block_state = ISAL_BLOCK_NEW_HDR; state->total_out += len; state->next_in += len; state->avail_in -= len + 4; - if (state->avail_in == 0 && state->new_block == 0) + if (state->avail_in == 0 && state->block_state == 0) return END_OF_INPUT; } else { /* Else decode a huffman encoded block */ - while (state->new_block == 0) { + while (state->block_state == ISAL_BLOCK_CODED) { /* While not at the end of block, decode the next * symbol */ next_lit = decode_next_large(state, &state->lit_huff_code); @@ -135,7 +135,7 @@ int isal_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *h else if (next_lit == 256) /* Next symbol is end of block */ - state->new_block = 1; + state->block_state = ISAL_BLOCK_NEW_HDR; else if (next_lit < 286) { /* Next symbol is a repeat length followed by a @@ -168,6 +168,9 @@ int isal_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *h return INVALID_SYMBOL; } } + + if (state->bfinal != 0 && state->block_state == ISAL_BLOCK_NEW_HDR) + state->block_state = ISAL_BLOCK_INPUT_DONE; } state->next_in -= state->read_in_length / 8; state->avail_in += state->read_in_length / 8; @@ -340,7 +343,9 @@ int main(int argc, char *argv[]) stream.hufftables = &hufftables_custom; isal_deflate_stateless(&stream); - isal_inflate_init(&gstream, outbuf, stream.total_out, NULL, 0); + isal_inflate_init(&gstream); + gstream.next_in = outbuf; + gstream.avail_in = outbuf_size; isal_inflate_hist(&gstream, &histogram2); printf("Histogram Error \n"); diff --git a/igzip/igzip_inflate.c b/igzip/igzip_inflate.c index d2bfd15..d1d76d3 100644 --- a/igzip/igzip_inflate.c +++ b/igzip/igzip_inflate.c @@ -72,6 +72,9 @@ void inline inflate_in_load(struct inflate_state *state, int min_required) uint64_t temp = 0; uint8_t new_bytes; + if (state->read_in_length >= 64) + return; + if (state->avail_in >= 8) { /* If there is enough space to load a 64 bits, load the data and use * that to fill read_in */ @@ -94,7 +97,6 @@ void inline inflate_in_load(struct inflate_state *state, int min_required) } } - } /* Returns the next bit_count bits from the in stream and shifts the stream over @@ -443,6 +445,8 @@ int inline setup_static_header(struct inflate_state *state) make_inflate_huff_code_small(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count); + state->block_state = ISAL_BLOCK_CODED; + return 0; } @@ -686,6 +690,8 @@ int inline setup_dynamic_header(struct inflate_state *state) make_inflate_huff_code_small(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN], DIST_LEN, dist_count); + state->block_state = ISAL_BLOCK_CODED; + return 0; } @@ -694,61 +700,133 @@ int inline setup_dynamic_header(struct inflate_state *state) int read_header(struct inflate_state *state) { uint8_t bytes; - - state->new_block = 0; + uint32_t btype; + uint16_t len, nlen; + int ret = 0; /* btype and bfinal are defined in RFC 1951, bfinal represents whether * the current block is the end of block, and btype represents the * encoding method on the current block. */ + state->bfinal = inflate_in_read_bits(state, 1); - state->btype = inflate_in_read_bits(state, 2); + btype = inflate_in_read_bits(state, 2); if (state->read_in_length < 0) - return END_OF_INPUT; + ret = END_OF_INPUT; + + else if (btype == 0) { + inflate_in_load(state, 40); + bytes = state->read_in_length / 8; + + if (bytes < 4) + return END_OF_INPUT; + + state->read_in >>= state->read_in_length % 8; + state->read_in_length = bytes * 8; + + len = state->read_in & 0xFFFF; + state->read_in >>= 16; + nlen = state->read_in & 0xFFFF; + state->read_in >>= 16; + state->read_in_length -= 32; - if (state->btype == 0) { bytes = state->read_in_length / 8; - state->read_in = 0; - state->read_in_length = 0; state->next_in -= bytes; state->avail_in += bytes; - return 0; + state->read_in = 0; + state->read_in_length = 0; - } else if (state->btype == 1) - return setup_static_header(state); + /* Check if len and nlen match */ + if (len != (~nlen & 0xffff)) + return INVALID_NON_COMPRESSED_BLOCK_LENGTH; - else if (state->btype == 2) - return setup_dynamic_header(state); + state->type0_block_len = len; + state->block_state = ISAL_BLOCK_TYPE0; + + ret = 0; + + } else if (btype == 1) + ret = setup_static_header(state); + + else if (btype == 2) + ret = setup_dynamic_header(state); + + else + ret = INVALID_BLOCK_HEADER; + + return ret; +} + +/* Reads in the header pointed to by in_stream and sets up state to reflect that + * header information*/ +int read_header_stateful(struct inflate_state *state) +{ + uint64_t read_in_start = state->read_in; + int32_t read_in_length_start = state->read_in_length; + uint8_t *next_in_start = state->next_in; + uint32_t avail_in_start = state->avail_in; + int block_state_start = state->block_state; + int ret; + int copy_size; + int bytes_read; + + if (block_state_start == ISAL_BLOCK_HDR) { + /* Setup so read_header decodes data in tmp_in_buffer */ + copy_size = ISAL_INFLATE_MAX_HDR_SIZE - state->tmp_in_size; + if (copy_size > state->avail_in) + copy_size = state->avail_in; + + memcpy(&state->tmp_in_buffer[state->tmp_in_size], state->next_in, copy_size); + state->next_in = state->tmp_in_buffer; + state->avail_in = state->tmp_in_size + copy_size; + } + + ret = read_header(state); + + if (block_state_start == ISAL_BLOCK_HDR) { + /* Setup so state is restored to a valid state */ + bytes_read = state->next_in - state->tmp_in_buffer - state->tmp_in_size; + if (bytes_read < 0) + bytes_read = 0; + state->next_in = next_in_start + bytes_read; + state->avail_in = avail_in_start - bytes_read; + } + + if (ret == END_OF_INPUT) { + /* Save off data so header can be decoded again with more data */ + state->read_in = read_in_start; + state->read_in_length = read_in_length_start; + memcpy(&state->tmp_in_buffer[state->tmp_in_size], next_in_start, + avail_in_start); + state->tmp_in_size += avail_in_start; + state->avail_in = 0; + state->next_in = next_in_start + avail_in_start; + state->block_state = ISAL_BLOCK_HDR; + } else + state->tmp_in_size = 0; + + return ret; - return INVALID_BLOCK_HEADER; } int inline decode_literal_block(struct inflate_state *state) { - uint16_t len, nlen; + uint32_t len = state->type0_block_len; /* If the block is uncompressed, perform a memcopy while * updating state data */ - if (state->avail_in < 4) - return END_OF_INPUT; - len = *(uint16_t *) state->next_in; - state->next_in += 2; - nlen = *(uint16_t *) state->next_in; - state->next_in += 2; + state->block_state = ISAL_BLOCK_NEW_HDR; - /* Check if len and nlen match */ - if (len != (~nlen & 0xffff)) - return INVALID_NON_COMPRESSED_BLOCK_LENGTH; + if (state->avail_out < len) { + len = state->avail_out; + state->block_state = ISAL_BLOCK_TYPE0; + } - if (state->avail_out < len) - return OUT_BUFFER_OVERFLOW; - - if (state->avail_in < len) + if (state->avail_in < len) { len = state->avail_in; - - else - state->new_block = 1; + state->block_state = ISAL_BLOCK_TYPE0; + } memcpy(state->next_out, state->next_in, len); @@ -756,11 +834,15 @@ int inline decode_literal_block(struct inflate_state *state) state->avail_out -= len; state->total_out += len; state->next_in += len; - state->avail_in -= len + 4; + state->avail_in -= len; + state->type0_block_len -= len; - if (state->avail_in == 0 && state->new_block == 0) + if (state->avail_in == 0 && state->block_state != ISAL_BLOCK_NEW_HDR) return END_OF_INPUT; + if (state->avail_out == 0 && state->type0_block_len > 0) + return OUT_BUFFER_OVERFLOW; + return 0; } @@ -772,22 +854,45 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) uint8_t next_dist; uint32_t repeat_length; uint32_t look_back_dist; + uint64_t read_in_tmp; + int32_t read_in_length_tmp; + uint8_t *next_in_tmp; + uint32_t avail_in_tmp; - while (state->new_block == 0) { + state->copy_overflow_length = 0; + state->copy_overflow_distance = 0; + + while (state->block_state == ISAL_BLOCK_CODED) { /* While not at the end of block, decode the next * symbol */ + inflate_in_load(state, 0); + + read_in_tmp = state->read_in; + read_in_length_tmp = state->read_in_length; + next_in_tmp = state->next_in; + avail_in_tmp = state->avail_in; next_lit = decode_next_large(state, &state->lit_huff_code); - if (state->read_in_length < 0) + if (state->read_in_length < 0) { + state->read_in = read_in_tmp; + state->read_in_length = read_in_length_tmp; + state->next_in = next_in_tmp; + state->avail_in = avail_in_tmp; return END_OF_INPUT; + } if (next_lit < 256) { /* If the next symbol is a literal, * write out the symbol and update state * data accordingly. */ - if (state->avail_out < 1) + if (state->avail_out < 1) { + state->read_in = read_in_tmp; + state->read_in_length = read_in_length_tmp; + state->next_in = next_in_tmp; + state->avail_in = avail_in_tmp; return OUT_BUFFER_OVERFLOW; + } *state->next_out = next_lit; state->next_out++; @@ -798,7 +903,7 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) /* If the next symbol is the end of * block, update the state data * accordingly */ - state->new_block = 1; + state->block_state = ISAL_BLOCK_NEW_HDR; } else if (next_lit < 286) { /* Else if the next symbol is a repeat @@ -812,10 +917,6 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) inflate_in_read_bits(state, rfc_lookup_table.len_extra_bit_count[next_lit - 257]); - - if (state->avail_out < repeat_length) - return OUT_BUFFER_OVERFLOW; - next_dist = decode_next_small(state, &state->dist_huff_code); look_back_dist = rfc_lookup_table.dist_start[next_dist] + @@ -823,12 +924,23 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) rfc_lookup_table.dist_extra_bit_count [next_dist]); - if (state->read_in_length < 0) + if (state->read_in_length < 0) { + state->read_in = read_in_tmp; + state->read_in_length = read_in_length_tmp; + state->next_in = next_in_tmp; + state->avail_in = avail_in_tmp; return END_OF_INPUT; + } if (look_back_dist > state->total_out) return INVALID_LOOK_BACK_DISTANCE; + if (state->avail_out < repeat_length) { + state->copy_overflow_length = repeat_length - state->avail_out; + state->copy_overflow_distance = look_back_dist; + repeat_length = state->avail_out; + } + if (look_back_dist > repeat_length) memcpy(state->next_out, state->next_out - look_back_dist, repeat_length); @@ -838,6 +950,9 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) state->next_out += repeat_length; state->avail_out -= repeat_length; state->total_out += repeat_length; + + if (state->copy_overflow_length > 0) + return OUT_BUFFER_OVERFLOW; } else /* Else the read in bits do not * correspond to any valid symbol */ @@ -846,45 +961,206 @@ int decode_huffman_code_block_stateless_base(struct inflate_state *state) return 0; } -void isal_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size, - uint8_t * out_stream, uint64_t out_size) +void isal_inflate_init(struct inflate_state *state) { state->read_in = 0; state->read_in_length = 0; - state->next_in = in_stream; - state->avail_in = in_size; - state->next_out = out_stream; - state->avail_out = out_size; + state->next_in = NULL; + state->avail_in = 0; + state->next_out = NULL; + state->avail_out = 0; state->total_out = 0; - state->new_block = 1; + state->block_state = ISAL_BLOCK_NEW_HDR; state->bfinal = 0; + state->type0_block_len = 0; + state->copy_overflow_length = 0; + state->copy_overflow_distance = 0; + state->tmp_in_size = 0; + state->tmp_out_processed = 0; + state->tmp_out_valid = 0; } int isal_inflate_stateless(struct inflate_state *state) { - uint32_t ret; + uint32_t ret = 0; - while (state->new_block == 0 || state->bfinal == 0) { - if (state->new_block != 0) { + state->read_in = 0; + state->read_in_length = 0; + state->block_state = ISAL_BLOCK_NEW_HDR; + state->bfinal = 0; + state->total_out = 0; + + while (state->block_state != ISAL_BLOCK_INPUT_DONE) { + if (state->block_state == ISAL_BLOCK_NEW_HDR) { ret = read_header(state); if (ret) - return ret; + break; } - if (state->btype == 0) + if (state->block_state == ISAL_BLOCK_TYPE0) ret = decode_literal_block(state); else ret = decode_huffman_code_block_stateless(state); if (ret) - return ret; + break; + if (state->bfinal != 0 && state->block_state == ISAL_BLOCK_NEW_HDR) + state->block_state = ISAL_BLOCK_INPUT_DONE; } /* Undo count stuff of bytes read into the read buffer */ state->next_in -= state->read_in_length / 8; state->avail_in += state->read_in_length / 8; + return ret; +} + +int isal_inflate(struct inflate_state *state) +{ + + uint8_t *next_out = state->next_out; + uint32_t avail_out = state->avail_out; + uint32_t copy_size = 0; + int32_t shift_size = 0; + int ret = 0; + + if (state->block_state != ISAL_BLOCK_FINISH) { + /* If space in tmp_out buffer, decompress into the tmp_out_buffer */ + if (state->tmp_out_valid < 2 * ISAL_INFLATE_HIST_SIZE) { + /* Setup to start decoding into temp buffer */ + state->next_out = &state->tmp_out_buffer[state->tmp_out_valid]; + state->avail_out = + sizeof(state->tmp_out_buffer) - ISAL_INFLATE_SLOP - + state->tmp_out_valid; + + if ((int32_t) state->avail_out < 0) + state->avail_out = 0; + + /* Decode into internal buffer until exit */ + while (state->block_state != ISAL_BLOCK_INPUT_DONE) { + if (state->block_state == ISAL_BLOCK_NEW_HDR + || state->block_state == ISAL_BLOCK_HDR) { + ret = read_header_stateful(state); + + if (ret) + break; + } + + if (state->block_state == ISAL_BLOCK_TYPE0) { + ret = decode_literal_block(state); + } else + ret = decode_huffman_code_block_stateless(state); + + if (ret) + break; + if (state->bfinal != 0 + && state->block_state == ISAL_BLOCK_NEW_HDR) + state->block_state = ISAL_BLOCK_INPUT_DONE; + } + + /* Copy valid data from internal buffer into out_buffer */ + if (state->copy_overflow_length != 0) { + byte_copy(state->next_out, state->copy_overflow_distance, + state->copy_overflow_length); + state->tmp_out_valid += state->copy_overflow_length; + state->next_out += state->copy_overflow_length; + state->total_out += state->copy_overflow_length; + state->copy_overflow_distance = 0; + state->copy_overflow_length = 0; + } + + state->tmp_out_valid = state->next_out - state->tmp_out_buffer; + + /* Setup state for decompressing into out_buffer */ + state->next_out = next_out; + state->avail_out = avail_out; + } + + /* Copy data from tmp_out buffer into out_buffer */ + copy_size = state->tmp_out_valid - state->tmp_out_processed; + if (copy_size > avail_out) + copy_size = avail_out; + + memcpy(state->next_out, + &state->tmp_out_buffer[state->tmp_out_processed], copy_size); + + state->tmp_out_processed += copy_size; + state->avail_out -= copy_size; + state->next_out += copy_size; + + if (ret == INVALID_LOOK_BACK_DISTANCE || ret == INVALID_BLOCK_HEADER + || ret == INVALID_NON_COMPRESSED_BLOCK_LENGTH) { + /* Set total_out to not count data in tmp_out_buffer */ + state->total_out -= state->tmp_out_valid - state->tmp_out_processed; + return ret; + } + + /* If all data from tmp_out buffer has been processed, start + * decompressing into the out buffer */ + if (state->tmp_out_processed == state->tmp_out_valid) { + while (state->block_state != ISAL_BLOCK_INPUT_DONE) { + if (state->block_state == ISAL_BLOCK_NEW_HDR + || state->block_state == ISAL_BLOCK_HDR) { + ret = read_header_stateful(state); + if (ret) + break; + } + + if (state->block_state == ISAL_BLOCK_TYPE0) + ret = decode_literal_block(state); + else + ret = decode_huffman_code_block_stateless(state); + if (ret) + break; + if (state->bfinal != 0 + && state->block_state == ISAL_BLOCK_NEW_HDR) + state->block_state = ISAL_BLOCK_INPUT_DONE; + } + } + + if (state->block_state != ISAL_BLOCK_INPUT_DONE) { + /* Save decompression history in tmp_out buffer */ + if (state->tmp_out_valid == state->tmp_out_processed + && avail_out - state->avail_out >= ISAL_INFLATE_HIST_SIZE) { + memcpy(state->tmp_out_buffer, + state->next_out - ISAL_INFLATE_HIST_SIZE, + ISAL_INFLATE_HIST_SIZE); + state->tmp_out_valid = ISAL_INFLATE_HIST_SIZE; + state->tmp_out_processed = ISAL_INFLATE_HIST_SIZE; + + } else if (state->tmp_out_processed >= ISAL_INFLATE_HIST_SIZE) { + shift_size = state->tmp_out_valid - ISAL_INFLATE_HIST_SIZE; + if (shift_size > state->tmp_out_processed) + shift_size = state->tmp_out_processed; + + memmove(state->tmp_out_buffer, + &state->tmp_out_buffer[shift_size], + state->tmp_out_valid - shift_size); + state->tmp_out_valid -= shift_size; + state->tmp_out_processed -= shift_size; + + } + + if (state->copy_overflow_length != 0) { + byte_copy(&state->tmp_out_buffer[state->tmp_out_valid], + state->copy_overflow_distance, + state->copy_overflow_length); + state->tmp_out_valid += state->copy_overflow_length; + state->total_out += state->copy_overflow_length; + state->copy_overflow_distance = 0; + state->copy_overflow_length = 0; + } + + if (ret == INVALID_LOOK_BACK_DISTANCE + || ret == INVALID_BLOCK_HEADER + || ret == INVALID_NON_COMPRESSED_BLOCK_LENGTH) + return ret; + + } else if (state->tmp_out_valid == state->tmp_out_processed) + state->block_state = ISAL_BLOCK_FINISH; + } + return DECOMPRESSION_FINISHED; } diff --git a/igzip/igzip_inflate_perf.c b/igzip/igzip_inflate_perf.c index aa4beaa..ada274b 100644 --- a/igzip/igzip_inflate_perf.c +++ b/igzip/igzip_inflate_perf.c @@ -208,9 +208,13 @@ int main(int argc, char *argv[]) perf_start(&start); for (i = 0; i < iterations; i++) { - isal_inflate_init(&state, inbuf, inbuf_size, outbuf, outbuf_size); + isal_inflate_init(&state); + state.next_in = inbuf; + state.avail_in = inbuf_size; + state.next_out = outbuf; + state.avail_out = outbuf_size; - check = isal_inflate_stateless(&state); + check = isal_inflate(&state); if (check) { printf("Error in decompression with error %d\n", check); break; diff --git a/igzip/igzip_inflate_test.c b/igzip/igzip_inflate_test.c index 6c1c2e3..04ca2cb 100644 --- a/igzip/igzip_inflate_test.c +++ b/igzip/igzip_inflate_test.c @@ -37,24 +37,129 @@ * are done in a stateless manner. */ #define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L -int test(uint8_t * compressed_stream, uint64_t * compressed_length, - uint8_t * uncompressed_stream, int uncompressed_length, - uint8_t * uncompressed_test_stream, int uncompressed_test_stream_length) +int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len, + uint8_t * uncompress_buf, uint32_t * uncompress_len) +{ + struct inflate_state *state = NULL; + int ret = 0; + uint8_t *comp_tmp = NULL, *uncomp_tmp = NULL; + uint32_t comp_tmp_size = 0, uncomp_tmp_size = 0; + uint32_t comp_processed = 0, uncomp_processed = 0; + + state = malloc(sizeof(struct inflate_state)); + if (state == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + isal_inflate_init(state); + + state->next_in = NULL; + state->next_out = NULL; + state->avail_in = 0; + state->avail_out = 0; + + while (1) { + if (state->avail_in == 0) { + comp_tmp_size = rand() % (compress_len + 1); + + if (comp_tmp_size >= compress_len - comp_processed) + comp_tmp_size = compress_len - comp_processed; + + if (comp_tmp_size != 0) { + if (comp_tmp != NULL) { + free(comp_tmp); + comp_tmp = NULL; + } + + comp_tmp = malloc(comp_tmp_size); + + if (comp_tmp == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + memcpy(comp_tmp, compress_buf + comp_processed, comp_tmp_size); + comp_processed += comp_tmp_size; + + state->next_in = comp_tmp; + state->avail_in = comp_tmp_size; + } + } + + if (state->avail_out == 0) { + /* Save uncompressed data into uncompress_buf */ + if (uncomp_tmp != NULL) { + memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, + uncomp_tmp_size); + uncomp_processed += uncomp_tmp_size; + } + + uncomp_tmp_size = rand() % (*uncompress_len + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (uncomp_tmp_size > *uncompress_len - uncomp_processed) + uncomp_tmp_size = *uncompress_len - uncomp_processed; + + if (uncomp_tmp_size != 0) { + + if (uncomp_tmp != NULL) { + fflush(0); + free(uncomp_tmp); + uncomp_tmp = NULL; + } + + uncomp_tmp = malloc(uncomp_tmp_size); + if (uncomp_tmp == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + state->avail_out = uncomp_tmp_size; + state->next_out = uncomp_tmp; + } + } + + ret = isal_inflate(state); + + if (state->block_state == ISAL_BLOCK_FINISH || ret != 0) { + memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, uncomp_tmp_size); + *uncompress_len = state->total_out; + break; + } + } + + if (comp_tmp != NULL) { + free(comp_tmp); + comp_tmp = NULL; + } + if (uncomp_tmp != NULL) { + free(uncomp_tmp); + uncomp_tmp = NULL; + } + + free(state); + return ret; +} + +int test(uint8_t * compressed_stream, + uint64_t * compressed_length, + uint8_t * uncompressed_stream, uint32_t uncompressed_length, + uint8_t * uncompressed_test_stream, uint32_t uncompressed_test_stream_length) { - struct inflate_state state; int ret; ret = - compress2(compressed_stream, compressed_length, uncompressed_stream, - uncompressed_length, 6); + compress2(compressed_stream, compressed_length, + uncompressed_stream, uncompressed_length, 6); if (ret) { printf("Failed compressing input with exit code %d", ret); return ret; } - isal_inflate_init(&state, compressed_stream + 2, *compressed_length - 2, - uncompressed_test_stream, uncompressed_test_stream_length); - ret = isal_inflate_stateless(&state); - + ret = + inflate_multi_pass(compressed_stream + 2, + *compressed_length - 2 - 4, + uncompressed_test_stream, &uncompressed_test_stream_length); switch (ret) { case 0: break; @@ -88,10 +193,10 @@ int test(uint8_t * compressed_stream, uint64_t * compressed_length, break; } - if (state.total_out != uncompressed_length) { + if (uncompressed_test_stream_length != uncompressed_length) { printf("incorrect amount of data was decompressed from compressed data\n"); - printf("%d decompressed of %d compressed", state.total_out, - uncompressed_length); + printf("%d decompressed of %d compressed", + uncompressed_test_stream_length, uncompressed_length); return -1; } if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) { @@ -105,6 +210,7 @@ int test(uint8_t * compressed_stream, uint64_t * compressed_length, printf(" decompressed data is not the same as the compressed data\n"); return -1; } + return 0; } @@ -120,7 +226,6 @@ int main(int argc, char **argv) if (argc == 1) printf("Error, no input file\n"); - for (i = 1; i < argc; i++) { file = fopen(argv[i], "r"); if (file == NULL) { @@ -128,7 +233,7 @@ int main(int argc, char **argv) return 1; } else printf("Starting file %s", argv[i]); - + fflush(0); fseek(file, 0, SEEK_END); file_length = ftell(file); fseek(file, 0, SEEK_SET); @@ -145,7 +250,6 @@ int main(int argc, char **argv) uncompressed_test_stream = malloc(file_length); } compressed_stream = malloc(compressed_length); - if (uncompressed_stream == NULL && file_length != 0) { printf("Failed to allocate memory\n"); exit(0); @@ -163,7 +267,6 @@ int main(int argc, char **argv) uncompressed_length = fread(uncompressed_stream, 1, file_length, file); uncompressed_test_stream_length = uncompressed_length; - ret = test(compressed_stream, &compressed_length, uncompressed_stream, uncompressed_length, uncompressed_test_stream, @@ -175,10 +278,8 @@ int main(int argc, char **argv) else printf(" "); printf("0x%02x,", compressed_stream[j]); - } printf("\n"); - } fflush(0); @@ -188,13 +289,11 @@ int main(int argc, char **argv) free(uncompressed_stream); if (uncompressed_test_stream != NULL) free(uncompressed_test_stream); - if (ret) { printf(" ... Fail with exit code %d\n", ret); return ret; } else printf(" ... Pass\n"); - fin_ret |= ret; } return fin_ret; diff --git a/igzip/igzip_rand_test.c b/igzip/igzip_rand_test.c index 7a649d3..47c98e0 100644 --- a/igzip/igzip_rand_test.c +++ b/igzip/igzip_rand_test.c @@ -113,6 +113,8 @@ const int gzip_extra_bytes = 0; #endif +int inflate_type = 0; + struct isal_hufftables *hufftables = NULL; #define HISTORY_SIZE 32*1024 @@ -306,29 +308,192 @@ uint32_t check_gzip_header(uint8_t * z_buf) return ret; } -uint32_t check_gzip_trl(struct inflate_state * gstream) +uint32_t check_gzip_trl(uint64_t gzip_crc, uint8_t * uncompress_buf, uint32_t uncompress_len) { - uint8_t *index = NULL; - uint32_t crc, ret = 0; + uint64_t crc, ret = 0; - index = gstream->next_out - gstream->total_out; - crc = find_crc(index, gstream->total_out); + crc = find_crc(uncompress_buf, uncompress_len); - if (gstream->total_out != *(uint32_t *) (gstream->next_in + 4) || - crc != *(uint32_t *) gstream->next_in) + crc = ((uint64_t) uncompress_len << 32) | crc; + + if (crc != gzip_crc) ret = INCORRECT_GZIP_TRAILER; return ret; } #endif +int inflate_stateless_pass(uint8_t * compress_buf, uint64_t compress_len, + uint8_t * uncompress_buf, uint32_t * uncompress_len) +{ + struct inflate_state state; + int ret = 0; + + state.next_in = compress_buf; + state.avail_in = compress_len; + state.next_out = uncompress_buf; + state.avail_out = *uncompress_len; + + ret = isal_inflate_stateless(&state); + + *uncompress_len = state.total_out; + +#ifndef DEFLATE + if (!ret) + ret = + check_gzip_trl(*(uint64_t *) state.next_in, uncompress_buf, + *uncompress_len); + state.avail_in -= 8; +#endif + + if (ret == 0 && state.avail_in != 0) + ret = INFLATE_LEFTOVER_INPUT; + + return ret; +} + +int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len, + uint8_t * uncompress_buf, uint32_t * uncompress_len) +{ + struct inflate_state *state = NULL; + int ret = 0; + uint8_t *comp_tmp = NULL, *uncomp_tmp = NULL; + uint32_t comp_tmp_size = 0, uncomp_tmp_size = 0; + uint32_t comp_processed = 0, uncomp_processed = 0; + int32_t read_in_old = 0; + + state = malloc(sizeof(struct inflate_state)); + if (state == NULL) { + printf("Failed to allocate memory\n"); + exit(0); + } + + isal_inflate_init(state); + + state->next_in = NULL; + state->next_out = NULL; + state->avail_in = 0; + state->avail_out = 0; + +#ifndef DEFLATE + compress_len -= 8; +#endif + + while (1) { + if (state->avail_in == 0) { + comp_tmp_size = rand() % (compress_len + 1); + + if (comp_tmp_size >= compress_len - comp_processed) + comp_tmp_size = compress_len - comp_processed; + + if (comp_tmp_size != 0) { + if (comp_tmp != NULL) { + free(comp_tmp); + comp_tmp = NULL; + } + + comp_tmp = malloc(comp_tmp_size); + + if (comp_tmp == NULL) { + printf("Failed to allocate memory\n"); + return MALLOC_FAILED; + } + + memcpy(comp_tmp, compress_buf + comp_processed, comp_tmp_size); + comp_processed += comp_tmp_size; + + state->next_in = comp_tmp; + state->avail_in = comp_tmp_size; + } + } + + if (state->avail_out == 0) { + /* Save uncompressed data into uncompress_buf */ + if (uncomp_tmp != NULL) { + memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, + uncomp_tmp_size); + uncomp_processed += uncomp_tmp_size; + } + + uncomp_tmp_size = rand() % (*uncompress_len + 1); + + /* Limit size of buffer to be smaller than maximum */ + if (uncomp_tmp_size > *uncompress_len - uncomp_processed) + uncomp_tmp_size = *uncompress_len - uncomp_processed; + + if (uncomp_tmp_size != 0) { + + if (uncomp_tmp != NULL) { + fflush(0); + free(uncomp_tmp); + uncomp_tmp = NULL; + } + + uncomp_tmp = malloc(uncomp_tmp_size); + if (uncomp_tmp == NULL) { + printf("Failed to allocate memory\n"); + return MALLOC_FAILED; + } + + state->avail_out = uncomp_tmp_size; + state->next_out = uncomp_tmp; + } + } + + ret = isal_inflate(state); + + if (state->block_state == ISAL_BLOCK_FINISH || ret != 0) { + memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, uncomp_tmp_size); + *uncompress_len = state->total_out; + break; + } + + if (*uncompress_len - uncomp_processed == 0 && state->avail_out == 0 + && state->tmp_out_valid - state->tmp_out_processed > 0) { + ret = INFLATE_OUT_BUFFER_OVERFLOW; + break; + } + + if (compress_len - comp_processed == 0 && state->avail_in == 0 + && (state->block_state != ISAL_BLOCK_INPUT_DONE) + && state->tmp_out_valid - state->tmp_out_processed == 0) { + if (state->read_in_length == read_in_old) { + ret = INFLATE_END_OF_INPUT; + break; + } + read_in_old = state->read_in_length; + } + } + +#ifndef DEFLATE + if (!ret) + ret = + check_gzip_trl(*(uint64_t *) & compress_buf[compress_len], + uncompress_buf, *uncompress_len); +#endif + if (ret == 0 && state->avail_in != 0) + ret = INFLATE_LEFTOVER_INPUT; + + if (comp_tmp != NULL) { + free(comp_tmp); + comp_tmp = NULL; + } + + if (uncomp_tmp != NULL) { + free(uncomp_tmp); + uncomp_tmp = NULL; + } + + free(state); + return ret; +} + /* Inflate the compressed data and check that the decompressed data agrees with the input data */ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) { /* Test inflate with reference inflate */ int ret = 0; - struct inflate_state gstream; uint32_t test_size = in_size; uint8_t *test_buf = NULL; int mem_result = 0; @@ -336,23 +501,27 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) if (in_size > 0) { assert(in_buf != NULL); test_buf = malloc(test_size); - if (test_buf == NULL) return MALLOC_FAILED; } + if (test_buf != NULL) memset(test_buf, 0xff, test_size); #ifndef DEFLATE - int gzip_hdr_result, gzip_trl_result; - + int gzip_hdr_result = 0, gzip_trl_result = 0; gzip_hdr_result = check_gzip_header(z_buf); z_buf += gzip_hdr_bytes; z_size -= gzip_hdr_bytes; #endif - isal_inflate_init(&gstream, z_buf, z_size, test_buf, test_size); - ret = isal_inflate_stateless(&gstream); + if (inflate_type == 0) { + ret = inflate_stateless_pass(z_buf, z_size, test_buf, &test_size); + inflate_type = 1; + } else { + ret = inflate_multi_pass(z_buf, z_size, test_buf, &test_size); + inflate_type = 0; + } if (test_buf != NULL) mem_result = memcmp(in_buf, test_buf, in_size); @@ -362,22 +531,16 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) if (mem_result) for (i = 0; i < in_size; i++) { if (in_buf[i] != test_buf[i]) { - printf("First incorrect data at 0x%x of 0x%x, 0x%x != 0x%x\n", - i, in_size, in_buf[i], test_buf[i]); + printf + ("First incorrect data at 0x%x of 0x%x, 0x%x != 0x%x\n", + i, in_size, in_buf[i], test_buf[i]); break; } } #endif -#ifndef DEFLATE - gzip_trl_result = check_gzip_trl(&gstream); - gstream.avail_in -= gzip_trl_bytes; - gstream.next_in += gzip_trl_bytes; -#endif - if (test_buf != NULL) free(test_buf); - switch (ret) { case 0: break; @@ -399,17 +562,22 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size) case INVALID_LOOK_BACK_DISTANCE: return INFLATE_INVALID_LOOK_BACK_DISTANCE; break; + case INFLATE_LEFTOVER_INPUT: + return INFLATE_LEFTOVER_INPUT; + break; + +#ifndef DEFLATE + case INCORRECT_GZIP_TRAILER: + gzip_trl_result = INCORRECT_GZIP_TRAILER; + break; + +#endif default: return INFLATE_GENERAL_ERROR; break; } - if (gstream.avail_in != 0) { - printf("leftover = %d\n", gstream.avail_in); - return INFLATE_LEFTOVER_INPUT; - } - - if (gstream.total_out != in_size) + if (test_size != in_size) return INFLATE_INCORRECT_OUTPUT_SIZE; if (mem_result) diff --git a/igzip/inflate_data_structs.asm b/igzip/inflate_data_structs.asm index 70042e8..7c55bef 100644 --- a/igzip/inflate_data_structs.asm +++ b/igzip/inflate_data_structs.asm @@ -61,9 +61,11 @@ FIELD _avail_in, 4, 4 FIELD _read_in_length,4, 4 FIELD _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align FIELD _dist_huff_code,_inflate_huff_code_small_size, _inflate_huff_code_small_align -FIELD _new_block, 1, 1 -FIELD _bfinal, 1, 1 -FIELD _btype, 1, 1 +FIELD _block_state, 4, 4 +FIELD _bfinal, 4, 4 +FIELD _type0_block_len, 4, 4 +FIELD _copy_overflow_len, 4, 4 +FIELD _copy_overflow_dist, 4, 4 %assign _inflate_state_size _FIELD_OFFSET %assign _inflate_state_align _STRUCT_ALIGN @@ -74,7 +76,12 @@ _lit_huff_code_long_code_lookup equ _lit_huff_code+_long_code_lookup_large _dist_huff_code_small_code_lookup equ _dist_huff_code+_small_code_lookup_small _dist_huff_code_long_code_lookup equ _dist_huff_code+_long_code_lookup_small - +ISAL_BLOCK_NEW_HDR equ 0 +ISAL_BLOCK_HDR equ 1 +ISAL_BLOCK_TYPE0 equ 2 +ISAL_BLOCK_CODED equ 3 +ISAL_BLOCK_END equ 4 +ISAL_BLOCK_FINISH equ 5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/include/inflate.h b/include/inflate.h index 2209c28..e869bba 100644 --- a/include/inflate.h +++ b/include/inflate.h @@ -16,6 +16,18 @@ #define DECODE_LOOKUP_SIZE_LARGE 13 #define DECODE_LOOKUP_SIZE_SMALL 10 +#define ISAL_INFLATE_HIST_SIZE (32*1024) +#define ISAL_INFLATE_SLOP 17*16 +#define ISAL_INFLATE_MAX_HDR_SIZE 360 +enum isal_block_state { + ISAL_BLOCK_NEW_HDR, /* Just starting a new block */ + ISAL_BLOCK_HDR, /* In the middle of reading in a block header */ + ISAL_BLOCK_TYPE0, /* Decoding a type 0 block */ + ISAL_BLOCK_CODED, /* Decoding a huffman coded block */ + ISAL_BLOCK_INPUT_DONE, /* Decompression of input is completed */ + ISAL_BLOCK_FINISH /* Decompression of input is completed and all data has been flushed to output */ +}; + /* * Data structure used to store a Huffman code for fast lookup. It works by * performing a lookup in small_code_lookup that hopefully yields the correct @@ -62,14 +74,13 @@ * code length and code value forces the maximum offset to be less than 288. */ -struct inflate_huff_code_large{ - uint16_t short_code_lookup[ 1 << (DECODE_LOOKUP_SIZE_LARGE)]; +struct inflate_huff_code_large { + uint16_t short_code_lookup[1 << (DECODE_LOOKUP_SIZE_LARGE)]; uint16_t long_code_lookup[288 + (1 << (15 - DECODE_LOOKUP_SIZE_LARGE))]; }; - -struct inflate_huff_code_small{ - uint16_t short_code_lookup[ 1 << (DECODE_LOOKUP_SIZE_SMALL)]; +struct inflate_huff_code_small { + uint16_t short_code_lookup[1 << (DECODE_LOOKUP_SIZE_SMALL)]; uint16_t long_code_lookup[32 + (1 << (15 - DECODE_LOOKUP_SIZE_SMALL))]; }; @@ -84,18 +95,27 @@ struct inflate_state { int32_t read_in_length; struct inflate_huff_code_large lit_huff_code; struct inflate_huff_code_small dist_huff_code; - uint8_t new_block; - uint8_t bfinal; - uint8_t btype; + enum isal_block_state block_state; + uint32_t bfinal; + int32_t type0_block_len; + int32_t copy_overflow_length; + int32_t copy_overflow_distance; + int32_t tmp_in_size; + int32_t tmp_out_valid; + int32_t tmp_out_processed; + uint8_t tmp_in_buffer[ISAL_INFLATE_MAX_HDR_SIZE]; + uint8_t tmp_out_buffer[2 * ISAL_INFLATE_HIST_SIZE + ISAL_INFLATE_SLOP]; }; /* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output * data into out_stream */ -void isal_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size, - uint8_t *out_stream, uint64_t out_size); +void isal_inflate_init(struct inflate_state *state); /* Decompress a deflate data. This function assumes a call to igzip_inflate_init * has been made to set up the state structure to allow for decompression.*/ int isal_inflate_stateless(struct inflate_state *state); +/* Decompress a deflate data. This function assumes a call to igzip_inflate_init + * has been made to set up the state structure to allow for decompression.*/ +int isal_inflate(struct inflate_state *state); #endif