igzip: Create assembly version of isal_update_histogram

Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
Reviewed-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Roy Oursler 2016-06-06 09:23:26 -07:00 committed by Greg Tucker
parent 7c91df5e50
commit 31814483c0
9 changed files with 892 additions and 4 deletions

View File

@ -36,7 +36,10 @@ lsrc += igzip/igzip.c igzip/hufftables_c.c \
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
igzip/igzip_multibinary.asm \
igzip/igzip_stateless_base.c \
igzip/igzip_base.c
igzip/igzip_base.c \
igzip/igzip_update_histogram_01.asm \
igzip/igzip_update_histogram_04.asm \
igzip/rfc1951_lookup.asm
src_include += -I $(srcdir)/igzip
extern_hdrs += include/igzip_lib.h
@ -49,7 +52,7 @@ check_tests += igzip/igzip_check
perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
igzip/igzip_buffer_utils_01.asm \
@ -59,6 +62,7 @@ other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
igzip/bitbuf2.h igzip/repeated_char_result.h \
igzip/igzip_body.asm \
igzip/igzip_stateless.asm \
igzip/igzip_update_histogram.asm \
igzip/huffman.asm \
include/reg_sizes.asm \
include/multibinary.asm \
@ -94,3 +98,5 @@ igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_igzip_inflate_test_LDFLAGS = -lz
igzip_check: igzip_inflate_ref.o
igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_hist_perf: igzip_inflate_ref.o
igzip_igzip_hist_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la

View File

@ -142,7 +142,7 @@ void append_to_back(struct linked_list *list, struct linked_list_node *new_eleme
return;
}
void isal_update_histogram(uint8_t * start_stream, int length,
void isal_update_histogram_base(uint8_t * start_stream, int length,
struct isal_huff_histogram *histogram)
{
uint32_t literal = 0, hash;

348
igzip/igzip_hist_perf.c Normal file
View File

@ -0,0 +1,348 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "igzip_lib.h"
#include "test.h"
#include "igzip_inflate_ref.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 8
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 2000000000
#endif
/* Inflates and fills a histogram of lit, len, and dist codes seen in non-type 0 blocks.*/
int igzip_inflate_hist(struct inflate_state *state, struct isal_huff_histogram *histogram)
{
/* The following tables are based on the tables in the deflate standard,
* RFC 1951 page 11. */
const uint16_t len_start[29] = {
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f,
0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73,
0x83, 0xa3, 0xc3, 0xe3, 0x102
};
const uint8_t len_extra_bit_count[29] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
0x5, 0x5, 0x5, 0x5, 0x0
};
const uint32_t dist_start[30] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
};
const uint8_t dist_extra_bit_count[30] = {
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
0xb, 0xb, 0xc, 0xc, 0xd, 0xd
};
uint16_t next_lit, len, nlen;
uint8_t next_dist;
uint32_t repeat_length;
uint32_t look_back_dist;
uint32_t tmp;
memset(histogram, 0, sizeof(struct isal_huff_histogram));
while (state->new_block == 0 || state->bfinal == 0) {
if (state->new_block != 0) {
tmp = read_header(state);
if (tmp)
return tmp;
}
if (state->btype == 0) {
/* If the block is uncompressed, update state data accordingly */
if (state->in_buffer.avail_in < 4)
return END_OF_INPUT;
len = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
nlen = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
/* Check if len and nlen match */
if (len != (~nlen & 0xffff))
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
if (state->in_buffer.avail_in < len)
len = state->in_buffer.avail_in;
else
state->new_block = 1;
state->out_buffer.total_out += len;
state->in_buffer.next_in += len;
state->in_buffer.avail_in -= len + 4;
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
return END_OF_INPUT;
} else {
/* Else decode a huffman encoded block */
while (state->new_block == 0) {
/* While not at the end of block, decode the next
* symbol */
next_lit =
decode_next(&state->in_buffer, &state->lit_huff_code);
histogram->lit_len_histogram[next_lit] += 1;
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (next_lit < 256)
/* Next symbol is a literal */
state->out_buffer.total_out++;
else if (next_lit == 256)
/* Next symbol is end of block */
state->new_block = 1;
else if (next_lit < 286) {
/* Next symbol is a repeat length followed by a
lookback distance */
repeat_length =
len_start[next_lit - 257] +
inflate_in_read_bits(&state->in_buffer,
len_extra_bit_count[next_lit -
257]);
next_dist = decode_next(&state->in_buffer,
&state->dist_huff_code);
histogram->dist_histogram[next_dist] += 1;
look_back_dist = dist_start[next_dist] +
inflate_in_read_bits(&state->in_buffer,
dist_extra_bit_count
[next_dist]);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (look_back_dist > state->out_buffer.total_out)
return INVALID_LOOK_BACK_DISTANCE;
state->out_buffer.total_out += repeat_length;
} else
return INVALID_SYMBOL;
}
}
}
state->in_buffer.next_in -= state->in_buffer.read_in_length / 8;
state->in_buffer.avail_in += state->in_buffer.read_in_length / 8;
return DECOMPRESSION_FINISHED;
}
int get_filesize(FILE * f)
{
int curr, end;
curr = ftell(f); /* Save current position */
fseek(f, 0L, SEEK_END);
end = ftell(f);
fseek(f, curr, SEEK_SET); /* Restore position */
return end;
}
void print_histogram(struct isal_huff_histogram *histogram)
{
int i;
printf("Lit Len histogram");
for (i = 0; i < IGZIP_LIT_LEN; i++) {
if (i % 16 == 0)
printf("\n");
else
printf(", ");
printf("%4lu", histogram->lit_len_histogram[i]);
}
printf("\n");
printf("Dist histogram");
for (i = 0; i < IGZIP_DIST_LEN; i++) {
if (i % 16 == 0)
printf("\n");
else
printf(", ");
printf("%4lu", histogram->dist_histogram[i]);
}
printf("\n");
}
void print_diff_histogram(struct isal_huff_histogram *histogram1,
struct isal_huff_histogram *histogram2)
{
int i;
double relative_error;
printf("Lit Len histogram relative error");
for (i = 0; i < IGZIP_LIT_LEN; i++) {
if (i % 16 == 0)
printf("\n");
else
printf(", ");
if (histogram1->lit_len_histogram[i] == histogram2->lit_len_histogram[i]) {
printf(" % 4.0f %%", 0.0);
} else {
relative_error =
abs(histogram1->lit_len_histogram[i] -
histogram2->lit_len_histogram[i]);
relative_error = relative_error / histogram1->lit_len_histogram[i];
relative_error = 100.0 * relative_error;
printf("~% 4.0f %%", relative_error);
}
}
printf("\n");
printf("Dist histogram relative error");
for (i = 0; i < IGZIP_DIST_LEN; i++) {
if (i % 16 == 0)
printf("\n");
else
printf(", ");
if (histogram1->dist_histogram[i] == histogram2->dist_histogram[i]) {
printf(" % 4.0f %%", 0.0);
} else {
relative_error =
abs(histogram1->dist_histogram[i] - histogram2->dist_histogram[i]);
relative_error = relative_error / histogram1->dist_histogram[i];
relative_error = 100.0 * relative_error;
printf("~% 4.0f %%", relative_error);
}
}
printf("\n");
}
int main(int argc, char *argv[])
{
FILE *in;
unsigned char *inbuf, *outbuf;
int i, infile_size, outbuf_size, iterations, avail_in;
struct isal_huff_histogram histogram1, histogram2;
struct isal_hufftables hufftables_custom;
struct isal_zstream stream;
struct inflate_state gstream;
memset(&histogram1, 0, sizeof(histogram1));
memset(&histogram2, 0, sizeof(histogram2));
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
"\t - Runs multiple iterations of igzip on a file to "
"get more accurate time results.\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
/* Allocate space for entire input file and output
* (assuming some possible expansion on output size)
*/
infile_size = get_filesize(in);
outbuf_size = 2 * infile_size;
if (infile_size != 0)
iterations = RUN_MEM_SIZE / infile_size;
else
iterations = MIN_TEST_LOOPS;
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
inbuf = malloc(infile_size);
outbuf = malloc(outbuf_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
}
avail_in = fread(inbuf, 1, infile_size, in);
if (avail_in != infile_size) {
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++)
isal_update_histogram(inbuf, infile_size, &histogram1);
perf_stop(&stop);
printf(" file %s - in_size=%d iter=%d\n", argv[1], infile_size, i);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
memset(&histogram1, 0, sizeof(histogram1));
isal_update_histogram(inbuf, infile_size, &histogram1);
isal_create_hufftables(&hufftables_custom, &histogram1);
isal_deflate_init(&stream);
stream.end_of_stream = 1; /* Do the entire file at once */
stream.flush = NO_FLUSH;
stream.next_in = inbuf;
stream.avail_in = infile_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
stream.hufftables = &hufftables_custom;
isal_deflate_stateless(&stream);
igzip_inflate_init(&gstream, outbuf, stream.total_out, NULL, 0);
igzip_inflate_hist(&gstream, &histogram2);
printf("Histogram Error \n");
print_diff_histogram(&histogram1, &histogram2);
fclose(in);
fflush(0);
return 0;
}

View File

@ -51,6 +51,10 @@ extern isal_deflate_finish_01
extern get_crc_base
extern get_crc_01
extern isal_update_histogram_base
extern isal_update_histogram_01
extern isal_update_histogram_04
extern isal_deflate_init_base
extern isal_deflate_init_01
@ -71,3 +75,6 @@ mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_
mbin_interface get_crc
mbin_dispatch_init5 get_crc, get_crc_base, get_crc_01, get_crc_01, get_crc_01
mbin_interface isal_update_histogram
mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04

View File

@ -0,0 +1,467 @@
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "reg_sizes.asm"
%include "stdmac.asm"
extern rfc1951_lookup_table
_len_to_code_offset equ 0
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
%define LIT_LEN 286
%define DIST_LEN 30
%define HIST_ELEM_SIZE 8
%ifdef DEBUG
%macro MARK 1
global %1
%1:
%endm
%else
%macro MARK 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define file_start rdi
%define file_length rsi
%define histogram rdx
%define rfc_lookup r9
%define f_i r10
%define curr_data rax
%define tmp2 rcx
%define dist rbx
%define dist_code2 rbx
%define dist2 r12
%define dist_code r12
%define len rbp
%define len_code rbp
%define hash3 rbp
%define curr_data2 r8
%define len2 r8
%define tmp1 r11
%define tmp3 r13
%define hash r14
%define hash2 r15
%define xtmp0 xmm0
%define xtmp1 xmm1
%define ytmp0 ymm0
%define ytmp1 ymm1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_eob_count_offset equ 0 ; local variable (8 bytes)
f_end_i_mem_offset equ 8
gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
stack_size equ 2*8 + 8*8 + 4*16 + 8
;;; 8 because stack address is odd multiple of 8 after a function call and
;;; we want it aligned to 16 bytes
_lit_len_offset equ 0
_dist_offset equ (8 * LIT_LEN)
_hash_offset equ (_dist_offset + 8 * DIST_LEN)
%macro len_to_len_code 3
%define %%len_code %1 ; Output
%define %%len %2 ; Input
%define %%rfc_lookup %3
movzx %%len_code, byte [%%rfc_lookup + _len_to_code_offset + %%len]
or %%len_code, 0x100
%endm
;;; Clobbers rcx and dist
%macro dist_to_dist_code 2
%define %%dist_code %1 ; Output code associated with dist
%define %%dist_coded %1d
%define %%dist %2d ; Input dist
dec %%dist
mov %%dist_coded, %%dist
bsr ecx, %%dist_coded
dec ecx
SHRX %%dist_code, %%dist_code, rcx
lea %%dist_coded, [%%dist_coded + 2*ecx]
cmp %%dist, 1
cmovle %%dist_coded, %%dist
%endm
;;; Clobbers rcx and dist
%macro dist_to_dist_code2 2
%define %%dist_code %1 ; Output code associated with dist
%define %%dist_coded %1d
%define %%dist %2d ; Input -(dist - 1)
neg %%dist
mov %%dist_coded, %%dist
bsr ecx, %%dist_coded
dec ecx
SHRX %%dist_code, %%dist_code, rcx
lea %%dist_coded, [%%dist_coded + 2*ecx]
cmp %%dist, 1
cmovle %%dist_coded, %%dist
%endm
; void isal_update_histogram
global isal_update_histogram_ %+ ARCH
isal_update_histogram_ %+ ARCH %+ :
;; do nothing if (avail_in == 0)
cmp file_length, 0
jne skip1
ret
skip1:
%ifdef ALIGN_STACK
push rbp
mov rbp, rsp
sub rsp, stack_size
and rsp, ~15
%else
sub rsp, stack_size
%endif
mov [rsp + gpr_save_mem_offset + 0*8], rbx
mov [rsp + gpr_save_mem_offset + 1*8], rsi
mov [rsp + gpr_save_mem_offset + 2*8], rdi
mov [rsp + gpr_save_mem_offset + 3*8], rbp
mov [rsp + gpr_save_mem_offset + 4*8], r12
mov [rsp + gpr_save_mem_offset + 5*8], r13
mov [rsp + gpr_save_mem_offset + 6*8], r14
mov [rsp + gpr_save_mem_offset + 7*8], r15
mov f_i, 0
mov tmp1, qword [histogram + _lit_len_offset + 8*256]
inc tmp1
mov [rsp + _eob_count_offset], tmp1
lea rfc_lookup, [rfc1951_lookup_table]
;; Init hash_table
mov rcx, (HASH_SIZE-1)
init_hash_table:
mov word [histogram + _hash_offset + 2*rcx], -(D+1)
sub rcx, 1
jge init_hash_table
sub file_length, LA_STATELESS
cmp file_length, 0
jle end_loop_2
;; Load first literal into histogram
mov curr_data, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
and curr_data, 0xff
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
inc f_i
;; Setup to begin loop 2
mov curr_data, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
shr curr_data2, 8
compute_hash hash2, curr_data2
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
loop2:
xor dist, dist
xor dist2, dist2
xor tmp3, tmp3
lea tmp1, [file_start + f_i]
;; Load possible look back distances and update hash data
mov dist %+ w, f_i %+ w
sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
add f_i, 1
mov dist2 %+ w, f_i %+ w
sub dist2 %+ w, word [histogram + _hash_offset + 2 * hash2]
mov [histogram + _hash_offset + 2 * hash2], f_i %+ w
;; Start computing hashes to be used in either the next loop or
;; for updating the hash if a match is found
mov curr_data2, [file_start + f_i + 1]
mov tmp2, curr_data2
compute_hash hash, curr_data2
;; Check if look back distances are valid. Load a junk distance of 1
;; if the look back distance is too long for speculative lookups.
sub dist, 1
cmp dist %+ d, (D-1)
cmovae dist, tmp3
neg dist
sub dist2, 1
cmp dist2 %+ d, (D-1)
cmovae dist2, tmp3
neg dist2
shr tmp2, 8
compute_hash hash2, tmp2
;; Check for long len/dist matches (>7)
mov len, [tmp1]
xor len, [tmp1 + dist - 1]
jz compare_loop
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
mov len2, [tmp1 + 1]
xor len2, [tmp1 + dist2]
jz compare_loop2
;; Specutively load the code for the first literal
movzx tmp1, curr_data %+ b
shr curr_data, 8
lea tmp3, [f_i + 1]
;; Check for len/dist match for first literal
test len %+ d, 0xFFFFFFFF
jz len_dist_huffman_pre
;; Store first literal
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * tmp1]
;; Specutively load the code for the second literal
and curr_data, 0xff
;; Check for len/dist match for second literal
test len2 %+ d, 0xFFFFFFFF
jnz lit_lit_huffman
len_dist_lit_huffman_pre:
;; Calculate repeat length
tzcnt len2, len2
shr len2, 3
len_dist_lit_huffman:
;; Store updated hashes
mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
add tmp3,1
mov [histogram + _hash_offset + 2 * hash2], tmp3 %+ w
add f_i, len2
mov curr_data, [file_start + f_i]
mov tmp1, curr_data
compute_hash hash, curr_data
dist_to_dist_code2 dist_code2, dist2
len_to_len_code len_code, len2, rfc_lookup
shr tmp1, 8
compute_hash hash2, tmp1
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2]
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
cmp f_i, file_length
jl loop2
jmp end_loop_2
;; encode as dist/len
len_dist_huffman_pre:
tzcnt len, len
shr len, 3
len_dist_huffman:
mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
dec f_i
add f_i, len
mov curr_data, [file_start + f_i]
mov tmp1, curr_data
compute_hash hash, curr_data
dist_to_dist_code2 dist_code, dist
len_to_len_code len_code, len, rfc_lookup
shr tmp1, 8
compute_hash hash2, tmp1
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
and hash2 %+ d, HASH_MASK
and hash, HASH_MASK
cmp f_i, file_length
jl loop2
jmp end_loop_2
lit_lit_huffman:
add f_i, 1
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
mov curr_data %+ d, [file_start + f_i]
cmp f_i, file_length
jl loop2
end_loop_2:
add file_length, LA_STATELESS - LAST_BYTES_COUNT
cmp f_i, file_length
jge final_bytes
loop2_finish:
mov curr_data, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
;; Calculate possible distance for length/dist pair.
xor dist, dist
mov dist %+ w, f_i %+ w
sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
mov [histogram + _hash_offset + 2 * hash], f_i %+ w
;; Check if look back distance is valid (the dec is to handle when dist = 0)
dec dist
cmp dist %+ d, (D-1)
jae encode_literal_finish
inc dist
;; Check if look back distance is a match
lea tmp3, [file_length + LAST_BYTES_COUNT]
sub tmp3, f_i
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist
compare tmp3, tmp1, tmp2, len, tmp3
;; Limit len to maximum value of 258
mov tmp2, 258
cmp len, 258
cmova len, tmp2
cmp len, SHORTEST_MATCH
jb encode_literal_finish
add f_i, len
len_to_len_code len_code, len, rfc_lookup
dist_to_dist_code dist_code, dist
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
cmp f_i, file_length
jl loop2_finish
jmp final_bytes
encode_literal_finish:
;; Encode literal
and curr_data %+ d, 0xFF
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
;; Setup for next loop
add f_i, 1
cmp f_i, file_length
jl loop2_finish
final_bytes:
add file_length, LAST_BYTES_COUNT
final_bytes_loop:
cmp f_i, file_length
jge end
movzx curr_data, byte [file_start + f_i]
inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
inc f_i
jmp final_bytes_loop
end:
;; Handle eob at end of stream
mov tmp1, [rsp + _eob_count_offset]
mov qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * 256], tmp1
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
mov r12, [rsp + gpr_save_mem_offset + 4*8]
mov r13, [rsp + gpr_save_mem_offset + 5*8]
mov r14, [rsp + gpr_save_mem_offset + 6*8]
mov r15, [rsp + gpr_save_mem_offset + 7*8]
%ifndef ALIGN_STACK
add rsp, stack_size
%else
mov rsp, rbp
pop rbp
%endif
ret
compare_loop:
and hash %+ d, HASH_MASK
lea tmp2, [tmp1 + dist - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
lea tmp3, [f_i + 1]
jmp len_dist_huffman
compare_loop2:
add tmp1, 1
lea tmp2, [tmp1 + dist2 - 1]
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
and curr_data, 0xff
inc qword [histogram + _lit_len_offset + 8 * curr_data]
lea tmp3, [f_i + 1]
jmp len_dist_lit_huffman
section .data
align 4
const_D: dq D
const_30: dq 30

View File

@ -0,0 +1,7 @@
%define ARCH 01
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 2
%endif
%include "igzip_update_histogram.asm"

View File

@ -0,0 +1,8 @@
%define ARCH 04
%define USE_HSWNI
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 3
%endif
%include "igzip_update_histogram.asm"

44
igzip/rfc1951_lookup.asm Normal file
View File

@ -0,0 +1,44 @@
%ifndef RFC1951_LOOKUP
%define RFC1951_LOOKUP
section .data
align 8
global rfc1951_lookup_table:data internal
rfc1951_lookup_table:
len_to_code:
db 0x00, 0x00, 0x00
db 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
db 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
db 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
db 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
db 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
db 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
db 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
db 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
%endif

View File

@ -190,6 +190,7 @@ enum isal_zstate_state {
struct isal_huff_histogram {
uint64_t lit_len_histogram[IGZIP_LIT_LEN];
uint64_t dist_histogram[IGZIP_DIST_LEN];
uint16_t hash_table[HASH_SIZE];
};
/** @brief Holds Bit Buffer information*/