Add data compression unit

Include fast DEFLATE compatable compression functions.

Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Greg Tucker 2016-06-02 13:52:23 -07:00
parent 61164e105b
commit 660f49b02d
54 changed files with 17226 additions and 14 deletions

View File

@ -1,7 +1,7 @@
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

View File

@ -27,6 +27,7 @@ perf_tests32=
include erasure_code/Makefile.am
include raid/Makefile.am
include crc/Makefile.am
include igzip/Makefile.am
# LIB version info not necessarily the same as package version
LIBISAL_CURRENT=2

View File

@ -1,5 +1,5 @@
########################################################################
# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@ -27,11 +27,91 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
objs = \
bin\ec_base.obj \
bin\ec_highlevel_func.obj \
bin\ec_multibinary.obj \
bin\gf_2vect_dot_prod_avx.obj \
bin\gf_2vect_dot_prod_avx2.obj \
bin\gf_2vect_dot_prod_avx512.obj \
bin\gf_2vect_dot_prod_sse.obj \
bin\gf_2vect_mad_avx.obj \
bin\gf_2vect_mad_avx2.obj \
bin\gf_2vect_mad_avx512.obj \
bin\gf_2vect_mad_sse.obj \
bin\gf_3vect_dot_prod_avx.obj \
bin\gf_3vect_dot_prod_avx2.obj \
bin\gf_3vect_dot_prod_avx512.obj \
bin\gf_3vect_dot_prod_sse.obj \
bin\gf_3vect_mad_avx.obj \
bin\gf_3vect_mad_avx2.obj \
bin\gf_3vect_mad_avx512.obj \
bin\gf_3vect_mad_sse.obj \
bin\gf_4vect_dot_prod_avx.obj \
bin\gf_4vect_dot_prod_avx2.obj \
bin\gf_4vect_dot_prod_avx512.obj \
bin\gf_4vect_dot_prod_sse.obj \
bin\gf_4vect_mad_avx.obj \
bin\gf_4vect_mad_avx2.obj \
bin\gf_4vect_mad_avx512.obj \
bin\gf_4vect_mad_sse.obj \
bin\gf_5vect_dot_prod_avx.obj \
bin\gf_5vect_dot_prod_avx2.obj \
bin\gf_5vect_dot_prod_sse.obj \
bin\gf_5vect_mad_avx.obj \
bin\gf_5vect_mad_avx2.obj \
bin\gf_5vect_mad_sse.obj \
bin\gf_6vect_dot_prod_avx.obj \
bin\gf_6vect_dot_prod_avx2.obj \
bin\gf_6vect_dot_prod_sse.obj \
bin\gf_6vect_mad_avx.obj \
bin\gf_6vect_mad_avx2.obj \
bin\gf_6vect_mad_sse.obj \
bin\gf_vect_dot_prod_avx.obj \
bin\gf_vect_dot_prod_avx2.obj \
bin\gf_vect_dot_prod_avx512.obj \
bin\gf_vect_dot_prod_sse.obj \
bin\gf_vect_mad_avx.obj \
bin\gf_vect_mad_avx2.obj \
bin\gf_vect_mad_avx512.obj \
bin\gf_vect_mad_sse.obj \
bin\gf_vect_mul_avx.obj \
bin\gf_vect_mul_sse.obj \
bin\pq_check_sse.obj \
bin\pq_gen_avx.obj \
bin\pq_gen_avx2.obj \
bin\pq_gen_sse.obj \
bin\raid_base.obj \
bin\raid_multibinary.obj \
bin\xor_check_sse.obj \
bin\xor_gen_avx.obj \
bin\xor_gen_sse.obj \
bin\crc16_t10dif_01.obj \
bin\crc16_t10dif_by4.obj \
bin\crc32_gzip.obj \
bin\crc32_ieee_01.obj \
bin\crc32_ieee_by4.obj \
bin\crc32_iscsi_00.obj \
bin\crc32_iscsi_01.obj \
bin\crc_base.obj \
bin\crc_data.obj \
bin\crc_multibinary.obj \
bin\huff_codes.obj \
bin\hufftables_c.obj \
bin\igzip.obj \
bin\igzip_base.obj \
bin\igzip_body_01.obj \
bin\igzip_body_04.obj \
bin\igzip_finish.obj \
bin\igzip_multibinary.obj \
bin\igzip_stateless_01.obj \
bin\igzip_stateless_04.obj \
bin\igzip_stateless_base.obj \
bin\crc_utils_01.obj \
bin\crc_utils_04.obj \
bin\detect_repeated_char.obj
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj bin\pq_check_sse.obj bin\pq_gen_avx.obj bin\pq_gen_avx2.obj bin\pq_gen_sse.obj bin\raid_base.obj bin\raid_multibinary.obj bin\xor_check_sse.obj bin\xor_gen_avx.obj bin\xor_gen_sse.obj bin/crc16_t10dif_01.obj bin/crc16_t10dif_by4.obj bin/crc32_ieee_01.obj bin/crc32_ieee_by4.obj bin/crc32_iscsi_01.obj bin/crc32_iscsi_00.obj bin/crc_multibinary.obj bin/crc_base.obj
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iinclude/
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/
LINKFLAGS = /nologo
CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
AFLAGS = -f win64 $(INCLUDES) $(D)
@ -65,9 +145,14 @@ isa-l.dll: $(objs)
{crc}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
{igzip}.c.obj:
$(CC) $(CFLAGS) /c -Fo$@ $?
{igzip}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
# Examples
ex = xor_example.exe crc_simple_test.exe
ex = xor_example.exe crc_simple_test.exe igzip_example.exe igzip_sync_flush_example.exe
ex: lib $(ex)
$(ex): $(@B).obj
@ -76,9 +161,19 @@ $(ex): $(@B).obj
link /out:$@ $(LINKFLAGS) isa-l.lib $?
# Check tests
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe \
pq_check_test.exe pq_gen_test.exe xor_check_test.exe xor_gen_test.exe \
crc16_t10dif_test.exe crc32_ieee_test.exe crc32_iscsi_test.exe
checks = \
gf_vect_mul_test.exe \
erasure_code_test.exe \
gf_inverse_test.exe \
erasure_code_update_test.exe \
xor_gen_test.exe \
pq_gen_test.exe \
xor_check_test.exe \
pq_check_test.exe \
crc16_t10dif_test.exe \
crc32_ieee_test.exe \
crc32_iscsi_test.exe \
igzip_check.exe
checks: lib $(checks)
$(checks): $(@B).obj
@ -86,13 +181,53 @@ check: $(checks)
!$?
# Unit tests
tests = erasure_code_base_test.exe erasure_code_sse_test.exe gf_2vect_dot_prod_sse_test.exe gf_3vect_dot_prod_sse_test.exe gf_4vect_dot_prod_sse_test.exe gf_5vect_dot_prod_sse_test.exe gf_6vect_dot_prod_sse_test.exe gf_vect_dot_prod_avx_test.exe gf_vect_dot_prod_base_test.exe gf_vect_dot_prod_sse_test.exe gf_vect_dot_prod_test.exe gf_vect_mad_test.exe gf_vect_mul_avx_test.exe gf_vect_mul_base_test.exe gf_vect_mul_sse_test.exe
tests = \
gf_vect_mul_sse_test.exe \
gf_vect_mul_avx_test.exe \
gf_vect_mul_base_test.exe \
gf_vect_dot_prod_sse_test.exe \
gf_vect_dot_prod_avx_test.exe \
gf_2vect_dot_prod_sse_test.exe \
gf_3vect_dot_prod_sse_test.exe \
gf_4vect_dot_prod_sse_test.exe \
gf_5vect_dot_prod_sse_test.exe \
gf_6vect_dot_prod_sse_test.exe \
gf_vect_dot_prod_base_test.exe \
gf_vect_dot_prod_test.exe \
gf_vect_mad_test.exe \
erasure_code_base_test.exe \
erasure_code_sse_test.exe \
igzip_rand_test.exe
tests: lib $(tests)
$(tests): $(@B).obj
# Performance tests
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe pq_gen_perf.exe xor_gen_perf.exe crc16_t10dif_perf.exe crc32_ieee_perf.exe crc32_iscsi_perf.exe
perfs = \
gf_vect_mul_perf.exe \
gf_vect_mul_sse_perf.exe \
gf_vect_mul_avx_perf.exe \
gf_vect_dot_prod_sse_perf.exe \
gf_vect_dot_prod_avx_perf.exe \
gf_2vect_dot_prod_sse_perf.exe \
gf_3vect_dot_prod_sse_perf.exe \
gf_4vect_dot_prod_sse_perf.exe \
gf_5vect_dot_prod_sse_perf.exe \
gf_6vect_dot_prod_sse_perf.exe \
gf_vect_dot_prod_perf.exe \
gf_vect_dot_prod_1tbl.exe \
gf_vect_mad_perf.exe \
erasure_code_perf.exe \
erasure_code_base_perf.exe \
erasure_code_sse_perf.exe \
erasure_code_update_perf.exe \
xor_gen_perf.exe \
pq_gen_perf.exe \
crc16_t10dif_perf.exe \
crc32_ieee_perf.exe \
crc32_iscsi_perf.exe \
igzip_perf.exe \
igzip_sync_flush_perf.exe
perfs: lib $(perfs)
$(perfs): $(@B).obj
@ -105,3 +240,10 @@ clean:
-if exist isa-l.lib del isa-l.lib
-if exist isa-l.dll del isa-l.dll
zlib.lib:
igzip_rand_test.exe: igzip_inflate_ref.obj
igzip_inflate_perf.exe: igzip_inflate_ref.obj
igzip_inflate_perf.exe: zlib.lib
igzip_inflate_test.exe: igzip_inflate_ref.obj
igzip_inflate_test.exe: zlib.lib
igzip_check.exe: igzip_inflate_ref.obj

View File

@ -27,7 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
units = erasure_code raid crc
units = erasure_code raid crc igzip
default: lib

95
igzip/Makefile.am Normal file
View File

@ -0,0 +1,95 @@
########################################################################
# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
lsrc += igzip/igzip.c igzip/hufftables_c.c \
igzip/crc_utils_01.asm \
igzip/crc_utils_04.asm \
igzip/igzip_body_01.asm igzip/igzip_body_04.asm igzip/igzip_finish.asm \
igzip/igzip_stateless_01.asm igzip/igzip_stateless_04.asm \
igzip/crc_data.asm \
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
igzip/igzip_multibinary.asm \
igzip/igzip_stateless_base.c \
igzip/igzip_base.c
extern_hdrs += include/igzip_lib.h
pkginclude_HEADERS += include/types.h
unit_tests += igzip/igzip_rand_test
check_tests += igzip/igzip_check
perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
igzip/igzip_buffer_utils_01.asm \
igzip/igzip_buffer_utils_04.asm \
igzip/igzip_body.asm igzip/igzip_finish.asm \
igzip/lz0a_const.asm igzip/options.asm igzip/stdmac.asm igzip/igzip_compare_types.asm \
igzip/bitbuf2.h igzip/repeated_char_result.h \
igzip/igzip_body.asm \
igzip/igzip_stateless.asm \
igzip/huffman.asm \
include/reg_sizes.asm \
include/multibinary.asm \
include/test.h \
igzip/huffman.h
examples += igzip/igzip_example igzip/igzip_sync_flush_example
igzip_rand_test: igzip_inflate_ref.o
igzip_igzip_rand_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
# Include tools to make custom Huffman tables based on sample data
other_tests += igzip/generate_custom_hufftables
other_tests += igzip/generate_constant_block_header
other_src += igzip/huff_codes.h
lsrc += igzip/huff_codes.c
# Include tools and tests using the reference inflate
other_tests += igzip/igzip_inflate_perf
other_tests += igzip/igzip_inflate_test
other_src += igzip/igzip_inflate_ref.h
other_src += igzip/igzip_inflate_ref.c
other_src += igzip/crc_inflate.h
igzip_inflate_perf: igzip_inflate_ref.o
igzip_inflate_perf: LDLIBS += -lz
igzip_igzip_inflate_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_igzip_inflate_perf_LDFLAGS = -lz
igzip_inflate_test: igzip_inflate_ref.o
igzip_inflate_test: LDLIBS += -lz
igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
igzip_igzip_inflate_test_LDFLAGS = -lz
igzip_check: igzip_inflate_ref.o
igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la

205
igzip/bitbuf2.asm Normal file
View File

@ -0,0 +1,205 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
; Assumes m_out_buf is a register
; Clobbers RCX
; code is clobbered
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1
%macro write_bits_always 6
%define %%m_bits %1
%define %%m_bit_count %2
%define %%code %3
%define %%count %4
%define %%m_out_buf %5
%define %%tmp1 %6
%ifdef USE_HSWNI
shlx %%code, %%code, %%m_bit_count
%else
mov rcx, %%m_bit_count
shl %%code, cl
%endif
or %%m_bits, %%code
add %%m_bit_count, %%count
movnti [%%m_out_buf], %%m_bits
mov rcx, %%m_bit_count
shr rcx, 3 ; rcx = bytes
add %%m_out_buf, rcx
shl rcx, 3 ; rcx = bits
sub %%m_bit_count, rcx
%ifdef USE_HSWNI
shrx %%m_bits, %%m_bits, rcx
%else
shr %%m_bits, cl
%endif
%endm
; Assumes m_out_buf is a register
; Clobbers RCX
; code is clobbered
; write_bits_safe m_bits, m_bit_count, code, count, m_out_buf, tmp1
%macro write_bits_safe 6
%define %%m_bits %1
%define %%m_bit_count %2
%define %%code %3
%define %%count %4
%define %%m_out_buf %5
%define %%tmp1 %6
mov %%tmp1, %%code
%ifdef USE_HSWNI
shlx %%tmp1, %%tmp1, %%m_bit_count
%else
mov rcx, %%m_bit_count
shl %%tmp1, cl
%endif
or %%m_bits, %%tmp1
add %%m_bit_count, %%count
cmp %%m_bit_count, 64
jb %%not_full
sub %%m_bit_count, 64
movnti [%%m_out_buf], %%m_bits
add %%m_out_buf, 8
mov rcx, %%count
sub rcx, %%m_bit_count
mov %%m_bits, %%code
%ifdef USE_HSWNI
shrx %%m_bits, %%m_bits, rcx
%else
shr %%m_bits, cl
%endif
%%not_full:
%endm
; Assumes m_out_buf is a register
; Clobbers RCX
;; check_space num_bits, m_bits, m_bit_count, m_out_buf, tmp1
%macro check_space 5
%define %%num_bits %1
%define %%m_bits %2
%define %%m_bit_count %3
%define %%m_out_buf %4
%define %%tmp1 %5
mov %%tmp1, 63
sub %%tmp1, %%m_bit_count
cmp %%tmp1, %%num_bits
jae %%space_ok
; if (63 - m_bit_count < num_bits)
movnti [%%m_out_buf], %%m_bits
mov rcx, %%m_bit_count
shr rcx, 3 ; rcx = bytes
add %%m_out_buf, rcx
shl rcx, 3 ; rcx = bits
sub %%m_bit_count, rcx
%ifdef USE_HSWNI
shrx %%m_bits, %%m_bits, rcx
%else
shr %%m_bits, cl
%endif
%%space_ok:
%endm
; rcx is clobbered
; code is clobbered
; write_bits_unsafe m_bits, m_bit_count, code, count
%macro write_bits_unsafe 4
%define %%m_bits %1
%define %%m_bit_count %2
%define %%code %3
%define %%count %4
%ifdef USE_HSWNI
shlx %%code, %%code, %%m_bit_count
%else
mov rcx, %%m_bit_count
shl %%code, cl
%endif
or %%m_bits, %%code
add %%m_bit_count, %%count
%endm
; pad_to_byte m_bit_count, extra_bits
%macro pad_to_byte 2
%define %%m_bit_count %1
%define %%extra_bits %2
mov %%extra_bits, %%m_bit_count
neg %%extra_bits
and %%extra_bits, 7
add %%m_bit_count, %%extra_bits
%endm
; Assumes m_out_buf is a memory reference
; flush m_bits, m_bit_count, m_out_buf, tmp1
%macro flush 4
%define %%m_bits %1
%define %%m_bit_count %2
%define %%m_out_buf %3
%define %%tmp1 %4
test %%m_bit_count, %%m_bit_count
jz %%bit_count_is_zero
mov %%tmp1, %%m_out_buf
movnti [%%tmp1], %%m_bits
add %%m_bit_count, 7
shr %%m_bit_count, 3 ; bytes
add %%tmp1, %%m_bit_count
mov %%m_out_buf, %%tmp1
%%bit_count_is_zero:
xor %%m_bits, %%m_bits
xor %%m_bit_count, %%m_bit_count
%endm
%macro write_bits 6
%define %%m_bits %1
%define %%m_bit_count %2
%define %%code %3
%define %%count %4
%define %%m_out_buf %5
%define %%tmp1 %6
%ifdef USE_BITBUF8
write_bits_safe %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1
%elifdef USE_BITBUFB
write_bits_always %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1
%else
; state->bitbuf.check_space(code_len2);
check_space %%count, %%m_bits, %%m_bit_count, %%m_out_buf, %%tmp1
; state->bitbuf.write_bits(code2, code_len2);
write_bits_unsafe %%m_bits, %%m_bit_count, %%code, %%count
; code2 is clobbered, rcx is clobbered
%endif
%endm

161
igzip/bitbuf2.h Normal file
View File

@ -0,0 +1,161 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef BITBUF2_H
#define BITBUF2_H
#include "igzip_lib.h"
#if defined (__unix__) || (__APPLE__)
#define _mm_stream_si64x(dst, src) *((uint64_t*)dst) = src
#else
#include <intrin.h>
#endif
#ifdef _WIN64
#pragma warning(disable: 4996)
#endif
#ifdef _MSC_VER
#define inline __inline
#endif
/* MAX_BITBUF_BIT WRITE is the maximum number of bits than can be safely written
* by consecutive calls of write_bits. Note this assumes the bitbuf is in a
* state that is possible at the exit of write_bits */
#ifdef USE_BITBUF8 /*Write bits safe */
# define MAX_BITBUF_BIT_WRITE 63
#elif defined(USE_BITBUFB) /* Write bits always */
# define MAX_BITBUF_BIT_WRITE 56
#else /* USE_BITBUF_ELSE */
# define MAX_BITBUF_BIT_WRITE 56
#endif
static
inline void construct(struct BitBuf2 *me)
{
me->m_bits = 0;
me->m_bit_count = 0;
me->m_out_buf = me->m_out_start = me->m_out_end = NULL;
}
static inline void init(struct BitBuf2 *me)
{
me->m_bits = 0;
me->m_bit_count = 0;
}
static inline void set_buf(struct BitBuf2 *me, unsigned char *buf, unsigned int len)
{
unsigned int slop = 8;
me->m_out_buf = me->m_out_start = buf;
me->m_out_end = buf + len - slop;
}
static inline int is_full(struct BitBuf2 *me)
{
return (me->m_out_buf > me->m_out_end);
}
static inline uint8_t * buffer_ptr(struct BitBuf2 *me)
{
return me->m_out_buf;
}
static inline uint32_t buffer_used(struct BitBuf2 *me)
{
return (uint32_t)(me->m_out_buf - me->m_out_start);
}
static inline void check_space(struct BitBuf2 *me, uint32_t num_bits)
{
/* Checks if bitbuf has num_bits extra space and flushes the bytes in
* the bitbuf if it doesn't. */
uint32_t bytes;
if (63 - me->m_bit_count < num_bits) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bytes = me->m_bit_count / 8;
me->m_out_buf += bytes;
bytes *= 8;
me->m_bit_count -= bytes;
me->m_bits >>= bytes;
}
}
static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count)
{
me->m_bits |= code << me->m_bit_count;
me->m_bit_count += count;
}
static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count)
{
#ifdef USE_BITBUF8 /*Write bits safe */
me->m_bits |= code << me->m_bit_count;
me->m_bit_count += count;
if (me->m_bit_count >= 64) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
me->m_out_buf += 8;
me->m_bit_count -= 64;
me->m_bits = code >> (count - me->m_bit_count);
}
#elif defined(USE_BITBUFB) /* Write bits always */
/* Assumes there is space to fit code into m_bits. */
uint32_t bits;
me->m_bits |= code << me->m_bit_count;
me->m_bit_count += count;
if (me->m_bit_count >= 8) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bits = me->m_bit_count & ~7;
me->m_bit_count -= bits;
me->m_out_buf += bits/8;
me->m_bits >>= bits;
}
#else /* USE_BITBUF_ELSE */
check_space(me, count);
write_bits_unsafe(me, code, count);
#endif
}
/* Can write up to 8 bytes to output buffer */
static inline void flush(struct BitBuf2 *me)
{
uint32_t bytes;
if (me->m_bit_count) {
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
bytes = (me->m_bit_count + 7) / 8;
me->m_out_buf += bytes;
}
me->m_bits = 0;
me->m_bit_count = 0;
}
#endif //BITBUF2_H

617
igzip/crc32_gzip.asm Normal file
View File

@ -0,0 +1,617 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Function API:
; UINT32 crc32_gzip(
; UINT32 init_crc, //initial CRC value, 32 bits
; const unsigned char *buf, //buffer pointer to calculate CRC on
; UINT64 len //buffer length in bytes (64-bit data)
; );
;
; Authors:
; Erdinc Ozturk
; Vinodh Gopal
; James Guilford
;
; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
; URL: http://download.intel.com/design/intarch/papers/323102.pdf
;
;
; sample yasm command line:
; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip
;
; As explained here:
; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html
; CRC-32 checksum is described in RFC 1952
; Implementing RFC 1952 CRC:
; http://www.ietf.org/rfc/rfc1952.txt
%include "reg_sizes.asm"
[bits 64]
default rel
section .text
%ifidn __OUTPUT_FORMAT__, win64
%xdefine arg1 rcx
%xdefine arg2 rdx
%xdefine arg3 r8
%xdefine arg1_low32 ecx
%else
%xdefine arg1 rdi
%xdefine arg2 rsi
%xdefine arg3 rdx
%xdefine arg1_low32 edi
%endif
%define TMP 16*0
%ifidn __OUTPUT_FORMAT__, win64
%define XMM_SAVE 16*2
%define VARIABLE_OFFSET 16*10+8
%else
%define VARIABLE_OFFSET 16*2+8
%endif
align 16
global crc32_gzip
crc32_gzip:
; unsigned long c = crc ^ 0xffffffffL;
not arg1_low32 ;
sub rsp, VARIABLE_OFFSET
%ifidn __OUTPUT_FORMAT__, win64
; push the xmm registers into the stack to maintain
movdqa [rsp + XMM_SAVE + 16*0], xmm6
movdqa [rsp + XMM_SAVE + 16*1], xmm7
movdqa [rsp + XMM_SAVE + 16*2], xmm8
movdqa [rsp + XMM_SAVE + 16*3], xmm9
movdqa [rsp + XMM_SAVE + 16*4], xmm10
movdqa [rsp + XMM_SAVE + 16*5], xmm11
movdqa [rsp + XMM_SAVE + 16*6], xmm12
movdqa [rsp + XMM_SAVE + 16*7], xmm13
%endif
; check if smaller than 256B
cmp arg3, 256
; for sizes less than 256, we can't fold 128B at a time...
jl _less_than_256
; load the initial crc value
movd xmm10, arg1_low32 ; initial crc
; receive the initial 64B data, xor the initial crc value
movdqu xmm0, [arg2+16*0]
movdqu xmm1, [arg2+16*1]
movdqu xmm2, [arg2+16*2]
movdqu xmm3, [arg2+16*3]
movdqu xmm4, [arg2+16*4]
movdqu xmm5, [arg2+16*5]
movdqu xmm6, [arg2+16*6]
movdqu xmm7, [arg2+16*7]
; XOR the initial_crc value
pxor xmm0, xmm10
movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
;imm value of pclmulqdq instruction will determine which constant to use
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; we subtract 256 instead of 128 to save one instruction from the loop
sub arg3, 256
; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
; loop will fold 128B at a time until we have 128+y Bytes of buffer
; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
_fold_128_B_loop:
; update the buffer pointer
add arg2, 128
movdqu xmm9, [arg2+16*0]
movdqu xmm12, [arg2+16*1]
movdqa xmm8, xmm0
movdqa xmm13, xmm1
pclmulqdq xmm0, xmm10, 0x10
pclmulqdq xmm8, xmm10 , 0x1
pclmulqdq xmm1, xmm10, 0x10
pclmulqdq xmm13, xmm10 , 0x1
pxor xmm0, xmm9
xorps xmm0, xmm8
pxor xmm1, xmm12
xorps xmm1, xmm13
movdqu xmm9, [arg2+16*2]
movdqu xmm12, [arg2+16*3]
movdqa xmm8, xmm2
movdqa xmm13, xmm3
pclmulqdq xmm2, xmm10, 0x10
pclmulqdq xmm8, xmm10 , 0x1
pclmulqdq xmm3, xmm10, 0x10
pclmulqdq xmm13, xmm10 , 0x1
pxor xmm2, xmm9
xorps xmm2, xmm8
pxor xmm3, xmm12
xorps xmm3, xmm13
movdqu xmm9, [arg2+16*4]
movdqu xmm12, [arg2+16*5]
movdqa xmm8, xmm4
movdqa xmm13, xmm5
pclmulqdq xmm4, xmm10, 0x10
pclmulqdq xmm8, xmm10 , 0x1
pclmulqdq xmm5, xmm10, 0x10
pclmulqdq xmm13, xmm10 , 0x1
pxor xmm4, xmm9
xorps xmm4, xmm8
pxor xmm5, xmm12
xorps xmm5, xmm13
movdqu xmm9, [arg2+16*6]
movdqu xmm12, [arg2+16*7]
movdqa xmm8, xmm6
movdqa xmm13, xmm7
pclmulqdq xmm6, xmm10, 0x10
pclmulqdq xmm8, xmm10 , 0x1
pclmulqdq xmm7, xmm10, 0x10
pclmulqdq xmm13, xmm10 , 0x1
pxor xmm6, xmm9
xorps xmm6, xmm8
pxor xmm7, xmm12
xorps xmm7, xmm13
sub arg3, 128
; check if there is another 128B in the buffer to be able to fold
jge _fold_128_B_loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
add arg2, 128
; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
; fold the 8 xmm registers to 1 xmm register with different constants
movdqa xmm10, [rk9]
movdqa xmm8, xmm0
pclmulqdq xmm0, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
xorps xmm7, xmm0
movdqa xmm10, [rk11]
movdqa xmm8, xmm1
pclmulqdq xmm1, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
xorps xmm7, xmm1
movdqa xmm10, [rk13]
movdqa xmm8, xmm2
pclmulqdq xmm2, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
pxor xmm7, xmm2
movdqa xmm10, [rk15]
movdqa xmm8, xmm3
pclmulqdq xmm3, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
xorps xmm7, xmm3
movdqa xmm10, [rk17]
movdqa xmm8, xmm4
pclmulqdq xmm4, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
pxor xmm7, xmm4
movdqa xmm10, [rk19]
movdqa xmm8, xmm5
pclmulqdq xmm5, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
xorps xmm7, xmm5
movdqa xmm10, [rk1]
movdqa xmm8, xmm6
pclmulqdq xmm6, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
pxor xmm7, xmm6
; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
; instead of a cmp instruction, we use the negative flag with the jl instruction
add arg3, 128-16
jl _final_reduction_for_128
; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
; we can fold 16 bytes at a time if y>=16
; continue folding 16B at a time
_16B_reduction_loop:
movdqa xmm8, xmm7
pclmulqdq xmm7, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
movdqu xmm0, [arg2]
pxor xmm7, xmm0
add arg2, 16
sub arg3, 16
; instead of a cmp instruction, we utilize the flags with the jge instruction
; equivalent of: cmp arg3, 16-16
; check if there is any more 16B in the buffer to be able to fold
jge _16B_reduction_loop
;now we have 16+z bytes left to reduce, where 0<= z < 16.
;first, we reduce the data in the xmm7 register
_final_reduction_for_128:
add arg3, 16
je _128_done
; here we are getting data that is less than 16 bytes.
; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
; after that the registers need to be adjusted.
_get_last_two_xmms:
movdqa xmm2, xmm7
movdqu xmm1, [arg2 - 16 + arg3]
; get rid of the extra data that was loaded before
; load the shift constant
lea rax, [pshufb_shf_table]
add rax, arg3
movdqu xmm0, [rax]
pshufb xmm7, xmm0
pxor xmm0, [mask3]
pshufb xmm2, xmm0
pblendvb xmm2, xmm1 ;xmm0 is implicit
;;;;;;;;;;
movdqa xmm8, xmm7
pclmulqdq xmm7, xmm10, 0x1
pclmulqdq xmm8, xmm10, 0x10
pxor xmm7, xmm8
pxor xmm7, xmm2
_128_done:
; compute crc of a 128-bit value
movdqa xmm10, [rk5]
movdqa xmm0, xmm7
;64b fold
pclmulqdq xmm7, xmm10, 0
psrldq xmm0, 8
pxor xmm7, xmm0
;32b fold
movdqa xmm0, xmm7
pslldq xmm7, 4
pclmulqdq xmm7, xmm10, 0x10
pxor xmm7, xmm0
;barrett reduction
_barrett:
pand xmm7, [mask2]
movdqa xmm1, xmm7
movdqa xmm2, xmm7
movdqa xmm10, [rk7]
pclmulqdq xmm7, xmm10, 0
pxor xmm7, xmm2
pand xmm7, [mask]
movdqa xmm2, xmm7
pclmulqdq xmm7, xmm10, 0x10
pxor xmm7, xmm2
pxor xmm7, xmm1
pextrd eax, xmm7, 2
_cleanup:
; return c ^ 0xffffffffL;
not eax
%ifidn __OUTPUT_FORMAT__, win64
movdqa xmm6, [rsp + XMM_SAVE + 16*0]
movdqa xmm7, [rsp + XMM_SAVE + 16*1]
movdqa xmm8, [rsp + XMM_SAVE + 16*2]
movdqa xmm9, [rsp + XMM_SAVE + 16*3]
movdqa xmm10, [rsp + XMM_SAVE + 16*4]
movdqa xmm11, [rsp + XMM_SAVE + 16*5]
movdqa xmm12, [rsp + XMM_SAVE + 16*6]
movdqa xmm13, [rsp + XMM_SAVE + 16*7]
%endif
add rsp, VARIABLE_OFFSET
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
align 16
_less_than_256:
; check if there is enough buffer to be able to fold 16B at a time
cmp arg3, 32
jl _less_than_32
; if there is, load the constants
movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
movd xmm0, arg1_low32 ; get the initial crc value
movdqu xmm7, [arg2] ; load the plaintext
pxor xmm7, xmm0
; update the buffer pointer
add arg2, 16
; update the counter. subtract 32 instead of 16 to save one instruction from the loop
sub arg3, 32
jmp _16B_reduction_loop
align 16
_less_than_32:
; mov initial crc to the return value. this is necessary for zero-length buffers.
mov eax, arg1_low32
test arg3, arg3
je _cleanup
movd xmm0, arg1_low32 ; get the initial crc value
cmp arg3, 16
je _exact_16_left
jl _less_than_16_left
movdqu xmm7, [arg2] ; load the plaintext
pxor xmm7, xmm0 ; xor the initial crc value
add arg2, 16
sub arg3, 16
movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
jmp _get_last_two_xmms
align 16
_less_than_16_left:
; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
pxor xmm1, xmm1
mov r11, rsp
movdqa [r11], xmm1
cmp arg3, 4
jl _only_less_than_4
; backup the counter value
mov r9, arg3
cmp arg3, 8
jl _less_than_8_left
; load 8 Bytes
mov rax, [arg2]
mov [r11], rax
add r11, 8
sub arg3, 8
add arg2, 8
_less_than_8_left:
cmp arg3, 4
jl _less_than_4_left
; load 4 Bytes
mov eax, [arg2]
mov [r11], eax
add r11, 4
sub arg3, 4
add arg2, 4
_less_than_4_left:
cmp arg3, 2
jl _less_than_2_left
; load 2 Bytes
mov ax, [arg2]
mov [r11], ax
add r11, 2
sub arg3, 2
add arg2, 2
_less_than_2_left:
cmp arg3, 1
jl _zero_left
; load 1 Byte
mov al, [arg2]
mov [r11], al
_zero_left:
movdqa xmm7, [rsp]
pxor xmm7, xmm0 ; xor the initial crc value
lea rax,[pshufb_shf_table]
movdqu xmm0, [rax + r9]
pshufb xmm7,xmm0
jmp _128_done
align 16
_exact_16_left:
movdqu xmm7, [arg2]
pxor xmm7, xmm0 ; xor the initial crc value
jmp _128_done
_only_less_than_4:
cmp arg3, 3
jl _only_less_than_3
; load 3 Bytes
mov al, [arg2]
mov [r11], al
mov al, [arg2+1]
mov [r11+1], al
mov al, [arg2+2]
mov [r11+2], al
movdqa xmm7, [rsp]
pxor xmm7, xmm0 ; xor the initial crc value
pslldq xmm7, 5
jmp _barrett
_only_less_than_3:
cmp arg3, 2
jl _only_less_than_2
; load 2 Bytes
mov al, [arg2]
mov [r11], al
mov al, [arg2+1]
mov [r11+1], al
movdqa xmm7, [rsp]
pxor xmm7, xmm0 ; xor the initial crc value
pslldq xmm7, 6
jmp _barrett
_only_less_than_2:
; load 1 Byte
mov al, [arg2]
mov [r11], al
movdqa xmm7, [rsp]
pxor xmm7, xmm0 ; xor the initial crc value
pslldq xmm7, 7
jmp _barrett
section .data
; precomputed constants
align 16
rk1 :
DQ 0x00000000ccaa009e
rk2 :
DQ 0x00000001751997d0
rk3 :
DQ 0x000000014a7fe880
rk4 :
DQ 0x00000001e88ef372
rk5 :
DQ 0x00000000ccaa009e
rk6 :
DQ 0x0000000163cd6124
rk7 :
DQ 0x00000001f7011640
rk8 :
DQ 0x00000001db710640
rk9 :
DQ 0x00000001d7cfc6ac
rk10 :
DQ 0x00000001ea89367e
rk11 :
DQ 0x000000018cb44e58
rk12 :
DQ 0x00000000df068dc2
rk13 :
DQ 0x00000000ae0b5394
rk14 :
DQ 0x00000001c7569e54
rk15 :
DQ 0x00000001c6e41596
rk16 :
DQ 0x0000000154442bd4
rk17 :
DQ 0x0000000174359406
rk18 :
DQ 0x000000003db1ecdc
rk19 :
DQ 0x000000015a546366
rk20 :
DQ 0x00000000f1da05aa
pshufb_shf_table:
; use these values for shift constants for the pshufb instruction
; different alignments result in values as shown:
; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
dq 0x0706050403020100, 0x000e0d0c0b0a0908
mask:
dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
mask2:
dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
mask3:
dq 0x8080808080808080, 0x8080808080808080

120
igzip/crc_data.asm Normal file
View File

@ -0,0 +1,120 @@
%ifndef CRC_DATA
%define CRC_DATA
; precomputed constants
section .data
align 32
global pshufb_shf_table:data internal
pshufb_shf_table:
dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
;; ; MAGIC value, which when folded 4 times gives FFFFFF00000...0000
;; global crc_init_4
;; crc_init_4:
;; dq 0x9db42487
;; dq 0x0
;; dq 0x0
;; dq 0x0
; constant used to shift/fold one XMM reg down by 4 XMM widths
global fold_4:data internal
fold_4:
dq 0x00000001c6e41596
dq 0x0000000154442bd4
;value, which when xored with pshufb_shf_table entry gives shr value
global mask3:data internal
mask3: dq 0x8080808080808080, 0x8080808080808080
%ifndef CRC_TABLE
%define CRC_TABLE
; Place marker in library to avoid linker warning
align 4
global CrcTable:data internal
CrcTable:
dd 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba
dd 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
dd 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988
dd 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
dd 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de
dd 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7
dd 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec
dd 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5
dd 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172
dd 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b
dd 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940
dd 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59
dd 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116
dd 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f
dd 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924
dd 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d
dd 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a
dd 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433
dd 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818
dd 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01
dd 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e
dd 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457
dd 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c
dd 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65
dd 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2
dd 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb
dd 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0
dd 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9
dd 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086
dd 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f
dd 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4
dd 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad
dd 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a
dd 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683
dd 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8
dd 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1
dd 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe
dd 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7
dd 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc
dd 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5
dd 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252
dd 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b
dd 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60
dd 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79
dd 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236
dd 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f
dd 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04
dd 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d
dd 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a
dd 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713
dd 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38
dd 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21
dd 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e
dd 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777
dd 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c
dd 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45
dd 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2
dd 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db
dd 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0
dd 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9
dd 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6
dd 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf
dd 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94
dd 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
End_CrcTable:
%endif ;; CRC_TABLE
%endif ;; CRC_DATA

81
igzip/crc_inflate.h Normal file
View File

@ -0,0 +1,81 @@
#ifndef INFLATE_CRC_TABLE
#define INFLATE_CRC_TABLE
uint32_t inflate_crc_table[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d};
uint32_t find_crc(uint8_t * start, uint32_t length)
{
uint32_t crc = ~0;
uint8_t *end = start + length;
while (start < end)
crc = (crc >> 8) ^ inflate_crc_table[(crc & 0x000000FF) ^ *start++];
return ~crc;
}
#endif

195
igzip/crc_utils_01.asm Normal file
View File

@ -0,0 +1,195 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "reg_sizes.asm"
; Functional versions of CRC macros
%include "igzip_buffer_utils_01.asm"
extern fold_4
%define crc_0 xmm0 ; in/out: crc state
%define crc_1 xmm1 ; in/out: crc state
%define crc_2 xmm2 ; in/out: crc state
%define crc_3 xmm3 ; in/out: crc state
%define crc_fold xmm4 ; in: (loaded from fold_4)
%define crc_tmp0 xmm5 ; tmp
%define crc_tmp1 xmm6 ; tmp
%define crc_tmp2 xmm7 ; tmp
%define crc_tmp3 xmm8 ; tmp
%define crc_tmp4 xmm9 ; tmp
%define tmp4 rax
; copy x bytes (rounded up to 16 bytes) from src to dst with crc
; src & dst are unaligned
; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
; arg 1: rcx: pointer to dst
; arg 2: rdx: pointer to src
; arg 3: r8: size (in bytes)
; arg 4: r9: pointer to CRC
;; %if 0
global copy_in_crc_01
copy_in_crc_01:
%ifidn __OUTPUT_FORMAT__, elf64
mov r9, rcx
mov r8, rdx
mov rdx, rsi
mov rcx, rdi
%endif
; Save xmm registers that need to be preserved.
sub rsp, 8 + 4*16
movdqa [rsp+0*16], xmm6
movdqa [rsp+1*16], xmm7
movdqa [rsp+2*16], xmm8
movdqa [rsp+3*16], xmm9
movdqa crc_0, [r9 + 0*16]
movdqa crc_1, [r9 + 1*16]
movdqa crc_2, [r9 + 2*16]
movdqa crc_3, [r9 + 3*16]
movdqa crc_fold, [fold_4 WRT_OPT]
COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
crc_fold, \
crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
movdqa [r9 + 0*16], crc_0
movdqa [r9 + 1*16], crc_1
movdqa [r9 + 2*16], crc_2
movdqa [r9 + 3*16], crc_3
movdqa xmm9, [rsp+3*16]
movdqa xmm8, [rsp+2*16]
movdqa xmm7, [rsp+1*16]
movdqa xmm6, [rsp+0*16]
add rsp, 8 + 4*16
ret
; Convert 512-bit CRC data to real 32-bit value
; uint32_t crc_512to32(uint32_t *crc)
; arg 1: rcx: pointer to CRC
; returns: eax: 32 bit crc
global crc_512to32_01
crc_512to32_01:
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
movdqa crc_0, [rcx + 0*16]
movdqa crc_1, [rcx + 1*16]
movdqa crc_2, [rcx + 2*16]
movdqa crc_3, [rcx + 3*16]
movdqa crc_fold, [rk1 WRT_OPT] ;k1
; fold the 4 xmm registers to 1 xmm register with different constants
movdqa crc_tmp0, crc_0
pclmulqdq crc_0, crc_fold, 0x1
pclmulqdq crc_tmp0, crc_fold, 0x10
pxor crc_1, crc_tmp0
pxor crc_1, crc_0
movdqa crc_tmp0, crc_1
pclmulqdq crc_1, crc_fold, 0x1
pclmulqdq crc_tmp0, crc_fold, 0x10
pxor crc_2, crc_tmp0
pxor crc_2, crc_1
movdqa crc_tmp0, crc_2
pclmulqdq crc_2, crc_fold, 0x1
pclmulqdq crc_tmp0, crc_fold, 0x10
pxor crc_3, crc_tmp0
pxor crc_3, crc_2
movdqa crc_fold, [rk5 WRT_OPT]
movdqa crc_0, crc_3
pclmulqdq crc_3, crc_fold, 0
psrldq crc_0, 8
pxor crc_3, crc_0
movdqa crc_0, crc_3
pslldq crc_3, 4
pclmulqdq crc_3, crc_fold, 0x10
pxor crc_3, crc_0
pand crc_3, [mask2 WRT_OPT]
movdqa crc_1, crc_3
movdqa crc_2, crc_3
movdqa crc_fold, [rk7 WRT_OPT]
pclmulqdq crc_3, crc_fold, 0
pxor crc_3, crc_2
pand crc_3, [mask WRT_OPT]
movdqa crc_2, crc_3
pclmulqdq crc_3, crc_fold, 0x10
pxor crc_3, crc_2
pxor crc_3, crc_1
pextrd eax, crc_3, 2
not eax
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .data
align 16
rk1: dq 0x00000000ccaa009e
rk2: dq 0x00000001751997d0
rk5: dq 0x00000000ccaa009e
rk6: dq 0x0000000163cd6124
rk7: dq 0x00000001f7011640
rk8: dq 0x00000001db710640
mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF

194
igzip/crc_utils_04.asm Normal file
View File

@ -0,0 +1,194 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "reg_sizes.asm"
; Functional versions of CRC macros
%include "igzip_buffer_utils_04.asm"
extern fold_4
%define crc_0 xmm0 ; in/out: crc state
%define crc_1 xmm1 ; in/out: crc state
%define crc_2 xmm2 ; in/out: crc state
%define crc_3 xmm3 ; in/out: crc state
%define crc_fold xmm4 ; in: (loaded from fold_4)
%define crc_tmp0 xmm5 ; tmp
%define crc_tmp1 xmm6 ; tmp
%define crc_tmp2 xmm7 ; tmp
%define crc_tmp3 xmm8 ; tmp
%define crc_tmp4 xmm9 ; tmp
%define tmp4 rax
; copy x bytes (rounded up to 16 bytes) from src to dst with crc
; src & dst are unaligned
; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
; arg 1: rcx: pointer to dst
; arg 2: rdx: pointer to src
; arg 3: r8: size (in bytes)
; arg 4: r9: pointer to CRC
;; %if 0
global copy_in_crc_04
copy_in_crc_04:
%ifidn __OUTPUT_FORMAT__, elf64
mov r9, rcx
mov r8, rdx
mov rdx, rsi
mov rcx, rdi
%endif
; Save xmm registers that need to be preserved.
sub rsp, 8 + 4*16
vmovdqa [rsp+0*16], xmm6
vmovdqa [rsp+1*16], xmm7
vmovdqa [rsp+2*16], xmm8
vmovdqa [rsp+3*16], xmm9
vmovdqa crc_0, [r9 + 0*16]
vmovdqa crc_1, [r9 + 1*16]
vmovdqa crc_2, [r9 + 2*16]
vmovdqa crc_3, [r9 + 3*16]
vmovdqa crc_fold, [fold_4 WRT_OPT]
COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
crc_fold, \
crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
vmovdqa [r9 + 0*16], crc_0
vmovdqa [r9 + 1*16], crc_1
vmovdqa [r9 + 2*16], crc_2
vmovdqa [r9 + 3*16], crc_3
vmovdqa xmm9, [rsp+3*16]
vmovdqa xmm8, [rsp+2*16]
vmovdqa xmm7, [rsp+1*16]
vmovdqa xmm6, [rsp+0*16]
add rsp, 8 + 4*16
ret
; Convert 512-bit CRC data to real 32-bit value
; uint32_t crc_512to32(uint32_t *crc)
; arg 1: rcx: pointer to CRC
; returns: eax: 32 bit crc
global crc_512to32_04
crc_512to32_04:
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
vmovdqa crc_0, [rcx + 0*16]
vmovdqa crc_1, [rcx + 1*16]
vmovdqa crc_2, [rcx + 2*16]
vmovdqa crc_3, [rcx + 3*16]
vmovdqa crc_fold, [rk1 WRT_OPT] ;k1
; fold the 4 xmm registers to 1 xmm register with different constants
vmovdqa crc_tmp0, crc_0
vpclmulqdq crc_0, crc_fold, 0x1
vpclmulqdq crc_tmp0, crc_fold, 0x10
vpxor crc_1, crc_tmp0
vpxor crc_1, crc_0
vmovdqa crc_tmp0, crc_1
vpclmulqdq crc_1, crc_fold, 0x1
vpclmulqdq crc_tmp0, crc_fold, 0x10
vpxor crc_2, crc_tmp0
vpxor crc_2, crc_1
vmovdqa crc_tmp0, crc_2
vpclmulqdq crc_2, crc_fold, 0x1
vpclmulqdq crc_tmp0, crc_fold, 0x10
vpxor crc_3, crc_tmp0
vpxor crc_3, crc_2
vmovdqa crc_fold, [rk5 WRT_OPT]
vmovdqa crc_0, crc_3
vpclmulqdq crc_3, crc_fold, 0
vpsrldq crc_0, 8
vpxor crc_3, crc_0
vmovdqa crc_0, crc_3
vpslldq crc_3, 4
vpclmulqdq crc_3, crc_fold, 0x10
vpxor crc_3, crc_0
vpand crc_3, [mask2 WRT_OPT]
vmovdqa crc_1, crc_3
vmovdqa crc_2, crc_3
vmovdqa crc_fold, [rk7 WRT_OPT]
vpclmulqdq crc_3, crc_fold, 0
vpxor crc_3, crc_2
vpand crc_3, [mask WRT_OPT]
vmovdqa crc_2, crc_3
vpclmulqdq crc_3, crc_fold, 0x10
vpxor crc_3, crc_2
vpxor crc_3, crc_1
vpextrd eax, crc_3, 2
not eax
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .data
align 16
rk1: dq 0x00000000ccaa009e
rk2: dq 0x00000001751997d0
rk5: dq 0x00000000ccaa009e
rk6: dq 0x0000000163cd6124
rk7: dq 0x00000001f7011640
rk8: dq 0x00000001db710640
mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF

165
igzip/data_struct2.asm Normal file
View File

@ -0,0 +1,165 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; START_FIELDS
%macro START_FIELDS 0
%assign _FIELD_OFFSET 0
%assign _STRUCT_ALIGN 0
%endm
;; FIELD name size align
%macro FIELD 3
%define %%name %1
%define %%size %2
%define %%align %3
%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
%%name equ _FIELD_OFFSET
%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
%if (%%align > _STRUCT_ALIGN)
%assign _STRUCT_ALIGN %%align
%endif
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; BitBuf2
;; name size align
FIELD _m_bits, 8, 8
FIELD _m_bit_count, 4, 4
FIELD _m_out_buf, 8, 8
FIELD _m_out_end, 8, 8
FIELD _m_out_start, 8, 8
%assign _BitBuf2_size _FIELD_OFFSET
%assign _BitBuf2_align _STRUCT_ALIGN
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; isal_zstate
;; name size align
FIELD _b_bytes_valid, 4, 4
FIELD _b_bytes_processed, 4, 4
FIELD _file_start, 8, 8
FIELD _crc, 64, 16
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
FIELD _state, 4, 4
FIELD _count, 4, 4
FIELD _tmp_out_buff, 16, 1
FIELD _tmp_out_start, 4, 4
FIELD _tmp_out_end, 4, 4
FIELD _last_flush, 4, 4
FIELD _has_gzip_hdr, 4, 4
FIELD _has_eob, 4, 4
FIELD _has_eob_hdr, 4, 4
FIELD _left_over, 4, 4
FIELD _buffer, BSIZE+16, 32
FIELD _head, HASH_SIZE*2, 16
%assign _isal_zstate_size _FIELD_OFFSET
%assign _isal_zstate_align _STRUCT_ALIGN
_bitbuf_m_bits equ _bitbuf+_m_bits
_bitbuf_m_bit_count equ _bitbuf+_m_bit_count
_bitbuf_m_out_buf equ _bitbuf+_m_out_buf
_bitbuf_m_out_end equ _bitbuf+_m_out_end
_bitbuf_m_out_start equ _bitbuf+_m_out_start
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
START_FIELDS ;; isal_zstream
;; name size align
FIELD _next_in, 8, 8
FIELD _avail_in, 4, 4
FIELD _total_in, 4, 4
FIELD _next_out, 8, 8
FIELD _avail_out, 4, 4
FIELD _total_out, 4, 4
FIELD _hufftables, 8, 8
FIELD _end_of_stream, 4, 4
FIELD _flush, 4, 4
FIELD _internal_state, _isal_zstate_size, _isal_zstate_align
%assign _isal_zstream_size _FIELD_OFFSET
%assign _isal_zstream_align _STRUCT_ALIGN
_internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
_internal_state_file_start equ _internal_state+_file_start
_internal_state_crc equ _internal_state+_crc
_internal_state_bitbuf equ _internal_state+_bitbuf
_internal_state_state equ _internal_state+_state
_internal_state_count equ _internal_state+_count
_internal_state_tmp_out_buff equ _internal_state+_tmp_out_buff
_internal_state_tmp_out_start equ _internal_state+_tmp_out_start
_internal_state_tmp_out_end equ _internal_state+_tmp_out_end
_internal_state_last_flush equ _internal_state+_last_flush
_internal_state_has_gzip_hdr equ _internal_state+_has_gzip_hdr
_internal_state_has_eob equ _internal_state+_has_eob
_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr
_internal_state_left_over equ _internal_state+_left_over
_internal_state_buffer equ _internal_state+_buffer
_internal_state_head equ _internal_state+_head
_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits
_internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count
_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf
_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end
_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
ZSTATE_HDR equ 1
ZSTATE_BODY equ 2
ZSTATE_FLUSH_READ_BUFFER equ 3
ZSTATE_SYNC_FLUSH equ 4
ZSTATE_TRL equ 6
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_NO_FLUSH equ 0
_SYNC_FLUSH equ 1
_FULL_FLUSH equ 2
_STORED_BLK equ 0
%assign _STORED_BLK_END 65535
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -0,0 +1,81 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "reg_sizes.asm"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; detect_repeated_char buf, size
%ifidn __OUTPUT_FORMAT__, elf64
%define buf rdi
%define size rsi
%elifidn __OUTPUT_FORMAT__, win64
%define buf rcx
%define size rdx
%endif ; output formats
%define tmp r10
global detect_repeated_char
detect_repeated_char:
;; replicate the 1st byte to 8 bytes
xor tmp, tmp
xor rax, rax
mov al, [buf]
mov ah, al
mov tmp %+ w, ax
shl tmp, 16
or eax, tmp %+ d
mov tmp %+ d, eax
shl tmp, 32
or rax, tmp
;; detect the 8K input
lea tmp, [buf + size]
_loop:
cmp rax, [buf]
jne _fail
add buf, 8
cmp buf, tmp
jb _loop
shr rax, 56
jmp _end
_fail:
mov rax, -1
_end:
ret
%undef buf
%undef size
%undef tmp

View File

@ -0,0 +1,118 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdint.h>
#include <stdio.h>
#include "huff_codes.h"
#include "bitbuf2.h"
#define MAX_HEADER_SIZE 350
#define BLOCK_SIZE 16*1024
void fprint_header(FILE * outfile, uint8_t * header, uint64_t bit_count)
{
int i;
fprintf(outfile, "unsigned char data[] = {");
for (i = 0; i < bit_count / 8; i++) {
if ((i & 7) == 0)
fprintf(outfile, "\n\t");
else
fprintf(outfile, " ");
fprintf(outfile, "0x%02x,", header[i]);
}
if ((i & 7) == 0)
fprintf(outfile, "\n\t");
else
fprintf(outfile, " ");
fprintf(outfile, "0x%02x", header[i]);
fprintf(outfile, "\t};\n\n");
}
int main(int argc, char **argv)
{
/* Generates a header for a constant block, along with some manual
* twiddling to create a header with the desired properties*/
uint8_t stream[BLOCK_SIZE];
struct isal_huff_histogram histogram;
uint64_t *lit_histogram = histogram.lit_len_histogram;
uint64_t *dist_histogram = histogram.dist_histogram;
uint8_t header[MAX_HEADER_SIZE];
struct huff_tree lit_tree, dist_tree;
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
uint64_t bit_count;
uint8_t repeated_char = 0x00;
memset(header, 0, sizeof(header));
memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
memset(stream, repeated_char, sizeof(stream));
memset(lit_tree_array, 0, sizeof(lit_tree_array));
memset(dist_tree_array, 0, sizeof(dist_tree_array));
memset(lit_huff_table, 0, sizeof(lit_huff_table));
memset(dist_huff_table, 0, sizeof(dist_huff_table));
isal_update_histogram(stream, sizeof(stream), &histogram);
/* These are set to manually change the histogram to create a header with the
* desired properties. In this case, the header is modified so that it is byte
* unaligned by 6 bits, so that 0 is a 2 bit code, so that the header plus the
* encoding of one 0 is byte aligned*/
lit_histogram[repeated_char] = 20;
lit_histogram[280] = 2;
lit_histogram[264] = 5;
lit_histogram[282] = 0;
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
dist_tree = create_symbol_subset_huff_tree(dist_tree_array, dist_histogram, DIST_LEN);
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, 15) > 0) {
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
}
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, 15) > 0) {
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
}
/* Remove litral symbol corresponding to the unoptimal look back
* distance of 258 found by gen_histogram*/
dist_huff_table[16].length = 0;
bit_count = create_header(header, sizeof(header), lit_huff_table, dist_huff_table, 1);
printf("Header for %x\n", repeated_char);
fprintf(stdout, "Complete Bytes: %lu\n", bit_count / 8);
fprintf(stdout, "Byte Offset: %lu\n\n", (bit_count) & 7);
fprint_header(stdout, header, bit_count);
printf("\n");
return 0;
}

View File

@ -0,0 +1,425 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
/* This program can be used to generate custom a custom huffman encoding to get
* better data compression. This is most useful when the type of data being
* compressed is well known.
*
* To use generate_custom_hufftables, pass a sequence of files to the program
* that together form an accurate representation of the data that is being
* compressed. Generate_custom_hufftables will then produce the file
* hufftables_c.c, which should be moved to replace its counterpart in the igzip
* source folder. After recompiling the Isa-l library, the igzip compression
* functions will use the new hufftables.
*
* Generate_custom_hufftables should be compiled with the same compile time
* parameters as the igzip source code. Generating custom hufftables with
* different compile time parameters may cause igzip to produce invalid output
* for the reasons described below. The default parameters used by
* generate_custom_hufftables are the same as the default parameters used by
* igzip.
*
* *WARNING* generate custom hufftables must be compiled with a HIST_SIZE that
* is at least as large as the HIST_SIZE used by igzip. By default HIST_SIZE is
* 8, the maximum usable HIST_SIZE is 32. The reason for this is to generate
* better compression. Igzip cannot produce look back distances with sizes
* larger than the HIST_SIZE * 1024 igzip was compiled with, so look back
* distances with sizes larger than HIST_SIZE * 1024 are not assigned a huffman
* code.
*
* To improve compression ratio, the compile time option LIT_SUB is provided to
* allow generating custom hufftables which only use a subset of all possible
* literals. This can be useful for getting better compression when it is known
* that the data being compressed will never contain certain symbols, for
* example text files. If this option is used, it needs to be checked that every
* possible literal is in fact given a valid code in the output hufftable. This
* can be done by checking that every required literal has a positive value for
* the length of the code associated with that literal. Literals which have not
* been given codes will have a code length of zero. The compile time option
* PRINT_CODES (described below) can be used to help manually perform this
* check.
*
* The compile time parameter PRINT_CODES causes the literal/length huffman code
* and the distance huffman code created by generate_custom_hufftables to be
* printed out. This is printed out where each line corresponds to a different
* symbol. The first column is the symbol used to represent each literal (Lit),
* end of block symbol (EOB), length (Len) or distance (Dist), the second column
* is the associated code value, and the third column is the length in bits of
* that code.
*/
#include <stdint.h>
#include <stdio.h>
#include <inttypes.h>
#include "huff_codes.h"
#include "bitbuf2.h"
/*These max code lengths are limited by how the data is stored in
* hufftables.asm. The deflate standard max is 15.*/
#define LONG_DCODE_OFFSET 26
#define SHORT_DCODE_OFFSET 20
#define MAX_HEADER_SIZE IGZIP_MAX_DEF_HDR_SIZE
#define GZIP_HEADER_SIZE 10
#define GZIP_TRAILER_SIZE 8
/**
* @brief Prints a table of uint8_t elements to a file.
* @param outfile: the file the table is printed to.
* @param table: the table to be printed.
* @param length: number of elements to be printed.
* @param header: header to append in front of the table.
* @param footer: footer to append at the end of the table.
* @param begin_line: string printed at beginning of new line
*/
void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
char *footer, char *begin_line)
{
int i;
fprintf(outfile, "%s", header);
for (i = 0; i < length - 1; i++) {
if ((i & 7) == 0)
fprintf(outfile, "\n%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%02x,", table[i]);
}
if ((i & 7) == 0)
fprintf(outfile, "\n%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%02x", table[i]);
fprintf(outfile, "%s", footer);
}
/**
* @brief Prints a table of uint16_t elements to a file.
* @param outfile: the file the table is printed to.
* @param table: the table to be printed.
* @param length: number of elements to be printed.
* @param header: header to append in front of the table.
* @param footer: footer to append at the end of the table.
* @param begin_line: string printed at beginning of new line
*/
void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
char *footer, char *begin_line)
{
int i;
fprintf(outfile, "%s", header);
for (i = 0; i < length - 1; i++) {
if ((i & 7) == 0)
fprintf(outfile, "\n%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%04x,", table[i]);
}
if ((i & 7) == 0)
fprintf(outfile, "\n%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%04x", table[i]);
fprintf(outfile, "%s", footer);
}
/**
* @brief Prints a table of uint32_t elements to a file.
* @param outfile: the file the table is printed to.
* @param table: the table to be printed.
* @param length: number of elements to be printed.
* @param header: header to append in front of the table.
* @param footer: footer to append at the end of the table.
* @param begin_line: string printed at beginning of new line
*/
void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
char *footer, char *begin_line)
{
int i;
fprintf(outfile, "%s", header);
for (i = 0; i < length - 1; i++) {
if ((i & 3) == 0)
fprintf(outfile, "\n%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%08x,", table[i]);
}
if ((i & 3) == 0)
fprintf(outfile, "%s", begin_line);
else
fprintf(outfile, " ");
fprintf(outfile, "0x%08x", table[i]);
fprintf(outfile, "%s", footer);
}
/**
* @brief Prints a table of uint64_t elements to a file.
* @param outfile: the file the table is printed to.
* @param table: the table to be printed.
* @param length: number of elements to be printed.
* @param header: header to append in front of the table.
* @param footer: footer to append at the end of the table.
*/
void fprint_uint64_table(FILE * outfile, uint64_t * table, uint64_t length, char *header,
char *footer)
{
int i;
fprintf(outfile, "%s\n", header);
for (i = 0; i < length - 1; i++)
fprintf(outfile, "\t0x%016" PRIx64 ",\n", table[i]);
fprintf(outfile, "\t0x%016" PRIx64, table[i]);
fprintf(outfile, "%s", footer);
}
void fprint_hufftables(FILE * output_file, uint8_t * header, uint32_t bit_count,
uint16_t * lit_code_table, uint8_t * lit_code_size_table,
uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
uint32_t * packed_len_table, uint32_t * packed_dist_table)
{
fprintf(output_file, "struct isal_hufftables hufftables_default = {\n\n");
fprint_uint8_table(output_file, header, (bit_count + 7) / 8,
"\t.deflate_hdr = {", "\t},\n\n", "\t\t");
fprintf(output_file, "\t.deflate_hdr_count = %d,\n", bit_count / 8);
fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", bit_count & 7);
fprint_uint32_table(output_file, packed_dist_table, SHORT_DIST_TABLE_SIZE,
"\t.dist_table = {", ",\n", "\t\t");
fprint_uint32_table(output_file, &packed_dist_table[SHORT_DIST_TABLE_SIZE],
LONG_DIST_TABLE_SIZE - SHORT_DIST_TABLE_SIZE,
"#ifdef LONGER_HUFFTABLE",
"\n#endif /* LONGER_HUFFTABLE */\n\t},\n\n", "\t\t");
fprint_uint32_table(output_file, packed_len_table, LEN_TABLE_SIZE, "\t.len_table = {",
"\t},\n\n", "\t\t");
fprint_uint16_table(output_file, lit_code_table, LIT_TABLE_SIZE, "\t.lit_table = {",
"\t},\n\n", "\t\t");
fprint_uint8_table(output_file, lit_code_size_table, LIT_TABLE_SIZE,
"\t.lit_table_sizes = {", "\t},\n\n", "\t\t");
fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n");
fprint_uint16_table(output_file, dcodes_code_table + SHORT_DCODE_OFFSET,
DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n",
"\t\t");
fprint_uint8_table(output_file, dcodes_code_size_table + SHORT_DCODE_OFFSET,
DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
"\t\t");
fprintf(output_file, "#else\n");
fprint_uint16_table(output_file, dcodes_code_table + LONG_DCODE_OFFSET,
DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", "\t\t");
fprint_uint8_table(output_file, dcodes_code_size_table + LONG_DCODE_OFFSET,
DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
"\t\t");
fprintf(output_file, "#endif\n");
fprintf(output_file, "};\n");
}
void fprint_header(FILE * output_file, uint8_t * header, uint32_t bit_count,
uint16_t * lit_code_table, uint8_t * lit_code_size_table,
uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
uint32_t * packed_len_table, uint32_t * packed_dist_table)
{
fprintf(output_file, "#include <stdint.h>\n");
fprintf(output_file, "#include <igzip_lib.h>\n\n");
fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
"\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
fprint_hufftables(output_file, header, bit_count, lit_code_table, lit_code_size_table,
dcodes_code_table, dcodes_code_size_table, packed_len_table,
packed_dist_table);
}
int main(int argc, char *argv[])
{
long int file_length;
uint8_t *stream = NULL;
struct isal_huff_histogram histogram;
uint64_t *lit_histogram = histogram.lit_len_histogram;
uint64_t *dist_histogram = histogram.dist_histogram;
uint8_t header[MAX_HEADER_SIZE];
FILE *file;
struct huff_tree lit_tree, dist_tree;
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
uint64_t bit_count;
uint32_t packed_len_table[LEN_TABLE_SIZE];
uint32_t packed_dist_table[LONG_DIST_TABLE_SIZE];
uint16_t lit_code_table[LIT_TABLE_SIZE];
uint16_t dcodes_code_table[DIST_LEN];
uint8_t lit_code_size_table[LIT_TABLE_SIZE];
uint8_t dcodes_code_size_table[DIST_LEN];
int max_dist = convert_dist_to_dist_sym(D);
if (argc == 1) {
printf("Error, no input file.\n");
return 1;
}
memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
memset(lit_tree_array, 0, sizeof(lit_tree_array));
memset(dist_tree_array, 0, sizeof(dist_tree_array));
memset(lit_huff_table, 0, sizeof(lit_huff_table));
memset(dist_huff_table, 0, sizeof(dist_huff_table));
while (argc > 1) {
printf("Processing %s\n", argv[argc - 1]);
file = fopen(argv[argc - 1], "r");
if (file == NULL) {
printf("Error opening file\n");
return 1;
}
fseek(file, 0, SEEK_END);
file_length = ftell(file);
fseek(file, 0, SEEK_SET);
file_length -= ftell(file);
stream = malloc(file_length);
if (stream == NULL) {
printf("Failed to allocate memory to read in file\n");
fclose(file);
return 1;
}
fread(stream, 1, file_length, file);
if (ferror(file)) {
printf("Error occurred when reading file");
fclose(file);
free(stream);
return 1;
}
/* Create a histogram of frequency of symbols found in stream to
* generate the huffman tree.*/
isal_update_histogram(stream, file_length, &histogram);
fclose(file);
free(stream);
argc--;
}
/* Create a huffman tree corresponding to the histograms created in
* gen_histogram*/
#ifdef LIT_SUB
int j;
/* Guarantee every possible repeat length is given a symbol. It is hard
* to guarantee data will never have a repeat of a given length */
for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
if (lit_histogram[j] == 0)
lit_histogram[j]++;
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
#else
lit_tree = create_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
#endif
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
/* Create a look up table to represent huffman tree above in deflate
* standard form after it has been modified to satisfy max depth
* criteria.*/
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) {
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
}
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) {
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
}
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
if (create_huff_lookup
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
if (create_huff_lookup
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
printf("Error, code with invalid length for Deflate standard.\n");
return 1;
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
printf("Error, hufftable is not usable\n");
return 1;
}
}
#ifdef PRINT_CODES
int i;
printf("Lit/Len codes\n");
for (i = 0; i < LIT_TABLE_SIZE - 1; i++)
printf("Lit %3d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
lit_huff_table[i].length);
printf("EOB %3d: Code 0x%04x, Code_Len %d\n", 256, lit_huff_table[256].code,
lit_huff_table[256].length);
for (i = LIT_TABLE_SIZE; i < LIT_LEN; i++)
printf("Len %d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
lit_huff_table[i].length);
printf("\n");
printf("Dist codes \n");
for (i = 0; i < DIST_LEN; i++)
printf("Dist %2d: Code 0x%04x, Code_Len %d\n", i, dist_huff_table[i].code,
dist_huff_table[i].length);
printf("\n");
#endif
create_code_tables(lit_code_table, lit_code_size_table, LIT_TABLE_SIZE,
lit_huff_table);
create_code_tables(dcodes_code_table, dcodes_code_size_table, DIST_LEN,
dist_huff_table);
create_packed_len_table(packed_len_table, lit_huff_table);
create_packed_dist_table(packed_dist_table, LONG_DIST_TABLE_SIZE, dist_huff_table);
bit_count =
create_header(header, sizeof(header), lit_huff_table, dist_huff_table, LAST_BLOCK);
file = fopen("hufftables_c.c", "w");
if (file == NULL) {
printf("Error creating file hufftables_c.c\n");
return 1;
}
fprint_header(file, header, bit_count, lit_code_table, lit_code_size_table,
dcodes_code_table, dcodes_code_size_table, packed_len_table,
packed_dist_table);
fclose(file);
return 0;
}

964
igzip/huff_codes.c Normal file
View File

@ -0,0 +1,964 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "igzip_lib.h"
#include "huff_codes.h"
#include "huffman.h"
#define LENGTH_BITS 5
/* The order code length codes are written in the dynamic code header. This is
* defined in RFC 1951 page 13 */
static const uint8_t code_length_code_order[] =
{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
int heap_push(struct huff_tree element, struct histheap *heap)
{
uint16_t index;
uint16_t parent;
assert(heap->size < MAX_HISTHEAP_SIZE);
index = heap->size;
heap->size += 1;
parent = (index - 1) / 2;
while ((index != 0) && (heap->tree[parent].frequency > element.frequency)) {
heap->tree[index] = heap->tree[parent];
index = parent;
parent = (index - 1) / 2;
}
heap->tree[index] = element;
return index;
}
struct huff_tree heap_pop(struct histheap *heap)
{
struct huff_tree root, temp;
uint16_t index = 0;
uint16_t child = 1;
assert(heap->size > 0);
root = heap->tree[index];
heap->size--;
heap->tree[index] = heap->tree[heap->size];
while (child + 1 < heap->size) {
if (heap->tree[child].frequency < heap->tree[index].frequency
|| heap->tree[child + 1].frequency < heap->tree[index].frequency) {
if (heap->tree[child].frequency > heap->tree[child + 1].frequency)
child += 1;
temp = heap->tree[index];
heap->tree[index] = heap->tree[child];
heap->tree[child] = temp;
index = child;
child = 2 * child + 1;
} else {
break;
}
}
if (child < heap->size) {
if (heap->tree[child].frequency < heap->tree[index].frequency) {
temp = heap->tree[index];
heap->tree[index] = heap->tree[child];
heap->tree[child] = temp;
}
}
return root;
}
struct linked_list_node *pop_from_front(struct linked_list *list)
{
struct linked_list_node *temp;
temp = list->start;
if (list->start != NULL) {
list->start = list->start->next;
if (list->start != NULL)
list->start->previous = NULL;
else
list->end = NULL;
list->length -= 1;
}
return temp;
}
void append_to_front(struct linked_list *list, struct linked_list_node *new_element)
{
new_element->next = list->start;
new_element->previous = NULL;
if (list->start != NULL)
list->start->previous = new_element;
else
list->end = new_element;
list->start = new_element;
list->length += 1;
return;
}
void append_to_back(struct linked_list *list, struct linked_list_node *new_element)
{
new_element->previous = list->end;
new_element->next = NULL;
if (list->end != NULL)
list->end->next = new_element;
else
list->start = new_element;
list->end = new_element;
list->length += 1;
return;
}
void isal_update_histogram(uint8_t * start_stream, int length,
struct isal_huff_histogram *histogram)
{
uint32_t literal = 0, hash;
uint8_t *last_seen[HASH_SIZE];
uint8_t *current, *seen, *end_stream, *next_hash, *end;
uint32_t match_length;
uint32_t dist;
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
uint64_t *dist_histogram = histogram->dist_histogram;
if (length <= 0)
return;
end_stream = start_stream + length;
memset(last_seen, 0, sizeof(last_seen)); /* Initialize last_seen to be 0. */
for (current = start_stream; current < end_stream - 3; current++) {
literal = *(uint32_t *) current;
hash = compute_hash(literal) & HASH_MASK;
seen = last_seen[hash];
last_seen[hash] = current;
dist = current - seen;
if (dist < D) {
match_length = compare258(seen, current, end_stream - current);
if (match_length >= SHORTEST_MATCH) {
next_hash = current;
#ifdef LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
if (end > end_stream - 3)
end = end_stream - 3;
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] = next_hash;
}
dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
lit_len_histogram[convert_length_to_len_sym(match_length)] +=
1;
current += match_length - 1;
continue;
}
}
lit_len_histogram[literal & 0xFF] += 1;
}
literal = literal >> 8;
hash = compute_hash(literal) & HASH_MASK;
seen = last_seen[hash];
last_seen[hash] = current;
dist = current - seen;
if (dist < D) {
match_length = compare258(seen, current, end_stream - current);
if (match_length >= SHORTEST_MATCH) {
dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
lit_len_histogram[convert_length_to_len_sym(match_length)] += 1;
lit_len_histogram[256] += 1;
return;
}
} else
lit_len_histogram[literal & 0xFF] += 1;
lit_len_histogram[(literal >> 8) & 0xFF] += 1;
lit_len_histogram[(literal >> 16) & 0xFF] += 1;
lit_len_histogram[256] += 1;
return;
}
uint32_t convert_dist_to_dist_sym(uint32_t dist)
{
assert(dist <= 32768 && dist > 0);
if (dist <= 2)
return dist - 1;
else if (dist <= 4)
return 0 + (dist - 1) / 1;
else if (dist <= 8)
return 2 + (dist - 1) / 2;
else if (dist <= 16)
return 4 + (dist - 1) / 4;
else if (dist <= 32)
return 6 + (dist - 1) / 8;
else if (dist <= 64)
return 8 + (dist - 1) / 16;
else if (dist <= 128)
return 10 + (dist - 1) / 32;
else if (dist <= 256)
return 12 + (dist - 1) / 64;
else if (dist <= 512)
return 14 + (dist - 1) / 128;
else if (dist <= 1024)
return 16 + (dist - 1) / 256;
else if (dist <= 2048)
return 18 + (dist - 1) / 512;
else if (dist <= 4096)
return 20 + (dist - 1) / 1024;
else if (dist <= 8192)
return 22 + (dist - 1) / 2048;
else if (dist <= 16384)
return 24 + (dist - 1) / 4096;
else if (dist <= 32768)
return 26 + (dist - 1) / 8192;
else
return ~0; /* ~0 is an invalid distance code */
}
uint32_t convert_length_to_len_sym(uint32_t length)
{
assert(length > 2 && length < 259);
/* Based on tables on page 11 in RFC 1951 */
if (length < 11)
return 257 + length - 3;
else if (length < 19)
return 261 + (length - 3) / 2;
else if (length < 35)
return 265 + (length - 3) / 4;
else if (length < 67)
return 269 + (length - 3) / 8;
else if (length < 131)
return 273 + (length - 3) / 16;
else if (length < 258)
return 277 + (length - 3) / 32;
else
return 285;
}
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
uint64_t * histogram, uint32_t size)
{
/* Assumes there are at least 2 symbols. */
int i;
uint32_t node_index;
struct huff_tree tree;
struct histheap heap;
heap.size = 0;
tree.right = tree.left = NULL;
/* Intitializes heap for construction of the huffman tree */
for (i = 0; i < size; i++) {
tree.value = i;
tree.frequency = histogram[i];
tree_array[i] = tree;
/* If symbol does not appear (has frequency 0), ignore it. */
if (tree_array[i].frequency != 0)
heap_push(tree, &heap);
}
node_index = size;
/* Construct the huffman tree */
while (heap.size > 1) {
tree = heap_pop(&heap);
tree_array[node_index].frequency = tree.frequency;
tree_array[node_index].left = &tree_array[tree.value];
tree = heap_pop(&heap);
tree_array[node_index].frequency += tree.frequency;
tree_array[node_index].right = &tree_array[tree.value];
tree_array[node_index].value = node_index;
heap_push(tree_array[node_index], &heap);
node_index += 1;
}
return heap_pop(&heap);
}
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
uint32_t size)
{
int i;
uint32_t node_index;
struct huff_tree tree;
struct histheap heap;
heap.size = 0;
tree.right = tree.left = NULL;
/* Intitializes heap for construction of the huffman tree */
for (i = 0; i < size; i++) {
tree.value = i;
tree.frequency = histogram[i];
tree_array[i] = tree;
heap_push(tree, &heap);
}
node_index = size;
/* Construct the huffman tree */
while (heap.size > 1) {
tree = heap_pop(&heap);
tree_array[node_index].frequency = tree.frequency;
tree_array[node_index].left = &tree_array[tree.value];
tree = heap_pop(&heap);
tree_array[node_index].frequency += tree.frequency;
tree_array[node_index].right = &tree_array[tree.value];
tree_array[node_index].value = node_index;
heap_push(tree_array[node_index], &heap);
node_index += 1;
}
return heap_pop(&heap);
}
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
struct huff_tree root, uint8_t max_depth)
{
/* Used to create a count of number of elements with a given code length */
uint16_t count[MAX_HUFF_TREE_DEPTH + 1];
memset(count, 0, sizeof(count));
if (find_code_lengths(huff_lookup_table, count, root, max_depth) != 0)
return 1;
set_huff_codes(huff_lookup_table, table_length, count);
return 0;
}
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
struct huff_tree root, uint8_t max_depth)
{
struct linked_list depth_array[MAX_HUFF_TREE_DEPTH + 2];
struct linked_list_node linked_lists[MAX_HISTHEAP_SIZE];
struct linked_list_node *temp;
uint16_t extra_nodes = 0;
int i, j;
memset(depth_array, 0, sizeof(depth_array));
memset(linked_lists, 0, sizeof(linked_lists));
for (i = 0; i < MAX_HISTHEAP_SIZE; i++)
linked_lists[i].value = i;
huffman_tree_traversal(depth_array, linked_lists, &extra_nodes, max_depth, root, 0);
/* This for loop fixes up the huffman tree to have a maximum depth not exceeding
* max_depth. This algorithm works by removing all elements below max_depth,
* filling up the empty leafs which are created with elements form the huffman
* tree and then iteratively pushing down the least frequent leaf that is above
* max_depth to a depth 1 lower, and moving up a leaf below max_depth to that
* same depth.*/
for (i = MAX_HUFF_TREE_DEPTH + 1; i > max_depth; i--) {
/* find element to push up the tree */
while (depth_array[i].start != NULL) {
if (extra_nodes > 0) {
temp = pop_from_front(&depth_array[i]);
append_to_back(&depth_array[max_depth], temp);
extra_nodes -= 1;
} else {
assert(depth_array[max_depth].length % 2 == 0);
assert(extra_nodes == 0);
/* find element to push down in the tree */
for (j = max_depth - 1; j >= 0; j--)
if (depth_array[j].start != NULL)
break;
/* No element available to push down further. */
if (j < 0)
return 1;
temp = pop_from_front(&depth_array[i]);
append_to_front(&depth_array[j + 1], temp);
temp = pop_from_front(&depth_array[j]);
append_to_back(&depth_array[j + 1], temp);
}
}
}
for (i = 0; i < MAX_HUFF_TREE_DEPTH + 2; i++) {
temp = depth_array[i].start;
while (temp != NULL) {
huff_lookup_table[temp->value].length = i;
count[i] += 1;
temp = temp->next;
}
}
return 0;
}
void huffman_tree_traversal(struct linked_list *depth_array,
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
uint8_t max_depth, struct huff_tree current_node,
uint16_t current_depth)
{
/* This algorithm performs a traversal of the huffman tree. It is setup
* to visit the leaves in order of frequency and bin elements into a
* linked list by depth.*/
if (current_node.left == NULL) {
if (current_depth < MAX_HUFF_TREE_DEPTH + 1)
append_to_front(&depth_array[current_depth],
&linked_lists[current_node.value]);
else
append_to_front(&depth_array[MAX_HUFF_TREE_DEPTH + 1],
&linked_lists[current_node.value]);
return;
} else if (current_depth == max_depth)
*extra_nodes += 1;
if (current_node.left->frequency < current_node.right->frequency) {
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
*current_node.right, current_depth + 1);
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
*current_node.left, current_depth + 1);
} else {
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
*current_node.left, current_depth + 1);
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
*current_node.right, current_depth + 1);
}
}
/*
* Returns integer with first length bits reversed and all higher bits zeroed
*/
uint16_t bit_reverse(uint16_t bits, uint8_t length)
{
bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits
bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs
bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles
bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes
return bits >> (16 - length);
}
void set_huff_codes(struct huff_code *huff_code_table, int table_length, uint16_t * count)
{
/* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */
int i;
uint16_t code = 0;
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
next_code[0] = code;
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
for (i = 0; i < table_length; i++) {
if (huff_code_table[i].length != 0) {
huff_code_table[i].code =
bit_reverse(next_code[huff_code_table[i].length],
huff_code_table[i].length);
next_code[huff_code_table[i].length] += 1;
}
}
return;
}
int create_header(uint8_t * header, uint32_t header_length, struct huff_code *lit_huff_table,
struct huff_code *dist_huff_table, uint32_t end_of_block)
{
int i;
uint64_t histogram[HUFF_LEN];
uint16_t huffman_rep[LIT_LEN + DIST_LEN];
uint16_t extra_bits[LIT_LEN + DIST_LEN];
uint16_t length;
struct huff_tree root;
struct huff_tree tree_array[2 * HUFF_LEN - 1];
struct huff_code lookup_table[HUFF_LEN];
struct huff_code combined_table[LIT_LEN + DIST_LEN];
/* hlit, hdist, and hclen are defined in RFC 1951 page 13 */
uint32_t hlit, hdist, hclen;
uint64_t bit_count;
memset(lookup_table, 0, sizeof(lookup_table));
/* Calculate hlit */
for (i = LIT_LEN - 1; i > 256; i--)
if (lit_huff_table[i].length != 0)
break;
hlit = i - 256;
/* Calculate hdist */
for (i = DIST_LEN - 1; i > 0; i--)
if (dist_huff_table[i].length != 0)
break;
hdist = i;
/* Combine huffman tables for run length encoding */
for (i = 0; i < 257 + hlit; i++)
combined_table[i] = lit_huff_table[i];
for (i = 0; i < 1 + hdist; i++)
combined_table[i + hlit + 257] = dist_huff_table[i];
memset(extra_bits, 0, LIT_LEN + DIST_LEN);
memset(histogram, 0, sizeof(histogram));
/* Create a run length encoded representation of the literal/lenght and
* distance huffman trees. */
length = create_huffman_rep(huffman_rep, histogram, extra_bits,
combined_table, hlit + 257 + hdist + 1);
/* Create a huffman tree to encode run length encoded representation. */
root = create_symbol_subset_huff_tree(tree_array, histogram, HUFF_LEN);
create_huff_lookup(lookup_table, HUFF_LEN, root, 7);
/* Calculate hclen */
for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */
if (lookup_table[code_length_code_order[i]].length != 0)
break;
hclen = i - 3;
/* Generate actual header. */
bit_count = create_huffman_header(header, header_length, lookup_table, huffman_rep,
extra_bits, length, end_of_block, hclen, hlit,
hdist);
return bit_count;
}
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
uint16_t * extra_bits, struct huff_code * huff_table, uint16_t len)
{
uint16_t current_in_index = 0, current_out_index = 0, run_length, last_code;
while (current_in_index < len) {
last_code = huff_table[current_in_index].length;
run_length = 0;
while (current_in_index < len
&& last_code == huff_table[current_in_index].length) {
run_length += 1;
current_in_index += 1;
}
current_out_index = flush_repeats(huffman_rep, histogram, extra_bits,
last_code, run_length, current_out_index);
}
return current_out_index;
}
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
uint16_t last_code, uint16_t run_length, uint16_t current_index)
{
int j;
if (last_code != 0 && last_code < HUFF_LEN && run_length > 0) {
huffman_rep[current_index++] = last_code;
histogram[last_code] += 1;
run_length -= 1;
}
if (run_length < SHORTEST_MATCH) {
for (j = 0; j < run_length; j++) {
huffman_rep[current_index++] = last_code;
histogram[last_code] += 1;
}
} else {
if (last_code == 0) {
/* The values 138 is the maximum repeat length
* represented with code 18. The value 10 is the maximum
* repeate length represented with 17. */
for (; run_length > 138; run_length -= 138) {
huffman_rep[current_index] = 0x12;
extra_bits[current_index++] = 0x7F7;
histogram[18]++;
}
if (run_length > 10) {
huffman_rep[current_index] = 18;
extra_bits[current_index++] = ((run_length - 11) << 4) | 7;
histogram[18] += 1;
} else if (run_length >= SHORTEST_MATCH) {
huffman_rep[current_index] = 17;
extra_bits[current_index++] = ((run_length - 3) << 4) | 3;
histogram[17] += 1;
} else {
for (j = 0; j < run_length; j++) {
huffman_rep[current_index++] = last_code;
histogram[last_code] += 1;
}
}
} else {
for (; run_length > 6; run_length -= 6) {
huffman_rep[current_index] = 0x10;
extra_bits[current_index++] = 0x32;
histogram[16]++;
}
if (run_length >= SHORTEST_MATCH) {
huffman_rep[current_index] = 16;
extra_bits[current_index++] = ((run_length - 3) << 4) | 2;
histogram[16] += 1;
} else {
for (j = 0; j < run_length; j++) {
huffman_rep[current_index++] = last_code;
histogram[last_code] += 1;
}
}
}
}
return current_index;
}
int create_huffman_header(uint8_t * header, uint32_t header_length,
struct huff_code *lookup_table, uint16_t * huffman_rep,
uint16_t * extra_bits, uint16_t huffman_rep_length,
uint32_t end_of_block, uint32_t hclen, uint32_t hlit, uint32_t hdist)
{
/* hlit, hdist, hclen are as defined in the deflate standard, head is the
* first three deflate header bits.*/
int i;
uint32_t head;
uint64_t bit_count;
struct huff_code huffman_value;
struct BitBuf2 header_bitbuf;
if (end_of_block)
head = 0x05;
else
head = 0x04;
set_buf(&header_bitbuf, header, header_length);
init(&header_bitbuf);
write_bits(&header_bitbuf, (head | (hlit << 3) | (hdist << 8) | (hclen << 13)),
DYN_HDR_START_LEN);
uint64_t tmp = 0;
for (i = hclen + 3; i >= 0; i--) {
tmp = (tmp << 3) | lookup_table[code_length_code_order[i]].length;
}
write_bits(&header_bitbuf, tmp, (hclen + 4) * 3);
for (i = 0; i < huffman_rep_length; i++) {
huffman_value = lookup_table[huffman_rep[i]];
write_bits(&header_bitbuf, (uint64_t) huffman_value.code,
(uint32_t) huffman_value.length);
if (huffman_rep[i] > 15) {
write_bits(&header_bitbuf, (uint64_t) extra_bits[i] >> 4,
(uint32_t) extra_bits[i] & 0xF);
}
}
bit_count = 8 * buffer_used(&header_bitbuf) + header_bitbuf.m_bit_count;
flush(&header_bitbuf);
return bit_count;
}
void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, uint32_t length,
struct huff_code *hufftable)
{
int i;
for (i = 0; i < length; i++) {
code_table[i] = hufftable[i].code;
code_length_table[i] = hufftable[i].length;
}
}
void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable)
{
int i, count = 0;
uint16_t extra_bits;
uint16_t extra_bits_count = 0;
/* Gain extra bits is the next place where the number of extra bits in
* lenght codes increases. */
uint16_t gain_extra_bits = LEN_EXTRA_BITS_START;
for (i = 257; i < LIT_LEN - 1; i++) {
for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
if (count > 254)
break;
packed_table[count++] =
(extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) |
(lit_len_hufftable[i].code << LENGTH_BITS) |
(lit_len_hufftable[i].length + extra_bits_count);
}
if (i == gain_extra_bits) {
gain_extra_bits += LEN_EXTRA_BITS_INTERVAL;
extra_bits_count += 1;
}
}
packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) |
(lit_len_hufftable[LIT_LEN - 1].length);
}
void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
struct huff_code *dist_hufftable)
{
int i, count = 0;
uint16_t extra_bits;
uint16_t extra_bits_count = 0;
/* Gain extra bits is the next place where the number of extra bits in
* distance codes increases. */
uint16_t gain_extra_bits = DIST_EXTRA_BITS_START;
for (i = 0; i < DIST_LEN; i++) {
for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
if (count >= length)
return;
packed_table[count++] =
(extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) |
(dist_hufftable[i].code << LENGTH_BITS) |
(dist_hufftable[i].length + extra_bits_count);
}
if (i == gain_extra_bits) {
gain_extra_bits += DIST_EXTRA_BITS_INTERVAL;
extra_bits_count += 1;
}
}
}
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
struct huff_code *dist_hufftable)
{
int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0;
int dist_extra_bits = 0, len_extra_bits = 0;
int gain_dist_extra_bits = DIST_EXTRA_BITS_START;
int gain_len_extra_bits = LEN_EXTRA_BITS_START;
int max_code_len;
int i;
for (i = 0; i < LIT_LEN; i++)
if (lit_len_hufftable[i].length > max_lit_code_len)
max_lit_code_len = lit_len_hufftable[i].length;
for (i = 257; i < LIT_LEN - 1; i++) {
if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len)
max_len_code_len = lit_len_hufftable[i].length + len_extra_bits;
if (i == gain_len_extra_bits) {
gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL;
len_extra_bits += 1;
}
}
for (i = 0; i < DIST_LEN; i++) {
if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len)
max_dist_code_len = dist_hufftable[i].length + dist_extra_bits;
if (i == gain_dist_extra_bits) {
gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL;
dist_extra_bits += 1;
}
}
max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len;
/* Some versions of igzip can write upto one literal, one length and one
* distance code at the same time. This checks to make sure that is
* always writeable in bitbuf*/
return (max_code_len > MAX_BITBUF_BIT_WRITE);
}
int isal_create_hufftables(struct isal_hufftables *hufftables,
struct isal_huff_histogram *histogram)
{
struct huff_tree lit_tree, dist_tree;
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
uint64_t bit_count;
int max_dist = convert_dist_to_dist_sym(IGZIP_D);
uint32_t *dist_table = hufftables->dist_table;
uint32_t *len_table = hufftables->len_table;
uint16_t *lit_table = hufftables->lit_table;
uint16_t *dcodes = hufftables->dcodes;
uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
uint8_t *deflate_hdr = hufftables->deflate_hdr;
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
uint64_t *dist_histogram = histogram->dist_histogram;
memset(hufftables, 0, sizeof(struct isal_hufftables));
memset(lit_tree_array, 0, sizeof(lit_tree_array));
memset(dist_tree_array, 0, sizeof(dist_tree_array));
memset(lit_huff_table, 0, sizeof(lit_huff_table));
memset(dist_huff_table, 0, sizeof(dist_huff_table));
lit_tree = create_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN);
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0)
return INVALID_LIT_LEN_HUFFCODE;
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0)
return INVALID_DIST_HUFFCODE;
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
if (create_huff_lookup
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
return INVALID_LIT_LEN_HUFFCODE;
if (create_huff_lookup
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
return INVALID_DIST_HUFFCODE;
if (are_hufftables_useable(lit_huff_table, dist_huff_table))
return INVALID_HUFFCODE;
}
create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
dist_huff_table + DCODE_OFFSET);
create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table);
create_packed_len_table(len_table, lit_huff_table);
create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table);
bit_count =
create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table,
LAST_BLOCK);
hufftables->deflate_hdr_count = bit_count / 8;
hufftables->deflate_hdr_extra_bits = bit_count % 8;
return 0;
}
int isal_create_hufftables_subset(struct isal_hufftables *hufftables,
struct isal_huff_histogram *histogram)
{
struct huff_tree lit_tree, dist_tree;
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
uint64_t bit_count;
int j, max_dist = convert_dist_to_dist_sym(IGZIP_D);
uint32_t *dist_table = hufftables->dist_table;
uint32_t *len_table = hufftables->len_table;
uint16_t *lit_table = hufftables->lit_table;
uint16_t *dcodes = hufftables->dcodes;
uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
uint8_t *deflate_hdr = hufftables->deflate_hdr;
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
uint64_t *dist_histogram = histogram->dist_histogram;
memset(hufftables, 0, sizeof(struct isal_hufftables));
memset(lit_tree_array, 0, sizeof(lit_tree_array));
memset(dist_tree_array, 0, sizeof(dist_tree_array));
memset(lit_huff_table, 0, sizeof(lit_huff_table));
memset(dist_huff_table, 0, sizeof(dist_huff_table));
for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
if (lit_len_histogram[j] == 0)
lit_len_histogram[j]++;
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN);
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0)
return INVALID_LIT_LEN_HUFFCODE;
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0)
return INVALID_DIST_HUFFCODE;
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
if (create_huff_lookup
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
return INVALID_LIT_LEN_HUFFCODE;
if (create_huff_lookup
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
return INVALID_DIST_HUFFCODE;
if (are_hufftables_useable(lit_huff_table, dist_huff_table))
return INVALID_HUFFCODE;
}
create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
dist_huff_table + DCODE_OFFSET);
create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table);
create_packed_len_table(len_table, lit_huff_table);
create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table);
bit_count =
create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table,
LAST_BLOCK);
hufftables->deflate_hdr_count = bit_count / 8;
hufftables->deflate_hdr_extra_bits = bit_count % 8;
return 0;
}

348
igzip/huff_codes.h Normal file
View File

@ -0,0 +1,348 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef HUFF_CODES_H
#define HUFF_CODES_H
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "igzip_lib.h"
#include "bitbuf2.h"
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
#define LIT_LEN IGZIP_LIT_LEN
#define DIST_LEN IGZIP_DIST_LEN
#define CODE_LEN_CODES 19
#define HUFF_LEN 19
#ifdef LONGER_HUFFTABLE
# define DCODE_OFFSET 26
#else
# define DCODE_OFFSET 20
#endif
#define DYN_HDR_START_LEN 17
#define MAX_HISTHEAP_SIZE LIT_LEN
#define MAX_HUFF_TREE_DEPTH 15
#define D IGZIP_D /* Amount of history */
#define MAX_DEFLATE_CODE_LEN 15
#define MAX_SAFE_LIT_CODE_LEN 13
#define MAX_SAFE_DIST_CODE_LEN 12
#define LONG_DIST_TABLE_SIZE 8192
#define SHORT_DIST_TABLE_SIZE 1024
#define LEN_TABLE_SIZE 256
#define LIT_TABLE_SIZE 257
#define LAST_BLOCK 1
#define LEN_EXTRA_BITS_START 264
#define LEN_EXTRA_BITS_INTERVAL 4
#define DIST_EXTRA_BITS_START 3
#define DIST_EXTRA_BITS_INTERVAL 2
#define INVALID_LIT_LEN_HUFFCODE 1
#define INVALID_DIST_HUFFCODE 1
#define INVALID_HUFFCODE 1
/**
* @brief Structure used to store huffman codes
*/
struct huff_code {
uint16_t code;
uint8_t length;
};
/**
* @brief Binary tree used to store and create a huffman tree.
*/
struct huff_tree {
uint16_t value;
uint64_t frequency;
struct huff_tree *left;
struct huff_tree *right;
};
/**
* @brief Nodes in a doubly linked list.
*/
struct linked_list_node {
uint16_t value;
struct linked_list_node *next;
struct linked_list_node *previous;
};
/**
* @brief This structure is a doubly linked list.
*/
struct linked_list {
uint64_t length;
struct linked_list_node *start;
struct linked_list_node *end;
};
/**
* @brief This is a binary minheap structure which stores huffman trees.
* @details The huffman trees are sorted by the frequency of the root.
* The structure is represented in a fixed sized array.
*/
struct histheap {
struct huff_tree tree[MAX_HISTHEAP_SIZE];
uint16_t size;
};
/**
* @brief Inserts a hufftree into a histheap.
* @param element: the hufftree to be inserted
* @param heap: the heap which element is being inserted into.
* @requires This function assumes the heap has enough allocated space.
* @returns Returns the index in heap of the inserted element
*/
int heap_push(struct huff_tree element, struct histheap *heap);
/**
* @brief Removes the top element from the heap and returns it.
*/
struct huff_tree heap_pop(struct histheap *heap);
/**
* @brief Removes the first element from list and returns it.
*/
struct linked_list_node *pop_from_front(struct linked_list *list);
/**
* @brief Adds new_element to the front of list.
*/
void append_to_front(struct linked_list *list, struct linked_list_node *new_element);
/**
* @brief Adds new_element to the end of list.
*/
void append_to_back(struct linked_list *list, struct linked_list_node *new_element);
/**
* @brief Returns the deflate symbol value for a repeat length.
*/
uint32_t convert_length_to_len_sym(uint32_t length);
/**
* @brief Returns the deflate symbol value for a look back distance.
*/
uint32_t convert_dist_to_dist_sym(uint32_t dist);
/**
* Constructs a huffman tree on tree_array which only uses elements with non-zero frequency.
* @requires Assumes there will be at least two symbols in the produced tree.
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
* size elements of the array are the leaf elements in the huffman tree.
* @param histogram: a histogram of the frequency of elements in tree_array.
* @param size: the number of leaf elements in the huffman tree.
*/
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
uint64_t * histogram, uint32_t size);
/**
* @brief Construct a huffman tree on tree_array which uses every symbol.
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
* @param size elements of the array are the leaf elements in the huffman tree.
* @param histogram: a histogram of the frequency of elements in tree_array.
* @param size: the number of leaf elements in the huffman tree.
*/
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
uint32_t size);
/**
* @brief Creates a deflate compliant huffman tree with maximum depth max_depth.
* @details The huffman tree is represented as a lookup table.
* @param huff_lookup_table: The output lookup table.
* @param table_length: The length of table.
* @param root: the input huffman tree the created tree is based on.
* @param max_depth: maximum depth the huffman tree can have
* @returns Returns 0 if sucessful and returns 1 otherwise.
*/
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
struct huff_tree root, uint8_t max_depth);
/**
* @brief Determines the code length for every value in a huffmant tree.
* @param huff_lookup_table: An output lookup table used to store the code lengths
* @param corresponding to the possible values
* @param count: An output histogram representing code length versus number of occurences.
* @param current_node: A node of the huffman tree being analyzed currently.
* @param current_depth: The depth of the current node in the huffman tree.
* @returns Returns 0 if sucessful and returns 1 otherwise.
*/
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
struct huff_tree root, uint8_t max_depth);
/**
* @brief Creates an array of linked lists.
* @detail Each linked list contains all the elements with codes of a given length for
* lengths less than 16, and an list for all elements with codes at least 16. These lists
* are sorted by frequency from least frequent to most frequent within any given code length.
* @param depth_array: depth_array[i] is a linked list of elements with code length i
* @param linked_lists: An input structure the linked lists in depth array are built on.
* @param current_node: the current node being visited in a huffman tree
* @param current_depth: the depth of current_node in a huffman tree
*/
void huffman_tree_traversal(struct linked_list *depth_array,
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
uint8_t max_depth, struct huff_tree current_node,
uint16_t current_depth);
/**
* @brief Determines the code each element of a deflate compliant huffman tree and stores
* it in a lookup table
* @requires table has been initialized to already contain the code length for each element.
* @param table: A lookup table used to store the codes.
* @param table_length: The length of table.
* @param count: a histogram representing the number of occurences of codes of a given length
*/
void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count);
/* Reverse the first length bits in bits and returns that value */
uint16_t bit_reverse(uint16_t bits, uint8_t length);
/**
* @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files.
* @param table: A literal/length huffman code lookup table.
* @returns index of the first symbol which fails and 0xFFFF otherwise.
*/
uint16_t valid_lit_huff_table(struct huff_code *huff_code_table);
/**
* @brief Checks if a distance huffman table can be stored in the igzip hufftables files.
* @param table: A distance huffman code lookup table.
* @returnsthe index of the first symbol which fails and 0xFFFF otherwise.
*/
uint16_t valid_dist_huff_table(struct huff_code *huff_code_table);
/**
* @brief Creates the dynamic huffman deflate header.
* @returns Returns the length of header in bits.
* @requires This function requires header is large enough to store the whole header.
* @param header: The output header.
* @param lit_huff_table: A literal/length code huffman lookup table.
* @param dist_huff_table: A distance huffman code lookup table.
* @param end_of_block: Value determining whether end of block header is produced or not;
* 0 corresponds to not end of block and all other inputs correspond to end of block.
*/
int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table,
struct huff_code *dist_huff_table, uint32_t end_of_block);
/**
* @brief Creates a run length encoded reprsentation of huff_table.
* @details Also creates a histogram representing the frequency of each symbols
* @returns Returns the number of symbols written into huffman_rep.
* @param huffman_rep: The output run length encoded version of huff_table.
* @param histogram: The output histogram of frequencies of elements in huffman_rep.
* @param extra_bits: An output table storing extra bits associated with huffman_rep.
* @param huff_table: The input huffman_table or concatonation of huffman_tables.
* @parma len: The length of huff_table.
*/
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len);
/**
* @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep.
* @param huffman_rep: pointer to array containing the output huffman_rep.
* @param histogram: histogram of elements seen in huffman_rep.
* @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep.
* @param huff_table: a concatenated list of huffman lookup tables.
* @param current_index: The next spot elements will be written in huffman_rep.
*/
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
uint16_t last_code, uint16_t run_length, uint16_t current_index);
/**
* @brief Creates the header for run length encoded huffman trees.
* @param header: the output header.
* @param lookup_table: a huffman lookup table.
* @param huffman_rep: a run length encoded huffman tree.
* @extra_bits: extra bits associated with the corresponding spot in huffman_rep
* @param huffman_rep_length: the length of huffman_rep.
* @param end_of_block: Value determining whether end of block header is produced or not;
* 0 corresponds to not end of block and all other inputs correspond to end of block.
* @param hclen: Length of huffman code for huffman codes minus 4.
* @param hlit: Length of literal/length table minus 257.
* @parm hdist: Length of distance table minus 1.
*/
int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table,
uint16_t * huffman_rep, uint16_t * extra_bits,
uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen,
uint32_t hlit, uint32_t hdist);
/**
* @brief Creates a two table representation of huffman codes.
* @param code_table: output table containing the code
* @param code_size_table: output table containing the code length
* @param length: the lenght of hufftable
* @param hufftable: a huffman lookup table
*/
void create_code_tables(uint16_t * code_table, uint8_t * code_length_table,
uint32_t length, struct huff_code *hufftable);
/**
* @brief Creates a packed representation of length huffman codes.
* @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
* code and bits 8:0 contain the code length.
* @param packed_table: the output table
* @param length: the length of lit_len_hufftable
* @param lit_len_hufftable: a literal/length huffman lookup table
*/
void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable);
/**
* @brief Creates a packed representation of distance huffman codes.
* @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
* code and bits 8:0 contain the code length.
* @param packed_table: the output table
* @param length: the length of lit_len_hufftable
* @param dist_hufftable: a distance huffman lookup table
*/
void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
struct huff_code *dist_hufftable);
/**
* @brief Checks to see if the hufftable is usable by igzip
*
* @param lit_len_hufftable: literal/lenght huffman code
* @param dist_hufftable: distance huffman code
* @returns Returns 0 if the table is usable
*/
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
struct huff_code *dist_hufftable);
#endif

208
igzip/huffman.asm Normal file
View File

@ -0,0 +1,208 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
; Macros for doing Huffman Encoding
%ifdef LONGER_HUFFTABLE
%if (D > 8192)
%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
% error
%else
%define DIST_TABLE_SIZE 8192
%define DECODE_OFFSET 26
%endif
%else
%define DIST_TABLE_SIZE 1024
%define DECODE_OFFSET 20
%endif
%define LEN_TABLE_SIZE 256
%define LIT_TABLE_SIZE 257
%define DIST_TABLE_START (IGZIP_MAX_DEF_HDR_SIZE + 8)
%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
;; struct isal_hufftables {
;; // deflate huffman tree header
;; uint8_t deflate_huff_hdr[IGZIP_MAX_DEF_HDR_SIZE];
;;
;; //!< Number of whole bytes in deflate_huff_hdr
;; uint32_t deflate_huff_hdr_count;
;;
;; //!< Number of bits in the partial byte in header
;; uint32_t deflate_huff_hdr_extra_bits;
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t dist_table[DIST_TABLE_SIZE];
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t len_table[LEN_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t lit_table[LIT_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t dcodes[30 - DECODE_OFFSET];
;; };
%ifdef LONGER_HUFFTABLE
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4 ; address of the hufftable
mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
%endm
%macro unpack_dist_code 2
%define %%code %1d ; 32-bit OUT
%define %%len %2d ; 32-bit OUT
mov %%len, %%code
and %%len, 0x1F;
shr %%code, 5
%endm
%else
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_code 4
%define %%dist %1d ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%len %3 ; OUT
%define %%hufftables %4
dec %%dist
bsr ecx, %%dist ; ecx = msb = bsr(dist)
dec ecx ; ecx = num_extra_bits = msb - N
mov %%code, 1
shl %%code, CL
dec %%code ; code = ((1 << num_extra_bits) - 1)
and %%code, %%dist ; code = extra_bits
shr %%dist, CL ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*ecx] ; dist = sym = dist + num_extra_bits*2
mov %%len, ecx ; len = num_extra_bits
movzx ecx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
shl %%code, CL ; code = extra_bits << (sym & 0xF)
or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
add %%len, ecx ; len = num_extra_bits + (sym & 0xF)
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1d ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
cmp %%dist, DIST_TABLE_SIZE
jg %%do_compute
mov %%len, [hufftables + DIST_TABLE_OFFSET + 4*%%distq WRT_OPT]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
jmp %%done
%%do_compute:
compute_dist_code %%distq, %%code, %%len, %%hufftables
%%done:
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
%endm
%endif
; "len" can be same register as "length"
; get_len_code length, code, len
%macro get_len_code 4
%define %%length %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
mov %%code, %%len
and %%len, 0x1F
shr %%code, 5
%endm
%macro get_lit_code 4
%define %%lit %1 ; 64-bit IN or CONST
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
%endm
;; Compute hash of first 3 bytes of data
%macro compute_hash 2
%define %%result %1d ; 32-bit reg
%define %%data %2d ; 32-bit reg (low byte not clobbered)
and %%data, 0x00FFFFFF
xor %%result, %%result
crc32 %%result, %%data
%endm

226
igzip/huffman.h Normal file
View File

@ -0,0 +1,226 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include "igzip_lib.h"
#ifdef _MSC_VER
# include <intrin.h>
# define inline __inline
#else
# include <x86intrin.h>
#endif
#ifndef IGZIP_USE_GZIP_FORMAT
# define DEFLATE 1
#endif
extern uint32_t CrcTable[256];
static inline uint32_t bsr(uint32_t val)
{
uint32_t msb;
#ifdef __LZCNT__
msb = 16 - __lzcnt16(val);
#else
for(msb = 0; val > 0; val >>= 1)
msb++;
#endif
return msb;
}
static inline uint32_t tzcnt(uint64_t val)
{
uint32_t cnt;
#ifdef __x86_64__
cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val);
#else
for(cnt = 8; val > 0; val <<= 8)
cnt -= 1;
#endif
return cnt;
}
static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len)
{
assert(dist > DIST_TABLE_SIZE);
dist -= 1;
uint32_t msb;
uint32_t num_extra_bits;
uint32_t extra_bits;
uint32_t sym;
uint32_t len;
uint32_t code;
msb = bsr(dist);
assert(msb >= 1);
num_extra_bits = msb - 2;
extra_bits = dist & ((1 << num_extra_bits) - 1);
dist >>= num_extra_bits;
sym = dist + 2 * num_extra_bits;
assert(sym < 30);
code = hufftables->dcodes[sym - DECODE_OFFSET];
len = hufftables->dcodes_sizes[sym - DECODE_OFFSET];
*p_code = code | (extra_bits << len);
*p_len = len + num_extra_bits;
}
static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len)
{
if (dist < 1)
dist = 0;
assert(dist >= 1);
assert(dist <= 32768);
if (dist <= DIST_TABLE_SIZE) {
uint64_t code_len;
code_len = hufftables->dist_table[dist - 1];
*code = code_len >> 5;
*len = code_len & 0x1F;
} else {
compute_dist_code(hufftables, dist, code, len);
}
}
static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len)
{
assert(length >= 3);
assert(length <= 258);
uint64_t code_len;
code_len = hufftables->len_table[length - 3];
*code = code_len >> 5;
*len = code_len & 0x1F;
}
static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len)
{
assert(lit <= 256);
*code = hufftables->lit_table[lit];
*len = hufftables->lit_table_sizes[lit];
}
/**
* @brief Returns a hash of the first 3 bytes of input data.
*/
static inline uint32_t compute_hash(uint32_t data)
{
data &= 0x00FFFFFF;
#ifdef __SSE4_1__
return _mm_crc32_u32(0, data);
#else
/* Use multiplication to create a hash, 0xBDD06057 is a prime number */
return ((uint64_t)data * 0xB2D06057) >> 16;
#endif /* __SSE4_1__ */
}
/**
* @brief Returns how long str1 and str2 have the same symbols.
* @param str1: First input string.
* @param str2: Second input string.
* @param max_length: length of the smaller string.
*/
static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length)
{
uint32_t count;
uint64_t test;
uint64_t loop_length;
if(max_length > 258)
max_length = 258;
loop_length = max_length & ~0x7;
for(count = 0; count < loop_length; count += 8){
test = *(uint64_t *) str1;
test ^= *(uint64_t *) str2;
if(test != 0)
return count + tzcnt(test);
str1 += 8;
str2 += 8;
}
switch(max_length % 8){
case 7:
if(*str1++ != *str2++)
return count;
count++;
case 6:
if(*str1++ != *str2++)
return count;
count++;
case 5:
if(*str1++ != *str2++)
return count;
count++;
case 4:
if(*str1++ != *str2++)
return count;
count++;
case 3:
if(*str1++ != *str2++)
return count;
count++;
case 2:
if(*str1++ != *str2++)
return count;
count++;
case 1:
if(*str1 != *str2)
return count;
count++;
}
return count;
}
static inline void update_crc(uint32_t* crc, uint8_t * start, uint32_t length)
{
#ifndef DEFLATE
uint8_t *end = start + length;
while (start < end)
*crc = (*crc >> 8) ^ CrcTable[(*crc & 0x000000FF) ^ *start++];
#else
return;
#endif
}

2528
igzip/hufftables_c.c Normal file

File diff suppressed because it is too large Load Diff

882
igzip/igzip.c Normal file
View File

@ -0,0 +1,882 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#define ASM
#include <assert.h>
#include <string.h>
#ifdef _WIN32
# include <intrin.h>
#endif
#ifndef IGZIP_USE_GZIP_FORMAT
# define DEFLATE 1
#endif
#define MAX_WRITE_BITS_SIZE 8
#define FORCE_FLUSH 64
#define MIN_OBUF_SIZE 224
#define NON_EMPTY_BLOCK_SIZE 6
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
#include "huffman.h"
#include "bitbuf2.h"
#include "igzip_lib.h"
#include "repeated_char_result.h"
extern const uint8_t gzip_hdr[];
extern const uint32_t gzip_hdr_bytes;
extern const uint32_t gzip_trl_bytes;
extern const struct isal_hufftables hufftables_default;
extern uint32_t crc32_gzip(uint32_t init_crc, const unsigned char *buf, uint64_t len);
static int write_stored_block_stateless(struct isal_zstream *stream, uint32_t stored_len,
uint32_t crc32);
#ifndef DEFLATE
static int write_gzip_header_stateless(struct isal_zstream *stream);
#endif
static int write_deflate_header_stateless(struct isal_zstream *stream);
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream);
static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in,
uint32_t crc32);
void isal_deflate_body_stateless(struct isal_zstream *stream);
unsigned int detect_repeated_char(uint8_t * buf, uint32_t size);
#define STORED_BLK_HDR_BZ 5
#define STORED_BLK_MAX_BZ 65535
void isal_deflate_body(struct isal_zstream *stream);
void isal_deflate_finish(struct isal_zstream *stream);
uint32_t crc_512to32_01(uint32_t * crc);
uint32_t get_crc(uint32_t * crc);
/*****************************************************************/
/* Forward declarations */
static inline void reset_match_history(struct isal_zstream *stream);
void write_header(struct isal_zstream *stream);
void write_deflate_header(struct isal_zstream *stream);
void write_trailer(struct isal_zstream *stream);
struct slver {
uint16_t snum;
uint8_t ver;
uint8_t core;
};
/* Version info */
struct slver isal_deflate_init_slver_01030081;
struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 };
struct slver isal_deflate_slver_01030082;
struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 };
struct slver isal_deflate_stateless_slver_01010083;
struct slver isal_deflate_stateless_slver = { 0x0083, 0x01, 0x01 };
/*****************************************************************/
uint32_t file_size(struct isal_zstate *state)
{
return state->b_bytes_valid + (uint32_t) (state->buffer - state->file_start);
}
static
void sync_flush(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
uint64_t bits_to_write = 0xFFFF0000, bits_len;
uint64_t code = 0, len = 0, bytes;
int flush_size;
if (stream->avail_out >= 8) {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
if (!state->has_eob)
get_lit_code(stream->hufftables, 256, &code, &len);
flush_size = (-(state->bitbuf.m_bit_count + len + 3)) % 8;
bits_to_write <<= flush_size + 3;
bits_len = 32 + len + flush_size + 3;
#ifdef USE_BITBUFB /* Write Bits Always */
state->state = ZSTATE_NEW_HDR;
#else /* Not Write Bits Always */
state->state = ZSTATE_FLUSH_WRITE_BUFFER;
#endif
state->has_eob = 0;
if (len > 0)
bits_to_write = (bits_to_write << len) | code;
write_bits(&state->bitbuf, bits_to_write, bits_len);
bytes = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= bytes;
stream->total_out += bytes;
if (stream->flush == FULL_FLUSH) {
/* Clear match history so there are no cross
* block length distance pairs */
reset_match_history(stream);
}
}
}
static void flush_write_buffer(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
int bytes = 0;
if (stream->avail_out >= 8) {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
flush(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
bytes = buffer_used(&state->bitbuf);
stream->avail_out -= bytes;
stream->total_out += bytes;
state->state = ZSTATE_NEW_HDR;
}
}
static void isal_deflate_int(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
write_header(stream);
if (state->state == ZSTATE_BODY)
isal_deflate_body(stream);
if (state->state == ZSTATE_FLUSH_READ_BUFFER)
isal_deflate_finish(stream);
if (state->state == ZSTATE_SYNC_FLUSH)
sync_flush(stream);
if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
flush_write_buffer(stream);
if (state->state == ZSTATE_TRL)
write_trailer(stream);
}
static uint32_t write_constant_compressed_stateless(struct isal_zstream *stream,
uint32_t repeated_char,
uint32_t repeated_length,
uint32_t end_of_stream)
{
/* Assumes repeated_length is at least 1.
* Assumes the input end_of_stream is either 0 or 1. */
struct isal_zstate *state = &stream->internal_state;
uint32_t rep_bits = ((repeated_length - 1) / 258) * 2;
uint32_t rep_bytes = rep_bits / 8;
uint32_t rep_extra = (repeated_length - 1) % 258;
uint32_t bytes;
/* Guarantee there is enough space for the header even in the worst case */
if (stream->avail_out < HEADER_LENGTH + MAX_FIXUP_CODE_LENGTH + rep_bytes + 8)
return STATELESS_OVERFLOW;
/* Assumes the repeated char is either 0 or 0xFF. */
memcpy(stream->next_out, repeated_char_header[repeated_char & 1], HEADER_LENGTH);
if (end_of_stream > 0)
stream->next_out[0] |= 1;
memset(stream->next_out + HEADER_LENGTH, 0, rep_bytes);
stream->avail_out -= HEADER_LENGTH + rep_bytes;
stream->next_out += HEADER_LENGTH + rep_bytes;
stream->total_out += HEADER_LENGTH + rep_bytes;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
/* These two lines are basically a modified version of init. */
state->bitbuf.m_bits = 0;
state->bitbuf.m_bit_count = rep_bits % 8;
/* Add smaller repeat codes as necessary. Code280 can describe repeat
* lengths of 115-130 bits. Code10 can describe repeat lengths of 10
* bits. If more than 230 bits, fill code with two code280s. Else if
* more than 115 repeates, fill with code10s until one code280 can
* finish the rest of the repeats. Else, fill with code10s and
* literals */
if (rep_extra > 115) {
while (rep_extra > 130 && rep_extra < 230) {
write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
rep_extra -= 10;
}
if (rep_extra >= 230) {
write_bits(&state->bitbuf,
CODE_280 | ((rep_extra / 2 - 115) << CODE_280_LENGTH),
CODE_280_TOTAL_LENGTH);
rep_extra -= rep_extra / 2;
}
write_bits(&state->bitbuf,
CODE_280 | ((rep_extra - 115) << CODE_280_LENGTH),
CODE_280_TOTAL_LENGTH);
} else {
while (rep_extra >= 10) {
write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
rep_extra -= 10;
}
for (; rep_extra > 0; rep_extra--)
write_bits(&state->bitbuf, CODE_LIT, CODE_LIT_LENGTH);
}
write_bits(&state->bitbuf, END_OF_BLOCK, END_OF_BLOCK_LEN);
stream->next_in += repeated_length;
stream->avail_in -= repeated_length;
stream->total_in += repeated_length;
bytes = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= bytes;
stream->total_out += bytes;
return COMP_OK;
}
int detect_repeated_char_length(uint8_t * in, uint32_t length)
{
/* This currently assumes the first 8 bytes are the same character.
* This won't work effectively if the input stream isn't aligned well. */
uint8_t *p_8, *end = in + length;
uint64_t *p_64 = (uint64_t *) in;
uint64_t w = *p_64;
uint8_t c = (uint8_t) w;
for (; (p_64 <= (uint64_t *) (end - 8)) && (w == *p_64); p_64++) ;
p_8 = (uint8_t *) p_64;
for (; (p_8 < end) && (c == *p_8); p_8++) ;
return p_8 - in;
}
static int isal_deflate_int_stateless(struct isal_zstream *stream, uint8_t * next_in,
const uint32_t avail_in)
{
uint32_t crc32 = 0;
uint32_t repeated_char_length;
#ifndef DEFLATE
if (write_gzip_header_stateless(stream))
return STATELESS_OVERFLOW;
#endif
if (avail_in >= 8
&& (*(uint64_t *) stream->next_in == 0
|| *(uint64_t *) stream->next_in == ~(uint64_t) 0))
repeated_char_length =
detect_repeated_char_length(stream->next_in, stream->avail_in);
else
repeated_char_length = 0;
if (stream->avail_in == repeated_char_length) {
if (write_constant_compressed_stateless(stream,
stream->next_in[0],
repeated_char_length, 1) != COMP_OK)
return STATELESS_OVERFLOW;
#ifndef DEFLATE
crc32 = crc32_gzip(0x0, next_in, avail_in);
#endif
/* write_trailer_stateless is required because if flushes out the last of the output */
if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK)
return STATELESS_OVERFLOW;
return COMP_OK;
} else if (repeated_char_length >= MIN_REPEAT_LEN) {
if (write_constant_compressed_stateless
(stream, stream->next_in[0], repeated_char_length, 0) != COMP_OK)
return STATELESS_OVERFLOW;
}
if (write_deflate_header_unaligned_stateless(stream) != COMP_OK)
return STATELESS_OVERFLOW;
if (stream->avail_out < 8)
return STATELESS_OVERFLOW;
isal_deflate_body_stateless(stream);
if (!stream->internal_state.has_eob)
return STATELESS_OVERFLOW;
#ifndef DEFLATE
crc32 = crc32_gzip(0x0, next_in, avail_in);
#endif
if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK)
return STATELESS_OVERFLOW;
return COMP_OK;
}
static int write_stored_block_stateless(struct isal_zstream *stream,
uint32_t stored_len, uint32_t crc32)
{
uint64_t stored_blk_hdr;
uint32_t copy_size;
uint32_t avail_in;
#ifndef DEFLATE
uint64_t gzip_trl;
#endif
if (stream->avail_out < stored_len)
return STATELESS_OVERFLOW;
stream->avail_out -= stored_len;
stream->total_out += stored_len;
avail_in = stream->avail_in;
#ifndef DEFLATE
memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes);
stream->next_out += gzip_hdr_bytes;
#endif
do {
if (avail_in >= STORED_BLK_MAX_BZ) {
stored_blk_hdr = 0xFFFF00;
copy_size = STORED_BLK_MAX_BZ;
} else {
stored_blk_hdr = ~avail_in;
stored_blk_hdr <<= 24;
stored_blk_hdr |= (avail_in & 0xFFFF) << 8;
copy_size = avail_in;
}
avail_in -= copy_size;
/* Handle BFINAL bit */
if (avail_in == 0)
stored_blk_hdr |= 0x1;
memcpy(stream->next_out, &stored_blk_hdr, STORED_BLK_HDR_BZ);
stream->next_out += STORED_BLK_HDR_BZ;
memcpy(stream->next_out, stream->next_in, copy_size);
stream->next_out += copy_size;
stream->next_in += copy_size;
stream->total_in += copy_size;
} while (avail_in != 0);
#ifndef DEFLATE
gzip_trl = stream->avail_in;
gzip_trl <<= 32;
gzip_trl |= crc32 & 0xFFFFFFFF;
memcpy(stream->next_out, &gzip_trl, gzip_trl_bytes);
stream->next_out += gzip_trl_bytes;
#endif
stream->avail_in = 0;
return COMP_OK;
}
static inline void reset_match_history(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
uint16_t *head = stream->internal_state.head;
int i = 0;
for (i = 0; i < sizeof(state->head) / 2; i++)
head[i] =
(uint16_t) (state->b_bytes_processed + state->buffer - state->file_start -
(IGZIP_D + 1));
}
void isal_deflate_init_01(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
stream->total_in = 0;
stream->total_out = 0;
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
stream->flush = 0;
state->b_bytes_valid = 0;
state->b_bytes_processed = 0;
state->has_eob = 0;
state->has_eob_hdr = 0;
state->left_over = 0;
state->last_flush = 0;
state->has_gzip_hdr = 0;
state->state = ZSTATE_NEW_HDR;
state->count = 0;
state->tmp_out_start = 0;
state->tmp_out_end = 0;
state->file_start = state->buffer;
init(&state->bitbuf);
memset(state->crc, 0, sizeof(state->crc));
*state->crc = 0x9db42487;
reset_match_history(stream);
return;
}
int isal_deflate_stateless(struct isal_zstream *stream)
{
uint8_t *next_in = stream->next_in;
const uint32_t avail_in = stream->avail_in;
uint8_t *next_out = stream->next_out;
const uint32_t avail_out = stream->avail_out;
uint32_t crc32 = 0;
uint32_t stored_len;
uint32_t dyn_min_len;
uint32_t min_len;
uint32_t select_stored_blk = 0;
if (avail_in == 0)
stored_len = STORED_BLK_HDR_BZ;
else
stored_len =
STORED_BLK_HDR_BZ * ((avail_in + STORED_BLK_MAX_BZ - 1) /
STORED_BLK_MAX_BZ) + avail_in;
/*
at least 1 byte compressed data in the case of empty dynamic block which only
contains the EOB
*/
dyn_min_len = stream->hufftables->deflate_hdr_count + 1;
#ifndef DEFLATE
dyn_min_len += gzip_hdr_bytes + gzip_trl_bytes + 1;
stored_len += gzip_hdr_bytes + gzip_trl_bytes;
#endif
min_len = dyn_min_len;
if (stored_len < dyn_min_len) {
min_len = stored_len;
select_stored_blk = 1;
}
/*
the output buffer should be no less than 8 bytes
while empty stored deflate block is 5 bytes only
*/
if (avail_out < min_len || stream->avail_out < 8)
return STATELESS_OVERFLOW;
if (!select_stored_blk) {
if (isal_deflate_int_stateless(stream, next_in, avail_in) == COMP_OK)
return COMP_OK;
}
if (avail_out < stored_len)
return STATELESS_OVERFLOW;
isal_deflate_init(stream);
stream->next_in = next_in;
stream->avail_in = avail_in;
stream->total_in = 0;
stream->next_out = next_out;
stream->avail_out = avail_out;
stream->total_out = 0;
#ifndef DEFLATE
crc32 = crc32_gzip(0x0, next_in, avail_in);
#endif
return write_stored_block_stateless(stream, stored_len, crc32);
}
int isal_deflate(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
uint32_t size;
int ret = COMP_OK;
if (stream->flush < 3) {
state->last_flush = stream->flush;
if (state->state >= TMP_OFFSET_SIZE) {
size = state->tmp_out_end - state->tmp_out_start;
if (size > stream->avail_out)
size = stream->avail_out;
memcpy(stream->next_out, state->tmp_out_buff + state->tmp_out_start,
size);
stream->next_out += size;
stream->avail_out -= size;
stream->total_out += size;
state->tmp_out_start += size;
if (state->tmp_out_start == state->tmp_out_end)
state->state -= TMP_OFFSET_SIZE;
if (stream->avail_out == 0 || state->state == ZSTATE_END)
return ret;
}
assert(state->tmp_out_start == state->tmp_out_end);
isal_deflate_int(stream);
if (stream->avail_out == 0)
return ret;
else if (stream->avail_out < 8) {
uint8_t *next_out;
uint32_t avail_out;
uint32_t total_out;
next_out = stream->next_out;
avail_out = stream->avail_out;
total_out = stream->total_out;
stream->next_out = state->tmp_out_buff;
stream->avail_out = sizeof(state->tmp_out_buff);
stream->total_out = 0;
isal_deflate_int(stream);
state->tmp_out_start = 0;
state->tmp_out_end = stream->total_out;
stream->next_out = next_out;
stream->avail_out = avail_out;
stream->total_out = total_out;
if (state->tmp_out_end) {
size = state->tmp_out_end;
if (size > stream->avail_out)
size = stream->avail_out;
memcpy(stream->next_out, state->tmp_out_buff, size);
stream->next_out += size;
stream->avail_out -= size;
stream->total_out += size;
state->tmp_out_start += size;
if (state->tmp_out_start != state->tmp_out_end)
state->state += TMP_OFFSET_SIZE;
}
}
} else
ret = INVALID_FLUSH;
return ret;
}
#ifndef DEFLATE
static int write_gzip_header_stateless(struct isal_zstream *stream)
{
if (gzip_hdr_bytes >= stream->avail_out)
return STATELESS_OVERFLOW;
stream->avail_out -= gzip_hdr_bytes;
stream->total_out += gzip_hdr_bytes;
memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes);
stream->next_out += gzip_hdr_bytes;
return COMP_OK;
}
static void write_gzip_header(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
int bytes_to_write = gzip_hdr_bytes;
bytes_to_write -= state->count;
if (bytes_to_write > stream->avail_out)
bytes_to_write = stream->avail_out;
memcpy(stream->next_out, gzip_hdr + state->count, bytes_to_write);
state->count += bytes_to_write;
if (state->count == gzip_hdr_bytes) {
state->count = 0;
state->has_gzip_hdr = 1;
}
stream->avail_out -= bytes_to_write;
stream->total_out += bytes_to_write;
stream->next_out += bytes_to_write;
}
#endif
static int write_deflate_header_stateless(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct isal_hufftables *hufftables = stream->hufftables;
uint32_t count;
if (hufftables->deflate_hdr_count + 8 >= stream->avail_out)
return STATELESS_OVERFLOW;
memcpy(stream->next_out, hufftables->deflate_hdr, hufftables->deflate_hdr_count);
stream->avail_out -= hufftables->deflate_hdr_count;
stream->total_out += hufftables->deflate_hdr_count;
stream->next_out += hufftables->deflate_hdr_count;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
write_bits(&state->bitbuf, hufftables->deflate_hdr[hufftables->deflate_hdr_count],
hufftables->deflate_hdr_extra_bits);
count = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= count;
stream->total_out += count;
return COMP_OK;
}
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct isal_hufftables *hufftables = stream->hufftables;
unsigned int count;
uint64_t bit_count;
uint64_t *header_next;
uint64_t *header_end;
uint64_t header_bits;
if (state->bitbuf.m_bit_count == 0)
return write_deflate_header_stateless(stream);
if (hufftables->deflate_hdr_count + 16 >= stream->avail_out)
return STATELESS_OVERFLOW;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
header_next = (uint64_t *) hufftables->deflate_hdr;
header_end = header_next + hufftables->deflate_hdr_count / 8;
/* Write out Complete Header bits */
for (; header_next < header_end; header_next++) {
header_bits = *header_next;
write_bits(&state->bitbuf, header_bits, 32);
header_bits >>= 32;
write_bits(&state->bitbuf, header_bits, 32);
}
header_bits = *header_next;
bit_count =
(hufftables->deflate_hdr_count & 0x7) * 8 + hufftables->deflate_hdr_extra_bits;
if (bit_count > MAX_BITBUF_BIT_WRITE) {
write_bits(&state->bitbuf, header_bits, MAX_BITBUF_BIT_WRITE);
header_bits >>= MAX_BITBUF_BIT_WRITE;
bit_count -= MAX_BITBUF_BIT_WRITE;
}
write_bits(&state->bitbuf, header_bits, bit_count);
/* check_space flushes extra bytes in bitbuf. Required because
* write_bits_always fails when the next commit makes the buffer
* length exceed 64 bits */
check_space(&state->bitbuf, FORCE_FLUSH);
count = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= count;
stream->total_out += count;
return COMP_OK;
}
void write_header(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
struct isal_hufftables *hufftables = stream->hufftables;
uint32_t count;
state->state = ZSTATE_HDR;
if (state->bitbuf.m_bit_count != 0) {
if (stream->avail_out < 8)
return;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
flush(&state->bitbuf);
count = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= count;
stream->total_out += count;
}
#ifndef DEFLATE
if (!state->has_gzip_hdr)
write_gzip_header(stream);
#endif
count = hufftables->deflate_hdr_count - state->count;
if (count != 0) {
if (count > stream->avail_out)
count = stream->avail_out;
memcpy(stream->next_out, hufftables->deflate_hdr + state->count, count);
if (state->count == 0 && count > 0) {
if (!stream->end_of_stream)
*stream->next_out &= 0xfe;
else
state->has_eob_hdr = 1;
}
stream->next_out += count;
stream->avail_out -= count;
stream->total_out += count;
state->count += count;
count = hufftables->deflate_hdr_count - state->count;
}
if ((count == 0) && (stream->avail_out >= 8)) {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
write_bits(&state->bitbuf,
hufftables->deflate_hdr[hufftables->deflate_hdr_count],
hufftables->deflate_hdr_extra_bits);
state->state = ZSTATE_BODY;
state->count = 0;
count = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= count;
stream->total_out += count;
}
}
uint32_t get_crc_01(uint32_t * crc)
{
return crc_512to32_01(crc);
}
void write_trailer(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
unsigned int bytes;
if (stream->avail_out >= 8) {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
/* the flush() will pad to the next byte and write up to 8 bytes
* to the output stream/buffer.
*/
if (!state->has_eob_hdr) {
/* If the final header has not been written, write a
* final block. This block is a static huffman block
* which only contains the end of block symbol. The code
* that happens to do this is the fist 10 bits of
* 0x003 */
state->has_eob_hdr = 1;
write_bits(&state->bitbuf, 0x003, 10);
if (is_full(&state->bitbuf)) {
stream->next_out = buffer_ptr(&state->bitbuf);
bytes = buffer_used(&state->bitbuf);
stream->avail_out -= bytes;
stream->total_out += bytes;
return;
}
}
flush(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
bytes = buffer_used(&state->bitbuf);
#ifndef DEFLATE
uint32_t *crc = state->crc;
if (!is_full(&state->bitbuf)) {
*(uint64_t *) stream->next_out =
((uint64_t) file_size(state) << 32) | get_crc(crc);
stream->next_out += 8;
bytes += 8;
state->state = ZSTATE_END;
}
#else
state->state = ZSTATE_END;
#endif
stream->avail_out -= bytes;
stream->total_out += bytes;
}
}
static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in,
uint32_t crc32)
{
int ret = COMP_OK;
struct isal_zstate *state = &stream->internal_state;
unsigned int bytes;
if (stream->avail_out < 8) {
ret = STATELESS_OVERFLOW;
} else {
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
/* the flush() will pad to the next byte and write up to 8 bytes
* to the output stream/buffer.
*/
flush(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
bytes = buffer_used(&state->bitbuf);
#ifndef DEFLATE
if (is_full(&state->bitbuf)) {
ret = STATELESS_OVERFLOW;
} else {
*(uint64_t *) stream->next_out = ((uint64_t) avail_in << 32) | crc32;
stream->next_out += 8;
bytes += 8;
}
#endif
stream->avail_out -= bytes;
stream->total_out += bytes;
}
return ret;
}

292
igzip/igzip_base.c Normal file
View File

@ -0,0 +1,292 @@
#include <stdint.h>
#include "igzip_lib.h"
#include "huffman.h"
#include "huff_codes.h"
#include "bitbuf2.h"
extern const struct isal_hufftables hufftables_default;
void isal_deflate_init_base(struct isal_zstream *stream)
{
struct isal_zstate *state = &stream->internal_state;
int i;
uint32_t *crc = state->crc;
stream->total_in = 0;
stream->total_out = 0;
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
stream->flush = 0;
state->b_bytes_valid = 0;
state->b_bytes_processed = 0;
state->has_eob = 0;
state->has_eob_hdr = 0;
state->left_over = 0;
state->last_flush = 0;
state->has_gzip_hdr = 0;
state->state = ZSTATE_NEW_HDR;
state->count = 0;
state->tmp_out_start = 0;
state->tmp_out_end = 0;
state->file_start = state->buffer;
init(&state->bitbuf);
*crc = ~0;
for (i = 0; i < HASH_SIZE; i++)
state->head[i] = (uint16_t) - (IGZIP_D + 1);
return;
}
uint32_t get_crc_base(uint32_t * crc)
{
return ~*crc;
}
static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state,
uint8_t * start_in)
{
uint32_t bytes_written;
stream->total_in += stream->next_in - start_in;
bytes_written = buffer_used(&state->bitbuf);
stream->total_out += bytes_written;
stream->next_out += bytes_written;
stream->avail_out -= bytes_written;
}
void isal_deflate_body_base(struct isal_zstream *stream)
{
uint32_t literal, hash;
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
uint16_t match_length;
uint32_t dist, bytes_to_buffer, offset;
uint64_t code, code_len, code2, code_len2;
struct isal_zstate *state = &stream->internal_state;
uint16_t *last_seen = state->head;
uint32_t *crc = state->crc;
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
start_in = stream->next_in;
while (stream->avail_in != 0) {
bytes_to_buffer =
IGZIP_D + IGZIP_LA - (state->b_bytes_valid - state->b_bytes_processed);
if (bytes_to_buffer > IGZIP_D)
bytes_to_buffer = IGZIP_D;
if (stream->avail_in < IGZIP_D)
bytes_to_buffer = stream->avail_in;
if (bytes_to_buffer > BSIZE - state->b_bytes_valid) {
if (state->b_bytes_valid - state->b_bytes_processed > IGZIP_LA) {
/* There was an out buffer overflow last round,
* complete the processing of data */
bytes_to_buffer = 0;
} else {
/* Not enough room in the buffer, shift the
* buffer down to make space for the new data */
offset = state->b_bytes_processed - IGZIP_D; // state->b_bytes_valid - (IGZIP_D + IGZIP_LA);
memmove(state->buffer, state->buffer + offset,
IGZIP_D + IGZIP_LA);
state->b_bytes_processed -= offset;
state->b_bytes_valid -= offset;
state->file_start -= offset;
stream->avail_in -= bytes_to_buffer;
memcpy(state->buffer + state->b_bytes_valid, stream->next_in,
bytes_to_buffer);
update_crc(crc, stream->next_in, bytes_to_buffer);
stream->next_in += bytes_to_buffer;
}
} else {
/* There is enough space in the buffer, copy in the new data */
stream->avail_in -= bytes_to_buffer;
memcpy(state->buffer + state->b_bytes_valid, stream->next_in,
bytes_to_buffer);
update_crc(crc, stream->next_in, bytes_to_buffer);
stream->next_in += bytes_to_buffer;
}
state->b_bytes_valid += bytes_to_buffer;
end_in = state->buffer + state->b_bytes_valid - IGZIP_LA;
next_in = state->b_bytes_processed + state->buffer;
while (next_in < end_in) {
if (is_full(&state->bitbuf)) {
state->b_bytes_processed = next_in - state->buffer;
update_state(stream, state, start_in);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - state->file_start);
if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */
assert(next_in - dist >= state->buffer);
assert(dist != 0);
match_length = compare258(next_in - dist, next_in, 258);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] =
(uint64_t) (next_hash - state->file_start);
}
get_len_code(stream->hufftables, match_length, &code,
&code_len);
get_dist_code(stream->hufftables, dist, &code2,
&code_len2);
code |= code2 << code_len;
code_len += code_len2;
write_bits(&state->bitbuf, code, code_len);
next_in += match_length;
continue;
}
}
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
next_in++;
}
state->b_bytes_processed = next_in - state->buffer;
}
update_state(stream, state, start_in);
if (stream->avail_in == 0) {
if (stream->end_of_stream || stream->flush != NO_FLUSH)
state->state = ZSTATE_FLUSH_READ_BUFFER;
return;
}
return;
}
void isal_deflate_finish_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *next_in, *end_in, *end, *next_hash;
uint16_t match_length;
uint32_t dist;
uint64_t code, code_len, code2, code_len2;
struct isal_zstate *state = &stream->internal_state;
uint16_t *last_seen = state->head;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
end_in = state->b_bytes_valid + (uint8_t *) state->buffer;
next_in = state->b_bytes_processed + state->buffer;
while (next_in < end_in) {
if (is_full(&state->bitbuf)) {
state->b_bytes_processed = next_in - state->buffer;
update_state(stream, state, stream->next_in);
return;
}
literal = *(uint32_t *) next_in;
hash = compute_hash(literal) & HASH_MASK;
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) (next_in - state->file_start);
if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */
assert(next_in - dist >= state->buffer);
match_length = compare258(next_in - dist, next_in, end_in - next_in);
if (match_length >= SHORTEST_MATCH) {
next_hash = next_in;
#ifdef LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] =
(uint64_t) (next_hash - state->file_start);
}
get_len_code(stream->hufftables, match_length, &code,
&code_len);
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
code |= code2 << code_len;
code_len += code_len2;
write_bits(&state->bitbuf, code, code_len);
next_in += match_length;
continue;
}
}
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
next_in++;
}
state->b_bytes_processed = next_in - state->buffer;
if (is_full(&state->bitbuf) || state->left_over > 0) {
update_state(stream, state, stream->next_in);
return;
}
get_lit_code(stream->hufftables, 256, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
state->has_eob = 1;
update_state(stream, state, stream->next_in);
if (stream->end_of_stream == 1)
state->state = ZSTATE_TRL;
else
state->state = ZSTATE_SYNC_FLUSH;
return;
}

751
igzip/igzip_body.asm Normal file
View File

@ -0,0 +1,751 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%ifndef TEST
extern fold_4
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "reg_sizes.asm"
%include "stdmac.asm"
%if (ARCH == 04)
%define MOVDQA vmovdqa
%else
%define MOVDQA movdqa
%endif
%ifdef DEBUG
%macro MARK 1
global %1
%1:
%endm
%else
%macro MARK 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define tmp2 rcx
%define hash2 rcx
%define b_bytes_valid rax
%define curr_data rax
%define code rax
%define tmp5 rax
%define tmp4 rbx
%define dist rbx
%define code2 rbx
%define x rdx
%define len rdx
%define hash rdx
%define code_len3 rdx
%define tmp1 rsi
%define code_len2 rsi
%define blen rdi
%define file_start rdi
%define m_bit_count rbp
%define in_buf r8
%define curr_data2 r8
%define len2 r8
%define tmp6 r8
%define m_bits r9
%define f_i r10
%define m_out_buf r11
%define f_end_i r12
%define dist2 r12
%define tmp7 r12
%define code4 r12
%define tmp3 r13
%define code3 r13
%define stream r14
%define hufftables r15
%define crc_0 xmm0 ; in/out: crc state
%define crc_1 xmm1 ; in/out: crc state
%define crc_2 xmm2 ; in/out: crc state
%define crc_3 xmm3 ; in/out: crc state
%define crc_fold xmm4 ; in: (loaded from fold_4)
%define xtmp0 xmm5 ; tmp
%define xtmp1 xmm6 ; tmp
%define xtmp2 xmm7 ; tmp
%define xtmp3 xmm8 ; tmp
%define xtmp4 xmm9 ; tmp
%define ytmp0 ymm5 ; tmp
%define ytmp1 ymm6 ; tmp
%if (ARCH == 04)
%define vtmp0 ymm5 ; tmp
%define vtmp1 ymm6 ; tmp
%define vtmp2 ymm7 ; tmp
%define vtmp3 ymm8 ; tmp
%define vtmp4 ymm9 ; tmp
%else
%define vtmp0 xmm5 ; tmp
%define vtmp1 xmm6 ; tmp
%define vtmp2 xmm7 ; tmp
%define vtmp3 xmm8 ; tmp
%define vtmp4 xmm9 ; tmp
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define b_bytes_processed f_i
blen_mem_offset equ 0 ; local variable (8 bytes)
in_buf_mem_offset equ 8
f_end_i_mem_offset equ 16
empty_buffer_flag equ 24
gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes)
xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
stack_size equ 4*8 + 8*8 + 4*16 + 8
;;; 8 because stack address is odd multiple of 8 after a function call and
;;; we want it aligned to 16 bytes
; void isal_deflate_body ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_body_ %+ ARCH
isal_deflate_body_ %+ ARCH %+ :
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
;; do nothing if (avail_in == 0)
cmp dword [rcx + _avail_in], 0
jne skip1
;; Set stream's next state
mov rdx, ZSTATE_FLUSH_READ_BUFFER
mov rax, ZSTATE_BODY
cmp dword [rcx + _end_of_stream], 0
cmovne rax, rdx
cmp dword [rcx + _flush], _NO_FLUSH
cmovne rax, rdx
mov dword [rcx + _internal_state_state], eax
ret
skip1:
%ifdef ALIGN_STACK
push rbp
mov rbp, rsp
sub rsp, stack_size
and rsp, ~15
%else
sub rsp, stack_size
%endif
mov [rsp + gpr_save_mem_offset + 0*8], rbx
mov [rsp + gpr_save_mem_offset + 1*8], rsi
mov [rsp + gpr_save_mem_offset + 2*8], rdi
mov [rsp + gpr_save_mem_offset + 3*8], rbp
mov [rsp + gpr_save_mem_offset + 4*8], r12
mov [rsp + gpr_save_mem_offset + 5*8], r13
mov [rsp + gpr_save_mem_offset + 6*8], r14
mov [rsp + gpr_save_mem_offset + 7*8], r15
MOVDQA [rsp + xmm_save_mem_offset + 0*16], xmm6
MOVDQA [rsp + xmm_save_mem_offset + 1*16], xmm7
MOVDQA [rsp + xmm_save_mem_offset + 2*16], xmm8
MOVDQA [rsp + xmm_save_mem_offset + 3*16], xmm9
mov stream, rcx
MOVDQA crc_0, [stream + _internal_state_crc + 0*16]
MOVDQA crc_1, [stream + _internal_state_crc + 1*16]
MOVDQA crc_2, [stream + _internal_state_crc + 2*16]
MOVDQA crc_3, [stream + _internal_state_crc + 3*16]
MOVDQA crc_fold, [fold_4]
mov dword [stream + _internal_state_has_eob], 0
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov m_out_buf, [stream + _next_out]
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
mov tmp1 %+ d, [stream + _avail_out]
add tmp1, m_out_buf
sub tmp1, SLOP
skip_SLOP:
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
mov hufftables, [stream + _hufftables]
; in_buf = stream->next_in
mov in_buf, [stream + _next_in]
mov blen %+ d, [stream + _avail_in]
mov dword [rsp + empty_buffer_flag], 0
cmp dword [stream + _flush], _FULL_FLUSH
sete byte [rsp + empty_buffer_flag]
cmp dword [stream + _internal_state_b_bytes_processed], 0
sete byte [rsp + empty_buffer_flag + 1]
; while (blen != 0)
MARK __Compute_X_ %+ ARCH
loop1:
; x = D + LA - (state->b_bytes_valid - state->b_bytes_processed);
mov b_bytes_valid %+ d, [stream + _internal_state_b_bytes_valid]
mov b_bytes_processed %+ d, [stream + _internal_state_b_bytes_processed]
lea x, [b_bytes_processed + D + LA]
sub x, b_bytes_valid
; if (x > D) x = D;
cmp x, D
cmova x, [const_D]
; if (blen < D) x = blen;
cmp blen, D
cmovb x, blen
;; process x bytes starting at in_buf
;; If there isn't enough room, shift buffer down
; if (x > BSIZE - state->b_bytes_valid) {
mov tmp1, BSIZE
sub tmp1, b_bytes_valid
cmp x, tmp1
jbe skip_move
; if (state->b_bytes_processed < state->b_bytes_valid - LA) {
mov tmp1, b_bytes_valid
sub tmp1, LA
cmp b_bytes_processed, tmp1
jae do_move
;; We need to move an odd amount, skip move for this copy of loop
xor x,x
mov [rsp + blen_mem_offset], blen
jmp skip_move_zero
MARK __shift_data_down_ %+ ARCH
do_move:
; offset = state->b_bytes_valid - (D + LA);
mov tmp4, b_bytes_valid
sub tmp4, D + LA
; copy_D_LA(state->buffer, state->buffer + offset);
lea tmp1, [stream + _internal_state_buffer]
lea tmp2, [tmp1 + tmp4]
copy_D_LA tmp1, tmp2, tmp3, vtmp0, vtmp1, vtmp2, vtmp3
; tmp1 clobbered
; state->file_start -= offset;
sub [stream + _internal_state_file_start], tmp4
; state->b_bytes_processed -= offset;
sub b_bytes_processed, tmp4
mov b_bytes_valid, D + LA
MARK __copy_in_ %+ ARCH
skip_move:
sub blen, x
mov [rsp + blen_mem_offset], blen
; copy_in(state->buffer + state->b_bytes_valid, in_buf, x);
lea tmp1, [stream + _internal_state_buffer + b_bytes_valid]
mov tmp2, in_buf
mov tmp3, x
COPY_IN_CRC tmp1, tmp2, tmp3, tmp4, crc_0, crc_1, crc_2, crc_3, crc_fold, \
xtmp0, xtmp1, xtmp2, xtmp3, xtmp4
; in_buf += x;
add in_buf, x
MARK __prepare_loop_ %+ ARCH
skip_move_zero:
mov [rsp + in_buf_mem_offset], in_buf
; state->b_bytes_valid += x;
add b_bytes_valid, x
mov [stream + _internal_state_b_bytes_valid], b_bytes_valid %+ d
; f_end_i = state->b_bytes_valid - LA;
%ifnidn f_end_i, b_bytes_valid
mov f_end_i, b_bytes_valid
%endif
sub f_end_i, LA
; if (f_end_i <= 0) continue;
cmp f_end_i, 0
jle continue_while
; f_start_i = state->b_bytes_processed;
;; f_i and b_bytes_processed are same register, just store b_bytes_proc
mov [stream + _internal_state_b_bytes_processed], b_bytes_processed %+ d
; f_start_i += (uint32_t)(state->buffer - state->file_start);
mov file_start, [stream + _internal_state_file_start]
lea tmp1, [stream + _internal_state_buffer]
sub tmp1, file_start
add f_i, tmp1
add f_end_i, tmp1
mov [rsp + f_end_i_mem_offset], f_end_i
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
cmp f_i, f_end_i
jge end_loop_2
MARK __misc_compute_hash_lookup_ %+ ARCH
mov curr_data %+ d, [file_start + f_i]
cmp dword [rsp + empty_buffer_flag], 0
jne write_first_byte
mov curr_data2, curr_data
compute_hash hash, curr_data
jmp loop2
align 16
loop2:
shr curr_data2, 8
xor hash2 %+ d, hash2 %+ d
crc32 hash2 %+ d, curr_data2 %+ d
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja bitbuf_full
xor dist, dist
xor dist2, dist2
xor tmp3, tmp3
lea tmp1, [file_start + f_i]
lea tmp6, [tmp1 - 1]
mov dist %+ w, f_i %+ w
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
; state->head[hash] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
inc f_i
mov dist2 %+ w, f_i %+ w
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
dec dist2
; state->head[hash2] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
mov tmp2, tmp1
sub tmp2, dist
dec dist
; if ((dist-1) < (D-1)) {
cmp dist %+ d, (D-1)
cmovae tmp2, tmp6
cmovae dist, tmp3
inc dist
cmp dist2 %+ d, (D-1)
cmovae dist2, tmp3
inc dist2
MARK __compare_ %+ ARCH
; len = compare258(state->file_start + f_i,
; state->file_start + f_i - dist);
;; Specutively load distance code (except for when large windows are used)
get_packed_dist_code dist, code2, hufftables
;; Check for long len/dist match (>7) with first literal
mov len, [tmp1]
xor len, [tmp2]
jz compare_loop
%ifdef USE_HSWNI
blsmsk tmp3, len
or tmp3, 0xFFFFFF
%endif
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist2
;; Specutively load distance code (except for when large windows are used)
get_packed_dist_code dist2, code4, hufftables
;; Check for len/dist match (>7) with second literal
mov len2, [tmp1]
xor len2, [tmp2]
jz compare_loop2
%ifdef USE_HSWNI
;; Check for len/dist match for first literal
test tmp3, len2
jz len_dist_lit_huffman_pre
cmp tmp3, 0xFFFFFF
je encode_2_literals
jmp len_dist_huffman_pre
MARK __len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, code_len3, hufftables
%else
;; Specutively load the code for the first literal
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
;; Check for len/dist match for first literal
test len, 0xFFFFFF
jz len_dist_huffman_pre
;; Specutively load the code for the second literal
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
shl code2, cl
or code2, code3
add code_len2, rcx
;; Check for len/dist match for second literal
test len2, 0xFFFFFF
jnz write_lit_bits
MARK __len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
mov code_len3, rcx
%endif
bsf len2, len2
shr len2, 3
len_dist_lit_huffman:
%ifndef LONGER_HUFFTABLE
mov tmp4, dist2
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
%else
unpack_dist_code code4, code_len2
%endif
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
%ifdef USE_HSWNI
shlx code4, code4, rcx
%else
shl code4, cl
%endif
or code4, code
add code_len2, rcx
mov rcx, code_len3
%ifdef USE_HSWNI
shlx code4, code4, rcx
%else
shl code4, cl
%endif
or code4, code3
add code_len2, rcx
mov code2, code4
;; Setup for updating hash
lea tmp3, [f_i + 1] ; tmp3 <= k
add f_i, len2
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp5 %+ d, [file_start + tmp3]
mov tmp7, tmp5
shr tmp7, 8
compute_hash hash, tmp5
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3,1
jmp update_hash_for_symbol
;; encode as dist/len
MARK __len_dist_huffman_ %+ ARCH
len_dist_huffman_pre:
bsf len, len
shr len, 3
len_dist_huffman:
dec f_i
; get_dist_code(dist, &code2, &code_len2);
%ifndef LONGER_HUFFTABLE
mov tmp3, dist ; since code2 and dist are rbx
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
%else
unpack_dist_code code2, code_len2
%endif
; get_len_code(len, &code, &code_len);
get_len_code len, code, rcx, hufftables ;; rcx is code_len
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
or code2, code
add code_len2, rcx
;; Setup for updateing hash
lea tmp3, [f_i + 2] ; tmp3 <= k
add f_i, len
mov tmp7 %+ d, [file_start + tmp3]
MARK __update_hash_for_symbol_ %+ ARCH
update_hash_for_symbol:
mov curr_data %+ d, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
%ifdef LIMIT_HASH_UPDATE
; only update hash twice, first hash was already calculated.
; hash = compute_hash(state->file_start + k) & HASH_MASK;
compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
%else
loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp7 %+ d, [file_start + tmp3]
compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
add tmp3,1
cmp tmp3, f_i
jl loop3
%endif
MARK __write_len_dist_bits_ %+ ARCH
mov f_end_i, [rsp + f_end_i_mem_offset]
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
; continue
cmp f_i, f_end_i
jl loop2
jmp end_loop_2
MARK __write_lit_bits_ %+ ARCH
%ifdef USE_HSWNI
encode_2_literals:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
;; Calculate code associated with both literals
shlx code2, code2, rcx
or code2, code3
add code_len2, rcx
%endif
write_lit_bits:
mov f_end_i, [rsp + f_end_i_mem_offset]
add f_i, 1
mov curr_data %+ d, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
; continue
cmp f_i, f_end_i
jl loop2
MARK __end_loops_ %+ ARCH
end_loop_2:
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
add f_i, [stream + _internal_state_file_start]
sub f_i, stream
sub f_i, _internal_state_buffer
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
; continue
continue_while:
mov blen, [rsp + blen_mem_offset]
mov in_buf, [rsp + in_buf_mem_offset]
cmp blen, 0
jnz loop1
end:
;; update input buffer
; stream->total_in += (uint32_t)(in_buf - stream->next_in); // bytes copied
mov tmp1 %+ d, [stream + _total_in]
mov in_buf, [rsp + in_buf_mem_offset]
add tmp1, in_buf
sub tmp1, [stream + _next_in]
mov [stream + _total_in], tmp1 %+ d
mov [stream + _next_in], in_buf
mov [stream + _avail_in], blen %+ d
cmp blen, 0
jne skip2
;; Set stream's next state
mov tmp1, ZSTATE_FLUSH_READ_BUFFER
mov tmp5, ZSTATE_BODY
cmp dword [stream + _end_of_stream], 0
cmovne tmp5, tmp1
cmp dword [stream + _flush], _NO_FLUSH
cmovne tmp5, tmp1
mov dword [stream + _internal_state_state], tmp5 %+ d
skip2:
mov [stream + _next_out], m_out_buf
; offset = state->bitbuf.buffer_used();
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
sub [stream + _avail_out], m_out_buf %+ d
add [stream + _total_out], m_out_buf %+ d
mov [stream + _internal_state_bitbuf_m_bits], m_bits
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
MOVDQA [stream + _internal_state_crc + 0*16], crc_0
MOVDQA [stream + _internal_state_crc + 1*16], crc_1
MOVDQA [stream + _internal_state_crc + 2*16], crc_2
MOVDQA [stream + _internal_state_crc + 3*16], crc_3
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
mov r12, [rsp + gpr_save_mem_offset + 4*8]
mov r13, [rsp + gpr_save_mem_offset + 5*8]
mov r14, [rsp + gpr_save_mem_offset + 6*8]
mov r15, [rsp + gpr_save_mem_offset + 7*8]
MOVDQA xmm6, [rsp + xmm_save_mem_offset + 0*16]
MOVDQA xmm7, [rsp + xmm_save_mem_offset + 1*16]
MOVDQA xmm8, [rsp + xmm_save_mem_offset + 2*16]
MOVDQA xmm9, [rsp + xmm_save_mem_offset + 3*16]
%ifndef ALIGN_STACK
add rsp, stack_size
%else
mov rsp, rbp
pop rbp
%endif
ret
MARK __bitbuf_full_ %+ ARCH
bitbuf_full:
mov blen, [rsp + blen_mem_offset]
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
add f_i, [stream + _internal_state_file_start]
sub f_i, stream
sub f_i, _internal_state_buffer
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
jmp end
MARK __compare_loops_ %+ ARCH
compare_loop:
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
jmp len_dist_huffman
compare_loop2:
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
and curr_data, 0xff
get_lit_code curr_data, code3, code_len3, hufftables
jmp len_dist_lit_huffman
MARK __write_first_byte_ %+ ARCH
write_first_byte:
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja bitbuf_full
mov dword [rsp + empty_buffer_flag], 0
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
jmp write_lit_bits
section .data
align 4
const_D: dq D
%endif ;; ifndef TEST

8
igzip/igzip_body_01.asm Normal file
View File

@ -0,0 +1,8 @@
%define ARCH 01
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 2
%endif
%include "igzip_buffer_utils_01.asm"
%include "igzip_body.asm"

9
igzip/igzip_body_04.asm Normal file
View File

@ -0,0 +1,9 @@
%define ARCH 04
%define USE_HSWNI
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 3
%endif
%include "igzip_buffer_utils_04.asm"
%include "igzip_body.asm"

View File

@ -0,0 +1,543 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifndef BUFFER_UTILS
%define BUFFER_UTILS
%include "options.asm"
extern pshufb_shf_table
extern mask3
%ifdef FIX_CACHE_READ
%define movntdqa movdqa
%else
%macro prefetchnta 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; code for doing the CRC calculation as part of copy-in, using pclmulqdq
; "shift" 4 input registers down 4 places
; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1
%macro FOLD4 7
%define %%xmm0 %1 ; xmm reg, in/out
%define %%xmm1 %2 ; xmm reg, in/out
%define %%xmm2 %3 ; xmm reg, in/out
%define %%xmm3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
%define %%tmp1 %7 ; xmm reg, tmp
movaps %%tmp0, %%xmm0
movaps %%tmp1, %%xmm1
pclmulqdq %%xmm0, %%const, 0x01
pclmulqdq %%xmm1, %%const, 0x01
pclmulqdq %%tmp0, %%const, 0x10
pclmulqdq %%tmp1, %%const, 0x10
xorps %%xmm0, %%tmp0
xorps %%xmm1, %%tmp1
movaps %%tmp0, %%xmm2
movaps %%tmp1, %%xmm3
pclmulqdq %%xmm2, %%const, 0x01
pclmulqdq %%xmm3, %%const, 0x01
pclmulqdq %%tmp0, %%const, 0x10
pclmulqdq %%tmp1, %%const, 0x10
xorps %%xmm2, %%tmp0
xorps %%xmm3, %%tmp1
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 3 input registers down 4 places
; macro FOLD3 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out D A' B' C'
%macro FOLD3 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
movdqa %%tmp0, %%x3
movaps %%x3, %%x2
pclmulqdq %%x2, %%const, 0x01
pclmulqdq %%x3, %%const, 0x10
xorps %%x3, %%x2
movaps %%x2, %%x1
pclmulqdq %%x1, %%const, 0x01
pclmulqdq %%x2, %%const, 0x10
xorps %%x2, %%x1
movaps %%x1, %%x0
pclmulqdq %%x0, %%const, 0x01
pclmulqdq %%x1, %%const, 0x10
xorps %%x1, %%x0
movdqa %%x0, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 2 input registers down 4 places
; macro FOLD2 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out C D A' B'
%macro FOLD2 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
movdqa %%tmp0, %%x3
movaps %%x3, %%x1
pclmulqdq %%x1, %%const, 0x01
pclmulqdq %%x3, %%const, 0x10
xorps %%x3, %%x1
movdqa %%x1, %%tmp0
movdqa %%tmp0, %%x2
movaps %%x2, %%x0
pclmulqdq %%x0, %%const, 0x01
pclmulqdq %%x2, %%const, 0x10
xorps %%x2, %%x0
movdqa %%x0, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 1 input registers down 4 places
; macro FOLD1 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out B C D A'
%macro FOLD1 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
movdqa %%tmp0, %%x3
movaps %%x3, %%x0
pclmulqdq %%x0, %%const, 0x01
pclmulqdq %%x3, %%const, 0x10
xorps %%x3, %%x0
movdqa %%x0, %%x1
movdqa %%x1, %%x2
movdqa %%x2, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3
; XP X3 X2 X1 X0 tmp2
; Initial state xI HG FE DC BA
; after shift IH GF ED CB A0
; after fold ff GF ED CB ff = merge(IH, A0)
;
%macro PARTIAL_FOLD 12
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%xp %5 ; xmm partial reg, in/clobbered
%define %%size %6 ; GPR, in/clobbered (1...15)
%define %%const %7 ; xmm reg, in
%define %%shl %8 ; xmm reg, tmp
%define %%shr %9 ; xmm reg, tmp
%define %%tmp2 %10 ; xmm reg, tmp
%define %%tmp3 %11 ; xmm reg, tmp
%define %%gtmp %12 ; GPR, tmp
; {XP X3 X2 X1 X0} = {xI HG FE DC BA}
shl %%size, 4 ; size *= 16
lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT]
movdqa %%shl, [%%gtmp + %%size] ; shl constant
movdqa %%shr, %%shl
pxor %%shr, [mask3 WRT_OPT] ; shr constant
movdqa %%tmp2, %%x0 ; tmp2 = BA
pshufb %%tmp2, %%shl ; tmp2 = A0
pshufb %%x0, %%shr ; x0 = 0B
movdqa %%tmp3, %%x1 ; tmp3 = DC
pshufb %%tmp3, %%shl ; tmp3 = C0
por %%x0, %%tmp3 ; x0 = CB
pshufb %%x1, %%shr ; x1 = 0D
movdqa %%tmp3, %%x2 ; tmp3 = FE
pshufb %%tmp3, %%shl ; tmp3 = E0
por %%x1, %%tmp3 ; x1 = ED
pshufb %%x2, %%shr ; x2 = 0F
movdqa %%tmp3, %%x3 ; tmp3 = HG
pshufb %%tmp3, %%shl ; tmp3 = G0
por %%x2, %%tmp3 ; x2 = GF
pshufb %%x3, %%shr ; x3 = 0H
pshufb %%xp, %%shl ; xp = I0
por %%x3, %%xp ; x3 = IH
; fold tmp2 into X3
movaps %%tmp3, %%tmp2
pclmulqdq %%tmp2, %%const, 0x01
pclmulqdq %%tmp3, %%const, 0x10
xorps %%x3, %%tmp2
xorps %%x3, %%tmp3
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes.
; Returns 0 if data has length 0.
; Input: The input data (src), that data's length (size).
; Output: The packed xmm register (xmm_out).
; size is clobbered.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro LOAD_FRACTIONAL_XMM 3
%define %%xmm_out %1 ; %%xmm_out is an xmm register
%define %%src %2
%define %%size %3
pxor %%xmm_out, %%xmm_out
cmp %%size, 0
je %%_done
add %%src, %%size
cmp %%size, 8
jl %%_byte_loop
sub %%src, 8
pinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists
sub %%size, 8
je %%_done
%%_byte_loop: ;Read in data 1 byte at a time while data is left
pslldq %%xmm_out, 1
dec %%src
pinsrb %%xmm_out, BYTE [%%src], 0
dec %%size
jg %%_byte_loop
%%_done:
%endmacro ; LOAD_FRACTIONAL_XMM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; copy x bytes (rounded up to 16 bytes) from src to dst
; src & dst are unaligned
; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold,
; xt0, xt1, xt2, xt3, xt4
%macro COPY_IN_CRC 14
%define %%dst %1 ; reg, in/clobbered
%define %%src %2 ; reg, in/clobbered
%define %%size %3 ; reg, in/clobbered
%define %%tmp %4 ; reg, tmp
%define %%x0 %5 ; xmm, in/out: crc state
%define %%x1 %6 ; xmm, in/out: crc state
%define %%x2 %7 ; xmm, in/out: crc state
%define %%x3 %8 ; xmm, in/out: crc state
%define %%xfold %9 ; xmm, in: (loaded from fold4)
%define %%xtmp0 %10 ; xmm, tmp
%define %%xtmp1 %11 ; xmm, tmp
%define %%xtmp2 %12 ; xmm, tmp
%define %%xtmp3 %13 ; xmm, tmp
%define %%xtmp4 %14 ; xmm, tmp
cmp %%size, 16
jl %%lt_16
; align source
xor %%tmp, %%tmp
sub %%tmp, %%src
and %%tmp, 15
jz %%already_aligned
; need to align, tmp contains number of bytes to transfer
movdqu %%xtmp0, [%%src]
movdqu [%%dst], %%xtmp0
add %%dst, %%tmp
add %%src, %%tmp
sub %%size, %%tmp
%ifndef DEFLATE
push %%dst
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
pop %%dst
%endif
%%already_aligned:
sub %%size, 64
jl %%end_loop
jmp %%loop
align 16
%%loop:
movntdqa %%xtmp0, [%%src+0*16]
movntdqa %%xtmp1, [%%src+1*16]
movntdqa %%xtmp2, [%%src+2*16]
%ifndef DEFLATE
FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4
%endif
movntdqa %%xtmp3, [%%src+3*16]
movdqu [%%dst+0*16], %%xtmp0
movdqu [%%dst+1*16], %%xtmp1
movdqu [%%dst+2*16], %%xtmp2
movdqu [%%dst+3*16], %%xtmp3
%ifndef DEFLATE
pxor %%x0, %%xtmp0
pxor %%x1, %%xtmp1
pxor %%x2, %%xtmp2
pxor %%x3, %%xtmp3
%endif
add %%src, 4*16
add %%dst, 4*16
sub %%size, 4*16
jge %%loop
%%end_loop:
; %%size contains (num bytes left - 64)
add %%size, 16
jge %%three_full_regs
add %%size, 16
jge %%two_full_regs
add %%size, 16
jge %%one_full_reg
add %%size, 16
%%no_full_regs: ; 0 <= %%size < 16, no full regs
jz %%done ; if no bytes left, we're done
jmp %%partial
;; Handle case where input is <16 bytes
%%lt_16:
test %%size, %%size
jz %%done ; if no bytes left, we're done
jmp %%partial
%%one_full_reg:
movntdqa %%xtmp0, [%%src+0*16]
%ifndef DEFLATE
FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
movdqu [%%dst+0*16], %%xtmp0
%ifndef DEFLATE
pxor %%x3, %%xtmp0
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 1*16
add %%src, 1*16
jmp %%partial
%%two_full_regs:
movntdqa %%xtmp0, [%%src+0*16]
movntdqa %%xtmp1, [%%src+1*16]
%ifndef DEFLATE
FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
movdqu [%%dst+0*16], %%xtmp0
movdqu [%%dst+1*16], %%xtmp1
%ifndef DEFLATE
pxor %%x2, %%xtmp0
pxor %%x3, %%xtmp1
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 2*16
add %%src, 2*16
jmp %%partial
%%three_full_regs:
movntdqa %%xtmp0, [%%src+0*16]
movntdqa %%xtmp1, [%%src+1*16]
movntdqa %%xtmp2, [%%src+2*16]
%ifndef DEFLATE
FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
movdqu [%%dst+0*16], %%xtmp0
movdqu [%%dst+1*16], %%xtmp1
movdqu [%%dst+2*16], %%xtmp2
%ifndef DEFLATE
pxor %%x1, %%xtmp0
pxor %%x2, %%xtmp1
pxor %%x3, %%xtmp2
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 3*16
add %%src, 3*16
; fall through to %%partial
%%partial: ; 0 <= %%size < 16
%ifndef DEFLATE
mov %%tmp, %%size
%endif
LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size
movdqu [%%dst], %%xtmp0
%ifndef DEFLATE
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
%endif
%%done:
%endm
;%assign K 1024;
;%assign D 8 * K; ; Amount of history
;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary
; copy D + LA bytes from src to dst
; dst is aligned
;void copy_D_LA(uint8_t *dst, uint8_t *src);
; arg 1: rcx : dst
; arg 2: rdx : src
; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3
%macro copy_D_LA 7
%define %%dst %1 ; reg, clobbered
%define %%src %2 ; reg, clobbered
%define %%tmp %3
%define %%xtmp0 %4
%define %%xtmp1 %5
%define %%xtmp2 %6
%define %%xtmp3 %7
%assign %%SIZE (D + LA) / 16 ; number of DQ words to be copied
%assign %%SIZE4 %%SIZE/4
lea %%tmp, [%%dst + 4 * 16 * %%SIZE4]
jmp %%copy_D_LA_1
align 16
%%copy_D_LA_1:
movdqu %%xtmp0, [%%src]
movdqu %%xtmp1, [%%src+16]
movdqu %%xtmp2, [%%src+32]
movdqu %%xtmp3, [%%src+48]
movdqa [%%dst], %%xtmp0
movdqa [%%dst+16], %%xtmp1
movdqa [%%dst+32], %%xtmp2
movdqa [%%dst+48], %%xtmp3
add %%src, 4*16
add %%dst, 4*16
cmp %%dst, %%tmp
jne %%copy_D_LA_1
%assign %%i 0
%rep (%%SIZE - 4 * %%SIZE4)
%if (%%i == 0)
movdqu %%xtmp0, [%%src + %%i*16]
%elif (%%i == 1)
movdqu %%xtmp1, [%%src + %%i*16]
%elif (%%i == 2)
movdqu %%xtmp2, [%%src + %%i*16]
%elif (%%i == 3)
movdqu %%xtmp3, [%%src + %%i*16]
%else
%error too many i
% error
%endif
%assign %%i %%i+1
%endrep
%assign %%i 0
%rep (%%SIZE - 4 * %%SIZE4)
%if (%%i == 0)
movdqa [%%dst + %%i*16], %%xtmp0
%elif (%%i == 1)
movdqa [%%dst + %%i*16], %%xtmp1
%elif (%%i == 2)
movdqa [%%dst + %%i*16], %%xtmp2
%elif (%%i == 3)
movdqa [%%dst + %%i*16], %%xtmp3
%else
%error too many i
% error
%endif
%assign %%i %%i+1
%endrep
%endm
%endif

View File

@ -0,0 +1,552 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifndef BUFFER_UTILS
%define BUFFER_UTILS
%include "options.asm"
extern pshufb_shf_table
extern mask3
%ifdef FIX_CACHE_READ
%define vmovntdqa vmovdqa
%else
%macro prefetchnta 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; code for doing the CRC calculation as part of copy-in, using pclmulqdq
; "shift" 4 input registers down 4 places
; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1
%macro FOLD4 7
%define %%xmm0 %1 ; xmm reg, in/out
%define %%xmm1 %2 ; xmm reg, in/out
%define %%xmm2 %3 ; xmm reg, in/out
%define %%xmm3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
%define %%tmp1 %7 ; xmm reg, tmp
vmovaps %%tmp0, %%xmm0
vmovaps %%tmp1, %%xmm1
vpclmulqdq %%xmm0, %%const, 0x01
vpclmulqdq %%xmm1, %%const, 0x01
vpclmulqdq %%tmp0, %%const, 0x10
vpclmulqdq %%tmp1, %%const, 0x10
vxorps %%xmm0, %%tmp0
vxorps %%xmm1, %%tmp1
vmovaps %%tmp0, %%xmm2
vmovaps %%tmp1, %%xmm3
vpclmulqdq %%xmm2, %%const, 0x01
vpclmulqdq %%xmm3, %%const, 0x01
vpclmulqdq %%tmp0, %%const, 0x10
vpclmulqdq %%tmp1, %%const, 0x10
vxorps %%xmm2, %%tmp0
vxorps %%xmm3, %%tmp1
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 3 input registers down 4 places
; macro FOLD3 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out D A' B' C'
%macro FOLD3 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
vmovdqa %%tmp0, %%x3
vmovaps %%x3, %%x2
vpclmulqdq %%x2, %%const, 0x01
vpclmulqdq %%x3, %%const, 0x10
vxorps %%x3, %%x2
vmovaps %%x2, %%x1
vpclmulqdq %%x1, %%const, 0x01
vpclmulqdq %%x2, %%const, 0x10
vxorps %%x2, %%x1
vmovaps %%x1, %%x0
vpclmulqdq %%x0, %%const, 0x01
vpclmulqdq %%x1, %%const, 0x10
vxorps %%x1, %%x0
vmovdqa %%x0, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 2 input registers down 4 places
; macro FOLD2 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out C D A' B'
%macro FOLD2 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
vmovdqa %%tmp0, %%x3
vmovaps %%x3, %%x1
vpclmulqdq %%x1, %%const, 0x01
vpclmulqdq %%x3, %%const, 0x10
vxorps %%x3, %%x1
vmovdqa %%x1, %%tmp0
vmovdqa %%tmp0, %%x2
vmovaps %%x2, %%x0
vpclmulqdq %%x0, %%const, 0x01
vpclmulqdq %%x2, %%const, 0x10
vxorps %%x2, %%x0
vmovdqa %%x0, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; "shift" 1 input registers down 4 places
; macro FOLD1 x0, x1, x2, x3, const, tmp0
; x0 x1 x2 x3
; In A B C D
; Out B C D A'
%macro FOLD1 6
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%const %5 ; xmm reg, in
%define %%tmp0 %6 ; xmm reg, tmp
vmovdqa %%tmp0, %%x3
vmovaps %%x3, %%x0
vpclmulqdq %%x0, %%const, 0x01
vpclmulqdq %%x3, %%const, 0x10
vxorps %%x3, %%x0
vmovdqa %%x0, %%x1
vmovdqa %%x1, %%x2
vmovdqa %%x2, %%tmp0
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3
; XP X3 X2 X1 X0 tmp2
; Initial state xI HG FE DC BA
; after shift IH GF ED CB A0
; after fold ff GF ED CB ff = merge(IH, A0)
;
%macro PARTIAL_FOLD 12
%define %%x0 %1 ; xmm reg, in/out
%define %%x1 %2 ; xmm reg, in/out
%define %%x2 %3 ; xmm reg, in/out
%define %%x3 %4 ; xmm reg, in/out
%define %%xp %5 ; xmm partial reg, in/clobbered
%define %%size %6 ; GPR, in/clobbered (1...15)
%define %%const %7 ; xmm reg, in
%define %%shl %8 ; xmm reg, tmp
%define %%shr %9 ; xmm reg, tmp
%define %%tmp2 %10 ; xmm reg, tmp
%define %%tmp3 %11 ; xmm reg, tmp
%define %%gtmp %12 ; GPR, tmp
; {XP X3 X2 X1 X0} = {xI HG FE DC BA}
shl %%size, 4 ; size *= 16
lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT]
vmovdqa %%shl, [%%gtmp + %%size] ; shl constant
vmovdqa %%shr, %%shl
vpxor %%shr, [mask3 WRT_OPT] ; shr constant
vmovdqa %%tmp2, %%x0 ; tmp2 = BA
vpshufb %%tmp2, %%shl ; tmp2 = A0
vpshufb %%x0, %%shr ; x0 = 0B
vmovdqa %%tmp3, %%x1 ; tmp3 = DC
vpshufb %%tmp3, %%shl ; tmp3 = C0
vpor %%x0, %%tmp3 ; x0 = CB
vpshufb %%x1, %%shr ; x1 = 0D
vmovdqa %%tmp3, %%x2 ; tmp3 = FE
vpshufb %%tmp3, %%shl ; tmp3 = E0
vpor %%x1, %%tmp3 ; x1 = ED
vpshufb %%x2, %%shr ; x2 = 0F
vmovdqa %%tmp3, %%x3 ; tmp3 = HG
vpshufb %%tmp3, %%shl ; tmp3 = G0
vpor %%x2, %%tmp3 ; x2 = GF
vpshufb %%x3, %%shr ; x3 = 0H
vpshufb %%xp, %%shl ; xp = I0
vpor %%x3, %%xp ; x3 = IH
; fold tmp2 into X3
vmovaps %%tmp3, %%tmp2
vpclmulqdq %%tmp2, %%const, 0x01
vpclmulqdq %%tmp3, %%const, 0x10
vxorps %%x3, %%tmp2
vxorps %%x3, %%tmp3
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes.
; Returns 0 if data has length 0.
; Input: The input data (src), that data's length (size).
; Output: The packed xmm register (xmm_out).
; size is clobbered.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro LOAD_FRACTIONAL_XMM 3
%define %%xmm_out %1 ; %%xmm_out is an xmm register
%define %%src %2
%define %%size %3
vpxor %%xmm_out, %%xmm_out
cmp %%size, 0
je %%_done
add %%src, %%size
cmp %%size, 8
jl %%_byte_loop
sub %%src, 8
vpinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists
sub %%size, 8
je %%_done
%%_byte_loop: ;Read in data 1 byte at a time while data is left
vpslldq %%xmm_out, 1
dec %%src
vpinsrb %%xmm_out, BYTE [%%src], 0
dec %%size
jg %%_byte_loop
%%_done:
%endmacro ; LOAD_FRACTIONAL_XMM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; copy x bytes (rounded up to 16 bytes) from src to dst
; src & dst are unaligned
; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold,
; xt0, xt1, xt2, xt3, xt4
%macro COPY_IN_CRC 14
%define %%dst %1 ; reg, in/clobbered
%define %%src %2 ; reg, in/clobbered
%define %%size %3 ; reg, in/clobbered
%define %%tmp %4 ; reg, tmp
%define %%x0 %5 ; xmm, in/out: crc state
%define %%x1 %6 ; xmm, in/out: crc state
%define %%x2 %7 ; xmm, in/out: crc state
%define %%x3 %8 ; xmm, in/out: crc state
%define %%xfold %9 ; xmm, in: (loaded from fold4)
%define %%xtmp0 %10 ; xmm, tmp
%define %%xtmp1 %11 ; xmm, tmp
%define %%xtmp2 %12 ; xmm, tmp
%define %%xtmp3 %13 ; xmm, tmp
%define %%xtmp4 %14 ; xmm, tmp
cmp %%size, 16
jl %%lt_16
; align source
xor %%tmp, %%tmp
sub %%tmp, %%src
and %%tmp, 15
jz %%already_aligned
; need to align, tmp contains number of bytes to transfer
vmovdqu %%xtmp0, [%%src]
vmovdqu [%%dst], %%xtmp0
add %%dst, %%tmp
add %%src, %%tmp
sub %%size, %%tmp
%ifndef DEFLATE
push %%dst
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
pop %%dst
%endif
%%already_aligned:
sub %%size, 64
jl %%end_loop
jmp %%loop
align 16
%%loop:
vmovntdqa %%xtmp0, [%%src+0*16]
vmovntdqa %%xtmp1, [%%src+1*16]
vmovntdqa %%xtmp2, [%%src+2*16]
%ifndef DEFLATE
FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4
%endif
vmovntdqa %%xtmp3, [%%src+3*16]
vmovdqu [%%dst+0*16], %%xtmp0
vmovdqu [%%dst+1*16], %%xtmp1
vmovdqu [%%dst+2*16], %%xtmp2
vmovdqu [%%dst+3*16], %%xtmp3
%ifndef DEFLATE
vpxor %%x0, %%xtmp0
vpxor %%x1, %%xtmp1
vpxor %%x2, %%xtmp2
vpxor %%x3, %%xtmp3
%endif
add %%src, 4*16
add %%dst, 4*16
sub %%size, 4*16
jge %%loop
%%end_loop:
; %%size contains (num bytes left - 64)
add %%size, 16
jge %%three_full_regs
add %%size, 16
jge %%two_full_regs
add %%size, 16
jge %%one_full_reg
add %%size, 16
%%no_full_regs: ; 0 <= %%size < 16, no full regs
jz %%done ; if no bytes left, we're done
jmp %%partial
;; Handle case where input is <16 bytes
%%lt_16:
test %%size, %%size
jz %%done ; if no bytes left, we're done
jmp %%partial
%%one_full_reg:
vmovntdqa %%xtmp0, [%%src+0*16]
%ifndef DEFLATE
FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
vmovdqu [%%dst+0*16], %%xtmp0
%ifndef DEFLATE
vpxor %%x3, %%xtmp0
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 1*16
add %%src, 1*16
jmp %%partial
%%two_full_regs:
vmovntdqa %%xtmp0, [%%src+0*16]
vmovntdqa %%xtmp1, [%%src+1*16]
%ifndef DEFLATE
FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
vmovdqu [%%dst+0*16], %%xtmp0
vmovdqu [%%dst+1*16], %%xtmp1
%ifndef DEFLATE
vpxor %%x2, %%xtmp0
vpxor %%x3, %%xtmp1
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 2*16
add %%src, 2*16
jmp %%partial
%%three_full_regs:
vmovntdqa %%xtmp0, [%%src+0*16]
vmovntdqa %%xtmp1, [%%src+1*16]
vmovntdqa %%xtmp2, [%%src+2*16]
%ifndef DEFLATE
FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
%endif
vmovdqu [%%dst+0*16], %%xtmp0
vmovdqu [%%dst+1*16], %%xtmp1
vmovdqu [%%dst+2*16], %%xtmp2
%ifndef DEFLATE
vpxor %%x1, %%xtmp0
vpxor %%x2, %%xtmp1
vpxor %%x3, %%xtmp2
%endif
test %%size, %%size
jz %%done ; if no bytes left, we're done
add %%dst, 3*16
add %%src, 3*16
; fall through to %%partial
%%partial: ; 0 <= %%size < 16
%ifndef DEFLATE
mov %%tmp, %%size
%endif
LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size
vmovdqu [%%dst], %%xtmp0
%ifndef DEFLATE
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
%endif
%%done:
%endm
;%assign K 1024;
;%assign D 8 * K; ; Amount of history
;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary
; copy D + LA bytes from src to dst
; dst is aligned
;void copy_D_LA(uint8_t *dst, uint8_t *src);
; arg 1: rcx : dst
; arg 2: rdx : src
; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3
%macro copy_D_LA 7
%define %%dst %1 ; reg, clobbered
%define %%src %2 ; reg, clobbered
%define %%tmp %3
%define %%ytmp0 %4
%define %%ytmp1 %5
%define %%ytmp2 %6
%define %%ytmp3 %7
%define %%xtmp0 %4x
%assign %%SIZE (D + LA) / 32 ; number of DQ words to be copied
%assign %%SIZE4 %%SIZE/4
%assign %%MOD16 ((D + LA) - 32 * %%SIZE) / 16
lea %%tmp, [%%dst + 4 * 32 * %%SIZE4]
jmp %%copy_D_LA_1
align 16
%%copy_D_LA_1:
vmovdqu %%ytmp0, [%%src]
vmovdqu %%ytmp1, [%%src + 1 * 32]
vmovdqu %%ytmp2, [%%src + 2 * 32]
vmovdqu %%ytmp3, [%%src + 3 * 32]
vmovdqa [%%dst], %%ytmp0
vmovdqa [%%dst + 1 * 32], %%ytmp1
vmovdqa [%%dst + 2 * 32], %%ytmp2
vmovdqa [%%dst + 3 * 32], %%ytmp3
add %%src, 4*32
add %%dst, 4*32
cmp %%dst, %%tmp
jne %%copy_D_LA_1
%assign %%i 0
%rep (%%SIZE - 4 * %%SIZE4)
%if (%%i == 0)
vmovdqu %%ytmp0, [%%src + %%i*32]
%elif (%%i == 1)
vmovdqu %%ytmp1, [%%src + %%i*32]
%elif (%%i == 2)
vmovdqu %%ytmp2, [%%src + %%i*32]
%elif (%%i == 3)
vmovdqu %%ytmp3, [%%src + %%i*32]
%else
%error too many i
% error
%endif
%assign %%i %%i+1
%endrep
%assign %%i 0
%rep (%%SIZE - 4 * %%SIZE4)
%if (%%i == 0)
vmovdqa [%%dst + %%i*32], %%ytmp0
%elif (%%i == 1)
vmovdqa [%%dst + %%i*32], %%ytmp1
%elif (%%i == 2)
vmovdqa [%%dst + %%i*32], %%ytmp2
%elif (%%i == 3)
vmovdqa [%%dst + %%i*32], %%ytmp3
%else
%error too many i
% error
%endif
%assign %%i %%i+1
%endrep
%rep %%MOD16
vmovdqu %%xtmp0, [%%src + (%%SIZE - 4 * %%SIZE4)*32]
vmovdqa [%%dst + (%%SIZE - 4 * %%SIZE4)*32], %%xtmp0
%endrep
%endm
%endif

1285
igzip/igzip_check.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,416 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%ifndef UTILS_ASM
%define UTILS_ASM
; compare macro
;; sttni2 is faster, but it can't be debugged
;; so following code is based on "mine5"
;; compare 258 bytes = 8 * 32 + 2
;; tmp16 is a 16-bit version of tmp
;; compare258 src1, src2, result, tmp
%macro compare258 4
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp16 %4w ; tmp as a 16-bit register
xor %%result, %%result
%%loop1:
mov %%tmp, [%%src1 + %%result]
xor %%tmp, [%%src2 + %%result]
jnz %%miscompare
add %%result, 8
mov %%tmp, [%%src1 + %%result]
xor %%tmp, [%%src2 + %%result]
jnz %%miscompare
add %%result, 8
cmp %%result, 256
jb %%loop1
; compare last two bytes
mov %%tmp16, [%%src1 + %%result]
xor %%tmp16, [%%src2 + %%result]
jnz %%miscompare16
; no miscompares, return 258
add %%result, 2
jmp %%end
%%miscompare16:
and %%tmp, 0xFFFF
%%miscompare:
bsf %%tmp, %%tmp
shr %%tmp, 3
add %%result, %%tmp
%%end:
%endm
;; compare 258 bytes = 8 * 32 + 2
;; tmp16 is a 16-bit version of tmp
;; compare258 src1, src2, result, tmp
%macro compare250 4
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp16 %4w ; tmp as a 16-bit register
mov %%result, 8
mov %%tmp, [%%src1 + 8]
xor %%tmp, [%%src2 + 8]
jnz %%miscompare
add %%result, 8
%%loop1:
mov %%tmp, [%%src1 + %%result]
xor %%tmp, [%%src2 + %%result]
jnz %%miscompare
add %%result, 8
mov %%tmp, [%%src1 + %%result]
xor %%tmp, [%%src2 + %%result]
jnz %%miscompare
add %%result, 8
cmp %%result, 256
jb %%loop1
; compare last two bytes
mov %%tmp16, [%%src1 + %%result]
xor %%tmp16, [%%src2 + %%result]
jnz %%miscompare16
; no miscompares, return 258
add %%result, 2
jmp %%end
%%miscompare16:
and %%tmp, 0xFFFF
%%miscompare:
bsf %%tmp, %%tmp
shr %%tmp, 3
add %%result, %%tmp
%%end:
%endm
;; compare 258 bytes = 8 * 32 + 2
;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
;; compare258_x src1, src2, result, tmp, xtmp1, xtmp2
%macro compare258_x 6
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp32 %4d
%define %%tmp16 %4w ; tmp as a 16-bit register
%define %%xtmp %5
%define %%xtmp2 %6
xor %%result, %%result
%%loop1:
movdqu %%xtmp, [%%src1 + %%result]
movdqu %%xtmp2, [%%src2 + %%result]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare
add %%result, 16
movdqu %%xtmp, [%%src1 + %%result]
movdqu %%xtmp2, [%%src2 + %%result]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare
add %%result, 16
cmp %%result, 256
jb %%loop1
; compare last two bytes
mov %%tmp16, [%%src1 + %%result]
xor %%tmp16, [%%src2 + %%result]
jnz %%miscompare16
; no miscompares, return 258
add %%result, 2
jmp %%end
%%miscompare16:
and %%tmp, 0xFFFF
bsf %%tmp, %%tmp
shr %%tmp, 3
add %%result, %%tmp
jmp %%end
%%miscompare:
bsf %%tmp, %%tmp
add %%result, %%tmp
%%end:
%endm
;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
;; were already checked
;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
;; compare250_x src1, src2, result, tmp, xtmp1, xtmp2
%macro compare250_x 6
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp32 %4d ; tmp as a 16-bit register
%define %%xtmp %5
%define %%xtmp2 %6
mov %%result, 8
movdqu %%xtmp, [%%src1 + 8]
movdqu %%xtmp2, [%%src2 + 8]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare
add %%result, 16
%%loop1:
movdqu %%xtmp, [%%src1 + %%result]
movdqu %%xtmp2, [%%src2 + %%result]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare
add %%result, 16
movdqu %%xtmp, [%%src1 + %%result]
movdqu %%xtmp2, [%%src2 + %%result]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare
add %%result, 16
cmp %%result, 258 - 16
jb %%loop1
movdqu %%xtmp, [%%src1 + %%result]
movdqu %%xtmp2, [%%src2 + %%result]
pcmpeqb %%xtmp, %%xtmp2
pmovmskb %%tmp32, %%xtmp
xor %%tmp, 0xFFFF
jnz %%miscompare_last
; no miscompares, return 258
mov %%result, 258
jmp %%end
%%miscompare_last:
bsf %%tmp, %%tmp
add %%result, %%tmp
;; Guarantee the result has length at most 258.
mov %%tmp, 258
cmp %%result, 258
cmova %%result, %%tmp
jmp %%end
%%miscompare:
bsf %%tmp, %%tmp
add %%result, %%tmp
%%end:
%endm
;; compare 258 bytes = 8 * 32 + 2
;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
%macro compare258_y 6
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp16 %4w ; tmp as a 16-bit register
%define %%tmp32 %4d ; tmp as a 32-bit register
%define %%ytmp %5
%define %%ytmp2 %6
xor %%result, %%result
%%loop1:
vmovdqu %%ytmp, [%%src1 + %%result]
vmovdqu %%ytmp2, [%%src2 + %%result]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare
add %%result, 32
vmovdqu %%ytmp, [%%src1 + %%result]
vmovdqu %%ytmp2, [%%src2 + %%result]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare
add %%result, 32
cmp %%result, 256
jb %%loop1
; compare last two bytes
mov %%tmp16, [%%src1 + %%result]
xor %%tmp16, [%%src2 + %%result]
jnz %%miscompare16
; no miscompares, return 258
add %%result, 2
jmp %%end
%%miscompare16:
and %%tmp, 0xFFFF
bsf %%tmp, %%tmp
shr %%tmp, 3
add %%result, %%tmp
jmp %%end
%%miscompare:
bsf %%tmp, %%tmp
add %%result, %%tmp
%%end:
%endm
;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
;; were already checked
;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
%macro compare250_y 6
%define %%src1 %1
%define %%src2 %2
%define %%result %3
%define %%tmp %4
%define %%tmp16 %4w ; tmp as a 16-bit register
%define %%tmp32 %4d ; tmp as a 32-bit register
%define %%ytmp %5
%define %%ytmp2 %6
mov %%result, 8
vmovdqu %%ytmp, [%%src1 + 8]
vmovdqu %%ytmp2, [%%src2 + 8]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare
add %%result, 32
%%loop1:
vmovdqu %%ytmp, [%%src1 + %%result]
vmovdqu %%ytmp2, [%%src2 + %%result]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare
add %%result, 32
vmovdqu %%ytmp, [%%src1 + %%result]
vmovdqu %%ytmp2, [%%src2 + %%result]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare
add %%result, 32
cmp %%result, 258 - 32
jb %%loop1
vmovdqu %%ytmp, [%%src1 + %%result]
vmovdqu %%ytmp2, [%%src2 + %%result]
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
vpmovmskb %%tmp, %%ytmp
xor %%tmp32, 0xFFFFFFFF
jnz %%miscompare_last
mov %%result, 258
jmp %%end
%%miscompare_last:
bsf %%tmp, %%tmp
add %%result, %%tmp
;; Guarantee the result has length at most 258.
mov %%tmp, 258
cmp %%result, 258
cmova %%result, %%tmp
jmp %%end
%%miscompare:
bsf %%tmp, %%tmp
add %%result, %%tmp
%%end:
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; compare size, src1, src2, result, tmp
%macro compare 5
%define %%size %1
%define %%src1 %2
%define %%src2 %3
%define %%result %4
%define %%tmp %5
%define %%tmp8 %5b ; tmp as a 8-bit register
xor %%result, %%result
sub %%size, 7
jle %%lab2
%%loop1:
mov %%tmp, [%%src1 + %%result]
xor %%tmp, [%%src2 + %%result]
jnz %%miscompare
add %%result, 8
sub %%size, 8
jg %%loop1
%%lab2:
;; if we fall through from above, we have found no mismatches,
;; %%size+7 is the number of bytes left to look at, and %%result is the
;; number of bytes that have matched
add %%size, 7
jle %%end
%%loop3:
mov %%tmp8, [%%src1 + %%result]
cmp %%tmp8, [%%src2 + %%result]
jne %%end
inc %%result
dec %%size
jg %%loop3
jmp %%end
%%miscompare:
bsf %%tmp, %%tmp
shr %%tmp, 3
add %%result, %%tmp
%%end:
%endm
%endif ;UTILS_ASM

86
igzip/igzip_example.c Normal file
View File

@ -0,0 +1,86 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "igzip_lib.h"
#define BUF_SIZE 8192
struct isal_zstream stream;
int main(int argc, char *argv[])
{
uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
FILE *in, *out;
if (argc != 3) {
fprintf(stderr, "Usage: igzip_example infile outfile\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("igzip_example\nWindow Size: %d K\n", HIST_SIZE);
fflush(0);
isal_deflate_init(&stream);
stream.end_of_stream = 0;
stream.flush = NO_FLUSH;
do {
stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
stream.end_of_stream = feof(in);
stream.next_in = inbuf;
do {
stream.avail_out = BUF_SIZE;
stream.next_out = outbuf;
isal_deflate(&stream);
fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
} while (stream.avail_out == 0);
assert(stream.avail_in == 0);
} while (stream.internal_state.state != ZSTATE_END);
fclose(out);
fclose(in);
printf("End of igzip_example\n\n");
return 0;
}

180
igzip/igzip_file_perf.c Normal file
View File

@ -0,0 +1,180 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "igzip_lib.h"
#include "test.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 100
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 500000000
#endif
struct isal_zstream stream;
int get_filesize(FILE * f)
{
int curr, end;
curr = ftell(f); /* Save current position */
fseek(f, 0L, SEEK_END);
end = ftell(f);
fseek(f, curr, SEEK_SET); /* Restore position */
return end;
}
int main(int argc, char *argv[])
{
FILE *in, *out = NULL;
unsigned char *inbuf, *outbuf;
int i, infile_size, iterations, outbuf_size;
struct isal_huff_histogram histogram;
struct isal_hufftables hufftables_custom;
memset(&histogram, 0, sizeof(histogram));
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
"\t - Runs multiple iterations of igzip on a file to "
"get more accurate time results.\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
if (argc > 2) {
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("outfile=%s\n", argv[2]);
}
printf("Window Size: %d K\n", HIST_SIZE);
printf("igzip_file_perf: \n");
fflush(0);
/* Allocate space for entire input file and output
* (assuming some possible expansion on output size)
*/
infile_size = get_filesize(in);
if (infile_size != 0) {
outbuf_size = infile_size * 2;
iterations = RUN_MEM_SIZE / infile_size;
} else {
outbuf_size = BUF_SIZE;
iterations = MIN_TEST_LOOPS;
}
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
inbuf = malloc(infile_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
outbuf = malloc(outbuf_size);
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
}
printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations);
/* Read complete input file into buffer */
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
if (stream.avail_in != infile_size) {
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_init(&stream);
stream.end_of_stream = 1; /* Do the entire file at once */
stream.flush = NO_FLUSH;
stream.next_in = inbuf;
stream.avail_in = infile_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
isal_deflate(&stream);
if (stream.avail_in != 0)
break;
}
perf_stop(&stop);
if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n");
exit(0);
}
printf(" file %s - in_size=%d out_size=%d iter=%d ratio_default=%3.1f%%", argv[1],
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
isal_update_histogram(inbuf, infile_size, &histogram);
isal_create_hufftables(&hufftables_custom, &histogram);
isal_deflate_init(&stream);
stream.end_of_stream = 1; /* Do the entire file at once */
stream.flush = NO_FLUSH;
stream.next_in = inbuf;
stream.avail_in = infile_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
stream.hufftables = &hufftables_custom;
isal_deflate(&stream);
printf(" ratio_custom=%3.1f%%\n", 100.0 * stream.total_out / infile_size);
if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n");
exit(0);
}
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
if (argc > 2 && out) {
printf("writing %s\n", argv[2]);
fwrite(outbuf, 1, stream.total_out, out);
fclose(out);
}
fclose(in);
printf("End of igzip_file_perf\n\n");
fflush(0);
return 0;
}

311
igzip/igzip_finish.asm Normal file
View File

@ -0,0 +1,311 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "stdmac.asm"
%include "reg_sizes.asm"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define tmp1 rax
%define f_index rbx
%define code rbx
%define tmp4 rbx
%define tmp5 rbx
%define tmp6 rbx
%define tmp2 rcx
%define hash rcx
%define tmp3 rdx
%define stream rsi
%define f_i rdi
%define code_len2 rbp
%define m_out_buf r8
%define m_bits r9
%define dist r10
%define m_bit_count r11
%define code2 r12
%define f_end_i r12
%define file_start r13
%define len r14
%define hufftables r15
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f_end_i_mem_offset equ 0 ; local variable (8 bytes)
stack_size equ 8
; void isal_deflate_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_finish_01
isal_deflate_finish_01:
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
sub rsp, stack_size
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
mov stream, rcx
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov m_out_buf, [stream + _next_out]
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
mov tmp1 %+ d, [stream + _avail_out]
add tmp1, m_out_buf
sub tmp1, SLOP
skip_SLOP:
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
mov hufftables, [stream + _hufftables]
; f_i = state->b_bytes_processed;
; f_end_i = state->b_bytes_valid;
mov f_i %+ d, [stream + _internal_state_b_bytes_processed]
mov f_end_i %+ d, [stream + _internal_state_b_bytes_valid]
; f_i += (uint32_t)(state->buffer - state->file_start);
; f_end_i += (uint32_t)(state->buffer - state->file_start);
mov file_start, [stream + _internal_state_file_start]
lea tmp1, [stream + _internal_state_buffer]
sub tmp1, file_start
add f_i, tmp1
add f_end_i, tmp1
mov [rsp + f_end_i_mem_offset], f_end_i
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
cmp f_i, f_end_i
jge end_loop_2
mov tmp1 %+ d, [file_start + f_i]
loop2:
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end_loop_2
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
compute_hash hash, tmp1
and hash %+ d, HASH_MASK
; f_index = state->head[hash];
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
; state->head[hash] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
; dist = f_i - f_index; // mod 64k
mov dist %+ d, f_i %+ d
sub dist %+ d, f_index %+ d
and dist %+ d, 0xFFFF
; if ((dist-1) <= (D-1)) {
mov tmp1 %+ d, dist %+ d
sub tmp1 %+ d, 1
cmp tmp1 %+ d, (D-1)
jae encode_literal
; len = f_end_i - f_i;
mov tmp4, [rsp + f_end_i_mem_offset]
sub tmp4, f_i
; if (len > 258) len = 258;
cmp tmp4, 258
cmovg tmp4, [c258]
; len = compare(state->file_start + f_i,
; state->file_start + f_i - dist, len);
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist
compare tmp4, tmp1, tmp2, len, tmp3
; if (len >= SHORTEST_MATCH) {
cmp len, SHORTEST_MATCH
jb encode_literal
;; encode as dist/len
; get_dist_code(dist, &code2, &code_len2);
get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
; get_len_code(len, &code, &code_len);
get_len_code len, code, rcx, hufftables ;; rcx is code_len
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
or code2, code
add code_len2, rcx
; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
lea tmp3, [f_i + 1] ; tmp3 <= k
add f_i, len
%ifdef LIMIT_HASH_UPDATE
; only update hash twice
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3, 1
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
%else
loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp6 %+ d, [file_start + tmp3]
compute_hash hash, tmp6
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
inc tmp3
cmp tmp3, f_i
jl loop3
%endif
mov tmp1 %+ d, [file_start + f_i]
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
; continue
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
jmp end_loop_2
encode_literal:
mov tmp1 %+ d, [file_start + f_i + 1]
; get_lit_code(state->file_start[f_i], &code2, &code_len2);
movzx tmp5, byte [file_start + f_i]
get_lit_code tmp5, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
; continue
add f_i, 1
cmp f_i, [rsp + f_end_i_mem_offset]
jl loop2
end_loop_2:
; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
cmp f_i, [rsp + f_end_i_mem_offset]
jl not_end
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja not_end
cmp dword [stream + _end_of_stream], 1
jne cont
cmp dword [stream + _internal_state_left_over], 0
jg not_end
cont:
; get_lit_code(256, &code2, &code_len2);
get_lit_code 256, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
mov dword [stream + _internal_state_has_eob], 1
cmp dword [stream + _end_of_stream], 1
jne sync_flush
; state->state = ZSTATE_TRL;
mov dword [stream + _internal_state_state], ZSTATE_TRL
jmp not_end
sync_flush:
; state->state = ZSTATE_SYNC_FLUSH;
mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
; }
not_end:
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
add f_i, [stream + _internal_state_file_start]
sub f_i, stream
sub f_i, _internal_state_buffer
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
; // update output buffer
; stream->next_out = state->bitbuf.buffer_ptr();
mov [stream + _next_out], m_out_buf
; len = state->bitbuf.buffer_used();
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
; stream->avail_out -= len;
sub [stream + _avail_out], m_out_buf %+ d
; stream->total_out += len;
add [stream + _total_out], m_out_buf %+ d
mov [stream + _internal_state_bitbuf_m_bits], m_bits
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
add rsp, stack_size
POP_ALL
ret
section .data
align 4
c258: dq 258

151
igzip/igzip_inflate_perf.c Normal file
View File

@ -0,0 +1,151 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <assert.h>
#include <zlib.h>
#include "huff_codes.h"
#include "igzip_inflate_ref.h"
#include "test.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 100
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 1000000000
#endif
int get_filesize(FILE * f)
{
int curr, end;
curr = ftell(f); /* Save current position */
fseek(f, 0L, SEEK_END);
end = ftell(f);
fseek(f, curr, SEEK_SET); /* Restore position */
return end;
}
int main(int argc, char *argv[])
{
FILE *in, *out = NULL;
unsigned char *inbuf, *outbuf, *tempbuf;
int i, infile_size, iterations, outbuf_size, check;
uint64_t inbuf_size;
struct inflate_state state;
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_inflate_file_perf infile\n"
"\t - Runs multiple iterations of igzip on a file to "
"get more accurate time results.\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
if (argc > 2) {
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("outfile=%s\n", argv[2]);
}
printf("igzip_inflate_perf: \n");
fflush(0);
/* Allocate space for entire input file and output
* (assuming some possible expansion on output size)
*/
infile_size = get_filesize(in);
if (infile_size != 0) {
outbuf_size = infile_size;
iterations = RUN_MEM_SIZE / infile_size;
} else {
printf("Error: input file has 0 size\n");
exit(0);
}
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
tempbuf = malloc(infile_size);
if (tempbuf == NULL) {
fprintf(stderr, "Can't allocate temp buffer memory\n");
exit(0);
}
inbuf_size = compressBound(infile_size);
inbuf = malloc(inbuf_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
outbuf = malloc(infile_size);
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
}
fread(tempbuf, 1, infile_size, in);
i = compress2(inbuf, &inbuf_size, tempbuf, infile_size, 9);
if (i != Z_OK) {
printf("Compression of input file failed\n");
exit(0);
}
printf("igzip_inflate_perf: %s %d iterations\n", argv[1], iterations);
/* Read complete input file into buffer */
fclose(in);
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
igzip_inflate_init(&state, inbuf + 2, inbuf_size - 2, outbuf, outbuf_size);
check = igzip_inflate(&state);
if (check) {
printf("Error in decompression with error %d\n", check);
break;
}
}
perf_stop(&stop);
printf(" file %s - in_size=%d out_size=%d iter=%d\n", argv[1],
infile_size, state.out_buffer.total_out, i);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
printf("End of igzip_inflate_perf\n\n");
fflush(0);
free(inbuf);
free(outbuf);
free(tempbuf);
return 0;
}

668
igzip/igzip_inflate_ref.c Normal file
View File

@ -0,0 +1,668 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include "igzip_inflate_ref.h"
void inline byte_copy(uint8_t * dest, uint64_t lookback_distance, int repeat_length)
{
uint8_t *src = dest - lookback_distance;
for (; repeat_length > 0; repeat_length--)
*dest++ = *src++;
}
/*
* Returns integer with first length bits reversed and all higher bits zeroed
*/
uint16_t inline bit_reverse2(uint16_t bits, uint8_t length)
{
bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits
bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs
bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles
bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes
return bits >> (16 - length);
}
void inline init_inflate_in_buffer(struct inflate_in_buffer *inflate_in)
{
inflate_in->read_in = 0;
inflate_in->read_in_length = 0;
}
void inline set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t * in_stream,
uint32_t in_size)
{
inflate_in->next_in = inflate_in->start = in_stream;
inflate_in->avail_in = in_size;
}
void inline set_inflate_out_buffer(struct inflate_out_buffer *inflate_out,
uint8_t * out_stream, uint32_t out_size)
{
inflate_out->next_out = out_stream;
inflate_out->avail_out = out_size;
inflate_out->total_out = 0;
}
void inline inflate_in_clear_bits(struct inflate_in_buffer *inflate_in)
{
uint8_t bytes;
bytes = inflate_in->read_in_length / 8;
inflate_in->read_in = 0;
inflate_in->read_in_length = 0;
inflate_in->next_in -= bytes;
inflate_in->avail_in += bytes;
}
void inline inflate_in_load(struct inflate_in_buffer *inflate_in, int min_required)
{
uint64_t temp = 0;
uint8_t new_bytes;
if (inflate_in->avail_in >= 8) {
/* If there is enough space to load a 64 bits, load the data and use
* that to fill read_in */
new_bytes = 8 - (inflate_in->read_in_length + 7) / 8;
temp = *(uint64_t *) inflate_in->next_in;
inflate_in->read_in |= temp << inflate_in->read_in_length;
inflate_in->next_in += new_bytes;
inflate_in->avail_in -= new_bytes;
inflate_in->read_in_length += new_bytes * 8;
} else {
/* Else fill the read_in buffer 1 byte at a time */
while (inflate_in->read_in_length < 57 && inflate_in->avail_in > 0) {
temp = *inflate_in->next_in;
inflate_in->read_in |= temp << inflate_in->read_in_length;
inflate_in->next_in++;
inflate_in->avail_in--;
inflate_in->read_in_length += 8;
}
}
}
uint64_t inline inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
assert(bit_count < 57);
/* Load inflate_in if not enough data is in the read_in buffer */
if (inflate_in->read_in_length < bit_count)
inflate_in_load(inflate_in, 0);
return (inflate_in->read_in) & ((1 << bit_count) - 1);
}
void inline inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
inflate_in->read_in >>= bit_count;
inflate_in->read_in_length -= bit_count;
}
uint64_t inline inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
{
uint64_t ret;
assert(bit_count < 57);
/* Load inflate_in if not enough data is in the read_in buffer */
if (inflate_in->read_in_length < bit_count)
inflate_in_load(inflate_in, bit_count);
ret = (inflate_in->read_in) & ((1 << bit_count) - 1);
inflate_in->read_in >>= bit_count;
inflate_in->read_in_length -= bit_count;
return ret;
}
int inline setup_static_header(struct inflate_state *state)
{
/* This could be turned into a memcpy of this functions output for
* higher speed, but then DECODE_LOOKUP_SIZE couldn't be changed without
* regenerating the table. */
int i;
struct huff_code lit_code[LIT_LEN + 2];
struct huff_code dist_code[DIST_LEN + 2];
/* These tables are based on the static huffman tree described in RFC
* 1951 */
uint16_t lit_count[16] = {
0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
};
uint16_t dist_count[16] = {
0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* These for loops set the code lengths for the static literal/length
* and distance codes defined in the deflate standard RFC 1951 */
for (i = 0; i < 144; i++)
lit_code[i].length = 8;
for (i = 144; i < 256; i++)
lit_code[i].length = 9;
for (i = 256; i < 280; i++)
lit_code[i].length = 7;
for (i = 280; i < LIT_LEN + 2; i++)
lit_code[i].length = 8;
for (i = 0; i < DIST_LEN + 2; i++)
dist_code[i].length = 5;
make_inflate_huff_code(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count);
make_inflate_huff_code(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count);
return 0;
}
void inline make_inflate_huff_code(struct inflate_huff_code *result,
struct huff_code *huff_code_table, int table_length,
uint16_t * count)
{
int i, j;
uint16_t code = 0;
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
uint16_t long_code_list[LIT_LEN];
uint32_t long_code_length = 0;
uint16_t temp_code_list[1 << (15 - DECODE_LOOKUP_SIZE)];
uint32_t temp_code_length;
uint32_t long_code_lookup_length = 0;
uint32_t max_length;
uint16_t first_bits;
uint32_t code_length;
uint16_t long_bits;
uint16_t min_increment;
memset(result, 0, sizeof(struct inflate_huff_code));
next_code[0] = code;
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
for (i = 0; i < table_length; i++) {
if (huff_code_table[i].length != 0) {
/* Determine the code for symbol i */
huff_code_table[i].code =
bit_reverse2(next_code[huff_code_table[i].length],
huff_code_table[i].length);
next_code[huff_code_table[i].length] += 1;
if (huff_code_table[i].length <= DECODE_LOOKUP_SIZE) {
/* Set lookup table to return the current symbol
* concatenated with the code length when the
* first DECODE_LENGTH bits of the address are
* the same as the code for the current
* symbol. The first 9 bits are the code, bits
* 14:10 are the code length, bit 15 is a flag
* representing this is a symbol*/
for (j = 0; j < (1 << (DECODE_LOOKUP_SIZE -
huff_code_table[i].length)); j++)
result->small_code_lookup[(j <<
huff_code_table[i].length) +
huff_code_table[i].code]
= i | (huff_code_table[i].length) << 9;
} else {
/* Store the element in a list of elements with long codes. */
long_code_list[long_code_length] = i;
long_code_length++;
}
}
}
for (i = 0; i < long_code_length; i++) {
/*Set the look up table to point to a hint where the symbol can be found
* in the list of long codes and add the current symbol to the list of
* long codes. */
if (huff_code_table[long_code_list[i]].code == 0xFFFF)
continue;
max_length = huff_code_table[long_code_list[i]].length;
first_bits =
huff_code_table[long_code_list[i]].code & ((1 << DECODE_LOOKUP_SIZE) - 1);
temp_code_list[0] = long_code_list[i];
temp_code_length = 1;
for (j = i + 1; j < long_code_length; j++) {
if ((huff_code_table[long_code_list[j]].code &
((1 << DECODE_LOOKUP_SIZE) - 1)) == first_bits) {
if (max_length < huff_code_table[long_code_list[j]].length)
max_length = huff_code_table[long_code_list[j]].length;
temp_code_list[temp_code_length] = long_code_list[j];
temp_code_length++;
}
}
for (j = 0; j < temp_code_length; j++) {
code_length = huff_code_table[temp_code_list[j]].length;
long_bits =
huff_code_table[temp_code_list[j]].code >> DECODE_LOOKUP_SIZE;
min_increment = 1 << (code_length - DECODE_LOOKUP_SIZE);
for (; long_bits < (1 << (max_length - DECODE_LOOKUP_SIZE));
long_bits += min_increment) {
result->long_code_lookup[long_code_lookup_length + long_bits] =
temp_code_list[j] | (code_length << 9);
}
huff_code_table[temp_code_list[j]].code = 0xFFFF;
}
result->small_code_lookup[first_bits] =
long_code_lookup_length | (max_length << 9) | 0x8000;
long_code_lookup_length += 1 << (max_length - DECODE_LOOKUP_SIZE);
}
}
uint16_t inline decode_next(struct inflate_in_buffer *in_buffer,
struct inflate_huff_code *huff_code)
{
uint16_t next_bits;
uint16_t next_sym;
next_bits = inflate_in_peek_bits(in_buffer, DECODE_LOOKUP_SIZE);
/* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0,
* next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10
* represent the length of that symbols huffman code. If next_sym is not
* a symbol, it provides a hint of where the large symbols containin
* this code are located. Note the hint is at largest the location the
* first actual symbol in the long code list.*/
next_sym = huff_code->small_code_lookup[next_bits];
if (next_sym < 0x8000) {
/* Return symbol found if next_code is a complete huffman code
* and shift in buffer over by the length of the next_code */
inflate_in_shift_bits(in_buffer, next_sym >> 9);
return next_sym & 0x1FF;
} else {
/* If a symbol is not found, perform a linear search of the long code
* list starting from the hint in next_sym */
next_bits = inflate_in_peek_bits(in_buffer, (next_sym - 0x8000) >> 9);
next_sym =
huff_code->long_code_lookup[(next_sym & 0x1FF) +
(next_bits >> DECODE_LOOKUP_SIZE)];
inflate_in_shift_bits(in_buffer, next_sym >> 9);
return next_sym & 0x1FF;
}
}
int inline setup_dynamic_header(struct inflate_state *state)
{
int i, j;
struct huff_code code_huff[CODE_LEN_CODES];
struct huff_code lit_and_dist_huff[LIT_LEN + DIST_LEN];
struct huff_code *previous = NULL, *current;
struct inflate_huff_code inflate_code_huff;
uint8_t hclen, hdist, hlit;
uint16_t code_count[16], lit_count[16], dist_count[16];
uint16_t *count;
uint16_t symbol;
/* This order is defined in RFC 1951 page 13 */
const uint8_t code_length_code_order[CODE_LEN_CODES] = {
0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06,
0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02,
0x0e, 0x01, 0x0f
};
memset(code_count, 0, sizeof(code_count));
memset(lit_count, 0, sizeof(lit_count));
memset(dist_count, 0, sizeof(dist_count));
memset(code_huff, 0, sizeof(code_huff));
memset(lit_and_dist_huff, 0, sizeof(lit_and_dist_huff));
/* These variables are defined in the deflate standard, RFC 1951 */
hlit = inflate_in_read_bits(&state->in_buffer, 5);
hdist = inflate_in_read_bits(&state->in_buffer, 5);
hclen = inflate_in_read_bits(&state->in_buffer, 4);
/* Create the code huffman code for decoding the lit/len and dist huffman codes */
for (i = 0; i < hclen + 4; i++) {
code_huff[code_length_code_order[i]].length =
inflate_in_read_bits(&state->in_buffer, 3);
code_count[code_huff[code_length_code_order[i]].length] += 1;
}
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
make_inflate_huff_code(&inflate_code_huff, code_huff, CODE_LEN_CODES, code_count);
/* Decode the lit/len and dist huffman codes using the code huffman code */
count = lit_count;
current = lit_and_dist_huff;
while (current < lit_and_dist_huff + LIT_LEN + hdist + 1) {
/* If finished decoding the lit/len huffman code, start decoding
* the distance code these decodings are in the same loop
* because the len/lit and dist huffman codes are run length
* encoded together. */
if (current == lit_and_dist_huff + 257 + hlit)
current = lit_and_dist_huff + LIT_LEN;
if (current == lit_and_dist_huff + LIT_LEN)
count = dist_count;
symbol = decode_next(&state->in_buffer, &inflate_code_huff);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (symbol < 16) {
/* If a length is found, update the current lit/len/dist
* to have length symbol */
count[symbol]++;
current->length = symbol;
previous = current;
current++;
} else if (symbol == 16) {
/* If a repeat length is found, update the next repeat
* length lit/len/dist elements to have the value of the
* repeated length */
if (previous == NULL) /* No elements available to be repeated */
return INVALID_BLOCK_HEADER;
i = 3 + inflate_in_read_bits(&state->in_buffer, 2);
for (j = 0; j < i; j++) {
*current = *previous;
count[current->length]++;
previous = current;
if (current == lit_and_dist_huff + 256 + hlit) {
current = lit_and_dist_huff + LIT_LEN;
count = dist_count;
} else
current++;
}
} else if (symbol == 17) {
/* If a repeat zeroes if found, update then next
* repeated zeroes length lit/len/dist elements to have
* length 0. */
i = 3 + inflate_in_read_bits(&state->in_buffer, 3);
for (j = 0; j < i; j++) {
previous = current;
if (current == lit_and_dist_huff + 256 + hlit) {
current = lit_and_dist_huff + LIT_LEN;
count = dist_count;
} else
current++;
}
} else if (symbol == 18) {
/* If a repeat zeroes if found, update then next
* repeated zeroes length lit/len/dist elements to have
* length 0. */
i = 11 + inflate_in_read_bits(&state->in_buffer, 7);
for (j = 0; j < i; j++) {
previous = current;
if (current == lit_and_dist_huff + 256 + hlit) {
current = lit_and_dist_huff + LIT_LEN;
count = dist_count;
} else
current++;
}
} else
return INVALID_BLOCK_HEADER;
}
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
make_inflate_huff_code(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN, lit_count);
make_inflate_huff_code(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN], DIST_LEN,
dist_count);
return 0;
}
int read_header(struct inflate_state *state)
{
state->new_block = 0;
/* btype and bfinal are defined in RFC 1951, bfinal represents whether
* the current block is the end of block, and btype represents the
* encoding method on the current block. */
state->bfinal = inflate_in_read_bits(&state->in_buffer, 1);
state->btype = inflate_in_read_bits(&state->in_buffer, 2);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (state->btype == 0) {
inflate_in_clear_bits(&state->in_buffer);
return 0;
} else if (state->btype == 1)
return setup_static_header(state);
else if (state->btype == 2)
return setup_dynamic_header(state);
return INVALID_BLOCK_HEADER;
}
void igzip_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size,
uint8_t * out_stream, uint64_t out_size)
{
init_inflate_in_buffer(&state->in_buffer);
set_inflate_in_buffer(&state->in_buffer, in_stream, in_size);
set_inflate_out_buffer(&state->out_buffer, out_stream, out_size);
state->new_block = 1;
state->bfinal = 0;
}
int igzip_inflate(struct inflate_state *state)
{
/* The following tables are based on the tables in the deflate standard,
* RFC 1951 page 11. */
const uint16_t len_start[29] = {
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f,
0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73,
0x83, 0xa3, 0xc3, 0xe3, 0x102
};
const uint8_t len_extra_bit_count[29] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
0x5, 0x5, 0x5, 0x5, 0x0
};
const uint32_t dist_start[30] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
};
const uint8_t dist_extra_bit_count[30] = {
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
0xb, 0xb, 0xc, 0xc, 0xd, 0xd
};
uint16_t next_lit, len, nlen;
uint8_t next_dist;
uint32_t repeat_length;
uint32_t look_back_dist;
uint32_t tmp;
while (state->new_block == 0 || state->bfinal == 0) {
if (state->new_block != 0) {
tmp = read_header(state);
if (tmp)
return tmp;
}
if (state->btype == 0) {
/* If the block is uncompressed, perform a memcopy while
* updating state data */
if (state->in_buffer.avail_in < 4)
return END_OF_INPUT;
len = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
nlen = *(uint16_t *) state->in_buffer.next_in;
state->in_buffer.next_in += 2;
/* Check if len and nlen match */
if (len != (~nlen & 0xffff))
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
if (state->out_buffer.avail_out < len)
return OUT_BUFFER_OVERFLOW;
if (state->in_buffer.avail_in < len)
len = state->in_buffer.avail_in;
else
state->new_block = 1;
memcpy(state->out_buffer.next_out, state->in_buffer.next_in, len);
state->out_buffer.next_out += len;
state->out_buffer.avail_out -= len;
state->out_buffer.total_out += len;
state->in_buffer.next_in += len;
state->in_buffer.avail_in -= len + 4;
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
return END_OF_INPUT;
} else {
/* Else decode a huffman encoded block */
while (state->new_block == 0) {
/* While not at the end of block, decode the next
* symbol */
next_lit =
decode_next(&state->in_buffer, &state->lit_huff_code);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (next_lit < 256) {
/* If the next symbol is a literal,
* write out the symbol and update state
* data accordingly. */
if (state->out_buffer.avail_out < 1)
return OUT_BUFFER_OVERFLOW;
*state->out_buffer.next_out = next_lit;
state->out_buffer.next_out++;
state->out_buffer.avail_out--;
state->out_buffer.total_out++;
} else if (next_lit == 256) {
/* If the next symbol is the end of
* block, update the state data
* accordingly */
state->new_block = 1;
} else if (next_lit < 286) {
/* Else if the next symbol is a repeat
* length, read in the length extra
* bits, the distance code, the distance
* extra bits. Then write out the
* corresponding data and update the
* state data accordingly*/
repeat_length =
len_start[next_lit - 257] +
inflate_in_read_bits(&state->in_buffer,
len_extra_bit_count[next_lit -
257]);
if (state->out_buffer.avail_out < repeat_length)
return OUT_BUFFER_OVERFLOW;
next_dist = decode_next(&state->in_buffer,
&state->dist_huff_code);
look_back_dist = dist_start[next_dist] +
inflate_in_read_bits(&state->in_buffer,
dist_extra_bit_count
[next_dist]);
if (state->in_buffer.read_in_length < 0)
return END_OF_INPUT;
if (look_back_dist > state->out_buffer.total_out)
return INVALID_LOOK_BACK_DISTANCE;
if (look_back_dist > repeat_length) {
memcpy(state->out_buffer.next_out,
state->out_buffer.next_out -
look_back_dist, repeat_length);
} else
byte_copy(state->out_buffer.next_out,
look_back_dist, repeat_length);
state->out_buffer.next_out += repeat_length;
state->out_buffer.avail_out -= repeat_length;
state->out_buffer.total_out += repeat_length;
} else
/* Else the read in bits do not
* correspond to any valid symbol */
return INVALID_SYMBOL;
}
}
}
state->in_buffer.next_in -= state->in_buffer.read_in_length / 8;
state->in_buffer.avail_in += state->in_buffer.read_in_length / 8;
return DECOMPRESSION_FINISHED;
}

150
igzip/igzip_inflate_ref.h Normal file
View File

@ -0,0 +1,150 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef INFLATE_H
#define INFLATE_H
#include <stdint.h>
#include "huff_codes.h"
#define DECOMPRESSION_FINISHED 0
#define END_OF_INPUT 1
#define OUT_BUFFER_OVERFLOW 2
#define INVALID_BLOCK_HEADER 3
#define INVALID_SYMBOL 4
#define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5
#define INVALID_LOOK_BACK_DISTANCE 6
#define DECODE_LOOKUP_SIZE 10
#if DECODE_LOOKUP_SIZE > 15
# undef DECODE_LOOKUP_SIZE
# define DECODE_LOOKUP_SIZE 15
#endif
#if DECODE_LOOKUP_SIZE > 7
# define MAX_LONG_CODE ((2 << 8) + 1) * (2 << (15 - DECODE_LOOKUP_SIZE)) + 32
#else
# define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32
#endif
/* Buffer used to manage decompressed output */
struct inflate_out_buffer{
uint8_t *next_out;
uint32_t avail_out;
uint32_t total_out;
};
/* Buffer used to manager compressed input */
struct inflate_in_buffer{
uint8_t *start;
uint8_t *next_in;
uint32_t avail_in;
uint64_t read_in;
int32_t read_in_length;
};
/* Data structure used to store a huffman code for fast look up */
struct inflate_huff_code{
uint16_t small_code_lookup[ 1 << (DECODE_LOOKUP_SIZE)];
uint16_t long_code_lookup[MAX_LONG_CODE];
};
/* Structure contained current state of decompression of data */
struct inflate_state {
struct inflate_out_buffer out_buffer;
struct inflate_in_buffer in_buffer;
struct inflate_huff_code lit_huff_code;
struct inflate_huff_code dist_huff_code;
uint8_t new_block;
uint8_t bfinal;
uint8_t btype;
};
/*Performs a copy of length repeat_length data starting at dest -
* lookback_distance into dest. This copy copies data previously copied when the
* src buffer and the dest buffer overlap. */
void byte_copy(uint8_t *dest, uint64_t lookback_distance, int repeat_length);
/* Initialize a struct in_buffer for use */
void init_inflate_in_buffer(struct inflate_in_buffer *inflate_in);
/* Set up the in_stream used for the in_buffer*/
void set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t *in_stream,
uint32_t in_size);
/* Set up the out_stream used for the out_buffer */
void set_inflate_out_buffer(struct inflate_out_buffer *inflate_out, uint8_t *out_stream,
uint32_t out_size);
/* Load data from the in_stream into a buffer to allow for handling unaligned data*/
void inflate_in_load(struct inflate_in_buffer *inflate_in, int min_load);
/* Returns the next bit_count bits from the in stream*/
uint64_t inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Shifts the in stream over by bit-count bits */
void inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Returns the next bit_count bits from the in stream and shifts the stream over
* by bit-count bits */
uint64_t inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
* deflate static header */
int setup_static_header(struct inflate_state *state);
/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
* the lengths in huff_code_table,where count is a histogram of the appearance
* of each code length */
void make_inflate_huff_code(struct inflate_huff_code *result, struct huff_code *huff_code_table,
int table_length, uint16_t * count);
/* Decodes the next symbol symbol in in_buffer using the huff code defined by
* huff_code */
uint16_t decode_next(struct inflate_in_buffer *in_buffer, struct inflate_huff_code *huff_code);
/* Reads data from the in_buffer and sets the huff code corresponding to that
* data */
int setup_dynamic_header(struct inflate_state *state);
/* Reads in the header pointed to by in_stream and sets up state to reflect that
* header information*/
int read_header(struct inflate_state *state);
/* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output
* data into out_stream */
void igzip_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size,
uint8_t *out_stream, uint64_t out_size);
/* Decompress a deflate data. This function assumes a call to igzip_inflate_init
* has been made to set up the state structure to allow for decompression.*/
int igzip_inflate(struct inflate_state *state);
#endif //INFLATE_H

182
igzip/igzip_inflate_test.c Normal file
View File

@ -0,0 +1,182 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdint.h>
#include <stdio.h>
#include <zlib.h>
#include "igzip_inflate_ref.h"
#include "huff_codes.h"
/*Don't use file larger memory can support because compression and decompression
* are done in a stateless manner. */
#define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L
int test(uint8_t * compressed_stream, uint64_t * compressed_length,
uint8_t * uncompressed_stream, int uncompressed_length,
uint8_t * uncompressed_test_stream)
{
struct inflate_state state;
int ret;
ret =
compress2(compressed_stream, compressed_length, uncompressed_stream,
uncompressed_length, 9);
if (ret) {
printf("Failed compressing input with exit code %d", ret);
return ret;
}
igzip_inflate_init(&state, compressed_stream + 2, *compressed_length - 2,
uncompressed_test_stream, uncompressed_length);
ret = igzip_inflate(&state);
switch (ret) {
case 0:
break;
case END_OF_INPUT:
printf(" did not decompress all input\n");
return END_OF_INPUT;
break;
case INVALID_BLOCK_HEADER:
printf(" invalid header\n");
return INVALID_BLOCK_HEADER;
break;
case INVALID_SYMBOL:
printf(" invalid symbol\n");
return INVALID_SYMBOL;
break;
case OUT_BUFFER_OVERFLOW:
printf(" out buffer overflow\n");
return OUT_BUFFER_OVERFLOW;
break;
case INVALID_NON_COMPRESSED_BLOCK_LENGTH:
printf("Invalid length bits in non-compressed block\n");
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
break;
case INVALID_LOOK_BACK_DISTANCE:
printf("Invalid lookback distance");
return INVALID_LOOK_BACK_DISTANCE;
break;
default:
printf(" error\n");
return -1;
break;
}
if (state.out_buffer.total_out != uncompressed_length) {
printf("incorrect amount of data was decompressed from compressed data\n");
printf("%d decompressed of %d compressed", state.out_buffer.total_out,
uncompressed_length);
return -1;
}
if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) {
printf(" decompressed data is not the same as the compressed data\n");
return -1;
}
return 0;
}
int main(int argc, char **argv)
{
int i, j, ret = 0, fin_ret = 0;
FILE *file;
uint64_t compressed_length, file_length, uncompressed_length;
uint8_t *uncompressed_stream, *compressed_stream, *uncompressed_test_stream;
if (argc == 1)
printf("Error, no input file\n");
for (i = 1; i < argc; i++) {
file = fopen(argv[i], "r");
if (file == NULL) {
printf("Error opening file %s\n", argv[i]);
return 1;
} else
printf("Starting file %s", argv[i]);
fseek(file, 0, SEEK_END);
file_length = ftell(file);
fseek(file, 0, SEEK_SET);
file_length -= ftell(file);
if (file_length > MAX_INPUT_FILE_SIZE) {
printf("File too large to run on this test\n");
fclose(file);
continue;
}
compressed_length = compressBound(file_length);
uncompressed_stream = malloc(file_length);
compressed_stream = malloc(compressed_length);
uncompressed_test_stream = malloc(file_length);
if (uncompressed_stream == NULL) {
printf("Failed to allocate memory\n");
exit(0);
}
if (compressed_stream == NULL) {
printf("Failed to allocate memory\n");
exit(0);
}
if (uncompressed_test_stream == NULL) {
printf("Failed to allocate memory\n");
exit(0);
}
uncompressed_length = fread(uncompressed_stream, 1, file_length, file);
ret =
test(compressed_stream, &compressed_length, uncompressed_stream,
uncompressed_length, uncompressed_test_stream);
if (ret) {
for (j = 0; j < compressed_length; j++) {
if ((j & 31) == 0)
printf("\n");
else
printf(" ");
printf("0x%02x,", compressed_stream[j]);
}
printf("\n");
}
fclose(file);
free(compressed_stream);
free(uncompressed_stream);
free(uncompressed_test_stream);
if (ret) {
printf(" ... Fail with exit code %d\n", ret);
return ret;
} else
printf(" ... Pass\n");
fin_ret |= ret;
}
return fin_ret;
}

View File

@ -0,0 +1,73 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
default rel
[bits 64]
%ifidn __OUTPUT_FORMAT__, elf64
%define WRT_OPT wrt ..plt
%else
%define WRT_OPT
%endif
%include "reg_sizes.asm"
extern isal_deflate_body_stateless_base
extern isal_deflate_body_stateless_01
extern isal_deflate_body_stateless_04
extern isal_deflate_body_base
extern isal_deflate_body_01
extern isal_deflate_body_04
extern isal_deflate_finish_base
extern isal_deflate_finish_01
extern get_crc_base
extern get_crc_01
extern isal_deflate_init_base
extern isal_deflate_init_01
section .text
%include "multibinary.asm"
mbin_interface isal_deflate_init
mbin_dispatch_init5 isal_deflate_init, isal_deflate_init_base, isal_deflate_init_01, isal_deflate_init_01, isal_deflate_init_01
mbin_interface isal_deflate_body_stateless
mbin_dispatch_init5 isal_deflate_body_stateless, isal_deflate_body_stateless_base, isal_deflate_body_stateless_01, isal_deflate_body_stateless_01, isal_deflate_body_stateless_04
mbin_interface isal_deflate_body
mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body_01, isal_deflate_body_01, isal_deflate_body_04
mbin_interface isal_deflate_finish
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
mbin_interface get_crc
mbin_dispatch_init5 get_crc, get_crc_base, get_crc_01, get_crc_01, get_crc_01

92
igzip/igzip_perf.c Normal file
View File

@ -0,0 +1,92 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "igzip_lib.h"
#include "test.h"
#define TEST_LEN (1024*1024)
#define IBUF_SIZE (1024*1024)
#define OBUF_SIZE (1024*1024)
#define TEST_LOOPS 400
#define TEST_TYPE_STR "_warm"
void create_data(unsigned char *data, int size)
{
char c = 'a';
while (size--)
*data++ = c = c < 'z' ? c + 1 : 'a';
}
int main(int argc, char *argv[])
{
int i = 1;
struct isal_zstream stream;
unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE];
printf("Window Size: %d K\n", HIST_SIZE);
printf("igzip_perf: \n");
fflush(0);
create_data(inbuf, TEST_LEN);
struct perf start, stop;
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
isal_deflate_init(&stream);
stream.avail_in = TEST_LEN;
stream.end_of_stream = 1;
stream.next_in = inbuf;
stream.flush = NO_FLUSH;
do {
stream.avail_out = OBUF_SIZE;
stream.next_out = zbuf;
isal_deflate(&stream);
} while (stream.avail_out == 0);
}
perf_stop(&stop);
printf("igzip" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
if (!stream.end_of_stream) {
printf("error: compression test could not fit into allocated buffers\n");
return -1;
}
printf("End of igzip_perf\n\n");
fflush(0);
return 0;
}

1614
igzip/igzip_rand_test.c Normal file

File diff suppressed because it is too large Load Diff

644
igzip/igzip_stateless.asm Normal file
View File

@ -0,0 +1,644 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"
%include "reg_sizes.asm"
%include "stdmac.asm"
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
%ifdef DEBUG
%macro MARK 1
global %1
%1:
%endm
%else
%macro MARK 1
%endm
%endif
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define tmp2 rcx
%define hash2 rcx
%define curr_data rax
%define code rax
%define tmp5 rax
%define tmp4 rbx
%define dist rbx
%define code2 rbx
%define hash rdx
%define len rdx
%define code_len3 rdx
%define tmp1 rsi
%define code_len2 rsi
%define file_start rdi
%define m_bit_count rbp
%define curr_data2 r8
%define len2 r8
%define tmp6 r8
%define m_bits r9
%define f_i r10
%define m_out_buf r11
%define f_end_i r12
%define dist2 r12
%define tmp7 r12
%define code4 r12
%define tmp3 r13
%define code3 r13
%define stream r14
%define hufftables r15
;; GPR r8 & r15 can be used
%define xtmp0 xmm0 ; tmp
%define xtmp1 xmm1 ; tmp
%define ytmp0 ymm0 ; tmp
%define ytmp1 ymm1 ; tmp
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
blen_mem_offset equ 0 ; local variable (8 bytes)
f_end_i_mem_offset equ 8
gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
stack_size equ 2*8 + 8*8 + 4*16 + 8
;;; 8 because stack address is odd multiple of 8 after a function call and
;;; we want it aligned to 16 bytes
; void isal_deflate_body_stateless ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_body_stateless_ %+ ARCH
isal_deflate_body_stateless_ %+ ARCH %+ :
%ifidn __OUTPUT_FORMAT__, elf64
mov rcx, rdi
%endif
;; do nothing if (avail_in == 0)
cmp dword [rcx + _avail_in], 0
jne skip1
ret
skip1:
%ifdef ALIGN_STACK
push rbp
mov rbp, rsp
sub rsp, stack_size
and rsp, ~15
%else
sub rsp, stack_size
%endif
mov [rsp + gpr_save_mem_offset + 0*8], rbx
mov [rsp + gpr_save_mem_offset + 1*8], rsi
mov [rsp + gpr_save_mem_offset + 2*8], rdi
mov [rsp + gpr_save_mem_offset + 3*8], rbp
mov [rsp + gpr_save_mem_offset + 4*8], r12
mov [rsp + gpr_save_mem_offset + 5*8], r13
mov [rsp + gpr_save_mem_offset + 6*8], r14
mov [rsp + gpr_save_mem_offset + 7*8], r15
mov stream, rcx
mov dword [stream + _internal_state_has_eob], 0
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
mov m_out_buf, [stream + _next_out]
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
mov tmp1 %+ d, [stream + _avail_out]
add tmp1, m_out_buf
sub tmp1, SLOP
skip_SLOP:
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
mov hufftables, [stream + _hufftables]
; state->b_bytes_valid = stream->avail_in;
mov f_end_i %+ d, [stream + _avail_in]
mov [stream + _internal_state_b_bytes_valid], f_end_i %+ d
mov f_i, 0
mov file_start, [stream + _next_in]
mov [stream + _internal_state_file_start], file_start
; f_end_i -= LA;
sub f_end_i, LA_STATELESS
mov [rsp + f_end_i_mem_offset], f_end_i
; if (f_end_i <= 0) continue;
cmp f_end_i, 0
jle end_loop_2
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
MARK __stateless_compute_hash_ %+ ARCH
mov curr_data %+ d, [file_start + f_i]
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end
;; Encode first byte in the stream as a literal
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
jmp write_lit_bits
align 16
loop2:
shr curr_data2, 8
xor hash2 %+ d, hash2 %+ d
crc32 hash2 %+ d, curr_data2 %+ d
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
and hash %+ d, HASH_MASK
and hash2 %+ d, HASH_MASK
; if (state->bitbuf.is_full()) {
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end
xor dist, dist
xor dist2, dist2
xor tmp3, tmp3
lea tmp1, [file_start + f_i]
lea tmp6, [tmp1 - 1]
mov dist %+ w, f_i %+ w
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
; state->head[hash] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
inc f_i
mov dist2 %+ w, f_i %+ w
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
dec dist2
; state->head[hash2] = (uint16_t) f_i;
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
mov tmp2, tmp1
sub tmp2, dist
dec dist
; if ((dist-1) < (D-1)) {
cmp dist %+ d, (D-1)
cmovae tmp2, tmp6
cmovae dist, tmp3
inc dist
cmp dist2 %+ d, (D-1)
cmovae dist2, tmp3
inc dist2
MARK __stateless_compare_ %+ ARCH
; len = compare258(state->file_start + f_i,
; state->file_start + f_i - dist);
;; Specutively load distance code (except for when large windows are used)
get_packed_dist_code dist, code2, hufftables
;; Check for long len/dist match (>7) with first literal
mov len, [tmp1]
xor len, [tmp2]
jz compare_loop
%ifdef USE_HSWNI
blsmsk tmp3, len
or tmp3, 0xFFFFFF
%endif
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist2
;; Specutively load distance code (except for when large windows are used)
get_packed_dist_code dist2, code4, hufftables
;; Check for len/dist match (>7) with second literal
mov len2, [tmp1]
xor len2, [tmp2]
jz compare_loop2
%ifdef USE_HSWNI
;; Check for len/dist match for first literal
test tmp3, len2
jz len_dist_lit_huffman_pre
cmp tmp3, 0xFFFFFF
je encode_2_literals
jmp len_dist_huffman_pre
MARK __stateless_len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, code_len3, hufftables
%else
;; Specutively load the code for the first literal
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
;; Check for len/dist match for first literal
test len, 0xFFFFFF
jz len_dist_huffman_pre
;; Specutively load the code for the second literal
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
shl code2, cl
or code2, code3
add code_len2, rcx
;; Check for len/dist match for second literal
test len2, 0xFFFFFF
jnz write_lit_bits
MARK __stateless_len_dist_lit_huffman_ %+ ARCH
len_dist_lit_huffman_pre:
mov code_len3, rcx
%endif
bsf len2, len2
shr len2, 3
len_dist_lit_huffman:
%ifndef LONGER_HUFFTABLE
mov tmp4, dist2
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
%else
unpack_dist_code code4, code_len2
%endif
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
%ifdef USE_HSWNI
shlx code4, code4, rcx
%else
shl code4, cl
%endif
or code4, code
add code_len2, rcx
mov rcx, code_len3
%ifdef USE_HSWNI
shlx code4, code4, rcx
%else
shl code4, cl
%endif
or code4, code3
add code_len2, rcx
mov code2, code4
;; Setup for updating hash
lea tmp3, [f_i + 1] ; tmp3 <= k
add f_i, len2
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp5 %+ d, [file_start + tmp3]
mov tmp7, tmp5
shr tmp7, 8
compute_hash hash, tmp5
and hash %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
add tmp3,1
jmp update_hash_for_symbol
;; encode as dist/len
MARK __stateless_len_dist_huffman_ %+ ARCH
len_dist_huffman_pre:
bsf len, len
shr len, 3
len_dist_huffman:
dec f_i
; get_dist_code(dist, &code2, &code_len2);
%ifndef LONGER_HUFFTABLE
mov tmp3, dist ; since code2 and dist are rbx
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
%else
unpack_dist_code code2, code_len2
%endif
; get_len_code(len, &code, &code_len);
get_len_code len, code, rcx, hufftables ;; rcx is code_len
; code2 <<= code_len
; code2 |= code
; code_len2 += code_len
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
or code2, code
add code_len2, rcx
;; Setup for updateing hash
lea tmp3, [f_i + 2] ; tmp3 <= k
add f_i, len
mov tmp7 %+ d, [file_start + tmp3]
MARK __stateless_update_hash_for_symbol_ %+ ARCH
update_hash_for_symbol:
mov curr_data %+ d, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
%ifdef LIMIT_HASH_UPDATE
; only update hash twice, first hash was already calculated.
; hash = compute_hash(state->file_start + k) & HASH_MASK;
compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
%else
loop3:
; hash = compute_hash(state->file_start + k) & HASH_MASK;
mov tmp7 %+ d, [file_start + tmp3]
compute_hash hash2, tmp7
and hash2 %+ d, HASH_MASK
; state->head[hash] = k;
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
add tmp3,1
cmp tmp3, f_i
jl loop3
%endif
MARK __stateless_write_len_dist_bits_ %+ ARCH
mov f_end_i, [rsp + f_end_i_mem_offset]
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
; continue
cmp f_i, f_end_i
jl loop2
jmp end_loop_2
MARK __stateless_write_lit_bits_ %+ ARCH
%ifdef USE_HSWNI
encode_2_literals:
movzx tmp1, curr_data %+ b
get_lit_code tmp1, code3, rcx, hufftables
shr curr_data, 8
and curr_data, 0xff
get_lit_code curr_data, code2, code_len2, hufftables
;; Calculate code associated with both literals
shlx code2, code2, rcx
or code2, code3
add code_len2, rcx
%endif
write_lit_bits:
mov f_end_i, [rsp + f_end_i_mem_offset]
add f_i, 1
mov curr_data %+ d, [file_start + f_i]
mov curr_data2, curr_data
compute_hash hash, curr_data
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
; continue
cmp f_i, f_end_i
jl loop2
MARK __stateless_end_loops_ %+ ARCH
end_loop_2:
;; Handle the last bytes (at most LA_statless bytes)
add f_end_i, LA_STATELESS - LAST_BYTES_COUNT
cmp f_i, f_end_i
jge end_loop_2_finish
loop2_finish:
;; Check for space in out buffer
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end
mov curr_data %+ d, [file_start + f_i]
compute_hash hash, curr_data
and hash %+ d, HASH_MASK
;; Calculate possible distance for length/dist pair.
xor dist, dist
mov dist %+ w, f_i %+ w
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
;; Check if look back distance is valid (the dec is to handle when dist = 0)
dec dist
cmp dist %+ d, (D-1)
jae encode_literal_finish
inc dist
;; Check if look back distance is a match
lea tmp6, [f_end_i + LAST_BYTES_COUNT]
sub tmp6, f_i
lea tmp1, [file_start + f_i]
mov tmp2, tmp1
sub tmp2, dist
compare tmp6, tmp1, tmp2, len, tmp3
;; Limit len to maximum value of 258
mov tmp2, 258
cmp len, 258
cmova len, tmp2
cmp len, SHORTEST_MATCH
jb encode_literal_finish
;; Encode len/dist pair
%ifndef LONGER_HUFFTABLE
mov tmp3, dist
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
%else
get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
%endif
get_len_code len, code, rcx, hufftables ;; rcx is code_len
;; Combine length and distance code for writing it out
%ifdef USE_HSWNI
shlx code2, code2, rcx
%else
shl code2, cl
%endif
or code2, code
add code_len2, rcx
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
;; Setup for next loop
add f_i, len
cmp f_i, f_end_i
jl loop2_finish
jmp end_loop_2_finish
encode_literal_finish:
;; Encode literal
and curr_data %+ d, 0xFF
get_lit_code curr_data, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
;; Setup for next loop
add f_i, 1
cmp f_i, f_end_i
jl loop2_finish
end_loop_2_finish:
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end
;; Check if any bytes left (at most LAST_BYTES_COUNT bytes)
add f_end_i, LAST_BYTES_COUNT
cmp f_i, f_end_i
jz write_eob
;; Handle encoding last few bytes by encoding them as literals
xor curr_data, curr_data
final_bytes:
movzx curr_data, byte [file_start + f_i]
get_lit_code curr_data, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
ja end
inc f_i
cmp f_i, f_end_i
jl final_bytes
write_eob:
;; Write out end of block
get_lit_code 256, code2, code_len2, hufftables
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
mov dword [stream + _internal_state_has_eob], 1
end:
;; update input buffer
add [stream + _total_in], f_i %+ d
add [stream + _next_in], f_i
sub [stream + _avail_in], f_i %+ d
;; update output buffer
mov [stream + _next_out], m_out_buf
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
sub [stream + _avail_out], m_out_buf %+ d
add [stream + _total_out], m_out_buf %+ d
mov [stream + _internal_state_bitbuf_m_bits], m_bits
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
mov r12, [rsp + gpr_save_mem_offset + 4*8]
mov r13, [rsp + gpr_save_mem_offset + 5*8]
mov r14, [rsp + gpr_save_mem_offset + 6*8]
mov r15, [rsp + gpr_save_mem_offset + 7*8]
%ifndef ALIGN_STACK
add rsp, stack_size
%else
mov rsp, rbp
pop rbp
%endif
ret
MARK __stateless_compare_loops_ %+ ARCH
compare_loop:
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
jmp len_dist_huffman
compare_loop2:
%if (COMPARE_TYPE == 1)
compare250 tmp1, tmp2, len2, tmp3
%elif (COMPARE_TYPE == 2)
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
%elif (COMPARE_TYPE == 3)
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
%else
%error Unknown Compare type COMPARE_TYPE
% error
%endif
and curr_data, 0xff
get_lit_code curr_data, code3, code_len3, hufftables
jmp len_dist_lit_huffman
section .data
align 4
const_D: dq D

View File

@ -0,0 +1,7 @@
%define ARCH 01
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 1
%endif
%include "igzip_stateless.asm"

View File

@ -0,0 +1,8 @@
%define ARCH 04
%define USE_HSWNI
%ifndef COMPARE_TYPE
%define COMPARE_TYPE 3
%endif
%include "igzip_stateless.asm"

View File

@ -0,0 +1,151 @@
#include <stdint.h>
#include "igzip_lib.h"
#include "huffman.h"
#include "huff_codes.h"
#include "bitbuf2.h"
static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state,
uint8_t * end_in, uint8_t * start_in)
{
uint32_t count;
stream->avail_in = end_in - stream->next_in;
stream->total_in += stream->next_in - start_in;
count = buffer_used(&state->bitbuf);
stream->next_out = buffer_ptr(&state->bitbuf);
stream->avail_out -= count;
stream->total_out += count;
}
void isal_deflate_body_stateless_base(struct isal_zstream *stream)
{
uint32_t literal = 0, hash;
uint8_t *start_in, *end_in, *end, *next_hash;
uint16_t match_length;
uint32_t dist;
uint64_t code, code_len, code2, code_len2, i;
struct isal_zstate *state = &stream->internal_state;
uint16_t *last_seen = state->head;
if (stream->avail_in == 0)
return;
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
start_in = stream->next_in;
end_in = stream->next_in + stream->avail_in;
while (stream->next_in < end_in - 3) {
if (is_full(&state->bitbuf)) {
update_state(stream, state, end_in, start_in);
return;
}
literal = *(uint32_t *) stream->next_in;
hash = compute_hash(literal) & HASH_MASK;
dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF;
last_seen[hash] = (uint64_t) stream->next_in;
if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) { /* The -1 are to handle the case when dist = 0 */
match_length =
compare258(stream->next_in - dist, stream->next_in,
end_in - stream->next_in);
if (match_length >= SHORTEST_MATCH) {
next_hash = stream->next_in;
#ifdef LIMIT_HASH_UPDATE
end = next_hash + 3;
#else
end = next_hash + match_length;
#endif
if (end > end_in - 3)
end = end_in - 3;
next_hash++;
for (; next_hash < end; next_hash++) {
literal = *(uint32_t *) next_hash;
hash = compute_hash(literal) & HASH_MASK;
last_seen[hash] = (uint64_t) next_hash;
}
get_len_code(stream->hufftables, match_length, &code,
&code_len);
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
code |= code2 << code_len;
code_len += code_len2;
write_bits(&state->bitbuf, code, code_len);
stream->next_in += match_length;
continue;
}
}
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
stream->next_in++;
}
if (is_full(&state->bitbuf)) {
update_state(stream, state, end_in, start_in);
return;
}
literal = *(uint32_t *) (end_in - 4);
for (i = 4; i > end_in - stream->next_in; i--)
literal = literal >> 8;
hash = compute_hash(literal) & HASH_MASK;
dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF;
if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) {
match_length =
compare258(stream->next_in - dist, stream->next_in,
end_in - stream->next_in);
if (match_length >= SHORTEST_MATCH) {
get_len_code(stream->hufftables, match_length, &code, &code_len);
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
code |= code2 << code_len;
code_len += code_len2;
write_bits(&state->bitbuf, code, code_len);
stream->next_in += 3;
if (is_full(&state->bitbuf)) {
update_state(stream, state, end_in, start_in);
return;
}
get_lit_code(stream->hufftables, 256, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
if (is_full(&state->bitbuf)) {
update_state(stream, state, end_in, start_in);
return;
}
state->has_eob = 1;
update_state(stream, state, end_in, start_in);
return;
}
}
while (stream->next_in < end_in) {
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
stream->next_in++;
if (is_full(&state->bitbuf)) {
update_state(stream, state, end_in, start_in);
return;
}
literal >>= 8;
}
get_lit_code(stream->hufftables, 256, &code, &code_len);
write_bits(&state->bitbuf, code, code_len);
state->has_eob = 1;
update_state(stream, state, end_in, start_in);
return;
}

View File

@ -0,0 +1,155 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "igzip_lib.h"
#include "test.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 10
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 5000000000
#endif
struct isal_zstream stream;
int get_filesize(FILE * f)
{
int curr, end;
curr = ftell(f); /* Save current position */
fseek(f, 0L, SEEK_END);
end = ftell(f);
fseek(f, curr, SEEK_SET); /* Restore position */
return end;
}
int main(int argc, char *argv[])
{
FILE *in, *out = NULL;
unsigned char *inbuf, *outbuf;
int i, infile_size, iterations, outbuf_size;
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
"\t - Runs multiple iterations of igzip on a file to "
"get more accurate time results.\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
if (argc > 2) {
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("outfile=%s\n", argv[2]);
}
printf("Window Size: %d K\n", HIST_SIZE);
printf("igzip_file_perf: \n");
fflush(0);
/* Allocate space for entire input file and output
* (assuming some possible expansion on output size)
*/
infile_size = get_filesize(in);
if (infile_size != 0) {
outbuf_size = infile_size * 1.07;
iterations = RUN_MEM_SIZE / infile_size;
} else {
outbuf_size = BUF_SIZE;
iterations = MIN_TEST_LOOPS;
}
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
inbuf = malloc(infile_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
outbuf = malloc(outbuf_size);
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
}
printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations);
/* Read complete input file into buffer */
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
if (stream.avail_in != infile_size) {
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_init(&stream);
stream.end_of_stream = 1; /* Do the entire file at once */
stream.flush = NO_FLUSH;
stream.next_in = inbuf;
stream.avail_in = infile_size;
stream.next_out = outbuf;
stream.avail_out = outbuf_size;
isal_deflate_stateless(&stream);
if (stream.avail_in != 0)
break;
}
perf_stop(&stop);
if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n");
exit(0);
}
printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1],
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
if (argc > 2 && out) {
printf("writing %s\n", argv[2]);
fwrite(outbuf, 1, stream.total_out, out);
fclose(out);
}
fclose(in);
printf("End of igzip_file_perf\n\n");
fflush(0);
return 0;
}

View File

@ -0,0 +1,86 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "igzip_lib.h"
#define BUF_SIZE 8 * 1024
struct isal_zstream stream;
int main(int argc, char *argv[])
{
uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
FILE *in, *out;
if (argc != 3) {
fprintf(stderr, "Usage: igzip_sync_flush_example infile outfile\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("igzip_sync_flush_example\nWindow Size: %d K\n", HIST_SIZE);
fflush(0);
isal_deflate_init(&stream);
stream.end_of_stream = 0;
stream.flush = SYNC_FLUSH;
do {
if (stream.internal_state.state == ZSTATE_NEW_HDR) {
stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
stream.end_of_stream = feof(in);
stream.next_in = inbuf;
}
do {
stream.avail_out = BUF_SIZE;
stream.next_out = outbuf;
isal_deflate(&stream);
fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
} while (stream.avail_out == 0);
} while (stream.internal_state.state != ZSTATE_END);
fclose(out);
fclose(in);
printf("End of igzip_sync_flush_example\n\n");
return 0;
}

View File

@ -0,0 +1,163 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "igzip_lib.h"
#include "test.h"
#define BUF_SIZE 1024
#define MIN_TEST_LOOPS 100
#ifndef RUN_MEM_SIZE
# define RUN_MEM_SIZE 500000000
#endif
struct isal_zstream stream;
int get_filesize(FILE * f)
{
int curr, end;
curr = ftell(f); /* Save current position */
fseek(f, 0L, SEEK_END);
end = ftell(f);
fseek(f, curr, SEEK_SET); /* Restore position */
return end;
}
int main(int argc, char *argv[])
{
FILE *in, *out = NULL;
unsigned char *inbuf, *outbuf;
int i, infile_size, iterations, outbuf_size;
if (argc > 3 || argc < 2) {
fprintf(stderr, "Usage: igzip_sync_flush_file_perf infile [outfile]\n"
"\t - Runs multiple iterations of igzip on a file to get more accurate time results.\n");
exit(0);
}
in = fopen(argv[1], "rb");
if (!in) {
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
exit(0);
}
if (argc > 2) {
out = fopen(argv[2], "wb");
if (!out) {
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
exit(0);
}
printf("outfile=%s\n", argv[2]);
}
printf("Window Size: %d K\n", HIST_SIZE);
printf("igzip_sync_flush_file_perf: \n");
fflush(0);
/* Allocate space for entire input file and
* output (assuming 1:1 max output size)
*/
infile_size = get_filesize(in);
if (infile_size != 0) {
outbuf_size = infile_size;
iterations = RUN_MEM_SIZE / infile_size;
} else {
outbuf_size = BUF_SIZE;
iterations = MIN_TEST_LOOPS;
}
if (iterations < MIN_TEST_LOOPS)
iterations = MIN_TEST_LOOPS;
inbuf = malloc(infile_size);
if (inbuf == NULL) {
fprintf(stderr, "Can't allocate input buffer memory\n");
exit(0);
}
outbuf = malloc(outbuf_size);
if (outbuf == NULL) {
fprintf(stderr, "Can't allocate output buffer memory\n");
exit(0);
}
printf("igzip_sync_flush_file_perf: %s %d iterations\n", argv[1], iterations);
/* Read complete input file into buffer */
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
if (stream.avail_in != infile_size) {
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
exit(0);
}
struct perf start, stop;
perf_start(&start);
for (i = 0; i < iterations; i++) {
isal_deflate_init(&stream);
stream.end_of_stream = 0;
stream.flush = SYNC_FLUSH;
stream.next_in = inbuf;
stream.avail_in = infile_size / 2;
stream.next_out = outbuf;
stream.avail_out = outbuf_size / 2;
isal_deflate(&stream);
if (infile_size == 0)
continue;
stream.avail_in = infile_size - infile_size / 2;
stream.end_of_stream = 1;
stream.next_in = inbuf + stream.total_in;
stream.flush = SYNC_FLUSH;
stream.avail_out = infile_size - outbuf_size / 2;
stream.next_out = outbuf + stream.total_out;
isal_deflate(&stream);
if (stream.avail_in != 0)
break;
}
perf_stop(&stop);
if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n");
exit(0);
}
printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1],
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
printf("igzip_file: ");
perf_print(stop, start, (long long)infile_size * i);
if (argc > 2 && out) {
printf("writing %s\n", argv[2]);
fwrite(outbuf, 1, stream.total_out, out);
fclose(out);
}
fclose(in);
printf("End of igzip_sync_flush_file_perf\n\n");
fflush(0);
return 0;
}

View File

@ -0,0 +1,96 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "igzip_lib.h"
#include "test.h"
#define TEST_LEN (1024*1024)
#define IBUF_SIZE (1024*1024)
#define OBUF_SIZE (1024*1024)
#define TEST_LOOPS 400
#define TEST_TYPE_STR "_warm"
void create_data(unsigned char *data, int size)
{
char c = 'a';
while (size--)
*data++ = c = c < 'z' ? c + 1 : 'a';
}
int main(int argc, char *argv[])
{
int i = 1;
struct isal_zstream stream;
unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE];
struct perf start, stop;
create_data(inbuf, TEST_LEN);
printf("Window Size: %d K\n", HIST_SIZE);
printf("igzip_sync_flush_perf: \n");
fflush(0);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++) {
isal_deflate_init(&stream);
stream.avail_in = TEST_LEN;
if (i == (TEST_LOOPS - 1))
stream.end_of_stream = 1;
else
stream.end_of_stream = 0;
stream.next_in = inbuf;
stream.flush = SYNC_FLUSH;
do {
stream.avail_out = OBUF_SIZE;
stream.next_out = zbuf;
isal_deflate(&stream);
} while (stream.avail_out == 0);
}
perf_stop(&stop);
printf("igzip_sync_flush_perf" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)(TEST_LEN) * (i));
if (!stream.end_of_stream) {
printf("error: compression test could not fit into allocated buffers\n");
return -1;
}
printf("End of igzip_sync_flush_perf\n\n");
fflush(0);
return 0;
}

44
igzip/lz0a_const.asm Normal file
View File

@ -0,0 +1,44 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%assign K 1024
%assign D HIST_SIZE * K ;; Amount of history
%assign LA 17 * 16 ;; Max look-ahead, rounded up to 32 byte boundary
%assign BSIZE 2*HIST_SIZE*K + LA ;; Nominal buffer size
;; Constants for stateless compression
%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds
%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer
%assign HASH_SIZE D
%assign HASH_MASK (HASH_SIZE - 1)
%assign SHORTEST_MATCH 3
%assign SLOP 8

87
igzip/options.asm Normal file
View File

@ -0,0 +1,87 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
default rel
%ifndef __OPTIONS_ASM__
%define __OPTIONS_ASM__
%ifndef IGZIP_USE_GZIP_FORMAT
%define DEFLATE
%endif
; Options:dir
; m - reschedule mem reads
; e b - bitbuff style
; t s x - compare style
; h - limit hash updates
; l - use longer huffman table
; f - fix cache read
%ifdef LARGE_WINDOW
%define HIST_SIZE 32
%else
%define HIST_SIZE 8
%endif
%ifdef USE_BITBUFB
%elifdef USE_BITBUF8
%elifdef USE_BITBUF_ELSE
%else
; bit buffer types
; BITBUFB: (b) Always write data
%define USE_BITBUFB
%endif
; (h) limit hash update
%define LIMIT_HASH_UPDATE
; (l) longer huffman table
%define LONGER_HUFFTABLE
; (f) fix cache read problem
%define FIX_CACHE_READ
%if (HIST_SIZE > 8)
%undef LONGER_HUFFTABLE
%endif
%define IGZIP_MAX_DEF_HDR_SIZE 328
%ifidn __OUTPUT_FORMAT__, elf64
%ifndef __NASM_VER__
%define WRT_OPT wrt ..sym
%else
%define WRT_OPT
%endif
%else
%define WRT_OPT
%endif
%endif ; ifndef __OPTIONS_ASM__

View File

@ -0,0 +1,68 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef _IGZIP_REPEATED_8K_CHAR_RESULT_H_
#define _IGZIP_REPEATED_8K_CHAR_RESULT_H_
/* The code for the literal being encoded */
#define CODE_LIT 0x1
#define CODE_LIT_LENGTH 0x2
/* The code for repeat 10. The Length includes the distance code length*/
#define CODE_10 0x3
#define CODE_10_LENGTH 0x4
/* The code for repeat 115-130. The Length includes the distance code length*/
#define CODE_280 0x0f
#define CODE_280_LENGTH 0x4
#define CODE_280_TOTAL_LENGTH CODE_280_LENGTH + 4 + 1
/* Code representing the end of block. */
#define END_OF_BLOCK 0x7
#define END_OF_BLOCK_LEN 0x4
/* MIN_REPEAT_LEN currently optimizes storage space, another possiblity is to
* find the size which optimizes speed instead.*/
#define MIN_REPEAT_LEN 4*1024
#define HEADER_LENGTH 16
/* Maximum length of the portion of the header represented by repeat lengths
* smaller than 258 */
#define MAX_FIXUP_CODE_LENGTH 8
/* Headers for constant 0x00 and 0xFF blocks
* This also contains the first literal character. */
const uint32_t repeated_char_header[2][5] = {
{ 0x0121c0ec, 0xc30c0000, 0x7d57fab0, 0x49270938}, /* Deflate header for 0x00 */
{ 0x0121c0ec, 0xc30c0000, 0x7baaff30, 0x49270938} /* Deflate header for 0xFF */
};
#endif /*_IGZIP_REPEATED_8K_CHAR_RESULT_H_*/

207
igzip/stdmac.asm Normal file
View File

@ -0,0 +1,207 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; internal macro used by push_all
;; push args L to R
%macro push_all_ 1-*
%xdefine _PUSH_ALL_REGS_COUNT_ %0
%rep %0
push %1
%rotate 1
%endrep
%endmacro
;; internal macro used by pop_all
;; pop args R to L
%macro pop_all_ 1-*
%rep %0
%rotate -1
pop %1
%endrep
%endmacro
%xdefine _PUSH_ALL_REGS_COUNT_ 0
%xdefine _ALLOC_STACK_VAL_ 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; STACK_OFFSET
;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; PUSH_ALL reg1, reg2, ...
;; push args L to R, remember regs for pop_all
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro PUSH_ALL 1+
%xdefine _PUSH_ALL_REGS_ %1
push_all_ %1
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; POP_ALL
;; push args from prev "push_all" R to L
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro POP_ALL 0
pop_all_ _PUSH_ALL_REGS_
%xdefine _PUSH_ALL_REGS_COUNT_ 0
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALLOC_STACK n
;; subtract n from the stack pointer and remember the value for restore_stack
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro ALLOC_STACK 1
%xdefine _ALLOC_STACK_VAL_ %1
sub rsp, %1
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; RESTORE_STACK
;; add n to the stack pointer, where n is the arg to the previous alloc_stack
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro RESTORE_STACK 0
add rsp, _ALLOC_STACK_VAL_
%xdefine _ALLOC_STACK_VAL_ 0
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NOPN n
;; Create n bytes of NOP, using nops of up to 8 bytes each
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro NOPN 1
%assign %%i %1
%rep 200
%if (%%i < 9)
nopn %%i
%exitrep
%else
nopn 8
%assign %%i (%%i - 8)
%endif
%endrep
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; nopn n
;; Create n bytes of NOP, where n is between 1 and 9
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro nopn 1
%if (%1 == 1)
nop
%elif (%1 == 2)
db 0x66
nop
%elif (%1 == 3)
db 0x0F
db 0x1F
db 0x00
%elif (%1 == 4)
db 0x0F
db 0x1F
db 0x40
db 0x00
%elif (%1 == 5)
db 0x0F
db 0x1F
db 0x44
db 0x00
db 0x00
%elif (%1 == 6)
db 0x66
db 0x0F
db 0x1F
db 0x44
db 0x00
db 0x00
%elif (%1 == 7)
db 0x0F
db 0x1F
db 0x80
db 0x00
db 0x00
db 0x00
db 0x00
%elif (%1 == 8)
db 0x0F
db 0x1F
db 0x84
db 0x00
db 0x00
db 0x00
db 0x00
db 0x00
%elif (%1 == 9)
db 0x66
db 0x0F
db 0x1F
db 0x84
db 0x00
db 0x00
db 0x00
db 0x00
db 0x00
%else
%error Invalid value to nopn
%endif
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rolx64 dst, src, amount
;; Emulate a rolx instruction using rorx, assuming data 64 bits wide
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro rolx64 3
rorx %1, %2, (64-%3)
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rolx32 dst, src, amount
;; Emulate a rolx instruction using rorx, assuming data 32 bits wide
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro rolx32 3
rorx %1, %2, (32-%3)
%endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Define a function void ssc(uint64_t x)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%macro DEF_SSC 0
global ssc
ssc:
mov rax, rbx
mov rbx, rcx
db 0x64
db 0x67
nop
mov rbx, rax
ret
%endm

371
include/igzip_lib.h Normal file
View File

@ -0,0 +1,371 @@
/**********************************************************************
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#ifndef _IGZIP_H
#define _IGZIP_H
/**
* @file igzip_lib.h
*
* @brief This file defines the igzip compression interface, a high performance
* deflate compression interface for storage applications.
*
* Deflate is a widely used compression standard that can be used standalone, it
* also forms the basis of gzip and zlib compression formats. Igzip supports the
* following flush features:
*
* - No Flush: The default method where no flush is performed.
*
* - Sync flush: whereby isal_deflate() finishes the current deflate block at
* the end of each input buffer. The deflate block is byte aligned by
* appending an empty stored block.
*
* - Full flush: whereby isal_deflate() finishes and aligns the deflate block as
* in sync flush but also ensures that subsequent block's history does not
* look back beyond this point and new blocks are fully independent.
*
* Igzip's default configuration is:
*
* - 8K window size
*
* This option can be overridden to enable:
*
* - 32K window size, by adding \#define LARGE_WINDOW 1 in igzip_lib.h and
* \%define LARGE_WINDOW in options.asm, or via the command line with
* @verbatim gmake D="-D LARGE_WINDOW" @endverbatim on Linux and FreeBSD, or
* with @verbatim nmake -f Makefile.nmake D="-D LARGE_WINDOW" @endverbatim on
* Windows.
*
* KNOWN ISSUES:
* - If building the code on Windows with the 32K window enabled, the
* /LARGEADDRESSAWARE:NO link option must be added.
* - The 32K window isn't supported when used in a shared library.
*
*/
#include <stdint.h>
#include "types.h"
#ifdef __cplusplus
extern "C" {
#endif
// Options:dir
// m - reschedule mem reads
// e b - bitbuff style
// t s x - compare style
// h - limit hash updates
// l - use longer huffman table
// f - fix cache read
#if defined(LARGE_WINDOW)
# define HIST_SIZE 32
#else
# define HIST_SIZE 8
#endif
/* bit buffer types
* BITBUF8: (e) Always write 8 bytes of data
* BITBUFB: (b) Always write data
*/
#if !(defined(USE_BITBUFB) || defined(USE_BITBUF8) || defined(USE_BITBUF_ELSE))
# define USE_BITBUFB
#endif
/* compare types
* 1: ( ) original
* 2: (t) with CMOV
* 3: (s) with sttni
* 4: (x) with xmm / pmovbmsk
* 5: (y) with ymm / pmovbmsk (32-bytes at a time)
*/
# define LIMIT_HASH_UPDATE
/* (l) longer huffman table */
#define LONGER_HUFFTABLE
/* (f) fix cache read problem */
#define FIX_CACHE_READ
#if (HIST_SIZE > 8)
# undef LONGER_HUFFTABLE
#endif
#define IGZIP_K 1024
#define IGZIP_D (HIST_SIZE * IGZIP_K) /* Amount of history */
#define IGZIP_LA (17 * 16) /* Max look-ahead, rounded up to 32 byte boundary */
#define BSIZE (2*IGZIP_D + IGZIP_LA) /* Nominal buffer size */
#define HASH_SIZE IGZIP_D
#define HASH_MASK (HASH_SIZE - 1)
#define SHORTEST_MATCH 3
#define IGZIP_MAX_DEF_HDR_SIZE 328
#ifdef LONGER_HUFFTABLE
enum {DIST_TABLE_SIZE = 8*1024};
/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
enum { DECODE_OFFSET = 26 };
#else
enum {DIST_TABLE_SIZE = 1024};
/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
enum { DECODE_OFFSET = 20 };
#endif
enum {LEN_TABLE_SIZE = 256};
enum {LIT_TABLE_SIZE = 257};
#define IGZIP_LIT_LEN 286
#define IGZIP_DIST_LEN 30
/* Flush Flags */
#define NO_FLUSH 0 /* Default */
#define SYNC_FLUSH 1
#define FULL_FLUSH 2
#define FINISH_FLUSH 0 /* Deprecated */
/* Return values */
#define COMP_OK 0
#define INVALID_FLUSH -7
#define INVALID_PARAM -8
#define STATELESS_OVERFLOW -1
#define DEFLATE_HDR_LEN 3
/**
* @enum isal_zstate
* @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
*/
/* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the
* hufftable being used for compression may be swapped
*/
enum isal_zstate_state {
ZSTATE_NEW_HDR, //!< Header to be written
ZSTATE_HDR, //!< Header state
ZSTATE_BODY, //!< Body state
ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
ZSTATE_SYNC_FLUSH, //!< Write sync flush block
ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
ZSTATE_TRL, //!< Trailer state
ZSTATE_END, //!< End state
ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
ZSTATE_TMP_HDR, //!< Temporary Header state
ZSTATE_TMP_BODY, //!< Temporary Body state
ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
ZSTATE_TMP_TRL, //!< Temporary Trailer state
ZSTATE_TMP_END //!< Temporary End state
};
/* Offset used to switch between TMP states and non-tmp states */
#define TMP_OFFSET_SIZE ZSTATE_TMP_HDR - ZSTATE_HDR
struct isal_huff_histogram {
uint64_t lit_len_histogram[IGZIP_LIT_LEN];
uint64_t dist_histogram[IGZIP_DIST_LEN];
};
/** @brief Holds Bit Buffer information*/
struct BitBuf2 {
uint64_t m_bits; //!< bits in the bit buffer
uint32_t m_bit_count; //!< number of valid bits in the bit buffer
uint8_t *m_out_buf; //!< current index of buffer to write to
uint8_t *m_out_end; //!< end of buffer to write to
uint8_t *m_out_start; //!< start of buffer to write to
};
/* Variable prefixes:
* b_ : Measured wrt the start of the buffer
* f_ : Measured wrt the start of the file (aka file_start)
*/
/** @brief Holds the internal state information for input and output compression streams*/
struct isal_zstate {
uint32_t b_bytes_valid; //!< number of bytes of valid data in buffer
uint32_t b_bytes_processed; //!< keeps track of the number of bytes processed in isal_zstate.buffer
uint8_t *file_start; //!< pointer to where file would logically start
DECLARE_ALIGNED(uint32_t crc[16], 16); //!< actually 4 128-bit integers
struct BitBuf2 bitbuf; //!< Bit Buffer
enum isal_zstate_state state; //!< Current state in processing the data stream
uint32_t count; //!< used for partial header/trailer writes
uint8_t tmp_out_buff[16]; //!< temporary array
uint32_t tmp_out_start; //!< temporary variable
uint32_t tmp_out_end; //!< temporary variable
uint32_t last_flush; //!< keeps track of last submitted flush
uint32_t has_gzip_hdr; //!< keeps track of if the gzip header has been written.
uint32_t has_eob; //!< keeps track of eob on the last deflate block
uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
uint32_t left_over; //!< keeps track of overflow bytes
DECLARE_ALIGNED(uint8_t buffer[BSIZE + 16], 32); //!< Internal buffer
DECLARE_ALIGNED(uint16_t head[HASH_SIZE], 16); //!< Hash array
};
/** @brief Holds the huffman tree used to huffman encode the input stream **/
struct isal_hufftables {
uint8_t deflate_hdr[IGZIP_MAX_DEF_HDR_SIZE]; //!< deflate huffman tree header
uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr
uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header
uint32_t dist_table[DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
uint32_t len_table[LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
uint16_t lit_table[LIT_TABLE_SIZE]; //!< literal code
uint8_t lit_table_sizes[LIT_TABLE_SIZE]; //!< literal code length
uint16_t dcodes[30 - DECODE_OFFSET]; //!< distance code
uint8_t dcodes_sizes[30 - DECODE_OFFSET]; //!< distance code length
};
/** @brief Holds stream information*/
struct isal_zstream {
uint8_t *next_in; //!< Next input byte
uint32_t avail_in; //!< number of bytes available at next_in
uint32_t total_in; //!< total number of bytes read so far
uint8_t *next_out; //!< Next output byte
uint32_t avail_out; //!< number of bytes available at next_out
uint32_t total_out; //!< total number of bytes written so far
struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
uint32_t end_of_stream; //!< non-zero if this is the last input buffer
uint32_t flush; //!< Flush type can be NO_FLUSH or SYNC_FLUSH
struct isal_zstate internal_state; //!< Internal state for this stream
};
/**
* @brief Updates histograms to include the symbols found in the input
* stream. Since this function only updates the histograms, it can be called on
* multiple streams to get a histogram better representing the desired data
* set. When first using histogram it must be initialized by zeroing the
* structure.
*
* @param in_stream: Input stream of data.
* @param length: The length of start_stream.
* @param histogram: The returned histogram of lit/len/dist symbols.
*/
void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_histogram * histogram);
/**
* @brief Creates a custom huffman code for the given histograms in which
* every literal and repeat length is assigned a code and all possible lookback
* distances are assigned a code.
*
* @param hufftables: the output structure containing the huffman code
* @param lit_histogram: histogram containing frequency of literal symbols and
* repeat lengths
* @param dist_histogram: histogram containing frequency of of lookback distances
* @returns Returns a non zero value if an invalid huffman code was created.
*/
int isal_create_hufftables(struct isal_hufftables * hufftables,
struct isal_huff_histogram * histogram);
/**
* @brief Creates a custom huffman code for the given histograms like
* isal_create_hufftables() except literals with 0 frequency in the histogram
* are not assigned a code
*
* @param hufftables: the output structure containing the huffman code
* @param lit_histogram: histogram containing frequency of literal symbols and
* repeat lengths
* @param dist_histogram: histogram containing frequency of of lookback distances
* @returns Returns a non zero value if an invalid huffman code was created.
*/
int isal_create_hufftables_subset(struct isal_hufftables * hufftables,
struct isal_huff_histogram * histogram);
/**
* @brief Initialize compression stream data structure
*
* @param stream Structure holding state information on the compression streams.
* @returns none
*/
void isal_deflate_init(struct isal_zstream *stream);
/**
* @brief Fast data (deflate) compression for storage applications.
*
* On entry to isal_deflate(), next_in points to an input buffer and avail_in
* indicates the length of that buffer. Similarly next_out points to an empty
* output buffer and avail_out indicates the size of that buffer.
*
* The fields total_in and total_out start at 0 and are updated by
* isal_deflate(). These reflect the total number of bytes read or written so far.
*
* The call to isal_deflate() will take data from the input buffer (updating
* next_in, avail_in and write a compressed stream to the output buffer
* (updating next_out and avail_out). The function returns when either the input
* buffer is empty or the output buffer is full.
*
* When the last input buffer is passed in, signaled by setting the
* end_of_stream, the routine will complete compression at the end of the input
* buffer, as long as the output buffer is big enough.
*
* The equivalent of the zlib FLUSH_SYNC operation is currently supported.
* Flush types can be NO_FLUSH or SYNC_FLUSH. Default flush type is NO_FLUSH.
* If SYNC_FLUSH is selected each input buffer is compressed and byte aligned
* with a type 0 block appended to the end. Switching between NO_FLUSH and
* SYNC_FLUSH is supported to select after which input buffer a SYNC_FLUSH is
* performed.
*
* @param stream Structure holding state information on the compression streams.
* @return COMP_OK (if everything is ok),
* INVALID_FLUSH (if an invalid FLUSH is selected),
*/
int isal_deflate(struct isal_zstream *stream);
/**
* @brief Fast data (deflate) stateless compression for storage applications.
*
* Stateless (one shot) compression routine with a similar interface to
* isal_deflate() but operates on entire input buffer at one time. Parameter
* avail_out must be large enough to fit the entire compressed output. Max
* expansion is limited to the input size plus the header size of a stored/raw
* block.
*
* @param stream Structure holding state information on the compression streams.
* @return COMP_OK (if everything is ok),
* STATELESS_OVERFLOW (if output buffer will not fit output).
*/
int isal_deflate_stateless(struct isal_zstream *stream);
#ifdef __cplusplus
}
#endif
#endif /* ifndef _IGZIP_H */

View File

@ -75,3 +75,9 @@ crc32_iscsi @71
crc16_t10dif_base @72
crc32_ieee_base @73
crc32_iscsi_base @74
isal_deflate_stateless @75
isal_deflate @76
isal_deflate_init @77
isal_update_histogram @78
isal_create_hufftables @79
isal_create_hufftables_subset @80