mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 09:23:50 +01:00
Add data compression unit
Include fast DEFLATE compatable compression functions. Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
parent
61164e105b
commit
660f49b02d
4
LICENSE
4
LICENSE
@ -1,7 +1,7 @@
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
@ -27,6 +27,7 @@ perf_tests32=
|
||||
include erasure_code/Makefile.am
|
||||
include raid/Makefile.am
|
||||
include crc/Makefile.am
|
||||
include igzip/Makefile.am
|
||||
|
||||
# LIB version info not necessarily the same as package version
|
||||
LIBISAL_CURRENT=2
|
||||
|
164
Makefile.nmake
164
Makefile.nmake
@ -1,5 +1,5 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
@ -27,11 +27,91 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
objs = \
|
||||
bin\ec_base.obj \
|
||||
bin\ec_highlevel_func.obj \
|
||||
bin\ec_multibinary.obj \
|
||||
bin\gf_2vect_dot_prod_avx.obj \
|
||||
bin\gf_2vect_dot_prod_avx2.obj \
|
||||
bin\gf_2vect_dot_prod_avx512.obj \
|
||||
bin\gf_2vect_dot_prod_sse.obj \
|
||||
bin\gf_2vect_mad_avx.obj \
|
||||
bin\gf_2vect_mad_avx2.obj \
|
||||
bin\gf_2vect_mad_avx512.obj \
|
||||
bin\gf_2vect_mad_sse.obj \
|
||||
bin\gf_3vect_dot_prod_avx.obj \
|
||||
bin\gf_3vect_dot_prod_avx2.obj \
|
||||
bin\gf_3vect_dot_prod_avx512.obj \
|
||||
bin\gf_3vect_dot_prod_sse.obj \
|
||||
bin\gf_3vect_mad_avx.obj \
|
||||
bin\gf_3vect_mad_avx2.obj \
|
||||
bin\gf_3vect_mad_avx512.obj \
|
||||
bin\gf_3vect_mad_sse.obj \
|
||||
bin\gf_4vect_dot_prod_avx.obj \
|
||||
bin\gf_4vect_dot_prod_avx2.obj \
|
||||
bin\gf_4vect_dot_prod_avx512.obj \
|
||||
bin\gf_4vect_dot_prod_sse.obj \
|
||||
bin\gf_4vect_mad_avx.obj \
|
||||
bin\gf_4vect_mad_avx2.obj \
|
||||
bin\gf_4vect_mad_avx512.obj \
|
||||
bin\gf_4vect_mad_sse.obj \
|
||||
bin\gf_5vect_dot_prod_avx.obj \
|
||||
bin\gf_5vect_dot_prod_avx2.obj \
|
||||
bin\gf_5vect_dot_prod_sse.obj \
|
||||
bin\gf_5vect_mad_avx.obj \
|
||||
bin\gf_5vect_mad_avx2.obj \
|
||||
bin\gf_5vect_mad_sse.obj \
|
||||
bin\gf_6vect_dot_prod_avx.obj \
|
||||
bin\gf_6vect_dot_prod_avx2.obj \
|
||||
bin\gf_6vect_dot_prod_sse.obj \
|
||||
bin\gf_6vect_mad_avx.obj \
|
||||
bin\gf_6vect_mad_avx2.obj \
|
||||
bin\gf_6vect_mad_sse.obj \
|
||||
bin\gf_vect_dot_prod_avx.obj \
|
||||
bin\gf_vect_dot_prod_avx2.obj \
|
||||
bin\gf_vect_dot_prod_avx512.obj \
|
||||
bin\gf_vect_dot_prod_sse.obj \
|
||||
bin\gf_vect_mad_avx.obj \
|
||||
bin\gf_vect_mad_avx2.obj \
|
||||
bin\gf_vect_mad_avx512.obj \
|
||||
bin\gf_vect_mad_sse.obj \
|
||||
bin\gf_vect_mul_avx.obj \
|
||||
bin\gf_vect_mul_sse.obj \
|
||||
bin\pq_check_sse.obj \
|
||||
bin\pq_gen_avx.obj \
|
||||
bin\pq_gen_avx2.obj \
|
||||
bin\pq_gen_sse.obj \
|
||||
bin\raid_base.obj \
|
||||
bin\raid_multibinary.obj \
|
||||
bin\xor_check_sse.obj \
|
||||
bin\xor_gen_avx.obj \
|
||||
bin\xor_gen_sse.obj \
|
||||
bin\crc16_t10dif_01.obj \
|
||||
bin\crc16_t10dif_by4.obj \
|
||||
bin\crc32_gzip.obj \
|
||||
bin\crc32_ieee_01.obj \
|
||||
bin\crc32_ieee_by4.obj \
|
||||
bin\crc32_iscsi_00.obj \
|
||||
bin\crc32_iscsi_01.obj \
|
||||
bin\crc_base.obj \
|
||||
bin\crc_data.obj \
|
||||
bin\crc_multibinary.obj \
|
||||
bin\huff_codes.obj \
|
||||
bin\hufftables_c.obj \
|
||||
bin\igzip.obj \
|
||||
bin\igzip_base.obj \
|
||||
bin\igzip_body_01.obj \
|
||||
bin\igzip_body_04.obj \
|
||||
bin\igzip_finish.obj \
|
||||
bin\igzip_multibinary.obj \
|
||||
bin\igzip_stateless_01.obj \
|
||||
bin\igzip_stateless_04.obj \
|
||||
bin\igzip_stateless_base.obj \
|
||||
bin\crc_utils_01.obj \
|
||||
bin\crc_utils_04.obj \
|
||||
bin\detect_repeated_char.obj
|
||||
|
||||
objs = bin\ec_base.obj bin\ec_highlevel_func.obj bin\ec_multibinary.obj bin\gf_2vect_dot_prod_avx.obj bin\gf_2vect_dot_prod_avx2.obj bin\gf_2vect_dot_prod_avx512.obj bin\gf_2vect_dot_prod_sse.obj bin\gf_2vect_mad_avx.obj bin\gf_2vect_mad_avx2.obj bin\gf_2vect_mad_avx512.obj bin\gf_2vect_mad_sse.obj bin\gf_3vect_dot_prod_avx.obj bin\gf_3vect_dot_prod_avx2.obj bin\gf_3vect_dot_prod_avx512.obj bin\gf_3vect_dot_prod_sse.obj bin\gf_3vect_mad_avx.obj bin\gf_3vect_mad_avx2.obj bin\gf_3vect_mad_avx512.obj bin\gf_3vect_mad_sse.obj bin\gf_4vect_dot_prod_avx.obj bin\gf_4vect_dot_prod_avx2.obj bin\gf_4vect_dot_prod_avx512.obj bin\gf_4vect_dot_prod_sse.obj bin\gf_4vect_mad_avx.obj bin\gf_4vect_mad_avx2.obj bin\gf_4vect_mad_avx512.obj bin\gf_4vect_mad_sse.obj bin\gf_5vect_dot_prod_avx.obj bin\gf_5vect_dot_prod_avx2.obj bin\gf_5vect_dot_prod_sse.obj bin\gf_5vect_mad_avx.obj bin\gf_5vect_mad_avx2.obj bin\gf_5vect_mad_sse.obj bin\gf_6vect_dot_prod_avx.obj bin\gf_6vect_dot_prod_avx2.obj bin\gf_6vect_dot_prod_sse.obj bin\gf_6vect_mad_avx.obj bin\gf_6vect_mad_avx2.obj bin\gf_6vect_mad_sse.obj bin\gf_vect_dot_prod_avx.obj bin\gf_vect_dot_prod_avx2.obj bin\gf_vect_dot_prod_avx512.obj bin\gf_vect_dot_prod_sse.obj bin\gf_vect_mad_avx.obj bin\gf_vect_mad_avx2.obj bin\gf_vect_mad_avx512.obj bin\gf_vect_mad_sse.obj bin\gf_vect_mul_avx.obj bin\gf_vect_mul_sse.obj bin\pq_check_sse.obj bin\pq_gen_avx.obj bin\pq_gen_avx2.obj bin\pq_gen_sse.obj bin\raid_base.obj bin\raid_multibinary.obj bin\xor_check_sse.obj bin\xor_gen_avx.obj bin\xor_gen_sse.obj bin/crc16_t10dif_01.obj bin/crc16_t10dif_by4.obj bin/crc32_ieee_01.obj bin/crc32_ieee_by4.obj bin/crc32_iscsi_01.obj bin/crc32_iscsi_00.obj bin/crc_multibinary.obj bin/crc_base.obj
|
||||
|
||||
|
||||
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iinclude/
|
||||
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/
|
||||
LINKFLAGS = /nologo
|
||||
CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
|
||||
AFLAGS = -f win64 $(INCLUDES) $(D)
|
||||
@ -65,9 +145,14 @@ isa-l.dll: $(objs)
|
||||
{crc}.asm.obj:
|
||||
$(AS) $(AFLAGS) -o $@ $?
|
||||
|
||||
{igzip}.c.obj:
|
||||
$(CC) $(CFLAGS) /c -Fo$@ $?
|
||||
{igzip}.asm.obj:
|
||||
$(AS) $(AFLAGS) -o $@ $?
|
||||
|
||||
|
||||
# Examples
|
||||
ex = xor_example.exe crc_simple_test.exe
|
||||
ex = xor_example.exe crc_simple_test.exe igzip_example.exe igzip_sync_flush_example.exe
|
||||
ex: lib $(ex)
|
||||
|
||||
$(ex): $(@B).obj
|
||||
@ -76,9 +161,19 @@ $(ex): $(@B).obj
|
||||
link /out:$@ $(LINKFLAGS) isa-l.lib $?
|
||||
|
||||
# Check tests
|
||||
checks = erasure_code_test.exe erasure_code_update_test.exe gf_inverse_test.exe gf_vect_mul_test.exe \
|
||||
pq_check_test.exe pq_gen_test.exe xor_check_test.exe xor_gen_test.exe \
|
||||
crc16_t10dif_test.exe crc32_ieee_test.exe crc32_iscsi_test.exe
|
||||
checks = \
|
||||
gf_vect_mul_test.exe \
|
||||
erasure_code_test.exe \
|
||||
gf_inverse_test.exe \
|
||||
erasure_code_update_test.exe \
|
||||
xor_gen_test.exe \
|
||||
pq_gen_test.exe \
|
||||
xor_check_test.exe \
|
||||
pq_check_test.exe \
|
||||
crc16_t10dif_test.exe \
|
||||
crc32_ieee_test.exe \
|
||||
crc32_iscsi_test.exe \
|
||||
igzip_check.exe
|
||||
|
||||
checks: lib $(checks)
|
||||
$(checks): $(@B).obj
|
||||
@ -86,13 +181,53 @@ check: $(checks)
|
||||
!$?
|
||||
|
||||
# Unit tests
|
||||
tests = erasure_code_base_test.exe erasure_code_sse_test.exe gf_2vect_dot_prod_sse_test.exe gf_3vect_dot_prod_sse_test.exe gf_4vect_dot_prod_sse_test.exe gf_5vect_dot_prod_sse_test.exe gf_6vect_dot_prod_sse_test.exe gf_vect_dot_prod_avx_test.exe gf_vect_dot_prod_base_test.exe gf_vect_dot_prod_sse_test.exe gf_vect_dot_prod_test.exe gf_vect_mad_test.exe gf_vect_mul_avx_test.exe gf_vect_mul_base_test.exe gf_vect_mul_sse_test.exe
|
||||
tests = \
|
||||
gf_vect_mul_sse_test.exe \
|
||||
gf_vect_mul_avx_test.exe \
|
||||
gf_vect_mul_base_test.exe \
|
||||
gf_vect_dot_prod_sse_test.exe \
|
||||
gf_vect_dot_prod_avx_test.exe \
|
||||
gf_2vect_dot_prod_sse_test.exe \
|
||||
gf_3vect_dot_prod_sse_test.exe \
|
||||
gf_4vect_dot_prod_sse_test.exe \
|
||||
gf_5vect_dot_prod_sse_test.exe \
|
||||
gf_6vect_dot_prod_sse_test.exe \
|
||||
gf_vect_dot_prod_base_test.exe \
|
||||
gf_vect_dot_prod_test.exe \
|
||||
gf_vect_mad_test.exe \
|
||||
erasure_code_base_test.exe \
|
||||
erasure_code_sse_test.exe \
|
||||
igzip_rand_test.exe
|
||||
|
||||
tests: lib $(tests)
|
||||
$(tests): $(@B).obj
|
||||
|
||||
# Performance tests
|
||||
perfs = erasure_code_base_perf.exe erasure_code_perf.exe erasure_code_sse_perf.exe erasure_code_update_perf.exe gf_2vect_dot_prod_sse_perf.exe gf_3vect_dot_prod_sse_perf.exe gf_4vect_dot_prod_sse_perf.exe gf_5vect_dot_prod_sse_perf.exe gf_6vect_dot_prod_sse_perf.exe gf_vect_dot_prod_1tbl.exe gf_vect_dot_prod_avx_perf.exe gf_vect_dot_prod_perf.exe gf_vect_dot_prod_sse_perf.exe gf_vect_mad_perf.exe gf_vect_mul_avx_perf.exe gf_vect_mul_perf.exe gf_vect_mul_sse_perf.exe pq_gen_perf.exe xor_gen_perf.exe crc16_t10dif_perf.exe crc32_ieee_perf.exe crc32_iscsi_perf.exe
|
||||
perfs = \
|
||||
gf_vect_mul_perf.exe \
|
||||
gf_vect_mul_sse_perf.exe \
|
||||
gf_vect_mul_avx_perf.exe \
|
||||
gf_vect_dot_prod_sse_perf.exe \
|
||||
gf_vect_dot_prod_avx_perf.exe \
|
||||
gf_2vect_dot_prod_sse_perf.exe \
|
||||
gf_3vect_dot_prod_sse_perf.exe \
|
||||
gf_4vect_dot_prod_sse_perf.exe \
|
||||
gf_5vect_dot_prod_sse_perf.exe \
|
||||
gf_6vect_dot_prod_sse_perf.exe \
|
||||
gf_vect_dot_prod_perf.exe \
|
||||
gf_vect_dot_prod_1tbl.exe \
|
||||
gf_vect_mad_perf.exe \
|
||||
erasure_code_perf.exe \
|
||||
erasure_code_base_perf.exe \
|
||||
erasure_code_sse_perf.exe \
|
||||
erasure_code_update_perf.exe \
|
||||
xor_gen_perf.exe \
|
||||
pq_gen_perf.exe \
|
||||
crc16_t10dif_perf.exe \
|
||||
crc32_ieee_perf.exe \
|
||||
crc32_iscsi_perf.exe \
|
||||
igzip_perf.exe \
|
||||
igzip_sync_flush_perf.exe
|
||||
|
||||
perfs: lib $(perfs)
|
||||
$(perfs): $(@B).obj
|
||||
@ -105,3 +240,10 @@ clean:
|
||||
-if exist isa-l.lib del isa-l.lib
|
||||
-if exist isa-l.dll del isa-l.dll
|
||||
|
||||
zlib.lib:
|
||||
igzip_rand_test.exe: igzip_inflate_ref.obj
|
||||
igzip_inflate_perf.exe: igzip_inflate_ref.obj
|
||||
igzip_inflate_perf.exe: zlib.lib
|
||||
igzip_inflate_test.exe: igzip_inflate_ref.obj
|
||||
igzip_inflate_test.exe: zlib.lib
|
||||
igzip_check.exe: igzip_inflate_ref.obj
|
||||
|
@ -27,7 +27,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
units = erasure_code raid crc
|
||||
units = erasure_code raid crc igzip
|
||||
|
||||
default: lib
|
||||
|
||||
|
95
igzip/Makefile.am
Normal file
95
igzip/Makefile.am
Normal file
@ -0,0 +1,95 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
lsrc += igzip/igzip.c igzip/hufftables_c.c \
|
||||
igzip/crc_utils_01.asm \
|
||||
igzip/crc_utils_04.asm \
|
||||
igzip/igzip_body_01.asm igzip/igzip_body_04.asm igzip/igzip_finish.asm \
|
||||
igzip/igzip_stateless_01.asm igzip/igzip_stateless_04.asm \
|
||||
igzip/crc_data.asm \
|
||||
igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \
|
||||
igzip/igzip_multibinary.asm \
|
||||
igzip/igzip_stateless_base.c \
|
||||
igzip/igzip_base.c
|
||||
|
||||
extern_hdrs += include/igzip_lib.h
|
||||
|
||||
pkginclude_HEADERS += include/types.h
|
||||
|
||||
unit_tests += igzip/igzip_rand_test
|
||||
|
||||
check_tests += igzip/igzip_check
|
||||
|
||||
perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf
|
||||
|
||||
other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf
|
||||
|
||||
other_src += igzip/bitbuf2.asm igzip/data_struct2.asm \
|
||||
igzip/igzip_buffer_utils_01.asm \
|
||||
igzip/igzip_buffer_utils_04.asm \
|
||||
igzip/igzip_body.asm igzip/igzip_finish.asm \
|
||||
igzip/lz0a_const.asm igzip/options.asm igzip/stdmac.asm igzip/igzip_compare_types.asm \
|
||||
igzip/bitbuf2.h igzip/repeated_char_result.h \
|
||||
igzip/igzip_body.asm \
|
||||
igzip/igzip_stateless.asm \
|
||||
igzip/huffman.asm \
|
||||
include/reg_sizes.asm \
|
||||
include/multibinary.asm \
|
||||
include/test.h \
|
||||
igzip/huffman.h
|
||||
|
||||
|
||||
examples += igzip/igzip_example igzip/igzip_sync_flush_example
|
||||
|
||||
igzip_rand_test: igzip_inflate_ref.o
|
||||
igzip_igzip_rand_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
|
||||
# Include tools to make custom Huffman tables based on sample data
|
||||
other_tests += igzip/generate_custom_hufftables
|
||||
other_tests += igzip/generate_constant_block_header
|
||||
other_src += igzip/huff_codes.h
|
||||
lsrc += igzip/huff_codes.c
|
||||
|
||||
# Include tools and tests using the reference inflate
|
||||
other_tests += igzip/igzip_inflate_perf
|
||||
other_tests += igzip/igzip_inflate_test
|
||||
other_src += igzip/igzip_inflate_ref.h
|
||||
other_src += igzip/igzip_inflate_ref.c
|
||||
other_src += igzip/crc_inflate.h
|
||||
|
||||
igzip_inflate_perf: igzip_inflate_ref.o
|
||||
igzip_inflate_perf: LDLIBS += -lz
|
||||
igzip_igzip_inflate_perf_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
igzip_igzip_inflate_perf_LDFLAGS = -lz
|
||||
igzip_inflate_test: igzip_inflate_ref.o
|
||||
igzip_inflate_test: LDLIBS += -lz
|
||||
igzip_igzip_inflate_test_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
||||
igzip_igzip_inflate_test_LDFLAGS = -lz
|
||||
igzip_check: igzip_inflate_ref.o
|
||||
igzip_igzip_check_LDADD = igzip/igzip_inflate_ref.lo libisal.la
|
205
igzip/bitbuf2.asm
Normal file
205
igzip/bitbuf2.asm
Normal file
@ -0,0 +1,205 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
|
||||
; Assumes m_out_buf is a register
|
||||
; Clobbers RCX
|
||||
; code is clobbered
|
||||
; write_bits_always m_bits, m_bit_count, code, count, m_out_buf, tmp1
|
||||
%macro write_bits_always 6
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%code %3
|
||||
%define %%count %4
|
||||
%define %%m_out_buf %5
|
||||
%define %%tmp1 %6
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx %%code, %%code, %%m_bit_count
|
||||
%else
|
||||
mov rcx, %%m_bit_count
|
||||
shl %%code, cl
|
||||
%endif
|
||||
or %%m_bits, %%code
|
||||
add %%m_bit_count, %%count
|
||||
|
||||
movnti [%%m_out_buf], %%m_bits
|
||||
mov rcx, %%m_bit_count
|
||||
shr rcx, 3 ; rcx = bytes
|
||||
add %%m_out_buf, rcx
|
||||
shl rcx, 3 ; rcx = bits
|
||||
sub %%m_bit_count, rcx
|
||||
%ifdef USE_HSWNI
|
||||
shrx %%m_bits, %%m_bits, rcx
|
||||
%else
|
||||
shr %%m_bits, cl
|
||||
%endif
|
||||
%endm
|
||||
|
||||
; Assumes m_out_buf is a register
|
||||
; Clobbers RCX
|
||||
; code is clobbered
|
||||
; write_bits_safe m_bits, m_bit_count, code, count, m_out_buf, tmp1
|
||||
%macro write_bits_safe 6
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%code %3
|
||||
%define %%count %4
|
||||
%define %%m_out_buf %5
|
||||
%define %%tmp1 %6
|
||||
|
||||
mov %%tmp1, %%code
|
||||
%ifdef USE_HSWNI
|
||||
shlx %%tmp1, %%tmp1, %%m_bit_count
|
||||
%else
|
||||
mov rcx, %%m_bit_count
|
||||
shl %%tmp1, cl
|
||||
%endif
|
||||
or %%m_bits, %%tmp1
|
||||
add %%m_bit_count, %%count
|
||||
cmp %%m_bit_count, 64
|
||||
jb %%not_full
|
||||
sub %%m_bit_count, 64
|
||||
movnti [%%m_out_buf], %%m_bits
|
||||
add %%m_out_buf, 8
|
||||
mov rcx, %%count
|
||||
sub rcx, %%m_bit_count
|
||||
mov %%m_bits, %%code
|
||||
%ifdef USE_HSWNI
|
||||
shrx %%m_bits, %%m_bits, rcx
|
||||
%else
|
||||
shr %%m_bits, cl
|
||||
%endif
|
||||
%%not_full:
|
||||
%endm
|
||||
|
||||
; Assumes m_out_buf is a register
|
||||
; Clobbers RCX
|
||||
;; check_space num_bits, m_bits, m_bit_count, m_out_buf, tmp1
|
||||
%macro check_space 5
|
||||
%define %%num_bits %1
|
||||
%define %%m_bits %2
|
||||
%define %%m_bit_count %3
|
||||
%define %%m_out_buf %4
|
||||
%define %%tmp1 %5
|
||||
|
||||
mov %%tmp1, 63
|
||||
sub %%tmp1, %%m_bit_count
|
||||
cmp %%tmp1, %%num_bits
|
||||
jae %%space_ok
|
||||
|
||||
; if (63 - m_bit_count < num_bits)
|
||||
movnti [%%m_out_buf], %%m_bits
|
||||
mov rcx, %%m_bit_count
|
||||
shr rcx, 3 ; rcx = bytes
|
||||
add %%m_out_buf, rcx
|
||||
shl rcx, 3 ; rcx = bits
|
||||
sub %%m_bit_count, rcx
|
||||
%ifdef USE_HSWNI
|
||||
shrx %%m_bits, %%m_bits, rcx
|
||||
%else
|
||||
shr %%m_bits, cl
|
||||
%endif
|
||||
%%space_ok:
|
||||
%endm
|
||||
|
||||
; rcx is clobbered
|
||||
; code is clobbered
|
||||
; write_bits_unsafe m_bits, m_bit_count, code, count
|
||||
%macro write_bits_unsafe 4
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%code %3
|
||||
%define %%count %4
|
||||
%ifdef USE_HSWNI
|
||||
shlx %%code, %%code, %%m_bit_count
|
||||
%else
|
||||
mov rcx, %%m_bit_count
|
||||
shl %%code, cl
|
||||
%endif
|
||||
or %%m_bits, %%code
|
||||
add %%m_bit_count, %%count
|
||||
%endm
|
||||
|
||||
; pad_to_byte m_bit_count, extra_bits
|
||||
%macro pad_to_byte 2
|
||||
%define %%m_bit_count %1
|
||||
%define %%extra_bits %2
|
||||
|
||||
mov %%extra_bits, %%m_bit_count
|
||||
neg %%extra_bits
|
||||
and %%extra_bits, 7
|
||||
add %%m_bit_count, %%extra_bits
|
||||
%endm
|
||||
|
||||
; Assumes m_out_buf is a memory reference
|
||||
; flush m_bits, m_bit_count, m_out_buf, tmp1
|
||||
%macro flush 4
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%m_out_buf %3
|
||||
%define %%tmp1 %4
|
||||
|
||||
test %%m_bit_count, %%m_bit_count
|
||||
jz %%bit_count_is_zero
|
||||
|
||||
mov %%tmp1, %%m_out_buf
|
||||
movnti [%%tmp1], %%m_bits
|
||||
|
||||
add %%m_bit_count, 7
|
||||
shr %%m_bit_count, 3 ; bytes
|
||||
add %%tmp1, %%m_bit_count
|
||||
mov %%m_out_buf, %%tmp1
|
||||
|
||||
%%bit_count_is_zero:
|
||||
xor %%m_bits, %%m_bits
|
||||
xor %%m_bit_count, %%m_bit_count
|
||||
%endm
|
||||
|
||||
%macro write_bits 6
|
||||
%define %%m_bits %1
|
||||
%define %%m_bit_count %2
|
||||
%define %%code %3
|
||||
%define %%count %4
|
||||
%define %%m_out_buf %5
|
||||
%define %%tmp1 %6
|
||||
|
||||
%ifdef USE_BITBUF8
|
||||
write_bits_safe %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1
|
||||
%elifdef USE_BITBUFB
|
||||
write_bits_always %%m_bits, %%m_bit_count, %%code, %%count, %%m_out_buf, %%tmp1
|
||||
%else
|
||||
; state->bitbuf.check_space(code_len2);
|
||||
check_space %%count, %%m_bits, %%m_bit_count, %%m_out_buf, %%tmp1
|
||||
; state->bitbuf.write_bits(code2, code_len2);
|
||||
write_bits_unsafe %%m_bits, %%m_bit_count, %%code, %%count
|
||||
; code2 is clobbered, rcx is clobbered
|
||||
%endif
|
||||
%endm
|
161
igzip/bitbuf2.h
Normal file
161
igzip/bitbuf2.h
Normal file
@ -0,0 +1,161 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#ifndef BITBUF2_H
|
||||
#define BITBUF2_H
|
||||
|
||||
#include "igzip_lib.h"
|
||||
|
||||
#if defined (__unix__) || (__APPLE__)
|
||||
#define _mm_stream_si64x(dst, src) *((uint64_t*)dst) = src
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN64
|
||||
#pragma warning(disable: 4996)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
|
||||
/* MAX_BITBUF_BIT WRITE is the maximum number of bits than can be safely written
|
||||
* by consecutive calls of write_bits. Note this assumes the bitbuf is in a
|
||||
* state that is possible at the exit of write_bits */
|
||||
#ifdef USE_BITBUF8 /*Write bits safe */
|
||||
# define MAX_BITBUF_BIT_WRITE 63
|
||||
#elif defined(USE_BITBUFB) /* Write bits always */
|
||||
# define MAX_BITBUF_BIT_WRITE 56
|
||||
#else /* USE_BITBUF_ELSE */
|
||||
# define MAX_BITBUF_BIT_WRITE 56
|
||||
#endif
|
||||
|
||||
|
||||
static
|
||||
inline void construct(struct BitBuf2 *me)
|
||||
{
|
||||
me->m_bits = 0;
|
||||
me->m_bit_count = 0;
|
||||
me->m_out_buf = me->m_out_start = me->m_out_end = NULL;
|
||||
}
|
||||
|
||||
static inline void init(struct BitBuf2 *me)
|
||||
{
|
||||
me->m_bits = 0;
|
||||
me->m_bit_count = 0;
|
||||
}
|
||||
|
||||
static inline void set_buf(struct BitBuf2 *me, unsigned char *buf, unsigned int len)
|
||||
{
|
||||
unsigned int slop = 8;
|
||||
me->m_out_buf = me->m_out_start = buf;
|
||||
me->m_out_end = buf + len - slop;
|
||||
}
|
||||
|
||||
static inline int is_full(struct BitBuf2 *me)
|
||||
{
|
||||
return (me->m_out_buf > me->m_out_end);
|
||||
}
|
||||
|
||||
static inline uint8_t * buffer_ptr(struct BitBuf2 *me)
|
||||
{
|
||||
return me->m_out_buf;
|
||||
}
|
||||
|
||||
static inline uint32_t buffer_used(struct BitBuf2 *me)
|
||||
{
|
||||
return (uint32_t)(me->m_out_buf - me->m_out_start);
|
||||
}
|
||||
|
||||
static inline void check_space(struct BitBuf2 *me, uint32_t num_bits)
|
||||
{
|
||||
/* Checks if bitbuf has num_bits extra space and flushes the bytes in
|
||||
* the bitbuf if it doesn't. */
|
||||
uint32_t bytes;
|
||||
if (63 - me->m_bit_count < num_bits) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bytes = me->m_bit_count / 8;
|
||||
me->m_out_buf += bytes;
|
||||
bytes *= 8;
|
||||
me->m_bit_count -= bytes;
|
||||
me->m_bits >>= bytes;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count)
|
||||
{
|
||||
me->m_bits |= code << me->m_bit_count;
|
||||
me->m_bit_count += count;
|
||||
}
|
||||
|
||||
static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count)
|
||||
{
|
||||
#ifdef USE_BITBUF8 /*Write bits safe */
|
||||
me->m_bits |= code << me->m_bit_count;
|
||||
me->m_bit_count += count;
|
||||
if (me->m_bit_count >= 64) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
me->m_out_buf += 8;
|
||||
me->m_bit_count -= 64;
|
||||
me->m_bits = code >> (count - me->m_bit_count);
|
||||
}
|
||||
#elif defined(USE_BITBUFB) /* Write bits always */
|
||||
/* Assumes there is space to fit code into m_bits. */
|
||||
uint32_t bits;
|
||||
me->m_bits |= code << me->m_bit_count;
|
||||
me->m_bit_count += count;
|
||||
if (me->m_bit_count >= 8) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bits = me->m_bit_count & ~7;
|
||||
me->m_bit_count -= bits;
|
||||
me->m_out_buf += bits/8;
|
||||
me->m_bits >>= bits;
|
||||
}
|
||||
#else /* USE_BITBUF_ELSE */
|
||||
check_space(me, count);
|
||||
write_bits_unsafe(me, code, count);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Can write up to 8 bytes to output buffer */
|
||||
static inline void flush(struct BitBuf2 *me)
|
||||
{
|
||||
uint32_t bytes;
|
||||
if (me->m_bit_count) {
|
||||
_mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits);
|
||||
bytes = (me->m_bit_count + 7) / 8;
|
||||
me->m_out_buf += bytes;
|
||||
}
|
||||
me->m_bits = 0;
|
||||
me->m_bit_count = 0;
|
||||
}
|
||||
|
||||
#endif //BITBUF2_H
|
617
igzip/crc32_gzip.asm
Normal file
617
igzip/crc32_gzip.asm
Normal file
@ -0,0 +1,617 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Function API:
|
||||
; UINT32 crc32_gzip(
|
||||
; UINT32 init_crc, //initial CRC value, 32 bits
|
||||
; const unsigned char *buf, //buffer pointer to calculate CRC on
|
||||
; UINT64 len //buffer length in bytes (64-bit data)
|
||||
; );
|
||||
;
|
||||
; Authors:
|
||||
; Erdinc Ozturk
|
||||
; Vinodh Gopal
|
||||
; James Guilford
|
||||
;
|
||||
; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
|
||||
; URL: http://download.intel.com/design/intarch/papers/323102.pdf
|
||||
;
|
||||
;
|
||||
; sample yasm command line:
|
||||
; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip
|
||||
;
|
||||
; As explained here:
|
||||
; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html
|
||||
; CRC-32 checksum is described in RFC 1952
|
||||
; Implementing RFC 1952 CRC:
|
||||
; http://www.ietf.org/rfc/rfc1952.txt
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
[bits 64]
|
||||
default rel
|
||||
|
||||
section .text
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%xdefine arg1 rcx
|
||||
%xdefine arg2 rdx
|
||||
%xdefine arg3 r8
|
||||
|
||||
%xdefine arg1_low32 ecx
|
||||
%else
|
||||
%xdefine arg1 rdi
|
||||
%xdefine arg2 rsi
|
||||
%xdefine arg3 rdx
|
||||
|
||||
%xdefine arg1_low32 edi
|
||||
%endif
|
||||
|
||||
%define TMP 16*0
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define XMM_SAVE 16*2
|
||||
%define VARIABLE_OFFSET 16*10+8
|
||||
%else
|
||||
%define VARIABLE_OFFSET 16*2+8
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global crc32_gzip
|
||||
crc32_gzip:
|
||||
|
||||
; unsigned long c = crc ^ 0xffffffffL;
|
||||
not arg1_low32 ;
|
||||
|
||||
|
||||
sub rsp, VARIABLE_OFFSET
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
; push the xmm registers into the stack to maintain
|
||||
movdqa [rsp + XMM_SAVE + 16*0], xmm6
|
||||
movdqa [rsp + XMM_SAVE + 16*1], xmm7
|
||||
movdqa [rsp + XMM_SAVE + 16*2], xmm8
|
||||
movdqa [rsp + XMM_SAVE + 16*3], xmm9
|
||||
movdqa [rsp + XMM_SAVE + 16*4], xmm10
|
||||
movdqa [rsp + XMM_SAVE + 16*5], xmm11
|
||||
movdqa [rsp + XMM_SAVE + 16*6], xmm12
|
||||
movdqa [rsp + XMM_SAVE + 16*7], xmm13
|
||||
%endif
|
||||
|
||||
; check if smaller than 256B
|
||||
cmp arg3, 256
|
||||
|
||||
; for sizes less than 256, we can't fold 128B at a time...
|
||||
jl _less_than_256
|
||||
|
||||
|
||||
; load the initial crc value
|
||||
movd xmm10, arg1_low32 ; initial crc
|
||||
|
||||
; receive the initial 64B data, xor the initial crc value
|
||||
movdqu xmm0, [arg2+16*0]
|
||||
movdqu xmm1, [arg2+16*1]
|
||||
movdqu xmm2, [arg2+16*2]
|
||||
movdqu xmm3, [arg2+16*3]
|
||||
movdqu xmm4, [arg2+16*4]
|
||||
movdqu xmm5, [arg2+16*5]
|
||||
movdqu xmm6, [arg2+16*6]
|
||||
movdqu xmm7, [arg2+16*7]
|
||||
|
||||
; XOR the initial_crc value
|
||||
pxor xmm0, xmm10
|
||||
movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
|
||||
;imm value of pclmulqdq instruction will determine which constant to use
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; we subtract 256 instead of 128 to save one instruction from the loop
|
||||
sub arg3, 256
|
||||
|
||||
; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
|
||||
; loop will fold 128B at a time until we have 128+y Bytes of buffer
|
||||
|
||||
|
||||
; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
|
||||
_fold_128_B_loop:
|
||||
|
||||
; update the buffer pointer
|
||||
add arg2, 128
|
||||
|
||||
movdqu xmm9, [arg2+16*0]
|
||||
movdqu xmm12, [arg2+16*1]
|
||||
movdqa xmm8, xmm0
|
||||
movdqa xmm13, xmm1
|
||||
pclmulqdq xmm0, xmm10, 0x10
|
||||
pclmulqdq xmm8, xmm10 , 0x1
|
||||
pclmulqdq xmm1, xmm10, 0x10
|
||||
pclmulqdq xmm13, xmm10 , 0x1
|
||||
pxor xmm0, xmm9
|
||||
xorps xmm0, xmm8
|
||||
pxor xmm1, xmm12
|
||||
xorps xmm1, xmm13
|
||||
|
||||
movdqu xmm9, [arg2+16*2]
|
||||
movdqu xmm12, [arg2+16*3]
|
||||
movdqa xmm8, xmm2
|
||||
movdqa xmm13, xmm3
|
||||
pclmulqdq xmm2, xmm10, 0x10
|
||||
pclmulqdq xmm8, xmm10 , 0x1
|
||||
pclmulqdq xmm3, xmm10, 0x10
|
||||
pclmulqdq xmm13, xmm10 , 0x1
|
||||
pxor xmm2, xmm9
|
||||
xorps xmm2, xmm8
|
||||
pxor xmm3, xmm12
|
||||
xorps xmm3, xmm13
|
||||
|
||||
movdqu xmm9, [arg2+16*4]
|
||||
movdqu xmm12, [arg2+16*5]
|
||||
movdqa xmm8, xmm4
|
||||
movdqa xmm13, xmm5
|
||||
pclmulqdq xmm4, xmm10, 0x10
|
||||
pclmulqdq xmm8, xmm10 , 0x1
|
||||
pclmulqdq xmm5, xmm10, 0x10
|
||||
pclmulqdq xmm13, xmm10 , 0x1
|
||||
pxor xmm4, xmm9
|
||||
xorps xmm4, xmm8
|
||||
pxor xmm5, xmm12
|
||||
xorps xmm5, xmm13
|
||||
|
||||
movdqu xmm9, [arg2+16*6]
|
||||
movdqu xmm12, [arg2+16*7]
|
||||
movdqa xmm8, xmm6
|
||||
movdqa xmm13, xmm7
|
||||
pclmulqdq xmm6, xmm10, 0x10
|
||||
pclmulqdq xmm8, xmm10 , 0x1
|
||||
pclmulqdq xmm7, xmm10, 0x10
|
||||
pclmulqdq xmm13, xmm10 , 0x1
|
||||
pxor xmm6, xmm9
|
||||
xorps xmm6, xmm8
|
||||
pxor xmm7, xmm12
|
||||
xorps xmm7, xmm13
|
||||
|
||||
sub arg3, 128
|
||||
|
||||
; check if there is another 128B in the buffer to be able to fold
|
||||
jge _fold_128_B_loop
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
add arg2, 128
|
||||
; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
|
||||
; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
|
||||
|
||||
; fold the 8 xmm registers to 1 xmm register with different constants
|
||||
|
||||
movdqa xmm10, [rk9]
|
||||
movdqa xmm8, xmm0
|
||||
pclmulqdq xmm0, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
xorps xmm7, xmm0
|
||||
|
||||
movdqa xmm10, [rk11]
|
||||
movdqa xmm8, xmm1
|
||||
pclmulqdq xmm1, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
xorps xmm7, xmm1
|
||||
|
||||
movdqa xmm10, [rk13]
|
||||
movdqa xmm8, xmm2
|
||||
pclmulqdq xmm2, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
pxor xmm7, xmm2
|
||||
|
||||
movdqa xmm10, [rk15]
|
||||
movdqa xmm8, xmm3
|
||||
pclmulqdq xmm3, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
xorps xmm7, xmm3
|
||||
|
||||
movdqa xmm10, [rk17]
|
||||
movdqa xmm8, xmm4
|
||||
pclmulqdq xmm4, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
pxor xmm7, xmm4
|
||||
|
||||
movdqa xmm10, [rk19]
|
||||
movdqa xmm8, xmm5
|
||||
pclmulqdq xmm5, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
xorps xmm7, xmm5
|
||||
|
||||
movdqa xmm10, [rk1]
|
||||
movdqa xmm8, xmm6
|
||||
pclmulqdq xmm6, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
pxor xmm7, xmm6
|
||||
|
||||
|
||||
; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
|
||||
; instead of a cmp instruction, we use the negative flag with the jl instruction
|
||||
add arg3, 128-16
|
||||
jl _final_reduction_for_128
|
||||
|
||||
; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
|
||||
; we can fold 16 bytes at a time if y>=16
|
||||
; continue folding 16B at a time
|
||||
|
||||
_16B_reduction_loop:
|
||||
movdqa xmm8, xmm7
|
||||
pclmulqdq xmm7, xmm10, 0x1
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
movdqu xmm0, [arg2]
|
||||
pxor xmm7, xmm0
|
||||
add arg2, 16
|
||||
sub arg3, 16
|
||||
; instead of a cmp instruction, we utilize the flags with the jge instruction
|
||||
; equivalent of: cmp arg3, 16-16
|
||||
; check if there is any more 16B in the buffer to be able to fold
|
||||
jge _16B_reduction_loop
|
||||
|
||||
;now we have 16+z bytes left to reduce, where 0<= z < 16.
|
||||
;first, we reduce the data in the xmm7 register
|
||||
|
||||
|
||||
_final_reduction_for_128:
|
||||
add arg3, 16
|
||||
je _128_done
|
||||
|
||||
; here we are getting data that is less than 16 bytes.
|
||||
; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
|
||||
; after that the registers need to be adjusted.
|
||||
_get_last_two_xmms:
|
||||
|
||||
|
||||
movdqa xmm2, xmm7
|
||||
movdqu xmm1, [arg2 - 16 + arg3]
|
||||
|
||||
; get rid of the extra data that was loaded before
|
||||
; load the shift constant
|
||||
lea rax, [pshufb_shf_table]
|
||||
add rax, arg3
|
||||
movdqu xmm0, [rax]
|
||||
|
||||
|
||||
pshufb xmm7, xmm0
|
||||
pxor xmm0, [mask3]
|
||||
pshufb xmm2, xmm0
|
||||
|
||||
pblendvb xmm2, xmm1 ;xmm0 is implicit
|
||||
;;;;;;;;;;
|
||||
movdqa xmm8, xmm7
|
||||
pclmulqdq xmm7, xmm10, 0x1
|
||||
|
||||
pclmulqdq xmm8, xmm10, 0x10
|
||||
pxor xmm7, xmm8
|
||||
pxor xmm7, xmm2
|
||||
|
||||
_128_done:
|
||||
; compute crc of a 128-bit value
|
||||
movdqa xmm10, [rk5]
|
||||
movdqa xmm0, xmm7
|
||||
|
||||
;64b fold
|
||||
pclmulqdq xmm7, xmm10, 0
|
||||
psrldq xmm0, 8
|
||||
pxor xmm7, xmm0
|
||||
|
||||
;32b fold
|
||||
movdqa xmm0, xmm7
|
||||
pslldq xmm7, 4
|
||||
pclmulqdq xmm7, xmm10, 0x10
|
||||
|
||||
pxor xmm7, xmm0
|
||||
|
||||
|
||||
;barrett reduction
|
||||
_barrett:
|
||||
pand xmm7, [mask2]
|
||||
movdqa xmm1, xmm7
|
||||
movdqa xmm2, xmm7
|
||||
movdqa xmm10, [rk7]
|
||||
|
||||
pclmulqdq xmm7, xmm10, 0
|
||||
pxor xmm7, xmm2
|
||||
pand xmm7, [mask]
|
||||
movdqa xmm2, xmm7
|
||||
pclmulqdq xmm7, xmm10, 0x10
|
||||
pxor xmm7, xmm2
|
||||
pxor xmm7, xmm1
|
||||
pextrd eax, xmm7, 2
|
||||
|
||||
_cleanup:
|
||||
; return c ^ 0xffffffffL;
|
||||
not eax
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
movdqa xmm6, [rsp + XMM_SAVE + 16*0]
|
||||
movdqa xmm7, [rsp + XMM_SAVE + 16*1]
|
||||
movdqa xmm8, [rsp + XMM_SAVE + 16*2]
|
||||
movdqa xmm9, [rsp + XMM_SAVE + 16*3]
|
||||
movdqa xmm10, [rsp + XMM_SAVE + 16*4]
|
||||
movdqa xmm11, [rsp + XMM_SAVE + 16*5]
|
||||
movdqa xmm12, [rsp + XMM_SAVE + 16*6]
|
||||
movdqa xmm13, [rsp + XMM_SAVE + 16*7]
|
||||
%endif
|
||||
add rsp, VARIABLE_OFFSET
|
||||
ret
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
align 16
|
||||
_less_than_256:
|
||||
|
||||
; check if there is enough buffer to be able to fold 16B at a time
|
||||
cmp arg3, 32
|
||||
jl _less_than_32
|
||||
|
||||
; if there is, load the constants
|
||||
movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
|
||||
|
||||
movd xmm0, arg1_low32 ; get the initial crc value
|
||||
movdqu xmm7, [arg2] ; load the plaintext
|
||||
pxor xmm7, xmm0
|
||||
|
||||
; update the buffer pointer
|
||||
add arg2, 16
|
||||
|
||||
; update the counter. subtract 32 instead of 16 to save one instruction from the loop
|
||||
sub arg3, 32
|
||||
|
||||
jmp _16B_reduction_loop
|
||||
|
||||
|
||||
align 16
|
||||
_less_than_32:
|
||||
; mov initial crc to the return value. this is necessary for zero-length buffers.
|
||||
mov eax, arg1_low32
|
||||
test arg3, arg3
|
||||
je _cleanup
|
||||
|
||||
movd xmm0, arg1_low32 ; get the initial crc value
|
||||
|
||||
cmp arg3, 16
|
||||
je _exact_16_left
|
||||
jl _less_than_16_left
|
||||
|
||||
movdqu xmm7, [arg2] ; load the plaintext
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
add arg2, 16
|
||||
sub arg3, 16
|
||||
movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
|
||||
jmp _get_last_two_xmms
|
||||
|
||||
|
||||
align 16
|
||||
_less_than_16_left:
|
||||
; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
|
||||
|
||||
pxor xmm1, xmm1
|
||||
mov r11, rsp
|
||||
movdqa [r11], xmm1
|
||||
|
||||
cmp arg3, 4
|
||||
jl _only_less_than_4
|
||||
|
||||
; backup the counter value
|
||||
mov r9, arg3
|
||||
cmp arg3, 8
|
||||
jl _less_than_8_left
|
||||
|
||||
; load 8 Bytes
|
||||
mov rax, [arg2]
|
||||
mov [r11], rax
|
||||
add r11, 8
|
||||
sub arg3, 8
|
||||
add arg2, 8
|
||||
_less_than_8_left:
|
||||
|
||||
cmp arg3, 4
|
||||
jl _less_than_4_left
|
||||
|
||||
; load 4 Bytes
|
||||
mov eax, [arg2]
|
||||
mov [r11], eax
|
||||
add r11, 4
|
||||
sub arg3, 4
|
||||
add arg2, 4
|
||||
_less_than_4_left:
|
||||
|
||||
cmp arg3, 2
|
||||
jl _less_than_2_left
|
||||
|
||||
; load 2 Bytes
|
||||
mov ax, [arg2]
|
||||
mov [r11], ax
|
||||
add r11, 2
|
||||
sub arg3, 2
|
||||
add arg2, 2
|
||||
_less_than_2_left:
|
||||
cmp arg3, 1
|
||||
jl _zero_left
|
||||
|
||||
; load 1 Byte
|
||||
mov al, [arg2]
|
||||
mov [r11], al
|
||||
|
||||
_zero_left:
|
||||
movdqa xmm7, [rsp]
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
|
||||
lea rax,[pshufb_shf_table]
|
||||
movdqu xmm0, [rax + r9]
|
||||
pshufb xmm7,xmm0
|
||||
|
||||
|
||||
|
||||
jmp _128_done
|
||||
|
||||
align 16
|
||||
_exact_16_left:
|
||||
movdqu xmm7, [arg2]
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
|
||||
jmp _128_done
|
||||
|
||||
_only_less_than_4:
|
||||
cmp arg3, 3
|
||||
jl _only_less_than_3
|
||||
|
||||
; load 3 Bytes
|
||||
mov al, [arg2]
|
||||
mov [r11], al
|
||||
|
||||
mov al, [arg2+1]
|
||||
mov [r11+1], al
|
||||
|
||||
mov al, [arg2+2]
|
||||
mov [r11+2], al
|
||||
|
||||
movdqa xmm7, [rsp]
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
|
||||
pslldq xmm7, 5
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_3:
|
||||
cmp arg3, 2
|
||||
jl _only_less_than_2
|
||||
|
||||
; load 2 Bytes
|
||||
mov al, [arg2]
|
||||
mov [r11], al
|
||||
|
||||
mov al, [arg2+1]
|
||||
mov [r11+1], al
|
||||
|
||||
movdqa xmm7, [rsp]
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
|
||||
pslldq xmm7, 6
|
||||
|
||||
jmp _barrett
|
||||
_only_less_than_2:
|
||||
|
||||
; load 1 Byte
|
||||
mov al, [arg2]
|
||||
mov [r11], al
|
||||
|
||||
movdqa xmm7, [rsp]
|
||||
pxor xmm7, xmm0 ; xor the initial crc value
|
||||
|
||||
pslldq xmm7, 7
|
||||
|
||||
jmp _barrett
|
||||
|
||||
section .data
|
||||
|
||||
; precomputed constants
|
||||
align 16
|
||||
rk1 :
|
||||
DQ 0x00000000ccaa009e
|
||||
rk2 :
|
||||
DQ 0x00000001751997d0
|
||||
rk3 :
|
||||
DQ 0x000000014a7fe880
|
||||
rk4 :
|
||||
DQ 0x00000001e88ef372
|
||||
rk5 :
|
||||
DQ 0x00000000ccaa009e
|
||||
rk6 :
|
||||
DQ 0x0000000163cd6124
|
||||
rk7 :
|
||||
DQ 0x00000001f7011640
|
||||
rk8 :
|
||||
DQ 0x00000001db710640
|
||||
rk9 :
|
||||
DQ 0x00000001d7cfc6ac
|
||||
rk10 :
|
||||
DQ 0x00000001ea89367e
|
||||
rk11 :
|
||||
DQ 0x000000018cb44e58
|
||||
rk12 :
|
||||
DQ 0x00000000df068dc2
|
||||
rk13 :
|
||||
DQ 0x00000000ae0b5394
|
||||
rk14 :
|
||||
DQ 0x00000001c7569e54
|
||||
rk15 :
|
||||
DQ 0x00000001c6e41596
|
||||
rk16 :
|
||||
DQ 0x0000000154442bd4
|
||||
rk17 :
|
||||
DQ 0x0000000174359406
|
||||
rk18 :
|
||||
DQ 0x000000003db1ecdc
|
||||
rk19 :
|
||||
DQ 0x000000015a546366
|
||||
rk20 :
|
||||
DQ 0x00000000f1da05aa
|
||||
|
||||
|
||||
pshufb_shf_table:
|
||||
; use these values for shift constants for the pshufb instruction
|
||||
; different alignments result in values as shown:
|
||||
; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
|
||||
; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
|
||||
; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
|
||||
; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
|
||||
; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
|
||||
; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
|
||||
; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
|
||||
; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
|
||||
; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
|
||||
; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
|
||||
; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
|
||||
; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
|
||||
; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
|
||||
; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
|
||||
; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
|
||||
dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
|
||||
dq 0x0706050403020100, 0x000e0d0c0b0a0908
|
||||
|
||||
|
||||
mask:
|
||||
dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
|
||||
mask2:
|
||||
dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
|
||||
mask3:
|
||||
dq 0x8080808080808080, 0x8080808080808080
|
120
igzip/crc_data.asm
Normal file
120
igzip/crc_data.asm
Normal file
@ -0,0 +1,120 @@
|
||||
%ifndef CRC_DATA
|
||||
|
||||
%define CRC_DATA
|
||||
; precomputed constants
|
||||
section .data
|
||||
|
||||
align 32
|
||||
|
||||
global pshufb_shf_table:data internal
|
||||
pshufb_shf_table:
|
||||
dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
|
||||
dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
|
||||
dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
|
||||
dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
|
||||
dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
|
||||
dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
|
||||
dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
|
||||
dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
|
||||
dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
|
||||
dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
|
||||
dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
|
||||
dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
|
||||
dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
|
||||
dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
|
||||
dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
|
||||
|
||||
;; ; MAGIC value, which when folded 4 times gives FFFFFF00000...0000
|
||||
;; global crc_init_4
|
||||
;; crc_init_4:
|
||||
;; dq 0x9db42487
|
||||
;; dq 0x0
|
||||
;; dq 0x0
|
||||
;; dq 0x0
|
||||
|
||||
; constant used to shift/fold one XMM reg down by 4 XMM widths
|
||||
global fold_4:data internal
|
||||
fold_4:
|
||||
dq 0x00000001c6e41596
|
||||
dq 0x0000000154442bd4
|
||||
|
||||
|
||||
;value, which when xored with pshufb_shf_table entry gives shr value
|
||||
global mask3:data internal
|
||||
mask3: dq 0x8080808080808080, 0x8080808080808080
|
||||
|
||||
%ifndef CRC_TABLE
|
||||
%define CRC_TABLE
|
||||
; Place marker in library to avoid linker warning
|
||||
align 4
|
||||
global CrcTable:data internal
|
||||
CrcTable:
|
||||
dd 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba
|
||||
dd 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
|
||||
dd 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988
|
||||
dd 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
|
||||
dd 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de
|
||||
dd 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7
|
||||
dd 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec
|
||||
dd 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5
|
||||
dd 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172
|
||||
dd 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b
|
||||
dd 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940
|
||||
dd 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59
|
||||
dd 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116
|
||||
dd 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f
|
||||
dd 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924
|
||||
dd 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d
|
||||
dd 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a
|
||||
dd 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433
|
||||
dd 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818
|
||||
dd 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01
|
||||
dd 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e
|
||||
dd 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457
|
||||
dd 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c
|
||||
dd 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65
|
||||
dd 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2
|
||||
dd 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb
|
||||
dd 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0
|
||||
dd 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9
|
||||
dd 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086
|
||||
dd 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f
|
||||
dd 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4
|
||||
dd 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad
|
||||
dd 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a
|
||||
dd 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683
|
||||
dd 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8
|
||||
dd 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1
|
||||
dd 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe
|
||||
dd 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7
|
||||
dd 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc
|
||||
dd 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5
|
||||
dd 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252
|
||||
dd 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b
|
||||
dd 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60
|
||||
dd 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79
|
||||
dd 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236
|
||||
dd 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f
|
||||
dd 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04
|
||||
dd 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d
|
||||
dd 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a
|
||||
dd 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713
|
||||
dd 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38
|
||||
dd 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21
|
||||
dd 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e
|
||||
dd 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777
|
||||
dd 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c
|
||||
dd 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45
|
||||
dd 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2
|
||||
dd 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db
|
||||
dd 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0
|
||||
dd 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9
|
||||
dd 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6
|
||||
dd 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf
|
||||
dd 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94
|
||||
dd 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
|
||||
End_CrcTable:
|
||||
|
||||
%endif ;; CRC_TABLE
|
||||
|
||||
%endif ;; CRC_DATA
|
81
igzip/crc_inflate.h
Normal file
81
igzip/crc_inflate.h
Normal file
@ -0,0 +1,81 @@
|
||||
#ifndef INFLATE_CRC_TABLE
|
||||
#define INFLATE_CRC_TABLE
|
||||
|
||||
uint32_t inflate_crc_table[256] = {
|
||||
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
|
||||
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
|
||||
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
|
||||
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
|
||||
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
|
||||
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
|
||||
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
|
||||
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
|
||||
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
|
||||
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
|
||||
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
|
||||
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
|
||||
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
|
||||
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
|
||||
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
|
||||
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
|
||||
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
|
||||
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
|
||||
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
|
||||
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
|
||||
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
|
||||
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
|
||||
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
|
||||
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
|
||||
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
|
||||
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
|
||||
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
|
||||
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
|
||||
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
|
||||
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
|
||||
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
|
||||
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
|
||||
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
|
||||
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
|
||||
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
|
||||
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
|
||||
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
|
||||
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
|
||||
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
|
||||
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
|
||||
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
|
||||
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
|
||||
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
|
||||
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
|
||||
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
|
||||
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
|
||||
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
|
||||
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
|
||||
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
|
||||
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
|
||||
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
|
||||
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
|
||||
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
|
||||
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
|
||||
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
|
||||
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
|
||||
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
|
||||
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
|
||||
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
|
||||
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
|
||||
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
|
||||
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
|
||||
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
|
||||
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d};
|
||||
|
||||
|
||||
uint32_t find_crc(uint8_t * start, uint32_t length)
|
||||
{
|
||||
uint32_t crc = ~0;
|
||||
uint8_t *end = start + length;
|
||||
|
||||
while (start < end)
|
||||
crc = (crc >> 8) ^ inflate_crc_table[(crc & 0x000000FF) ^ *start++];
|
||||
return ~crc;
|
||||
}
|
||||
|
||||
#endif
|
195
igzip/crc_utils_01.asm
Normal file
195
igzip/crc_utils_01.asm
Normal file
@ -0,0 +1,195 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
; Functional versions of CRC macros
|
||||
|
||||
%include "igzip_buffer_utils_01.asm"
|
||||
|
||||
extern fold_4
|
||||
|
||||
%define crc_0 xmm0 ; in/out: crc state
|
||||
%define crc_1 xmm1 ; in/out: crc state
|
||||
%define crc_2 xmm2 ; in/out: crc state
|
||||
%define crc_3 xmm3 ; in/out: crc state
|
||||
%define crc_fold xmm4 ; in: (loaded from fold_4)
|
||||
%define crc_tmp0 xmm5 ; tmp
|
||||
%define crc_tmp1 xmm6 ; tmp
|
||||
%define crc_tmp2 xmm7 ; tmp
|
||||
%define crc_tmp3 xmm8 ; tmp
|
||||
%define crc_tmp4 xmm9 ; tmp
|
||||
%define tmp4 rax
|
||||
|
||||
; copy x bytes (rounded up to 16 bytes) from src to dst with crc
|
||||
; src & dst are unaligned
|
||||
; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
|
||||
; arg 1: rcx: pointer to dst
|
||||
; arg 2: rdx: pointer to src
|
||||
; arg 3: r8: size (in bytes)
|
||||
; arg 4: r9: pointer to CRC
|
||||
;; %if 0
|
||||
global copy_in_crc_01
|
||||
copy_in_crc_01:
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov r9, rcx
|
||||
mov r8, rdx
|
||||
mov rdx, rsi
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
; Save xmm registers that need to be preserved.
|
||||
sub rsp, 8 + 4*16
|
||||
movdqa [rsp+0*16], xmm6
|
||||
movdqa [rsp+1*16], xmm7
|
||||
movdqa [rsp+2*16], xmm8
|
||||
movdqa [rsp+3*16], xmm9
|
||||
|
||||
movdqa crc_0, [r9 + 0*16]
|
||||
movdqa crc_1, [r9 + 1*16]
|
||||
movdqa crc_2, [r9 + 2*16]
|
||||
movdqa crc_3, [r9 + 3*16]
|
||||
|
||||
movdqa crc_fold, [fold_4 WRT_OPT]
|
||||
COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
|
||||
crc_fold, \
|
||||
crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
|
||||
|
||||
movdqa [r9 + 0*16], crc_0
|
||||
movdqa [r9 + 1*16], crc_1
|
||||
movdqa [r9 + 2*16], crc_2
|
||||
movdqa [r9 + 3*16], crc_3
|
||||
|
||||
movdqa xmm9, [rsp+3*16]
|
||||
movdqa xmm8, [rsp+2*16]
|
||||
movdqa xmm7, [rsp+1*16]
|
||||
movdqa xmm6, [rsp+0*16]
|
||||
add rsp, 8 + 4*16
|
||||
ret
|
||||
|
||||
; Convert 512-bit CRC data to real 32-bit value
|
||||
; uint32_t crc_512to32(uint32_t *crc)
|
||||
; arg 1: rcx: pointer to CRC
|
||||
; returns: eax: 32 bit crc
|
||||
global crc_512to32_01
|
||||
crc_512to32_01:
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
movdqa crc_0, [rcx + 0*16]
|
||||
movdqa crc_1, [rcx + 1*16]
|
||||
movdqa crc_2, [rcx + 2*16]
|
||||
movdqa crc_3, [rcx + 3*16]
|
||||
|
||||
movdqa crc_fold, [rk1 WRT_OPT] ;k1
|
||||
|
||||
; fold the 4 xmm registers to 1 xmm register with different constants
|
||||
movdqa crc_tmp0, crc_0
|
||||
pclmulqdq crc_0, crc_fold, 0x1
|
||||
pclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
pxor crc_1, crc_tmp0
|
||||
pxor crc_1, crc_0
|
||||
|
||||
movdqa crc_tmp0, crc_1
|
||||
pclmulqdq crc_1, crc_fold, 0x1
|
||||
pclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
pxor crc_2, crc_tmp0
|
||||
pxor crc_2, crc_1
|
||||
|
||||
movdqa crc_tmp0, crc_2
|
||||
pclmulqdq crc_2, crc_fold, 0x1
|
||||
pclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
pxor crc_3, crc_tmp0
|
||||
pxor crc_3, crc_2
|
||||
|
||||
|
||||
movdqa crc_fold, [rk5 WRT_OPT]
|
||||
movdqa crc_0, crc_3
|
||||
|
||||
pclmulqdq crc_3, crc_fold, 0
|
||||
|
||||
psrldq crc_0, 8
|
||||
|
||||
pxor crc_3, crc_0
|
||||
|
||||
movdqa crc_0, crc_3
|
||||
|
||||
|
||||
pslldq crc_3, 4
|
||||
|
||||
pclmulqdq crc_3, crc_fold, 0x10
|
||||
|
||||
|
||||
pxor crc_3, crc_0
|
||||
|
||||
pand crc_3, [mask2 WRT_OPT]
|
||||
|
||||
movdqa crc_1, crc_3
|
||||
|
||||
movdqa crc_2, crc_3
|
||||
|
||||
movdqa crc_fold, [rk7 WRT_OPT]
|
||||
|
||||
|
||||
pclmulqdq crc_3, crc_fold, 0
|
||||
pxor crc_3, crc_2
|
||||
|
||||
pand crc_3, [mask WRT_OPT]
|
||||
|
||||
movdqa crc_2, crc_3
|
||||
|
||||
pclmulqdq crc_3, crc_fold, 0x10
|
||||
|
||||
pxor crc_3, crc_2
|
||||
|
||||
pxor crc_3, crc_1
|
||||
|
||||
pextrd eax, crc_3, 2
|
||||
|
||||
not eax
|
||||
|
||||
ret
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
rk1: dq 0x00000000ccaa009e
|
||||
rk2: dq 0x00000001751997d0
|
||||
rk5: dq 0x00000000ccaa009e
|
||||
rk6: dq 0x0000000163cd6124
|
||||
rk7: dq 0x00000001f7011640
|
||||
rk8: dq 0x00000001db710640
|
||||
|
||||
mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
|
||||
mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
|
194
igzip/crc_utils_04.asm
Normal file
194
igzip/crc_utils_04.asm
Normal file
@ -0,0 +1,194 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
; Functional versions of CRC macros
|
||||
|
||||
%include "igzip_buffer_utils_04.asm"
|
||||
|
||||
extern fold_4
|
||||
|
||||
%define crc_0 xmm0 ; in/out: crc state
|
||||
%define crc_1 xmm1 ; in/out: crc state
|
||||
%define crc_2 xmm2 ; in/out: crc state
|
||||
%define crc_3 xmm3 ; in/out: crc state
|
||||
%define crc_fold xmm4 ; in: (loaded from fold_4)
|
||||
%define crc_tmp0 xmm5 ; tmp
|
||||
%define crc_tmp1 xmm6 ; tmp
|
||||
%define crc_tmp2 xmm7 ; tmp
|
||||
%define crc_tmp3 xmm8 ; tmp
|
||||
%define crc_tmp4 xmm9 ; tmp
|
||||
%define tmp4 rax
|
||||
|
||||
; copy x bytes (rounded up to 16 bytes) from src to dst with crc
|
||||
; src & dst are unaligned
|
||||
; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
|
||||
; arg 1: rcx: pointer to dst
|
||||
; arg 2: rdx: pointer to src
|
||||
; arg 3: r8: size (in bytes)
|
||||
; arg 4: r9: pointer to CRC
|
||||
;; %if 0
|
||||
global copy_in_crc_04
|
||||
copy_in_crc_04:
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov r9, rcx
|
||||
mov r8, rdx
|
||||
mov rdx, rsi
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
; Save xmm registers that need to be preserved.
|
||||
sub rsp, 8 + 4*16
|
||||
vmovdqa [rsp+0*16], xmm6
|
||||
vmovdqa [rsp+1*16], xmm7
|
||||
vmovdqa [rsp+2*16], xmm8
|
||||
vmovdqa [rsp+3*16], xmm9
|
||||
|
||||
vmovdqa crc_0, [r9 + 0*16]
|
||||
vmovdqa crc_1, [r9 + 1*16]
|
||||
vmovdqa crc_2, [r9 + 2*16]
|
||||
vmovdqa crc_3, [r9 + 3*16]
|
||||
|
||||
vmovdqa crc_fold, [fold_4 WRT_OPT]
|
||||
COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
|
||||
crc_fold, \
|
||||
crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
|
||||
|
||||
vmovdqa [r9 + 0*16], crc_0
|
||||
vmovdqa [r9 + 1*16], crc_1
|
||||
vmovdqa [r9 + 2*16], crc_2
|
||||
vmovdqa [r9 + 3*16], crc_3
|
||||
|
||||
vmovdqa xmm9, [rsp+3*16]
|
||||
vmovdqa xmm8, [rsp+2*16]
|
||||
vmovdqa xmm7, [rsp+1*16]
|
||||
vmovdqa xmm6, [rsp+0*16]
|
||||
add rsp, 8 + 4*16
|
||||
ret
|
||||
|
||||
; Convert 512-bit CRC data to real 32-bit value
|
||||
; uint32_t crc_512to32(uint32_t *crc)
|
||||
; arg 1: rcx: pointer to CRC
|
||||
; returns: eax: 32 bit crc
|
||||
global crc_512to32_04
|
||||
crc_512to32_04:
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
vmovdqa crc_0, [rcx + 0*16]
|
||||
vmovdqa crc_1, [rcx + 1*16]
|
||||
vmovdqa crc_2, [rcx + 2*16]
|
||||
vmovdqa crc_3, [rcx + 3*16]
|
||||
|
||||
vmovdqa crc_fold, [rk1 WRT_OPT] ;k1
|
||||
|
||||
; fold the 4 xmm registers to 1 xmm register with different constants
|
||||
vmovdqa crc_tmp0, crc_0
|
||||
vpclmulqdq crc_0, crc_fold, 0x1
|
||||
vpclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
vpxor crc_1, crc_tmp0
|
||||
vpxor crc_1, crc_0
|
||||
|
||||
vmovdqa crc_tmp0, crc_1
|
||||
vpclmulqdq crc_1, crc_fold, 0x1
|
||||
vpclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
vpxor crc_2, crc_tmp0
|
||||
vpxor crc_2, crc_1
|
||||
|
||||
vmovdqa crc_tmp0, crc_2
|
||||
vpclmulqdq crc_2, crc_fold, 0x1
|
||||
vpclmulqdq crc_tmp0, crc_fold, 0x10
|
||||
vpxor crc_3, crc_tmp0
|
||||
vpxor crc_3, crc_2
|
||||
|
||||
|
||||
vmovdqa crc_fold, [rk5 WRT_OPT]
|
||||
vmovdqa crc_0, crc_3
|
||||
|
||||
vpclmulqdq crc_3, crc_fold, 0
|
||||
|
||||
vpsrldq crc_0, 8
|
||||
|
||||
vpxor crc_3, crc_0
|
||||
|
||||
vmovdqa crc_0, crc_3
|
||||
|
||||
|
||||
vpslldq crc_3, 4
|
||||
|
||||
vpclmulqdq crc_3, crc_fold, 0x10
|
||||
|
||||
|
||||
vpxor crc_3, crc_0
|
||||
|
||||
vpand crc_3, [mask2 WRT_OPT]
|
||||
|
||||
vmovdqa crc_1, crc_3
|
||||
|
||||
vmovdqa crc_2, crc_3
|
||||
|
||||
vmovdqa crc_fold, [rk7 WRT_OPT]
|
||||
|
||||
vpclmulqdq crc_3, crc_fold, 0
|
||||
vpxor crc_3, crc_2
|
||||
|
||||
vpand crc_3, [mask WRT_OPT]
|
||||
|
||||
vmovdqa crc_2, crc_3
|
||||
|
||||
vpclmulqdq crc_3, crc_fold, 0x10
|
||||
|
||||
vpxor crc_3, crc_2
|
||||
|
||||
vpxor crc_3, crc_1
|
||||
|
||||
vpextrd eax, crc_3, 2
|
||||
|
||||
not eax
|
||||
|
||||
ret
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
rk1: dq 0x00000000ccaa009e
|
||||
rk2: dq 0x00000001751997d0
|
||||
rk5: dq 0x00000000ccaa009e
|
||||
rk6: dq 0x0000000163cd6124
|
||||
rk7: dq 0x00000001f7011640
|
||||
rk8: dq 0x00000001db710640
|
||||
|
||||
mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
|
||||
mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
|
165
igzip/data_struct2.asm
Normal file
165
igzip/data_struct2.asm
Normal file
@ -0,0 +1,165 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; START_FIELDS
|
||||
%macro START_FIELDS 0
|
||||
%assign _FIELD_OFFSET 0
|
||||
%assign _STRUCT_ALIGN 0
|
||||
%endm
|
||||
|
||||
;; FIELD name size align
|
||||
%macro FIELD 3
|
||||
%define %%name %1
|
||||
%define %%size %2
|
||||
%define %%align %3
|
||||
|
||||
%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
|
||||
%%name equ _FIELD_OFFSET
|
||||
%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
|
||||
%if (%%align > _STRUCT_ALIGN)
|
||||
%assign _STRUCT_ALIGN %%align
|
||||
%endif
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; BitBuf2
|
||||
|
||||
;; name size align
|
||||
FIELD _m_bits, 8, 8
|
||||
FIELD _m_bit_count, 4, 4
|
||||
FIELD _m_out_buf, 8, 8
|
||||
FIELD _m_out_end, 8, 8
|
||||
FIELD _m_out_start, 8, 8
|
||||
|
||||
%assign _BitBuf2_size _FIELD_OFFSET
|
||||
%assign _BitBuf2_align _STRUCT_ALIGN
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; isal_zstate
|
||||
|
||||
;; name size align
|
||||
FIELD _b_bytes_valid, 4, 4
|
||||
FIELD _b_bytes_processed, 4, 4
|
||||
FIELD _file_start, 8, 8
|
||||
FIELD _crc, 64, 16
|
||||
FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
|
||||
FIELD _state, 4, 4
|
||||
FIELD _count, 4, 4
|
||||
FIELD _tmp_out_buff, 16, 1
|
||||
FIELD _tmp_out_start, 4, 4
|
||||
FIELD _tmp_out_end, 4, 4
|
||||
FIELD _last_flush, 4, 4
|
||||
FIELD _has_gzip_hdr, 4, 4
|
||||
FIELD _has_eob, 4, 4
|
||||
FIELD _has_eob_hdr, 4, 4
|
||||
FIELD _left_over, 4, 4
|
||||
FIELD _buffer, BSIZE+16, 32
|
||||
FIELD _head, HASH_SIZE*2, 16
|
||||
|
||||
%assign _isal_zstate_size _FIELD_OFFSET
|
||||
%assign _isal_zstate_align _STRUCT_ALIGN
|
||||
|
||||
_bitbuf_m_bits equ _bitbuf+_m_bits
|
||||
_bitbuf_m_bit_count equ _bitbuf+_m_bit_count
|
||||
_bitbuf_m_out_buf equ _bitbuf+_m_out_buf
|
||||
_bitbuf_m_out_end equ _bitbuf+_m_out_end
|
||||
_bitbuf_m_out_start equ _bitbuf+_m_out_start
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
START_FIELDS ;; isal_zstream
|
||||
|
||||
;; name size align
|
||||
FIELD _next_in, 8, 8
|
||||
FIELD _avail_in, 4, 4
|
||||
FIELD _total_in, 4, 4
|
||||
FIELD _next_out, 8, 8
|
||||
FIELD _avail_out, 4, 4
|
||||
FIELD _total_out, 4, 4
|
||||
FIELD _hufftables, 8, 8
|
||||
FIELD _end_of_stream, 4, 4
|
||||
FIELD _flush, 4, 4
|
||||
FIELD _internal_state, _isal_zstate_size, _isal_zstate_align
|
||||
|
||||
%assign _isal_zstream_size _FIELD_OFFSET
|
||||
%assign _isal_zstream_align _STRUCT_ALIGN
|
||||
|
||||
_internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
|
||||
_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
|
||||
_internal_state_file_start equ _internal_state+_file_start
|
||||
_internal_state_crc equ _internal_state+_crc
|
||||
_internal_state_bitbuf equ _internal_state+_bitbuf
|
||||
_internal_state_state equ _internal_state+_state
|
||||
_internal_state_count equ _internal_state+_count
|
||||
_internal_state_tmp_out_buff equ _internal_state+_tmp_out_buff
|
||||
_internal_state_tmp_out_start equ _internal_state+_tmp_out_start
|
||||
_internal_state_tmp_out_end equ _internal_state+_tmp_out_end
|
||||
_internal_state_last_flush equ _internal_state+_last_flush
|
||||
_internal_state_has_gzip_hdr equ _internal_state+_has_gzip_hdr
|
||||
_internal_state_has_eob equ _internal_state+_has_eob
|
||||
_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr
|
||||
_internal_state_left_over equ _internal_state+_left_over
|
||||
_internal_state_buffer equ _internal_state+_buffer
|
||||
_internal_state_head equ _internal_state+_head
|
||||
_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits
|
||||
_internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count
|
||||
_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf
|
||||
_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end
|
||||
_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
ZSTATE_HDR equ 1
|
||||
ZSTATE_BODY equ 2
|
||||
ZSTATE_FLUSH_READ_BUFFER equ 3
|
||||
ZSTATE_SYNC_FLUSH equ 4
|
||||
ZSTATE_TRL equ 6
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
_NO_FLUSH equ 0
|
||||
_SYNC_FLUSH equ 1
|
||||
_FULL_FLUSH equ 2
|
||||
_STORED_BLK equ 0
|
||||
%assign _STORED_BLK_END 65535
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
81
igzip/detect_repeated_char.asm
Normal file
81
igzip/detect_repeated_char.asm
Normal file
@ -0,0 +1,81 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; detect_repeated_char buf, size
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define buf rdi
|
||||
%define size rsi
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define buf rcx
|
||||
%define size rdx
|
||||
%endif ; output formats
|
||||
|
||||
%define tmp r10
|
||||
|
||||
global detect_repeated_char
|
||||
detect_repeated_char:
|
||||
|
||||
;; replicate the 1st byte to 8 bytes
|
||||
xor tmp, tmp
|
||||
xor rax, rax
|
||||
|
||||
mov al, [buf]
|
||||
mov ah, al
|
||||
mov tmp %+ w, ax
|
||||
shl tmp, 16
|
||||
or eax, tmp %+ d
|
||||
mov tmp %+ d, eax
|
||||
shl tmp, 32
|
||||
or rax, tmp
|
||||
|
||||
;; detect the 8K input
|
||||
lea tmp, [buf + size]
|
||||
_loop:
|
||||
cmp rax, [buf]
|
||||
jne _fail
|
||||
add buf, 8
|
||||
cmp buf, tmp
|
||||
jb _loop
|
||||
shr rax, 56
|
||||
jmp _end
|
||||
|
||||
_fail:
|
||||
mov rax, -1
|
||||
|
||||
_end:
|
||||
ret
|
||||
|
||||
%undef buf
|
||||
%undef size
|
||||
%undef tmp
|
118
igzip/generate_constant_block_header.c
Normal file
118
igzip/generate_constant_block_header.c
Normal file
@ -0,0 +1,118 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include "huff_codes.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
#define MAX_HEADER_SIZE 350
|
||||
#define BLOCK_SIZE 16*1024
|
||||
|
||||
void fprint_header(FILE * outfile, uint8_t * header, uint64_t bit_count)
|
||||
{
|
||||
int i;
|
||||
fprintf(outfile, "unsigned char data[] = {");
|
||||
for (i = 0; i < bit_count / 8; i++) {
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n\t");
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%02x,", header[i]);
|
||||
}
|
||||
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n\t");
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%02x", header[i]);
|
||||
fprintf(outfile, "\t};\n\n");
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
/* Generates a header for a constant block, along with some manual
|
||||
* twiddling to create a header with the desired properties*/
|
||||
uint8_t stream[BLOCK_SIZE];
|
||||
struct isal_huff_histogram histogram;
|
||||
uint64_t *lit_histogram = histogram.lit_len_histogram;
|
||||
uint64_t *dist_histogram = histogram.dist_histogram;
|
||||
uint8_t header[MAX_HEADER_SIZE];
|
||||
struct huff_tree lit_tree, dist_tree;
|
||||
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
|
||||
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
|
||||
uint64_t bit_count;
|
||||
|
||||
uint8_t repeated_char = 0x00;
|
||||
|
||||
memset(header, 0, sizeof(header));
|
||||
memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
|
||||
memset(stream, repeated_char, sizeof(stream));
|
||||
memset(lit_tree_array, 0, sizeof(lit_tree_array));
|
||||
memset(dist_tree_array, 0, sizeof(dist_tree_array));
|
||||
memset(lit_huff_table, 0, sizeof(lit_huff_table));
|
||||
memset(dist_huff_table, 0, sizeof(dist_huff_table));
|
||||
|
||||
isal_update_histogram(stream, sizeof(stream), &histogram);
|
||||
|
||||
/* These are set to manually change the histogram to create a header with the
|
||||
* desired properties. In this case, the header is modified so that it is byte
|
||||
* unaligned by 6 bits, so that 0 is a 2 bit code, so that the header plus the
|
||||
* encoding of one 0 is byte aligned*/
|
||||
lit_histogram[repeated_char] = 20;
|
||||
lit_histogram[280] = 2;
|
||||
lit_histogram[264] = 5;
|
||||
lit_histogram[282] = 0;
|
||||
|
||||
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
|
||||
dist_tree = create_symbol_subset_huff_tree(dist_tree_array, dist_histogram, DIST_LEN);
|
||||
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, 15) > 0) {
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, 15) > 0) {
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Remove litral symbol corresponding to the unoptimal look back
|
||||
* distance of 258 found by gen_histogram*/
|
||||
dist_huff_table[16].length = 0;
|
||||
|
||||
bit_count = create_header(header, sizeof(header), lit_huff_table, dist_huff_table, 1);
|
||||
printf("Header for %x\n", repeated_char);
|
||||
fprintf(stdout, "Complete Bytes: %lu\n", bit_count / 8);
|
||||
fprintf(stdout, "Byte Offset: %lu\n\n", (bit_count) & 7);
|
||||
fprint_header(stdout, header, bit_count);
|
||||
printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
425
igzip/generate_custom_hufftables.c
Normal file
425
igzip/generate_custom_hufftables.c
Normal file
@ -0,0 +1,425 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
/* This program can be used to generate custom a custom huffman encoding to get
|
||||
* better data compression. This is most useful when the type of data being
|
||||
* compressed is well known.
|
||||
*
|
||||
* To use generate_custom_hufftables, pass a sequence of files to the program
|
||||
* that together form an accurate representation of the data that is being
|
||||
* compressed. Generate_custom_hufftables will then produce the file
|
||||
* hufftables_c.c, which should be moved to replace its counterpart in the igzip
|
||||
* source folder. After recompiling the Isa-l library, the igzip compression
|
||||
* functions will use the new hufftables.
|
||||
*
|
||||
* Generate_custom_hufftables should be compiled with the same compile time
|
||||
* parameters as the igzip source code. Generating custom hufftables with
|
||||
* different compile time parameters may cause igzip to produce invalid output
|
||||
* for the reasons described below. The default parameters used by
|
||||
* generate_custom_hufftables are the same as the default parameters used by
|
||||
* igzip.
|
||||
*
|
||||
* *WARNING* generate custom hufftables must be compiled with a HIST_SIZE that
|
||||
* is at least as large as the HIST_SIZE used by igzip. By default HIST_SIZE is
|
||||
* 8, the maximum usable HIST_SIZE is 32. The reason for this is to generate
|
||||
* better compression. Igzip cannot produce look back distances with sizes
|
||||
* larger than the HIST_SIZE * 1024 igzip was compiled with, so look back
|
||||
* distances with sizes larger than HIST_SIZE * 1024 are not assigned a huffman
|
||||
* code.
|
||||
*
|
||||
* To improve compression ratio, the compile time option LIT_SUB is provided to
|
||||
* allow generating custom hufftables which only use a subset of all possible
|
||||
* literals. This can be useful for getting better compression when it is known
|
||||
* that the data being compressed will never contain certain symbols, for
|
||||
* example text files. If this option is used, it needs to be checked that every
|
||||
* possible literal is in fact given a valid code in the output hufftable. This
|
||||
* can be done by checking that every required literal has a positive value for
|
||||
* the length of the code associated with that literal. Literals which have not
|
||||
* been given codes will have a code length of zero. The compile time option
|
||||
* PRINT_CODES (described below) can be used to help manually perform this
|
||||
* check.
|
||||
*
|
||||
* The compile time parameter PRINT_CODES causes the literal/length huffman code
|
||||
* and the distance huffman code created by generate_custom_hufftables to be
|
||||
* printed out. This is printed out where each line corresponds to a different
|
||||
* symbol. The first column is the symbol used to represent each literal (Lit),
|
||||
* end of block symbol (EOB), length (Len) or distance (Dist), the second column
|
||||
* is the associated code value, and the third column is the length in bits of
|
||||
* that code.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include "huff_codes.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
/*These max code lengths are limited by how the data is stored in
|
||||
* hufftables.asm. The deflate standard max is 15.*/
|
||||
|
||||
#define LONG_DCODE_OFFSET 26
|
||||
#define SHORT_DCODE_OFFSET 20
|
||||
|
||||
#define MAX_HEADER_SIZE IGZIP_MAX_DEF_HDR_SIZE
|
||||
|
||||
#define GZIP_HEADER_SIZE 10
|
||||
#define GZIP_TRAILER_SIZE 8
|
||||
|
||||
/**
|
||||
* @brief Prints a table of uint8_t elements to a file.
|
||||
* @param outfile: the file the table is printed to.
|
||||
* @param table: the table to be printed.
|
||||
* @param length: number of elements to be printed.
|
||||
* @param header: header to append in front of the table.
|
||||
* @param footer: footer to append at the end of the table.
|
||||
* @param begin_line: string printed at beginning of new line
|
||||
*/
|
||||
void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
|
||||
char *footer, char *begin_line)
|
||||
{
|
||||
int i;
|
||||
fprintf(outfile, "%s", header);
|
||||
for (i = 0; i < length - 1; i++) {
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%02x,", table[i]);
|
||||
}
|
||||
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%02x", table[i]);
|
||||
fprintf(outfile, "%s", footer);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints a table of uint16_t elements to a file.
|
||||
* @param outfile: the file the table is printed to.
|
||||
* @param table: the table to be printed.
|
||||
* @param length: number of elements to be printed.
|
||||
* @param header: header to append in front of the table.
|
||||
* @param footer: footer to append at the end of the table.
|
||||
* @param begin_line: string printed at beginning of new line
|
||||
*/
|
||||
void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
|
||||
char *footer, char *begin_line)
|
||||
{
|
||||
int i;
|
||||
fprintf(outfile, "%s", header);
|
||||
for (i = 0; i < length - 1; i++) {
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%04x,", table[i]);
|
||||
}
|
||||
|
||||
if ((i & 7) == 0)
|
||||
fprintf(outfile, "\n%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%04x", table[i]);
|
||||
fprintf(outfile, "%s", footer);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints a table of uint32_t elements to a file.
|
||||
* @param outfile: the file the table is printed to.
|
||||
* @param table: the table to be printed.
|
||||
* @param length: number of elements to be printed.
|
||||
* @param header: header to append in front of the table.
|
||||
* @param footer: footer to append at the end of the table.
|
||||
* @param begin_line: string printed at beginning of new line
|
||||
*/
|
||||
void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
|
||||
char *footer, char *begin_line)
|
||||
{
|
||||
int i;
|
||||
fprintf(outfile, "%s", header);
|
||||
for (i = 0; i < length - 1; i++) {
|
||||
if ((i & 3) == 0)
|
||||
fprintf(outfile, "\n%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%08x,", table[i]);
|
||||
}
|
||||
|
||||
if ((i & 3) == 0)
|
||||
fprintf(outfile, "%s", begin_line);
|
||||
else
|
||||
fprintf(outfile, " ");
|
||||
fprintf(outfile, "0x%08x", table[i]);
|
||||
fprintf(outfile, "%s", footer);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints a table of uint64_t elements to a file.
|
||||
* @param outfile: the file the table is printed to.
|
||||
* @param table: the table to be printed.
|
||||
* @param length: number of elements to be printed.
|
||||
* @param header: header to append in front of the table.
|
||||
* @param footer: footer to append at the end of the table.
|
||||
*/
|
||||
void fprint_uint64_table(FILE * outfile, uint64_t * table, uint64_t length, char *header,
|
||||
char *footer)
|
||||
{
|
||||
int i;
|
||||
fprintf(outfile, "%s\n", header);
|
||||
for (i = 0; i < length - 1; i++)
|
||||
fprintf(outfile, "\t0x%016" PRIx64 ",\n", table[i]);
|
||||
fprintf(outfile, "\t0x%016" PRIx64, table[i]);
|
||||
fprintf(outfile, "%s", footer);
|
||||
|
||||
}
|
||||
|
||||
void fprint_hufftables(FILE * output_file, uint8_t * header, uint32_t bit_count,
|
||||
uint16_t * lit_code_table, uint8_t * lit_code_size_table,
|
||||
uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
|
||||
uint32_t * packed_len_table, uint32_t * packed_dist_table)
|
||||
{
|
||||
fprintf(output_file, "struct isal_hufftables hufftables_default = {\n\n");
|
||||
|
||||
fprint_uint8_table(output_file, header, (bit_count + 7) / 8,
|
||||
"\t.deflate_hdr = {", "\t},\n\n", "\t\t");
|
||||
fprintf(output_file, "\t.deflate_hdr_count = %d,\n", bit_count / 8);
|
||||
fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", bit_count & 7);
|
||||
|
||||
fprint_uint32_table(output_file, packed_dist_table, SHORT_DIST_TABLE_SIZE,
|
||||
"\t.dist_table = {", ",\n", "\t\t");
|
||||
fprint_uint32_table(output_file, &packed_dist_table[SHORT_DIST_TABLE_SIZE],
|
||||
LONG_DIST_TABLE_SIZE - SHORT_DIST_TABLE_SIZE,
|
||||
"#ifdef LONGER_HUFFTABLE",
|
||||
"\n#endif /* LONGER_HUFFTABLE */\n\t},\n\n", "\t\t");
|
||||
|
||||
fprint_uint32_table(output_file, packed_len_table, LEN_TABLE_SIZE, "\t.len_table = {",
|
||||
"\t},\n\n", "\t\t");
|
||||
fprint_uint16_table(output_file, lit_code_table, LIT_TABLE_SIZE, "\t.lit_table = {",
|
||||
"\t},\n\n", "\t\t");
|
||||
fprint_uint8_table(output_file, lit_code_size_table, LIT_TABLE_SIZE,
|
||||
"\t.lit_table_sizes = {", "\t},\n\n", "\t\t");
|
||||
|
||||
fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n");
|
||||
fprint_uint16_table(output_file, dcodes_code_table + SHORT_DCODE_OFFSET,
|
||||
DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n",
|
||||
"\t\t");
|
||||
fprint_uint8_table(output_file, dcodes_code_size_table + SHORT_DCODE_OFFSET,
|
||||
DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
|
||||
"\t\t");
|
||||
fprintf(output_file, "#else\n");
|
||||
fprint_uint16_table(output_file, dcodes_code_table + LONG_DCODE_OFFSET,
|
||||
DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", "\t\t");
|
||||
fprint_uint8_table(output_file, dcodes_code_size_table + LONG_DCODE_OFFSET,
|
||||
DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
|
||||
"\t\t");
|
||||
fprintf(output_file, "#endif\n");
|
||||
fprintf(output_file, "};\n");
|
||||
}
|
||||
|
||||
void fprint_header(FILE * output_file, uint8_t * header, uint32_t bit_count,
|
||||
uint16_t * lit_code_table, uint8_t * lit_code_size_table,
|
||||
uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
|
||||
uint32_t * packed_len_table, uint32_t * packed_dist_table)
|
||||
{
|
||||
fprintf(output_file, "#include <stdint.h>\n");
|
||||
fprintf(output_file, "#include <igzip_lib.h>\n\n");
|
||||
|
||||
fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
|
||||
"\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
|
||||
|
||||
fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
|
||||
fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
|
||||
|
||||
fprint_hufftables(output_file, header, bit_count, lit_code_table, lit_code_size_table,
|
||||
dcodes_code_table, dcodes_code_size_table, packed_len_table,
|
||||
packed_dist_table);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
long int file_length;
|
||||
uint8_t *stream = NULL;
|
||||
struct isal_huff_histogram histogram;
|
||||
uint64_t *lit_histogram = histogram.lit_len_histogram;
|
||||
uint64_t *dist_histogram = histogram.dist_histogram;
|
||||
uint8_t header[MAX_HEADER_SIZE];
|
||||
FILE *file;
|
||||
struct huff_tree lit_tree, dist_tree;
|
||||
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
|
||||
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
|
||||
uint64_t bit_count;
|
||||
uint32_t packed_len_table[LEN_TABLE_SIZE];
|
||||
uint32_t packed_dist_table[LONG_DIST_TABLE_SIZE];
|
||||
uint16_t lit_code_table[LIT_TABLE_SIZE];
|
||||
uint16_t dcodes_code_table[DIST_LEN];
|
||||
uint8_t lit_code_size_table[LIT_TABLE_SIZE];
|
||||
uint8_t dcodes_code_size_table[DIST_LEN];
|
||||
int max_dist = convert_dist_to_dist_sym(D);
|
||||
|
||||
if (argc == 1) {
|
||||
printf("Error, no input file.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
|
||||
memset(lit_tree_array, 0, sizeof(lit_tree_array));
|
||||
memset(dist_tree_array, 0, sizeof(dist_tree_array));
|
||||
memset(lit_huff_table, 0, sizeof(lit_huff_table));
|
||||
memset(dist_huff_table, 0, sizeof(dist_huff_table));
|
||||
|
||||
while (argc > 1) {
|
||||
printf("Processing %s\n", argv[argc - 1]);
|
||||
file = fopen(argv[argc - 1], "r");
|
||||
if (file == NULL) {
|
||||
printf("Error opening file\n");
|
||||
return 1;
|
||||
}
|
||||
fseek(file, 0, SEEK_END);
|
||||
file_length = ftell(file);
|
||||
fseek(file, 0, SEEK_SET);
|
||||
file_length -= ftell(file);
|
||||
stream = malloc(file_length);
|
||||
if (stream == NULL) {
|
||||
printf("Failed to allocate memory to read in file\n");
|
||||
fclose(file);
|
||||
return 1;
|
||||
}
|
||||
fread(stream, 1, file_length, file);
|
||||
if (ferror(file)) {
|
||||
printf("Error occurred when reading file");
|
||||
fclose(file);
|
||||
free(stream);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Create a histogram of frequency of symbols found in stream to
|
||||
* generate the huffman tree.*/
|
||||
isal_update_histogram(stream, file_length, &histogram);
|
||||
|
||||
fclose(file);
|
||||
free(stream);
|
||||
argc--;
|
||||
}
|
||||
|
||||
/* Create a huffman tree corresponding to the histograms created in
|
||||
* gen_histogram*/
|
||||
#ifdef LIT_SUB
|
||||
int j;
|
||||
/* Guarantee every possible repeat length is given a symbol. It is hard
|
||||
* to guarantee data will never have a repeat of a given length */
|
||||
for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
|
||||
if (lit_histogram[j] == 0)
|
||||
lit_histogram[j]++;
|
||||
|
||||
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
|
||||
#else
|
||||
lit_tree = create_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
|
||||
#endif
|
||||
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
|
||||
|
||||
/* Create a look up table to represent huffman tree above in deflate
|
||||
* standard form after it has been modified to satisfy max depth
|
||||
* criteria.*/
|
||||
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) {
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) {
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
|
||||
if (create_huff_lookup
|
||||
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
|
||||
if (create_huff_lookup
|
||||
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
|
||||
printf("Error, code with invalid length for Deflate standard.\n");
|
||||
return 1;
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
|
||||
printf("Error, hufftable is not usable\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#ifdef PRINT_CODES
|
||||
int i;
|
||||
printf("Lit/Len codes\n");
|
||||
for (i = 0; i < LIT_TABLE_SIZE - 1; i++)
|
||||
printf("Lit %3d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
|
||||
lit_huff_table[i].length);
|
||||
|
||||
printf("EOB %3d: Code 0x%04x, Code_Len %d\n", 256, lit_huff_table[256].code,
|
||||
lit_huff_table[256].length);
|
||||
|
||||
for (i = LIT_TABLE_SIZE; i < LIT_LEN; i++)
|
||||
printf("Len %d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
|
||||
lit_huff_table[i].length);
|
||||
printf("\n");
|
||||
|
||||
printf("Dist codes \n");
|
||||
for (i = 0; i < DIST_LEN; i++)
|
||||
printf("Dist %2d: Code 0x%04x, Code_Len %d\n", i, dist_huff_table[i].code,
|
||||
dist_huff_table[i].length);
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
create_code_tables(lit_code_table, lit_code_size_table, LIT_TABLE_SIZE,
|
||||
lit_huff_table);
|
||||
create_code_tables(dcodes_code_table, dcodes_code_size_table, DIST_LEN,
|
||||
dist_huff_table);
|
||||
create_packed_len_table(packed_len_table, lit_huff_table);
|
||||
create_packed_dist_table(packed_dist_table, LONG_DIST_TABLE_SIZE, dist_huff_table);
|
||||
|
||||
bit_count =
|
||||
create_header(header, sizeof(header), lit_huff_table, dist_huff_table, LAST_BLOCK);
|
||||
|
||||
file = fopen("hufftables_c.c", "w");
|
||||
if (file == NULL) {
|
||||
printf("Error creating file hufftables_c.c\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprint_header(file, header, bit_count, lit_code_table, lit_code_size_table,
|
||||
dcodes_code_table, dcodes_code_size_table, packed_len_table,
|
||||
packed_dist_table);
|
||||
|
||||
fclose(file);
|
||||
|
||||
return 0;
|
||||
}
|
964
igzip/huff_codes.c
Normal file
964
igzip/huff_codes.c
Normal file
@ -0,0 +1,964 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "huff_codes.h"
|
||||
#include "huffman.h"
|
||||
|
||||
#define LENGTH_BITS 5
|
||||
|
||||
/* The order code length codes are written in the dynamic code header. This is
|
||||
* defined in RFC 1951 page 13 */
|
||||
static const uint8_t code_length_code_order[] =
|
||||
{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
||||
|
||||
int heap_push(struct huff_tree element, struct histheap *heap)
|
||||
{
|
||||
uint16_t index;
|
||||
uint16_t parent;
|
||||
assert(heap->size < MAX_HISTHEAP_SIZE);
|
||||
index = heap->size;
|
||||
heap->size += 1;
|
||||
parent = (index - 1) / 2;
|
||||
while ((index != 0) && (heap->tree[parent].frequency > element.frequency)) {
|
||||
heap->tree[index] = heap->tree[parent];
|
||||
index = parent;
|
||||
parent = (index - 1) / 2;
|
||||
|
||||
}
|
||||
heap->tree[index] = element;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
struct huff_tree heap_pop(struct histheap *heap)
|
||||
{
|
||||
struct huff_tree root, temp;
|
||||
uint16_t index = 0;
|
||||
uint16_t child = 1;
|
||||
assert(heap->size > 0);
|
||||
root = heap->tree[index];
|
||||
heap->size--;
|
||||
heap->tree[index] = heap->tree[heap->size];
|
||||
|
||||
while (child + 1 < heap->size) {
|
||||
if (heap->tree[child].frequency < heap->tree[index].frequency
|
||||
|| heap->tree[child + 1].frequency < heap->tree[index].frequency) {
|
||||
if (heap->tree[child].frequency > heap->tree[child + 1].frequency)
|
||||
child += 1;
|
||||
temp = heap->tree[index];
|
||||
heap->tree[index] = heap->tree[child];
|
||||
heap->tree[child] = temp;
|
||||
index = child;
|
||||
child = 2 * child + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (child < heap->size) {
|
||||
if (heap->tree[child].frequency < heap->tree[index].frequency) {
|
||||
temp = heap->tree[index];
|
||||
heap->tree[index] = heap->tree[child];
|
||||
heap->tree[child] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
return root;
|
||||
|
||||
}
|
||||
|
||||
struct linked_list_node *pop_from_front(struct linked_list *list)
|
||||
{
|
||||
struct linked_list_node *temp;
|
||||
|
||||
temp = list->start;
|
||||
if (list->start != NULL) {
|
||||
list->start = list->start->next;
|
||||
if (list->start != NULL)
|
||||
list->start->previous = NULL;
|
||||
else
|
||||
list->end = NULL;
|
||||
list->length -= 1;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
void append_to_front(struct linked_list *list, struct linked_list_node *new_element)
|
||||
{
|
||||
new_element->next = list->start;
|
||||
new_element->previous = NULL;
|
||||
if (list->start != NULL)
|
||||
list->start->previous = new_element;
|
||||
else
|
||||
list->end = new_element;
|
||||
list->start = new_element;
|
||||
list->length += 1;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void append_to_back(struct linked_list *list, struct linked_list_node *new_element)
|
||||
{
|
||||
new_element->previous = list->end;
|
||||
new_element->next = NULL;
|
||||
if (list->end != NULL)
|
||||
list->end->next = new_element;
|
||||
else
|
||||
list->start = new_element;
|
||||
list->end = new_element;
|
||||
list->length += 1;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void isal_update_histogram(uint8_t * start_stream, int length,
|
||||
struct isal_huff_histogram *histogram)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *last_seen[HASH_SIZE];
|
||||
uint8_t *current, *seen, *end_stream, *next_hash, *end;
|
||||
uint32_t match_length;
|
||||
uint32_t dist;
|
||||
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
|
||||
uint64_t *dist_histogram = histogram->dist_histogram;
|
||||
|
||||
if (length <= 0)
|
||||
return;
|
||||
|
||||
end_stream = start_stream + length;
|
||||
memset(last_seen, 0, sizeof(last_seen)); /* Initialize last_seen to be 0. */
|
||||
for (current = start_stream; current < end_stream - 3; current++) {
|
||||
literal = *(uint32_t *) current;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
seen = last_seen[hash];
|
||||
last_seen[hash] = current;
|
||||
dist = current - seen;
|
||||
if (dist < D) {
|
||||
match_length = compare258(seen, current, end_stream - current);
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = current;
|
||||
#ifdef LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
if (end > end_stream - 3)
|
||||
end = end_stream - 3;
|
||||
next_hash++;
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] = next_hash;
|
||||
}
|
||||
|
||||
dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
|
||||
lit_len_histogram[convert_length_to_len_sym(match_length)] +=
|
||||
1;
|
||||
current += match_length - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
lit_len_histogram[literal & 0xFF] += 1;
|
||||
}
|
||||
literal = literal >> 8;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
seen = last_seen[hash];
|
||||
last_seen[hash] = current;
|
||||
dist = current - seen;
|
||||
if (dist < D) {
|
||||
match_length = compare258(seen, current, end_stream - current);
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
|
||||
lit_len_histogram[convert_length_to_len_sym(match_length)] += 1;
|
||||
lit_len_histogram[256] += 1;
|
||||
return;
|
||||
}
|
||||
} else
|
||||
lit_len_histogram[literal & 0xFF] += 1;
|
||||
lit_len_histogram[(literal >> 8) & 0xFF] += 1;
|
||||
lit_len_histogram[(literal >> 16) & 0xFF] += 1;
|
||||
lit_len_histogram[256] += 1;
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t convert_dist_to_dist_sym(uint32_t dist)
|
||||
{
|
||||
assert(dist <= 32768 && dist > 0);
|
||||
if (dist <= 2)
|
||||
return dist - 1;
|
||||
else if (dist <= 4)
|
||||
return 0 + (dist - 1) / 1;
|
||||
else if (dist <= 8)
|
||||
return 2 + (dist - 1) / 2;
|
||||
else if (dist <= 16)
|
||||
return 4 + (dist - 1) / 4;
|
||||
else if (dist <= 32)
|
||||
return 6 + (dist - 1) / 8;
|
||||
else if (dist <= 64)
|
||||
return 8 + (dist - 1) / 16;
|
||||
else if (dist <= 128)
|
||||
return 10 + (dist - 1) / 32;
|
||||
else if (dist <= 256)
|
||||
return 12 + (dist - 1) / 64;
|
||||
else if (dist <= 512)
|
||||
return 14 + (dist - 1) / 128;
|
||||
else if (dist <= 1024)
|
||||
return 16 + (dist - 1) / 256;
|
||||
else if (dist <= 2048)
|
||||
return 18 + (dist - 1) / 512;
|
||||
else if (dist <= 4096)
|
||||
return 20 + (dist - 1) / 1024;
|
||||
else if (dist <= 8192)
|
||||
return 22 + (dist - 1) / 2048;
|
||||
else if (dist <= 16384)
|
||||
return 24 + (dist - 1) / 4096;
|
||||
else if (dist <= 32768)
|
||||
return 26 + (dist - 1) / 8192;
|
||||
else
|
||||
return ~0; /* ~0 is an invalid distance code */
|
||||
|
||||
}
|
||||
|
||||
uint32_t convert_length_to_len_sym(uint32_t length)
|
||||
{
|
||||
assert(length > 2 && length < 259);
|
||||
|
||||
/* Based on tables on page 11 in RFC 1951 */
|
||||
if (length < 11)
|
||||
return 257 + length - 3;
|
||||
else if (length < 19)
|
||||
return 261 + (length - 3) / 2;
|
||||
else if (length < 35)
|
||||
return 265 + (length - 3) / 4;
|
||||
else if (length < 67)
|
||||
return 269 + (length - 3) / 8;
|
||||
else if (length < 131)
|
||||
return 273 + (length - 3) / 16;
|
||||
else if (length < 258)
|
||||
return 277 + (length - 3) / 32;
|
||||
else
|
||||
return 285;
|
||||
}
|
||||
|
||||
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
|
||||
uint64_t * histogram, uint32_t size)
|
||||
{
|
||||
/* Assumes there are at least 2 symbols. */
|
||||
int i;
|
||||
uint32_t node_index;
|
||||
struct huff_tree tree;
|
||||
struct histheap heap;
|
||||
|
||||
heap.size = 0;
|
||||
|
||||
tree.right = tree.left = NULL;
|
||||
|
||||
/* Intitializes heap for construction of the huffman tree */
|
||||
for (i = 0; i < size; i++) {
|
||||
tree.value = i;
|
||||
tree.frequency = histogram[i];
|
||||
tree_array[i] = tree;
|
||||
|
||||
/* If symbol does not appear (has frequency 0), ignore it. */
|
||||
if (tree_array[i].frequency != 0)
|
||||
heap_push(tree, &heap);
|
||||
}
|
||||
|
||||
node_index = size;
|
||||
|
||||
/* Construct the huffman tree */
|
||||
while (heap.size > 1) {
|
||||
|
||||
tree = heap_pop(&heap);
|
||||
tree_array[node_index].frequency = tree.frequency;
|
||||
tree_array[node_index].left = &tree_array[tree.value];
|
||||
|
||||
tree = heap_pop(&heap);
|
||||
tree_array[node_index].frequency += tree.frequency;
|
||||
tree_array[node_index].right = &tree_array[tree.value];
|
||||
|
||||
tree_array[node_index].value = node_index;
|
||||
heap_push(tree_array[node_index], &heap);
|
||||
|
||||
node_index += 1;
|
||||
}
|
||||
|
||||
return heap_pop(&heap);
|
||||
}
|
||||
|
||||
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
|
||||
uint32_t size)
|
||||
{
|
||||
int i;
|
||||
uint32_t node_index;
|
||||
struct huff_tree tree;
|
||||
struct histheap heap;
|
||||
|
||||
heap.size = 0;
|
||||
|
||||
tree.right = tree.left = NULL;
|
||||
|
||||
/* Intitializes heap for construction of the huffman tree */
|
||||
for (i = 0; i < size; i++) {
|
||||
tree.value = i;
|
||||
tree.frequency = histogram[i];
|
||||
tree_array[i] = tree;
|
||||
heap_push(tree, &heap);
|
||||
}
|
||||
|
||||
node_index = size;
|
||||
|
||||
/* Construct the huffman tree */
|
||||
while (heap.size > 1) {
|
||||
|
||||
tree = heap_pop(&heap);
|
||||
tree_array[node_index].frequency = tree.frequency;
|
||||
tree_array[node_index].left = &tree_array[tree.value];
|
||||
|
||||
tree = heap_pop(&heap);
|
||||
tree_array[node_index].frequency += tree.frequency;
|
||||
tree_array[node_index].right = &tree_array[tree.value];
|
||||
|
||||
tree_array[node_index].value = node_index;
|
||||
heap_push(tree_array[node_index], &heap);
|
||||
|
||||
node_index += 1;
|
||||
}
|
||||
|
||||
return heap_pop(&heap);
|
||||
}
|
||||
|
||||
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
|
||||
struct huff_tree root, uint8_t max_depth)
|
||||
{
|
||||
/* Used to create a count of number of elements with a given code length */
|
||||
uint16_t count[MAX_HUFF_TREE_DEPTH + 1];
|
||||
|
||||
memset(count, 0, sizeof(count));
|
||||
|
||||
if (find_code_lengths(huff_lookup_table, count, root, max_depth) != 0)
|
||||
return 1;
|
||||
|
||||
set_huff_codes(huff_lookup_table, table_length, count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
|
||||
struct huff_tree root, uint8_t max_depth)
|
||||
{
|
||||
struct linked_list depth_array[MAX_HUFF_TREE_DEPTH + 2];
|
||||
struct linked_list_node linked_lists[MAX_HISTHEAP_SIZE];
|
||||
struct linked_list_node *temp;
|
||||
uint16_t extra_nodes = 0;
|
||||
int i, j;
|
||||
|
||||
memset(depth_array, 0, sizeof(depth_array));
|
||||
memset(linked_lists, 0, sizeof(linked_lists));
|
||||
for (i = 0; i < MAX_HISTHEAP_SIZE; i++)
|
||||
linked_lists[i].value = i;
|
||||
|
||||
huffman_tree_traversal(depth_array, linked_lists, &extra_nodes, max_depth, root, 0);
|
||||
|
||||
/* This for loop fixes up the huffman tree to have a maximum depth not exceeding
|
||||
* max_depth. This algorithm works by removing all elements below max_depth,
|
||||
* filling up the empty leafs which are created with elements form the huffman
|
||||
* tree and then iteratively pushing down the least frequent leaf that is above
|
||||
* max_depth to a depth 1 lower, and moving up a leaf below max_depth to that
|
||||
* same depth.*/
|
||||
for (i = MAX_HUFF_TREE_DEPTH + 1; i > max_depth; i--) {
|
||||
|
||||
/* find element to push up the tree */
|
||||
while (depth_array[i].start != NULL) {
|
||||
if (extra_nodes > 0) {
|
||||
temp = pop_from_front(&depth_array[i]);
|
||||
append_to_back(&depth_array[max_depth], temp);
|
||||
extra_nodes -= 1;
|
||||
|
||||
} else {
|
||||
assert(depth_array[max_depth].length % 2 == 0);
|
||||
assert(extra_nodes == 0);
|
||||
|
||||
/* find element to push down in the tree */
|
||||
for (j = max_depth - 1; j >= 0; j--)
|
||||
if (depth_array[j].start != NULL)
|
||||
break;
|
||||
|
||||
/* No element available to push down further. */
|
||||
if (j < 0)
|
||||
return 1;
|
||||
|
||||
temp = pop_from_front(&depth_array[i]);
|
||||
append_to_front(&depth_array[j + 1], temp);
|
||||
|
||||
temp = pop_from_front(&depth_array[j]);
|
||||
append_to_back(&depth_array[j + 1], temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_HUFF_TREE_DEPTH + 2; i++) {
|
||||
temp = depth_array[i].start;
|
||||
|
||||
while (temp != NULL) {
|
||||
huff_lookup_table[temp->value].length = i;
|
||||
count[i] += 1;
|
||||
temp = temp->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void huffman_tree_traversal(struct linked_list *depth_array,
|
||||
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
|
||||
uint8_t max_depth, struct huff_tree current_node,
|
||||
uint16_t current_depth)
|
||||
{
|
||||
/* This algorithm performs a traversal of the huffman tree. It is setup
|
||||
* to visit the leaves in order of frequency and bin elements into a
|
||||
* linked list by depth.*/
|
||||
if (current_node.left == NULL) {
|
||||
if (current_depth < MAX_HUFF_TREE_DEPTH + 1)
|
||||
append_to_front(&depth_array[current_depth],
|
||||
&linked_lists[current_node.value]);
|
||||
else
|
||||
append_to_front(&depth_array[MAX_HUFF_TREE_DEPTH + 1],
|
||||
&linked_lists[current_node.value]);
|
||||
return;
|
||||
|
||||
} else if (current_depth == max_depth)
|
||||
*extra_nodes += 1;
|
||||
|
||||
if (current_node.left->frequency < current_node.right->frequency) {
|
||||
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
|
||||
*current_node.right, current_depth + 1);
|
||||
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
|
||||
*current_node.left, current_depth + 1);
|
||||
|
||||
} else {
|
||||
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
|
||||
*current_node.left, current_depth + 1);
|
||||
huffman_tree_traversal(depth_array, linked_lists, extra_nodes, max_depth,
|
||||
*current_node.right, current_depth + 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns integer with first length bits reversed and all higher bits zeroed
|
||||
*/
|
||||
uint16_t bit_reverse(uint16_t bits, uint8_t length)
|
||||
{
|
||||
bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits
|
||||
bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs
|
||||
bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles
|
||||
bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes
|
||||
return bits >> (16 - length);
|
||||
}
|
||||
|
||||
void set_huff_codes(struct huff_code *huff_code_table, int table_length, uint16_t * count)
|
||||
{
|
||||
/* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */
|
||||
int i;
|
||||
uint16_t code = 0;
|
||||
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
|
||||
|
||||
next_code[0] = code;
|
||||
|
||||
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
|
||||
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
|
||||
|
||||
for (i = 0; i < table_length; i++) {
|
||||
if (huff_code_table[i].length != 0) {
|
||||
huff_code_table[i].code =
|
||||
bit_reverse(next_code[huff_code_table[i].length],
|
||||
huff_code_table[i].length);
|
||||
next_code[huff_code_table[i].length] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int create_header(uint8_t * header, uint32_t header_length, struct huff_code *lit_huff_table,
|
||||
struct huff_code *dist_huff_table, uint32_t end_of_block)
|
||||
{
|
||||
int i;
|
||||
uint64_t histogram[HUFF_LEN];
|
||||
uint16_t huffman_rep[LIT_LEN + DIST_LEN];
|
||||
uint16_t extra_bits[LIT_LEN + DIST_LEN];
|
||||
uint16_t length;
|
||||
struct huff_tree root;
|
||||
struct huff_tree tree_array[2 * HUFF_LEN - 1];
|
||||
struct huff_code lookup_table[HUFF_LEN];
|
||||
struct huff_code combined_table[LIT_LEN + DIST_LEN];
|
||||
|
||||
/* hlit, hdist, and hclen are defined in RFC 1951 page 13 */
|
||||
uint32_t hlit, hdist, hclen;
|
||||
uint64_t bit_count;
|
||||
|
||||
memset(lookup_table, 0, sizeof(lookup_table));
|
||||
|
||||
/* Calculate hlit */
|
||||
for (i = LIT_LEN - 1; i > 256; i--)
|
||||
if (lit_huff_table[i].length != 0)
|
||||
break;
|
||||
|
||||
hlit = i - 256;
|
||||
|
||||
/* Calculate hdist */
|
||||
for (i = DIST_LEN - 1; i > 0; i--)
|
||||
if (dist_huff_table[i].length != 0)
|
||||
break;
|
||||
|
||||
hdist = i;
|
||||
|
||||
/* Combine huffman tables for run length encoding */
|
||||
for (i = 0; i < 257 + hlit; i++)
|
||||
combined_table[i] = lit_huff_table[i];
|
||||
for (i = 0; i < 1 + hdist; i++)
|
||||
combined_table[i + hlit + 257] = dist_huff_table[i];
|
||||
|
||||
memset(extra_bits, 0, LIT_LEN + DIST_LEN);
|
||||
memset(histogram, 0, sizeof(histogram));
|
||||
|
||||
/* Create a run length encoded representation of the literal/lenght and
|
||||
* distance huffman trees. */
|
||||
length = create_huffman_rep(huffman_rep, histogram, extra_bits,
|
||||
combined_table, hlit + 257 + hdist + 1);
|
||||
|
||||
/* Create a huffman tree to encode run length encoded representation. */
|
||||
root = create_symbol_subset_huff_tree(tree_array, histogram, HUFF_LEN);
|
||||
create_huff_lookup(lookup_table, HUFF_LEN, root, 7);
|
||||
|
||||
/* Calculate hclen */
|
||||
for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */
|
||||
if (lookup_table[code_length_code_order[i]].length != 0)
|
||||
break;
|
||||
|
||||
hclen = i - 3;
|
||||
|
||||
/* Generate actual header. */
|
||||
bit_count = create_huffman_header(header, header_length, lookup_table, huffman_rep,
|
||||
extra_bits, length, end_of_block, hclen, hlit,
|
||||
hdist);
|
||||
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
|
||||
uint16_t * extra_bits, struct huff_code * huff_table, uint16_t len)
|
||||
{
|
||||
uint16_t current_in_index = 0, current_out_index = 0, run_length, last_code;
|
||||
|
||||
while (current_in_index < len) {
|
||||
last_code = huff_table[current_in_index].length;
|
||||
run_length = 0;
|
||||
|
||||
while (current_in_index < len
|
||||
&& last_code == huff_table[current_in_index].length) {
|
||||
run_length += 1;
|
||||
current_in_index += 1;
|
||||
}
|
||||
|
||||
current_out_index = flush_repeats(huffman_rep, histogram, extra_bits,
|
||||
last_code, run_length, current_out_index);
|
||||
}
|
||||
return current_out_index;
|
||||
}
|
||||
|
||||
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
|
||||
uint16_t last_code, uint16_t run_length, uint16_t current_index)
|
||||
{
|
||||
int j;
|
||||
|
||||
if (last_code != 0 && last_code < HUFF_LEN && run_length > 0) {
|
||||
huffman_rep[current_index++] = last_code;
|
||||
histogram[last_code] += 1;
|
||||
run_length -= 1;
|
||||
|
||||
}
|
||||
|
||||
if (run_length < SHORTEST_MATCH) {
|
||||
for (j = 0; j < run_length; j++) {
|
||||
huffman_rep[current_index++] = last_code;
|
||||
histogram[last_code] += 1;
|
||||
}
|
||||
} else {
|
||||
if (last_code == 0) {
|
||||
/* The values 138 is the maximum repeat length
|
||||
* represented with code 18. The value 10 is the maximum
|
||||
* repeate length represented with 17. */
|
||||
for (; run_length > 138; run_length -= 138) {
|
||||
huffman_rep[current_index] = 0x12;
|
||||
extra_bits[current_index++] = 0x7F7;
|
||||
histogram[18]++;
|
||||
}
|
||||
|
||||
if (run_length > 10) {
|
||||
huffman_rep[current_index] = 18;
|
||||
extra_bits[current_index++] = ((run_length - 11) << 4) | 7;
|
||||
histogram[18] += 1;
|
||||
|
||||
} else if (run_length >= SHORTEST_MATCH) {
|
||||
huffman_rep[current_index] = 17;
|
||||
extra_bits[current_index++] = ((run_length - 3) << 4) | 3;
|
||||
histogram[17] += 1;
|
||||
|
||||
} else {
|
||||
for (j = 0; j < run_length; j++) {
|
||||
huffman_rep[current_index++] = last_code;
|
||||
histogram[last_code] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
for (; run_length > 6; run_length -= 6) {
|
||||
huffman_rep[current_index] = 0x10;
|
||||
extra_bits[current_index++] = 0x32;
|
||||
histogram[16]++;
|
||||
}
|
||||
|
||||
if (run_length >= SHORTEST_MATCH) {
|
||||
huffman_rep[current_index] = 16;
|
||||
extra_bits[current_index++] = ((run_length - 3) << 4) | 2;
|
||||
histogram[16] += 1;
|
||||
|
||||
} else {
|
||||
for (j = 0; j < run_length; j++) {
|
||||
huffman_rep[current_index++] = last_code;
|
||||
histogram[last_code] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return current_index;
|
||||
}
|
||||
|
||||
int create_huffman_header(uint8_t * header, uint32_t header_length,
|
||||
struct huff_code *lookup_table, uint16_t * huffman_rep,
|
||||
uint16_t * extra_bits, uint16_t huffman_rep_length,
|
||||
uint32_t end_of_block, uint32_t hclen, uint32_t hlit, uint32_t hdist)
|
||||
{
|
||||
/* hlit, hdist, hclen are as defined in the deflate standard, head is the
|
||||
* first three deflate header bits.*/
|
||||
int i;
|
||||
uint32_t head;
|
||||
uint64_t bit_count;
|
||||
struct huff_code huffman_value;
|
||||
struct BitBuf2 header_bitbuf;
|
||||
|
||||
if (end_of_block)
|
||||
head = 0x05;
|
||||
else
|
||||
head = 0x04;
|
||||
|
||||
set_buf(&header_bitbuf, header, header_length);
|
||||
init(&header_bitbuf);
|
||||
|
||||
write_bits(&header_bitbuf, (head | (hlit << 3) | (hdist << 8) | (hclen << 13)),
|
||||
DYN_HDR_START_LEN);
|
||||
|
||||
uint64_t tmp = 0;
|
||||
for (i = hclen + 3; i >= 0; i--) {
|
||||
tmp = (tmp << 3) | lookup_table[code_length_code_order[i]].length;
|
||||
}
|
||||
|
||||
write_bits(&header_bitbuf, tmp, (hclen + 4) * 3);
|
||||
|
||||
for (i = 0; i < huffman_rep_length; i++) {
|
||||
huffman_value = lookup_table[huffman_rep[i]];
|
||||
|
||||
write_bits(&header_bitbuf, (uint64_t) huffman_value.code,
|
||||
(uint32_t) huffman_value.length);
|
||||
|
||||
if (huffman_rep[i] > 15) {
|
||||
write_bits(&header_bitbuf, (uint64_t) extra_bits[i] >> 4,
|
||||
(uint32_t) extra_bits[i] & 0xF);
|
||||
}
|
||||
}
|
||||
bit_count = 8 * buffer_used(&header_bitbuf) + header_bitbuf.m_bit_count;
|
||||
flush(&header_bitbuf);
|
||||
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, uint32_t length,
|
||||
struct huff_code *hufftable)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < length; i++) {
|
||||
code_table[i] = hufftable[i].code;
|
||||
code_length_table[i] = hufftable[i].length;
|
||||
}
|
||||
}
|
||||
|
||||
void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable)
|
||||
{
|
||||
int i, count = 0;
|
||||
uint16_t extra_bits;
|
||||
uint16_t extra_bits_count = 0;
|
||||
|
||||
/* Gain extra bits is the next place where the number of extra bits in
|
||||
* lenght codes increases. */
|
||||
uint16_t gain_extra_bits = LEN_EXTRA_BITS_START;
|
||||
|
||||
for (i = 257; i < LIT_LEN - 1; i++) {
|
||||
for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
|
||||
if (count > 254)
|
||||
break;
|
||||
packed_table[count++] =
|
||||
(extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) |
|
||||
(lit_len_hufftable[i].code << LENGTH_BITS) |
|
||||
(lit_len_hufftable[i].length + extra_bits_count);
|
||||
}
|
||||
|
||||
if (i == gain_extra_bits) {
|
||||
gain_extra_bits += LEN_EXTRA_BITS_INTERVAL;
|
||||
extra_bits_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) |
|
||||
(lit_len_hufftable[LIT_LEN - 1].length);
|
||||
}
|
||||
|
||||
void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
|
||||
struct huff_code *dist_hufftable)
|
||||
{
|
||||
int i, count = 0;
|
||||
uint16_t extra_bits;
|
||||
uint16_t extra_bits_count = 0;
|
||||
|
||||
/* Gain extra bits is the next place where the number of extra bits in
|
||||
* distance codes increases. */
|
||||
uint16_t gain_extra_bits = DIST_EXTRA_BITS_START;
|
||||
|
||||
for (i = 0; i < DIST_LEN; i++) {
|
||||
for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
|
||||
if (count >= length)
|
||||
return;
|
||||
|
||||
packed_table[count++] =
|
||||
(extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) |
|
||||
(dist_hufftable[i].code << LENGTH_BITS) |
|
||||
(dist_hufftable[i].length + extra_bits_count);
|
||||
|
||||
}
|
||||
|
||||
if (i == gain_extra_bits) {
|
||||
gain_extra_bits += DIST_EXTRA_BITS_INTERVAL;
|
||||
extra_bits_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
|
||||
struct huff_code *dist_hufftable)
|
||||
{
|
||||
int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0;
|
||||
int dist_extra_bits = 0, len_extra_bits = 0;
|
||||
int gain_dist_extra_bits = DIST_EXTRA_BITS_START;
|
||||
int gain_len_extra_bits = LEN_EXTRA_BITS_START;
|
||||
int max_code_len;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < LIT_LEN; i++)
|
||||
if (lit_len_hufftable[i].length > max_lit_code_len)
|
||||
max_lit_code_len = lit_len_hufftable[i].length;
|
||||
|
||||
for (i = 257; i < LIT_LEN - 1; i++) {
|
||||
if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len)
|
||||
max_len_code_len = lit_len_hufftable[i].length + len_extra_bits;
|
||||
|
||||
if (i == gain_len_extra_bits) {
|
||||
gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL;
|
||||
len_extra_bits += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < DIST_LEN; i++) {
|
||||
if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len)
|
||||
max_dist_code_len = dist_hufftable[i].length + dist_extra_bits;
|
||||
|
||||
if (i == gain_dist_extra_bits) {
|
||||
gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL;
|
||||
dist_extra_bits += 1;
|
||||
}
|
||||
}
|
||||
|
||||
max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len;
|
||||
|
||||
/* Some versions of igzip can write upto one literal, one length and one
|
||||
* distance code at the same time. This checks to make sure that is
|
||||
* always writeable in bitbuf*/
|
||||
return (max_code_len > MAX_BITBUF_BIT_WRITE);
|
||||
}
|
||||
|
||||
int isal_create_hufftables(struct isal_hufftables *hufftables,
|
||||
struct isal_huff_histogram *histogram)
|
||||
{
|
||||
struct huff_tree lit_tree, dist_tree;
|
||||
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
|
||||
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
|
||||
uint64_t bit_count;
|
||||
int max_dist = convert_dist_to_dist_sym(IGZIP_D);
|
||||
|
||||
uint32_t *dist_table = hufftables->dist_table;
|
||||
uint32_t *len_table = hufftables->len_table;
|
||||
uint16_t *lit_table = hufftables->lit_table;
|
||||
uint16_t *dcodes = hufftables->dcodes;
|
||||
uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
|
||||
uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
|
||||
uint8_t *deflate_hdr = hufftables->deflate_hdr;
|
||||
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
|
||||
uint64_t *dist_histogram = histogram->dist_histogram;
|
||||
|
||||
memset(hufftables, 0, sizeof(struct isal_hufftables));
|
||||
memset(lit_tree_array, 0, sizeof(lit_tree_array));
|
||||
memset(dist_tree_array, 0, sizeof(dist_tree_array));
|
||||
memset(lit_huff_table, 0, sizeof(lit_huff_table));
|
||||
memset(dist_huff_table, 0, sizeof(dist_huff_table));
|
||||
|
||||
lit_tree = create_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN);
|
||||
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
|
||||
|
||||
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0)
|
||||
return INVALID_LIT_LEN_HUFFCODE;
|
||||
|
||||
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0)
|
||||
return INVALID_DIST_HUFFCODE;
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
|
||||
if (create_huff_lookup
|
||||
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
|
||||
return INVALID_LIT_LEN_HUFFCODE;
|
||||
|
||||
if (create_huff_lookup
|
||||
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
|
||||
return INVALID_DIST_HUFFCODE;
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table))
|
||||
return INVALID_HUFFCODE;
|
||||
}
|
||||
|
||||
create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
|
||||
dist_huff_table + DCODE_OFFSET);
|
||||
|
||||
create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table);
|
||||
|
||||
create_packed_len_table(len_table, lit_huff_table);
|
||||
create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table);
|
||||
|
||||
bit_count =
|
||||
create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table,
|
||||
LAST_BLOCK);
|
||||
|
||||
hufftables->deflate_hdr_count = bit_count / 8;
|
||||
hufftables->deflate_hdr_extra_bits = bit_count % 8;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int isal_create_hufftables_subset(struct isal_hufftables *hufftables,
|
||||
struct isal_huff_histogram *histogram)
|
||||
{
|
||||
struct huff_tree lit_tree, dist_tree;
|
||||
struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
|
||||
struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
|
||||
uint64_t bit_count;
|
||||
int j, max_dist = convert_dist_to_dist_sym(IGZIP_D);
|
||||
|
||||
uint32_t *dist_table = hufftables->dist_table;
|
||||
uint32_t *len_table = hufftables->len_table;
|
||||
uint16_t *lit_table = hufftables->lit_table;
|
||||
uint16_t *dcodes = hufftables->dcodes;
|
||||
uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
|
||||
uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
|
||||
uint8_t *deflate_hdr = hufftables->deflate_hdr;
|
||||
uint64_t *lit_len_histogram = histogram->lit_len_histogram;
|
||||
uint64_t *dist_histogram = histogram->dist_histogram;
|
||||
|
||||
memset(hufftables, 0, sizeof(struct isal_hufftables));
|
||||
memset(lit_tree_array, 0, sizeof(lit_tree_array));
|
||||
memset(dist_tree_array, 0, sizeof(dist_tree_array));
|
||||
memset(lit_huff_table, 0, sizeof(lit_huff_table));
|
||||
memset(dist_huff_table, 0, sizeof(dist_huff_table));
|
||||
|
||||
for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
|
||||
if (lit_len_histogram[j] == 0)
|
||||
lit_len_histogram[j]++;
|
||||
|
||||
lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_len_histogram, LIT_LEN);
|
||||
dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
|
||||
|
||||
if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0)
|
||||
return INVALID_LIT_LEN_HUFFCODE;
|
||||
|
||||
if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0)
|
||||
return INVALID_DIST_HUFFCODE;
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
|
||||
if (create_huff_lookup
|
||||
(lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
|
||||
return INVALID_LIT_LEN_HUFFCODE;
|
||||
|
||||
if (create_huff_lookup
|
||||
(dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
|
||||
return INVALID_DIST_HUFFCODE;
|
||||
|
||||
if (are_hufftables_useable(lit_huff_table, dist_huff_table))
|
||||
return INVALID_HUFFCODE;
|
||||
}
|
||||
|
||||
create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
|
||||
dist_huff_table + DCODE_OFFSET);
|
||||
|
||||
create_code_tables(lit_table, lit_table_sizes, LIT_TABLE_SIZE, lit_huff_table);
|
||||
|
||||
create_packed_len_table(len_table, lit_huff_table);
|
||||
create_packed_dist_table(dist_table, DIST_TABLE_SIZE, dist_huff_table);
|
||||
|
||||
bit_count =
|
||||
create_header(deflate_hdr, sizeof(deflate_hdr), lit_huff_table, dist_huff_table,
|
||||
LAST_BLOCK);
|
||||
|
||||
hufftables->deflate_hdr_count = bit_count / 8;
|
||||
hufftables->deflate_hdr_extra_bits = bit_count % 8;
|
||||
|
||||
return 0;
|
||||
}
|
348
igzip/huff_codes.h
Normal file
348
igzip/huff_codes.h
Normal file
@ -0,0 +1,348 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef HUFF_CODES_H
|
||||
#define HUFF_CODES_H
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#else
|
||||
# include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#define LIT_LEN IGZIP_LIT_LEN
|
||||
#define DIST_LEN IGZIP_DIST_LEN
|
||||
#define CODE_LEN_CODES 19
|
||||
#define HUFF_LEN 19
|
||||
#ifdef LONGER_HUFFTABLE
|
||||
# define DCODE_OFFSET 26
|
||||
#else
|
||||
# define DCODE_OFFSET 20
|
||||
#endif
|
||||
#define DYN_HDR_START_LEN 17
|
||||
#define MAX_HISTHEAP_SIZE LIT_LEN
|
||||
#define MAX_HUFF_TREE_DEPTH 15
|
||||
#define D IGZIP_D /* Amount of history */
|
||||
|
||||
#define MAX_DEFLATE_CODE_LEN 15
|
||||
#define MAX_SAFE_LIT_CODE_LEN 13
|
||||
#define MAX_SAFE_DIST_CODE_LEN 12
|
||||
|
||||
#define LONG_DIST_TABLE_SIZE 8192
|
||||
#define SHORT_DIST_TABLE_SIZE 1024
|
||||
#define LEN_TABLE_SIZE 256
|
||||
#define LIT_TABLE_SIZE 257
|
||||
#define LAST_BLOCK 1
|
||||
|
||||
#define LEN_EXTRA_BITS_START 264
|
||||
#define LEN_EXTRA_BITS_INTERVAL 4
|
||||
#define DIST_EXTRA_BITS_START 3
|
||||
#define DIST_EXTRA_BITS_INTERVAL 2
|
||||
|
||||
#define INVALID_LIT_LEN_HUFFCODE 1
|
||||
#define INVALID_DIST_HUFFCODE 1
|
||||
#define INVALID_HUFFCODE 1
|
||||
|
||||
/**
|
||||
* @brief Structure used to store huffman codes
|
||||
*/
|
||||
struct huff_code {
|
||||
uint16_t code;
|
||||
uint8_t length;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Binary tree used to store and create a huffman tree.
|
||||
*/
|
||||
struct huff_tree {
|
||||
uint16_t value;
|
||||
uint64_t frequency;
|
||||
struct huff_tree *left;
|
||||
struct huff_tree *right;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Nodes in a doubly linked list.
|
||||
*/
|
||||
struct linked_list_node {
|
||||
uint16_t value;
|
||||
struct linked_list_node *next;
|
||||
struct linked_list_node *previous;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This structure is a doubly linked list.
|
||||
*/
|
||||
struct linked_list {
|
||||
uint64_t length;
|
||||
struct linked_list_node *start;
|
||||
struct linked_list_node *end;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This is a binary minheap structure which stores huffman trees.
|
||||
* @details The huffman trees are sorted by the frequency of the root.
|
||||
* The structure is represented in a fixed sized array.
|
||||
*/
|
||||
struct histheap {
|
||||
struct huff_tree tree[MAX_HISTHEAP_SIZE];
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Inserts a hufftree into a histheap.
|
||||
* @param element: the hufftree to be inserted
|
||||
* @param heap: the heap which element is being inserted into.
|
||||
* @requires This function assumes the heap has enough allocated space.
|
||||
* @returns Returns the index in heap of the inserted element
|
||||
*/
|
||||
int heap_push(struct huff_tree element, struct histheap *heap);
|
||||
|
||||
/**
|
||||
* @brief Removes the top element from the heap and returns it.
|
||||
*/
|
||||
struct huff_tree heap_pop(struct histheap *heap);
|
||||
|
||||
/**
|
||||
* @brief Removes the first element from list and returns it.
|
||||
*/
|
||||
struct linked_list_node *pop_from_front(struct linked_list *list);
|
||||
|
||||
/**
|
||||
* @brief Adds new_element to the front of list.
|
||||
*/
|
||||
void append_to_front(struct linked_list *list, struct linked_list_node *new_element);
|
||||
|
||||
/**
|
||||
* @brief Adds new_element to the end of list.
|
||||
*/
|
||||
void append_to_back(struct linked_list *list, struct linked_list_node *new_element);
|
||||
|
||||
/**
|
||||
* @brief Returns the deflate symbol value for a repeat length.
|
||||
*/
|
||||
uint32_t convert_length_to_len_sym(uint32_t length);
|
||||
|
||||
/**
|
||||
* @brief Returns the deflate symbol value for a look back distance.
|
||||
*/
|
||||
uint32_t convert_dist_to_dist_sym(uint32_t dist);
|
||||
|
||||
/**
|
||||
* Constructs a huffman tree on tree_array which only uses elements with non-zero frequency.
|
||||
* @requires Assumes there will be at least two symbols in the produced tree.
|
||||
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
|
||||
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
|
||||
* size elements of the array are the leaf elements in the huffman tree.
|
||||
* @param histogram: a histogram of the frequency of elements in tree_array.
|
||||
* @param size: the number of leaf elements in the huffman tree.
|
||||
*/
|
||||
struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array,
|
||||
uint64_t * histogram, uint32_t size);
|
||||
|
||||
/**
|
||||
* @brief Construct a huffman tree on tree_array which uses every symbol.
|
||||
* @requires tree_array must have length at least 2*size-1, and size must be less than 286.
|
||||
* @param tree_array: array of huff_tree elements used to create a huffman tree, the first
|
||||
* @param size elements of the array are the leaf elements in the huffman tree.
|
||||
* @param histogram: a histogram of the frequency of elements in tree_array.
|
||||
* @param size: the number of leaf elements in the huffman tree.
|
||||
*/
|
||||
struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram,
|
||||
uint32_t size);
|
||||
|
||||
/**
|
||||
* @brief Creates a deflate compliant huffman tree with maximum depth max_depth.
|
||||
* @details The huffman tree is represented as a lookup table.
|
||||
* @param huff_lookup_table: The output lookup table.
|
||||
* @param table_length: The length of table.
|
||||
* @param root: the input huffman tree the created tree is based on.
|
||||
* @param max_depth: maximum depth the huffman tree can have
|
||||
* @returns Returns 0 if sucessful and returns 1 otherwise.
|
||||
*/
|
||||
int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length,
|
||||
struct huff_tree root, uint8_t max_depth);
|
||||
|
||||
/**
|
||||
* @brief Determines the code length for every value in a huffmant tree.
|
||||
* @param huff_lookup_table: An output lookup table used to store the code lengths
|
||||
* @param corresponding to the possible values
|
||||
* @param count: An output histogram representing code length versus number of occurences.
|
||||
* @param current_node: A node of the huffman tree being analyzed currently.
|
||||
* @param current_depth: The depth of the current node in the huffman tree.
|
||||
* @returns Returns 0 if sucessful and returns 1 otherwise.
|
||||
*/
|
||||
int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count,
|
||||
struct huff_tree root, uint8_t max_depth);
|
||||
|
||||
/**
|
||||
* @brief Creates an array of linked lists.
|
||||
* @detail Each linked list contains all the elements with codes of a given length for
|
||||
* lengths less than 16, and an list for all elements with codes at least 16. These lists
|
||||
* are sorted by frequency from least frequent to most frequent within any given code length.
|
||||
* @param depth_array: depth_array[i] is a linked list of elements with code length i
|
||||
* @param linked_lists: An input structure the linked lists in depth array are built on.
|
||||
* @param current_node: the current node being visited in a huffman tree
|
||||
* @param current_depth: the depth of current_node in a huffman tree
|
||||
*/
|
||||
void huffman_tree_traversal(struct linked_list *depth_array,
|
||||
struct linked_list_node *linked_lists, uint16_t * extra_nodes,
|
||||
uint8_t max_depth, struct huff_tree current_node,
|
||||
uint16_t current_depth);
|
||||
|
||||
/**
|
||||
* @brief Determines the code each element of a deflate compliant huffman tree and stores
|
||||
* it in a lookup table
|
||||
* @requires table has been initialized to already contain the code length for each element.
|
||||
* @param table: A lookup table used to store the codes.
|
||||
* @param table_length: The length of table.
|
||||
* @param count: a histogram representing the number of occurences of codes of a given length
|
||||
*/
|
||||
void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count);
|
||||
|
||||
/* Reverse the first length bits in bits and returns that value */
|
||||
uint16_t bit_reverse(uint16_t bits, uint8_t length);
|
||||
|
||||
/**
|
||||
* @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files.
|
||||
* @param table: A literal/length huffman code lookup table.
|
||||
* @returns index of the first symbol which fails and 0xFFFF otherwise.
|
||||
*/
|
||||
uint16_t valid_lit_huff_table(struct huff_code *huff_code_table);
|
||||
|
||||
/**
|
||||
* @brief Checks if a distance huffman table can be stored in the igzip hufftables files.
|
||||
* @param table: A distance huffman code lookup table.
|
||||
* @returnsthe index of the first symbol which fails and 0xFFFF otherwise.
|
||||
*/
|
||||
uint16_t valid_dist_huff_table(struct huff_code *huff_code_table);
|
||||
|
||||
/**
|
||||
* @brief Creates the dynamic huffman deflate header.
|
||||
* @returns Returns the length of header in bits.
|
||||
* @requires This function requires header is large enough to store the whole header.
|
||||
* @param header: The output header.
|
||||
* @param lit_huff_table: A literal/length code huffman lookup table.
|
||||
* @param dist_huff_table: A distance huffman code lookup table.
|
||||
* @param end_of_block: Value determining whether end of block header is produced or not;
|
||||
* 0 corresponds to not end of block and all other inputs correspond to end of block.
|
||||
*/
|
||||
int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table,
|
||||
struct huff_code *dist_huff_table, uint32_t end_of_block);
|
||||
|
||||
/**
|
||||
* @brief Creates a run length encoded reprsentation of huff_table.
|
||||
* @details Also creates a histogram representing the frequency of each symbols
|
||||
* @returns Returns the number of symbols written into huffman_rep.
|
||||
* @param huffman_rep: The output run length encoded version of huff_table.
|
||||
* @param histogram: The output histogram of frequencies of elements in huffman_rep.
|
||||
* @param extra_bits: An output table storing extra bits associated with huffman_rep.
|
||||
* @param huff_table: The input huffman_table or concatonation of huffman_tables.
|
||||
* @parma len: The length of huff_table.
|
||||
*/
|
||||
uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram,
|
||||
uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len);
|
||||
|
||||
/**
|
||||
* @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep.
|
||||
* @param huffman_rep: pointer to array containing the output huffman_rep.
|
||||
* @param histogram: histogram of elements seen in huffman_rep.
|
||||
* @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep.
|
||||
* @param huff_table: a concatenated list of huffman lookup tables.
|
||||
* @param current_index: The next spot elements will be written in huffman_rep.
|
||||
*/
|
||||
uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits,
|
||||
uint16_t last_code, uint16_t run_length, uint16_t current_index);
|
||||
|
||||
/**
|
||||
* @brief Creates the header for run length encoded huffman trees.
|
||||
* @param header: the output header.
|
||||
* @param lookup_table: a huffman lookup table.
|
||||
* @param huffman_rep: a run length encoded huffman tree.
|
||||
* @extra_bits: extra bits associated with the corresponding spot in huffman_rep
|
||||
* @param huffman_rep_length: the length of huffman_rep.
|
||||
* @param end_of_block: Value determining whether end of block header is produced or not;
|
||||
* 0 corresponds to not end of block and all other inputs correspond to end of block.
|
||||
* @param hclen: Length of huffman code for huffman codes minus 4.
|
||||
* @param hlit: Length of literal/length table minus 257.
|
||||
* @parm hdist: Length of distance table minus 1.
|
||||
*/
|
||||
int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table,
|
||||
uint16_t * huffman_rep, uint16_t * extra_bits,
|
||||
uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen,
|
||||
uint32_t hlit, uint32_t hdist);
|
||||
|
||||
/**
|
||||
* @brief Creates a two table representation of huffman codes.
|
||||
* @param code_table: output table containing the code
|
||||
* @param code_size_table: output table containing the code length
|
||||
* @param length: the lenght of hufftable
|
||||
* @param hufftable: a huffman lookup table
|
||||
*/
|
||||
void create_code_tables(uint16_t * code_table, uint8_t * code_length_table,
|
||||
uint32_t length, struct huff_code *hufftable);
|
||||
|
||||
/**
|
||||
* @brief Creates a packed representation of length huffman codes.
|
||||
* @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
|
||||
* code and bits 8:0 contain the code length.
|
||||
* @param packed_table: the output table
|
||||
* @param length: the length of lit_len_hufftable
|
||||
* @param lit_len_hufftable: a literal/length huffman lookup table
|
||||
*/
|
||||
void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable);
|
||||
|
||||
/**
|
||||
* @brief Creates a packed representation of distance huffman codes.
|
||||
* @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
|
||||
* code and bits 8:0 contain the code length.
|
||||
* @param packed_table: the output table
|
||||
* @param length: the length of lit_len_hufftable
|
||||
* @param dist_hufftable: a distance huffman lookup table
|
||||
*/
|
||||
void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
|
||||
struct huff_code *dist_hufftable);
|
||||
|
||||
/**
|
||||
* @brief Checks to see if the hufftable is usable by igzip
|
||||
*
|
||||
* @param lit_len_hufftable: literal/lenght huffman code
|
||||
* @param dist_hufftable: distance huffman code
|
||||
* @returns Returns 0 if the table is usable
|
||||
*/
|
||||
int are_hufftables_useable(struct huff_code *lit_len_hufftable,
|
||||
struct huff_code *dist_hufftable);
|
||||
#endif
|
208
igzip/huffman.asm
Normal file
208
igzip/huffman.asm
Normal file
@ -0,0 +1,208 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%include "lz0a_const.asm"
|
||||
|
||||
; Macros for doing Huffman Encoding
|
||||
|
||||
%ifdef LONGER_HUFFTABLE
|
||||
%if (D > 8192)
|
||||
%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
|
||||
% error
|
||||
%else
|
||||
%define DIST_TABLE_SIZE 8192
|
||||
%define DECODE_OFFSET 26
|
||||
%endif
|
||||
%else
|
||||
%define DIST_TABLE_SIZE 1024
|
||||
%define DECODE_OFFSET 20
|
||||
%endif
|
||||
|
||||
%define LEN_TABLE_SIZE 256
|
||||
%define LIT_TABLE_SIZE 257
|
||||
|
||||
%define DIST_TABLE_START (IGZIP_MAX_DEF_HDR_SIZE + 8)
|
||||
%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
|
||||
%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
|
||||
%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
|
||||
%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
|
||||
%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
|
||||
%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
|
||||
;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
|
||||
;; struct isal_hufftables {
|
||||
;; // deflate huffman tree header
|
||||
;; uint8_t deflate_huff_hdr[IGZIP_MAX_DEF_HDR_SIZE];
|
||||
;;
|
||||
;; //!< Number of whole bytes in deflate_huff_hdr
|
||||
;; uint32_t deflate_huff_hdr_count;
|
||||
;;
|
||||
;; //!< Number of bits in the partial byte in header
|
||||
;; uint32_t deflate_huff_hdr_extra_bits;
|
||||
;;
|
||||
;; //!< bits 7:0 are the code length, bits 31:8 are the code
|
||||
;; uint32_t dist_table[DIST_TABLE_SIZE];
|
||||
;;
|
||||
;; //!< bits 7:0 are the code length, bits 31:8 are the code
|
||||
;; uint32_t len_table[LEN_TABLE_SIZE];
|
||||
;;
|
||||
;; //!< bits 3:0 are the code length, bits 15:4 are the code
|
||||
;; uint16_t lit_table[LIT_TABLE_SIZE];
|
||||
;;
|
||||
;; //!< bits 3:0 are the code length, bits 15:4 are the code
|
||||
;; uint16_t dcodes[30 - DECODE_OFFSET];
|
||||
|
||||
;; };
|
||||
|
||||
|
||||
%ifdef LONGER_HUFFTABLE
|
||||
; Uses RCX, clobbers dist
|
||||
; get_dist_code dist, code, len
|
||||
%macro get_dist_code 4
|
||||
%define %%dist %1 ; 64-bit IN
|
||||
%define %%code %2d ; 32-bit OUT
|
||||
%define %%len %3d ; 32-bit OUT
|
||||
%define %%hufftables %4 ; address of the hufftable
|
||||
|
||||
mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
|
||||
mov %%code, %%len
|
||||
and %%len, 0x1F;
|
||||
shr %%code, 5
|
||||
%endm
|
||||
|
||||
%macro get_packed_dist_code 3
|
||||
%define %%dist %1 ; 64-bit IN
|
||||
%define %%code_len %2d ; 32-bit OUT
|
||||
%define %%hufftables %3 ; address of the hufftable
|
||||
mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
|
||||
%endm
|
||||
|
||||
%macro unpack_dist_code 2
|
||||
%define %%code %1d ; 32-bit OUT
|
||||
%define %%len %2d ; 32-bit OUT
|
||||
|
||||
mov %%len, %%code
|
||||
and %%len, 0x1F;
|
||||
shr %%code, 5
|
||||
%endm
|
||||
|
||||
%else
|
||||
; Assumes (dist != 0)
|
||||
; Uses RCX, clobbers dist
|
||||
; void compute_dist_code dist, code, len
|
||||
%macro compute_dist_code 4
|
||||
%define %%dist %1d ; IN, clobbered
|
||||
%define %%distq %1
|
||||
%define %%code %2 ; OUT
|
||||
%define %%len %3 ; OUT
|
||||
%define %%hufftables %4
|
||||
|
||||
dec %%dist
|
||||
bsr ecx, %%dist ; ecx = msb = bsr(dist)
|
||||
dec ecx ; ecx = num_extra_bits = msb - N
|
||||
mov %%code, 1
|
||||
shl %%code, CL
|
||||
dec %%code ; code = ((1 << num_extra_bits) - 1)
|
||||
and %%code, %%dist ; code = extra_bits
|
||||
shr %%dist, CL ; dist >>= num_extra_bits
|
||||
lea %%dist, [%%dist + 2*ecx] ; dist = sym = dist + num_extra_bits*2
|
||||
mov %%len, ecx ; len = num_extra_bits
|
||||
movzx ecx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
|
||||
movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
|
||||
shl %%code, CL ; code = extra_bits << (sym & 0xF)
|
||||
or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
|
||||
add %%len, ecx ; len = num_extra_bits + (sym & 0xF)
|
||||
%endm
|
||||
|
||||
; Uses RCX, clobbers dist
|
||||
; get_dist_code dist, code, len
|
||||
%macro get_dist_code 4
|
||||
%define %%dist %1d ; 32-bit IN, clobbered
|
||||
%define %%distq %1 ; 64-bit IN, clobbered
|
||||
%define %%code %2d ; 32-bit OUT
|
||||
%define %%len %3d ; 32-bit OUT
|
||||
%define %%hufftables %4
|
||||
|
||||
cmp %%dist, DIST_TABLE_SIZE
|
||||
jg %%do_compute
|
||||
mov %%len, [hufftables + DIST_TABLE_OFFSET + 4*%%distq WRT_OPT]
|
||||
mov %%code, %%len
|
||||
and %%len, 0x1F;
|
||||
shr %%code, 5
|
||||
jmp %%done
|
||||
%%do_compute:
|
||||
compute_dist_code %%distq, %%code, %%len, %%hufftables
|
||||
%%done:
|
||||
%endm
|
||||
|
||||
%macro get_packed_dist_code 3
|
||||
%define %%dist %1 ; 64-bit IN
|
||||
%define %%code_len %2d ; 32-bit OUT
|
||||
%define %%hufftables %3 ; address of the hufftable
|
||||
%endm
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
; "len" can be same register as "length"
|
||||
; get_len_code length, code, len
|
||||
%macro get_len_code 4
|
||||
%define %%length %1 ; 64-bit IN
|
||||
%define %%code %2d ; 32-bit OUT
|
||||
%define %%len %3d ; 32-bit OUT
|
||||
%define %%hufftables %4
|
||||
|
||||
mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
|
||||
mov %%code, %%len
|
||||
and %%len, 0x1F
|
||||
shr %%code, 5
|
||||
%endm
|
||||
|
||||
|
||||
%macro get_lit_code 4
|
||||
%define %%lit %1 ; 64-bit IN or CONST
|
||||
%define %%code %2d ; 32-bit OUT
|
||||
%define %%len %3d ; 32-bit OUT
|
||||
%define %%hufftables %4
|
||||
|
||||
movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
|
||||
movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
|
||||
|
||||
%endm
|
||||
|
||||
|
||||
;; Compute hash of first 3 bytes of data
|
||||
%macro compute_hash 2
|
||||
%define %%result %1d ; 32-bit reg
|
||||
%define %%data %2d ; 32-bit reg (low byte not clobbered)
|
||||
|
||||
and %%data, 0x00FFFFFF
|
||||
xor %%result, %%result
|
||||
crc32 %%result, %%data
|
||||
%endm
|
226
igzip/huffman.h
Normal file
226
igzip/huffman.h
Normal file
@ -0,0 +1,226 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
# define inline __inline
|
||||
#else
|
||||
# include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef IGZIP_USE_GZIP_FORMAT
|
||||
# define DEFLATE 1
|
||||
#endif
|
||||
|
||||
|
||||
extern uint32_t CrcTable[256];
|
||||
|
||||
static inline uint32_t bsr(uint32_t val)
|
||||
{
|
||||
uint32_t msb;
|
||||
#ifdef __LZCNT__
|
||||
msb = 16 - __lzcnt16(val);
|
||||
#else
|
||||
for(msb = 0; val > 0; val >>= 1)
|
||||
msb++;
|
||||
#endif
|
||||
return msb;
|
||||
}
|
||||
|
||||
static inline uint32_t tzcnt(uint64_t val)
|
||||
{
|
||||
uint32_t cnt;
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val);
|
||||
|
||||
#else
|
||||
for(cnt = 8; val > 0; val <<= 8)
|
||||
cnt -= 1;
|
||||
#endif
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len)
|
||||
{
|
||||
assert(dist > DIST_TABLE_SIZE);
|
||||
|
||||
dist -= 1;
|
||||
uint32_t msb;
|
||||
uint32_t num_extra_bits;
|
||||
uint32_t extra_bits;
|
||||
uint32_t sym;
|
||||
uint32_t len;
|
||||
uint32_t code;
|
||||
|
||||
msb = bsr(dist);
|
||||
assert(msb >= 1);
|
||||
num_extra_bits = msb - 2;
|
||||
extra_bits = dist & ((1 << num_extra_bits) - 1);
|
||||
dist >>= num_extra_bits;
|
||||
sym = dist + 2 * num_extra_bits;
|
||||
assert(sym < 30);
|
||||
code = hufftables->dcodes[sym - DECODE_OFFSET];
|
||||
len = hufftables->dcodes_sizes[sym - DECODE_OFFSET];
|
||||
*p_code = code | (extra_bits << len);
|
||||
*p_len = len + num_extra_bits;
|
||||
}
|
||||
|
||||
static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len)
|
||||
{
|
||||
if (dist < 1)
|
||||
dist = 0;
|
||||
assert(dist >= 1);
|
||||
assert(dist <= 32768);
|
||||
if (dist <= DIST_TABLE_SIZE) {
|
||||
uint64_t code_len;
|
||||
code_len = hufftables->dist_table[dist - 1];
|
||||
*code = code_len >> 5;
|
||||
*len = code_len & 0x1F;
|
||||
} else {
|
||||
compute_dist_code(hufftables, dist, code, len);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len)
|
||||
{
|
||||
assert(length >= 3);
|
||||
assert(length <= 258);
|
||||
|
||||
uint64_t code_len;
|
||||
code_len = hufftables->len_table[length - 3];
|
||||
*code = code_len >> 5;
|
||||
*len = code_len & 0x1F;
|
||||
}
|
||||
|
||||
static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len)
|
||||
{
|
||||
assert(lit <= 256);
|
||||
|
||||
*code = hufftables->lit_table[lit];
|
||||
*len = hufftables->lit_table_sizes[lit];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns a hash of the first 3 bytes of input data.
|
||||
*/
|
||||
static inline uint32_t compute_hash(uint32_t data)
|
||||
{
|
||||
data &= 0x00FFFFFF;
|
||||
#ifdef __SSE4_1__
|
||||
|
||||
return _mm_crc32_u32(0, data);
|
||||
|
||||
#else
|
||||
/* Use multiplication to create a hash, 0xBDD06057 is a prime number */
|
||||
return ((uint64_t)data * 0xB2D06057) >> 16;
|
||||
|
||||
#endif /* __SSE4_1__ */
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns how long str1 and str2 have the same symbols.
|
||||
* @param str1: First input string.
|
||||
* @param str2: Second input string.
|
||||
* @param max_length: length of the smaller string.
|
||||
*/
|
||||
static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length)
|
||||
{
|
||||
uint32_t count;
|
||||
uint64_t test;
|
||||
uint64_t loop_length;
|
||||
|
||||
if(max_length > 258)
|
||||
max_length = 258;
|
||||
|
||||
loop_length = max_length & ~0x7;
|
||||
|
||||
for(count = 0; count < loop_length; count += 8){
|
||||
test = *(uint64_t *) str1;
|
||||
test ^= *(uint64_t *) str2;
|
||||
if(test != 0)
|
||||
return count + tzcnt(test);
|
||||
str1 += 8;
|
||||
str2 += 8;
|
||||
}
|
||||
|
||||
switch(max_length % 8){
|
||||
|
||||
case 7:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 6:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 5:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 4:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 3:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 2:
|
||||
if(*str1++ != *str2++)
|
||||
return count;
|
||||
count++;
|
||||
case 1:
|
||||
if(*str1 != *str2)
|
||||
return count;
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline void update_crc(uint32_t* crc, uint8_t * start, uint32_t length)
|
||||
{
|
||||
#ifndef DEFLATE
|
||||
uint8_t *end = start + length;
|
||||
|
||||
while (start < end)
|
||||
*crc = (*crc >> 8) ^ CrcTable[(*crc & 0x000000FF) ^ *start++];
|
||||
#else
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
2528
igzip/hufftables_c.c
Normal file
2528
igzip/hufftables_c.c
Normal file
File diff suppressed because it is too large
Load Diff
882
igzip/igzip.c
Normal file
882
igzip/igzip.c
Normal file
@ -0,0 +1,882 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#define ASM
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#ifdef _WIN32
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef IGZIP_USE_GZIP_FORMAT
|
||||
# define DEFLATE 1
|
||||
#endif
|
||||
|
||||
#define MAX_WRITE_BITS_SIZE 8
|
||||
#define FORCE_FLUSH 64
|
||||
#define MIN_OBUF_SIZE 224
|
||||
#define NON_EMPTY_BLOCK_SIZE 6
|
||||
#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
|
||||
|
||||
#include "huffman.h"
|
||||
#include "bitbuf2.h"
|
||||
#include "igzip_lib.h"
|
||||
#include "repeated_char_result.h"
|
||||
|
||||
extern const uint8_t gzip_hdr[];
|
||||
extern const uint32_t gzip_hdr_bytes;
|
||||
extern const uint32_t gzip_trl_bytes;
|
||||
extern const struct isal_hufftables hufftables_default;
|
||||
|
||||
extern uint32_t crc32_gzip(uint32_t init_crc, const unsigned char *buf, uint64_t len);
|
||||
|
||||
static int write_stored_block_stateless(struct isal_zstream *stream, uint32_t stored_len,
|
||||
uint32_t crc32);
|
||||
#ifndef DEFLATE
|
||||
static int write_gzip_header_stateless(struct isal_zstream *stream);
|
||||
#endif
|
||||
static int write_deflate_header_stateless(struct isal_zstream *stream);
|
||||
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream);
|
||||
static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in,
|
||||
uint32_t crc32);
|
||||
|
||||
void isal_deflate_body_stateless(struct isal_zstream *stream);
|
||||
|
||||
unsigned int detect_repeated_char(uint8_t * buf, uint32_t size);
|
||||
|
||||
#define STORED_BLK_HDR_BZ 5
|
||||
#define STORED_BLK_MAX_BZ 65535
|
||||
|
||||
void isal_deflate_body(struct isal_zstream *stream);
|
||||
void isal_deflate_finish(struct isal_zstream *stream);
|
||||
uint32_t crc_512to32_01(uint32_t * crc);
|
||||
uint32_t get_crc(uint32_t * crc);
|
||||
|
||||
/*****************************************************************/
|
||||
|
||||
/* Forward declarations */
|
||||
static inline void reset_match_history(struct isal_zstream *stream);
|
||||
void write_header(struct isal_zstream *stream);
|
||||
void write_deflate_header(struct isal_zstream *stream);
|
||||
void write_trailer(struct isal_zstream *stream);
|
||||
|
||||
struct slver {
|
||||
uint16_t snum;
|
||||
uint8_t ver;
|
||||
uint8_t core;
|
||||
};
|
||||
|
||||
/* Version info */
|
||||
struct slver isal_deflate_init_slver_01030081;
|
||||
struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 };
|
||||
|
||||
struct slver isal_deflate_slver_01030082;
|
||||
struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 };
|
||||
|
||||
struct slver isal_deflate_stateless_slver_01010083;
|
||||
struct slver isal_deflate_stateless_slver = { 0x0083, 0x01, 0x01 };
|
||||
|
||||
/*****************************************************************/
|
||||
|
||||
uint32_t file_size(struct isal_zstate *state)
|
||||
{
|
||||
return state->b_bytes_valid + (uint32_t) (state->buffer - state->file_start);
|
||||
}
|
||||
|
||||
static
|
||||
void sync_flush(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint64_t bits_to_write = 0xFFFF0000, bits_len;
|
||||
uint64_t code = 0, len = 0, bytes;
|
||||
int flush_size;
|
||||
|
||||
if (stream->avail_out >= 8) {
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
if (!state->has_eob)
|
||||
get_lit_code(stream->hufftables, 256, &code, &len);
|
||||
|
||||
flush_size = (-(state->bitbuf.m_bit_count + len + 3)) % 8;
|
||||
|
||||
bits_to_write <<= flush_size + 3;
|
||||
bits_len = 32 + len + flush_size + 3;
|
||||
|
||||
#ifdef USE_BITBUFB /* Write Bits Always */
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
#else /* Not Write Bits Always */
|
||||
state->state = ZSTATE_FLUSH_WRITE_BUFFER;
|
||||
#endif
|
||||
state->has_eob = 0;
|
||||
|
||||
if (len > 0)
|
||||
bits_to_write = (bits_to_write << len) | code;
|
||||
|
||||
write_bits(&state->bitbuf, bits_to_write, bits_len);
|
||||
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
|
||||
if (stream->flush == FULL_FLUSH) {
|
||||
/* Clear match history so there are no cross
|
||||
* block length distance pairs */
|
||||
reset_match_history(stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void flush_write_buffer(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
int bytes = 0;
|
||||
if (stream->avail_out >= 8) {
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
flush(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
}
|
||||
}
|
||||
|
||||
static void isal_deflate_int(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
|
||||
write_header(stream);
|
||||
|
||||
if (state->state == ZSTATE_BODY)
|
||||
isal_deflate_body(stream);
|
||||
|
||||
if (state->state == ZSTATE_FLUSH_READ_BUFFER)
|
||||
isal_deflate_finish(stream);
|
||||
|
||||
if (state->state == ZSTATE_SYNC_FLUSH)
|
||||
sync_flush(stream);
|
||||
|
||||
if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
|
||||
flush_write_buffer(stream);
|
||||
|
||||
if (state->state == ZSTATE_TRL)
|
||||
write_trailer(stream);
|
||||
}
|
||||
|
||||
static uint32_t write_constant_compressed_stateless(struct isal_zstream *stream,
|
||||
uint32_t repeated_char,
|
||||
uint32_t repeated_length,
|
||||
uint32_t end_of_stream)
|
||||
{
|
||||
/* Assumes repeated_length is at least 1.
|
||||
* Assumes the input end_of_stream is either 0 or 1. */
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint32_t rep_bits = ((repeated_length - 1) / 258) * 2;
|
||||
uint32_t rep_bytes = rep_bits / 8;
|
||||
uint32_t rep_extra = (repeated_length - 1) % 258;
|
||||
uint32_t bytes;
|
||||
|
||||
/* Guarantee there is enough space for the header even in the worst case */
|
||||
if (stream->avail_out < HEADER_LENGTH + MAX_FIXUP_CODE_LENGTH + rep_bytes + 8)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
/* Assumes the repeated char is either 0 or 0xFF. */
|
||||
memcpy(stream->next_out, repeated_char_header[repeated_char & 1], HEADER_LENGTH);
|
||||
|
||||
if (end_of_stream > 0)
|
||||
stream->next_out[0] |= 1;
|
||||
|
||||
memset(stream->next_out + HEADER_LENGTH, 0, rep_bytes);
|
||||
stream->avail_out -= HEADER_LENGTH + rep_bytes;
|
||||
stream->next_out += HEADER_LENGTH + rep_bytes;
|
||||
stream->total_out += HEADER_LENGTH + rep_bytes;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
/* These two lines are basically a modified version of init. */
|
||||
state->bitbuf.m_bits = 0;
|
||||
state->bitbuf.m_bit_count = rep_bits % 8;
|
||||
|
||||
/* Add smaller repeat codes as necessary. Code280 can describe repeat
|
||||
* lengths of 115-130 bits. Code10 can describe repeat lengths of 10
|
||||
* bits. If more than 230 bits, fill code with two code280s. Else if
|
||||
* more than 115 repeates, fill with code10s until one code280 can
|
||||
* finish the rest of the repeats. Else, fill with code10s and
|
||||
* literals */
|
||||
if (rep_extra > 115) {
|
||||
while (rep_extra > 130 && rep_extra < 230) {
|
||||
write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
|
||||
rep_extra -= 10;
|
||||
}
|
||||
|
||||
if (rep_extra >= 230) {
|
||||
write_bits(&state->bitbuf,
|
||||
CODE_280 | ((rep_extra / 2 - 115) << CODE_280_LENGTH),
|
||||
CODE_280_TOTAL_LENGTH);
|
||||
rep_extra -= rep_extra / 2;
|
||||
}
|
||||
|
||||
write_bits(&state->bitbuf,
|
||||
CODE_280 | ((rep_extra - 115) << CODE_280_LENGTH),
|
||||
CODE_280_TOTAL_LENGTH);
|
||||
|
||||
} else {
|
||||
while (rep_extra >= 10) {
|
||||
|
||||
write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
|
||||
rep_extra -= 10;
|
||||
}
|
||||
|
||||
for (; rep_extra > 0; rep_extra--)
|
||||
write_bits(&state->bitbuf, CODE_LIT, CODE_LIT_LENGTH);
|
||||
}
|
||||
|
||||
write_bits(&state->bitbuf, END_OF_BLOCK, END_OF_BLOCK_LEN);
|
||||
|
||||
stream->next_in += repeated_length;
|
||||
stream->avail_in -= repeated_length;
|
||||
stream->total_in += repeated_length;
|
||||
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
int detect_repeated_char_length(uint8_t * in, uint32_t length)
|
||||
{
|
||||
/* This currently assumes the first 8 bytes are the same character.
|
||||
* This won't work effectively if the input stream isn't aligned well. */
|
||||
uint8_t *p_8, *end = in + length;
|
||||
uint64_t *p_64 = (uint64_t *) in;
|
||||
uint64_t w = *p_64;
|
||||
uint8_t c = (uint8_t) w;
|
||||
|
||||
for (; (p_64 <= (uint64_t *) (end - 8)) && (w == *p_64); p_64++) ;
|
||||
|
||||
p_8 = (uint8_t *) p_64;
|
||||
|
||||
for (; (p_8 < end) && (c == *p_8); p_8++) ;
|
||||
|
||||
return p_8 - in;
|
||||
}
|
||||
|
||||
static int isal_deflate_int_stateless(struct isal_zstream *stream, uint8_t * next_in,
|
||||
const uint32_t avail_in)
|
||||
{
|
||||
uint32_t crc32 = 0;
|
||||
uint32_t repeated_char_length;
|
||||
|
||||
#ifndef DEFLATE
|
||||
if (write_gzip_header_stateless(stream))
|
||||
return STATELESS_OVERFLOW;
|
||||
#endif
|
||||
|
||||
if (avail_in >= 8
|
||||
&& (*(uint64_t *) stream->next_in == 0
|
||||
|| *(uint64_t *) stream->next_in == ~(uint64_t) 0))
|
||||
repeated_char_length =
|
||||
detect_repeated_char_length(stream->next_in, stream->avail_in);
|
||||
else
|
||||
repeated_char_length = 0;
|
||||
|
||||
if (stream->avail_in == repeated_char_length) {
|
||||
if (write_constant_compressed_stateless(stream,
|
||||
stream->next_in[0],
|
||||
repeated_char_length, 1) != COMP_OK)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
#ifndef DEFLATE
|
||||
crc32 = crc32_gzip(0x0, next_in, avail_in);
|
||||
#endif
|
||||
|
||||
/* write_trailer_stateless is required because if flushes out the last of the output */
|
||||
if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK)
|
||||
return STATELESS_OVERFLOW;
|
||||
return COMP_OK;
|
||||
|
||||
} else if (repeated_char_length >= MIN_REPEAT_LEN) {
|
||||
if (write_constant_compressed_stateless
|
||||
(stream, stream->next_in[0], repeated_char_length, 0) != COMP_OK)
|
||||
return STATELESS_OVERFLOW;
|
||||
}
|
||||
|
||||
if (write_deflate_header_unaligned_stateless(stream) != COMP_OK)
|
||||
return STATELESS_OVERFLOW;
|
||||
if (stream->avail_out < 8)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
isal_deflate_body_stateless(stream);
|
||||
|
||||
if (!stream->internal_state.has_eob)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
#ifndef DEFLATE
|
||||
crc32 = crc32_gzip(0x0, next_in, avail_in);
|
||||
#endif
|
||||
|
||||
if (write_trailer_stateless(stream, avail_in, crc32) != COMP_OK)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
static int write_stored_block_stateless(struct isal_zstream *stream,
|
||||
uint32_t stored_len, uint32_t crc32)
|
||||
{
|
||||
uint64_t stored_blk_hdr;
|
||||
uint32_t copy_size;
|
||||
uint32_t avail_in;
|
||||
|
||||
#ifndef DEFLATE
|
||||
uint64_t gzip_trl;
|
||||
#endif
|
||||
|
||||
if (stream->avail_out < stored_len)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
stream->avail_out -= stored_len;
|
||||
stream->total_out += stored_len;
|
||||
avail_in = stream->avail_in;
|
||||
|
||||
#ifndef DEFLATE
|
||||
memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes);
|
||||
stream->next_out += gzip_hdr_bytes;
|
||||
#endif
|
||||
|
||||
do {
|
||||
if (avail_in >= STORED_BLK_MAX_BZ) {
|
||||
stored_blk_hdr = 0xFFFF00;
|
||||
copy_size = STORED_BLK_MAX_BZ;
|
||||
} else {
|
||||
stored_blk_hdr = ~avail_in;
|
||||
stored_blk_hdr <<= 24;
|
||||
stored_blk_hdr |= (avail_in & 0xFFFF) << 8;
|
||||
copy_size = avail_in;
|
||||
}
|
||||
|
||||
avail_in -= copy_size;
|
||||
|
||||
/* Handle BFINAL bit */
|
||||
if (avail_in == 0)
|
||||
stored_blk_hdr |= 0x1;
|
||||
|
||||
memcpy(stream->next_out, &stored_blk_hdr, STORED_BLK_HDR_BZ);
|
||||
stream->next_out += STORED_BLK_HDR_BZ;
|
||||
|
||||
memcpy(stream->next_out, stream->next_in, copy_size);
|
||||
stream->next_out += copy_size;
|
||||
stream->next_in += copy_size;
|
||||
stream->total_in += copy_size;
|
||||
} while (avail_in != 0);
|
||||
|
||||
#ifndef DEFLATE
|
||||
gzip_trl = stream->avail_in;
|
||||
gzip_trl <<= 32;
|
||||
gzip_trl |= crc32 & 0xFFFFFFFF;
|
||||
memcpy(stream->next_out, &gzip_trl, gzip_trl_bytes);
|
||||
stream->next_out += gzip_trl_bytes;
|
||||
#endif
|
||||
|
||||
stream->avail_in = 0;
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
static inline void reset_match_history(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *head = stream->internal_state.head;
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < sizeof(state->head) / 2; i++)
|
||||
head[i] =
|
||||
(uint16_t) (state->b_bytes_processed + state->buffer - state->file_start -
|
||||
(IGZIP_D + 1));
|
||||
}
|
||||
|
||||
void isal_deflate_init_01(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
|
||||
stream->total_in = 0;
|
||||
stream->total_out = 0;
|
||||
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
|
||||
stream->flush = 0;
|
||||
|
||||
state->b_bytes_valid = 0;
|
||||
state->b_bytes_processed = 0;
|
||||
state->has_eob = 0;
|
||||
state->has_eob_hdr = 0;
|
||||
state->left_over = 0;
|
||||
state->last_flush = 0;
|
||||
state->has_gzip_hdr = 0;
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
state->count = 0;
|
||||
|
||||
state->tmp_out_start = 0;
|
||||
state->tmp_out_end = 0;
|
||||
|
||||
state->file_start = state->buffer;
|
||||
|
||||
init(&state->bitbuf);
|
||||
|
||||
memset(state->crc, 0, sizeof(state->crc));
|
||||
*state->crc = 0x9db42487;
|
||||
|
||||
reset_match_history(stream);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int isal_deflate_stateless(struct isal_zstream *stream)
|
||||
{
|
||||
uint8_t *next_in = stream->next_in;
|
||||
const uint32_t avail_in = stream->avail_in;
|
||||
|
||||
uint8_t *next_out = stream->next_out;
|
||||
const uint32_t avail_out = stream->avail_out;
|
||||
|
||||
uint32_t crc32 = 0;
|
||||
uint32_t stored_len;
|
||||
uint32_t dyn_min_len;
|
||||
uint32_t min_len;
|
||||
uint32_t select_stored_blk = 0;
|
||||
|
||||
if (avail_in == 0)
|
||||
stored_len = STORED_BLK_HDR_BZ;
|
||||
else
|
||||
stored_len =
|
||||
STORED_BLK_HDR_BZ * ((avail_in + STORED_BLK_MAX_BZ - 1) /
|
||||
STORED_BLK_MAX_BZ) + avail_in;
|
||||
|
||||
/*
|
||||
at least 1 byte compressed data in the case of empty dynamic block which only
|
||||
contains the EOB
|
||||
*/
|
||||
|
||||
dyn_min_len = stream->hufftables->deflate_hdr_count + 1;
|
||||
#ifndef DEFLATE
|
||||
dyn_min_len += gzip_hdr_bytes + gzip_trl_bytes + 1;
|
||||
stored_len += gzip_hdr_bytes + gzip_trl_bytes;
|
||||
#endif
|
||||
|
||||
min_len = dyn_min_len;
|
||||
|
||||
if (stored_len < dyn_min_len) {
|
||||
min_len = stored_len;
|
||||
select_stored_blk = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
the output buffer should be no less than 8 bytes
|
||||
while empty stored deflate block is 5 bytes only
|
||||
*/
|
||||
if (avail_out < min_len || stream->avail_out < 8)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
if (!select_stored_blk) {
|
||||
if (isal_deflate_int_stateless(stream, next_in, avail_in) == COMP_OK)
|
||||
return COMP_OK;
|
||||
}
|
||||
if (avail_out < stored_len)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
isal_deflate_init(stream);
|
||||
|
||||
stream->next_in = next_in;
|
||||
stream->avail_in = avail_in;
|
||||
stream->total_in = 0;
|
||||
|
||||
stream->next_out = next_out;
|
||||
stream->avail_out = avail_out;
|
||||
stream->total_out = 0;
|
||||
|
||||
#ifndef DEFLATE
|
||||
crc32 = crc32_gzip(0x0, next_in, avail_in);
|
||||
#endif
|
||||
return write_stored_block_stateless(stream, stored_len, crc32);
|
||||
}
|
||||
|
||||
int isal_deflate(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint32_t size;
|
||||
int ret = COMP_OK;
|
||||
|
||||
if (stream->flush < 3) {
|
||||
|
||||
state->last_flush = stream->flush;
|
||||
|
||||
if (state->state >= TMP_OFFSET_SIZE) {
|
||||
size = state->tmp_out_end - state->tmp_out_start;
|
||||
if (size > stream->avail_out)
|
||||
size = stream->avail_out;
|
||||
memcpy(stream->next_out, state->tmp_out_buff + state->tmp_out_start,
|
||||
size);
|
||||
stream->next_out += size;
|
||||
stream->avail_out -= size;
|
||||
stream->total_out += size;
|
||||
state->tmp_out_start += size;
|
||||
|
||||
if (state->tmp_out_start == state->tmp_out_end)
|
||||
state->state -= TMP_OFFSET_SIZE;
|
||||
|
||||
if (stream->avail_out == 0 || state->state == ZSTATE_END)
|
||||
return ret;
|
||||
}
|
||||
assert(state->tmp_out_start == state->tmp_out_end);
|
||||
|
||||
isal_deflate_int(stream);
|
||||
|
||||
if (stream->avail_out == 0)
|
||||
return ret;
|
||||
|
||||
else if (stream->avail_out < 8) {
|
||||
uint8_t *next_out;
|
||||
uint32_t avail_out;
|
||||
uint32_t total_out;
|
||||
|
||||
next_out = stream->next_out;
|
||||
avail_out = stream->avail_out;
|
||||
total_out = stream->total_out;
|
||||
|
||||
stream->next_out = state->tmp_out_buff;
|
||||
stream->avail_out = sizeof(state->tmp_out_buff);
|
||||
stream->total_out = 0;
|
||||
|
||||
isal_deflate_int(stream);
|
||||
|
||||
state->tmp_out_start = 0;
|
||||
state->tmp_out_end = stream->total_out;
|
||||
|
||||
stream->next_out = next_out;
|
||||
stream->avail_out = avail_out;
|
||||
stream->total_out = total_out;
|
||||
if (state->tmp_out_end) {
|
||||
size = state->tmp_out_end;
|
||||
if (size > stream->avail_out)
|
||||
size = stream->avail_out;
|
||||
memcpy(stream->next_out, state->tmp_out_buff, size);
|
||||
stream->next_out += size;
|
||||
stream->avail_out -= size;
|
||||
stream->total_out += size;
|
||||
state->tmp_out_start += size;
|
||||
if (state->tmp_out_start != state->tmp_out_end)
|
||||
state->state += TMP_OFFSET_SIZE;
|
||||
|
||||
}
|
||||
}
|
||||
} else
|
||||
ret = INVALID_FLUSH;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef DEFLATE
|
||||
static int write_gzip_header_stateless(struct isal_zstream *stream)
|
||||
{
|
||||
if (gzip_hdr_bytes >= stream->avail_out)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
stream->avail_out -= gzip_hdr_bytes;
|
||||
stream->total_out += gzip_hdr_bytes;
|
||||
|
||||
memcpy(stream->next_out, gzip_hdr, gzip_hdr_bytes);
|
||||
|
||||
stream->next_out += gzip_hdr_bytes;
|
||||
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
static void write_gzip_header(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
int bytes_to_write = gzip_hdr_bytes;
|
||||
|
||||
bytes_to_write -= state->count;
|
||||
|
||||
if (bytes_to_write > stream->avail_out)
|
||||
bytes_to_write = stream->avail_out;
|
||||
|
||||
memcpy(stream->next_out, gzip_hdr + state->count, bytes_to_write);
|
||||
state->count += bytes_to_write;
|
||||
|
||||
if (state->count == gzip_hdr_bytes) {
|
||||
state->count = 0;
|
||||
state->has_gzip_hdr = 1;
|
||||
}
|
||||
|
||||
stream->avail_out -= bytes_to_write;
|
||||
stream->total_out += bytes_to_write;
|
||||
stream->next_out += bytes_to_write;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
static int write_deflate_header_stateless(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct isal_hufftables *hufftables = stream->hufftables;
|
||||
uint32_t count;
|
||||
|
||||
if (hufftables->deflate_hdr_count + 8 >= stream->avail_out)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
memcpy(stream->next_out, hufftables->deflate_hdr, hufftables->deflate_hdr_count);
|
||||
|
||||
stream->avail_out -= hufftables->deflate_hdr_count;
|
||||
stream->total_out += hufftables->deflate_hdr_count;
|
||||
stream->next_out += hufftables->deflate_hdr_count;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
write_bits(&state->bitbuf, hufftables->deflate_hdr[hufftables->deflate_hdr_count],
|
||||
hufftables->deflate_hdr_extra_bits);
|
||||
|
||||
count = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct isal_hufftables *hufftables = stream->hufftables;
|
||||
unsigned int count;
|
||||
uint64_t bit_count;
|
||||
uint64_t *header_next;
|
||||
uint64_t *header_end;
|
||||
uint64_t header_bits;
|
||||
|
||||
if (state->bitbuf.m_bit_count == 0)
|
||||
return write_deflate_header_stateless(stream);
|
||||
|
||||
if (hufftables->deflate_hdr_count + 16 >= stream->avail_out)
|
||||
return STATELESS_OVERFLOW;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
header_next = (uint64_t *) hufftables->deflate_hdr;
|
||||
header_end = header_next + hufftables->deflate_hdr_count / 8;
|
||||
|
||||
/* Write out Complete Header bits */
|
||||
for (; header_next < header_end; header_next++) {
|
||||
header_bits = *header_next;
|
||||
write_bits(&state->bitbuf, header_bits, 32);
|
||||
header_bits >>= 32;
|
||||
write_bits(&state->bitbuf, header_bits, 32);
|
||||
}
|
||||
|
||||
header_bits = *header_next;
|
||||
bit_count =
|
||||
(hufftables->deflate_hdr_count & 0x7) * 8 + hufftables->deflate_hdr_extra_bits;
|
||||
|
||||
if (bit_count > MAX_BITBUF_BIT_WRITE) {
|
||||
write_bits(&state->bitbuf, header_bits, MAX_BITBUF_BIT_WRITE);
|
||||
header_bits >>= MAX_BITBUF_BIT_WRITE;
|
||||
bit_count -= MAX_BITBUF_BIT_WRITE;
|
||||
|
||||
}
|
||||
|
||||
write_bits(&state->bitbuf, header_bits, bit_count);
|
||||
|
||||
/* check_space flushes extra bytes in bitbuf. Required because
|
||||
* write_bits_always fails when the next commit makes the buffer
|
||||
* length exceed 64 bits */
|
||||
check_space(&state->bitbuf, FORCE_FLUSH);
|
||||
|
||||
count = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
|
||||
return COMP_OK;
|
||||
}
|
||||
|
||||
void write_header(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
struct isal_hufftables *hufftables = stream->hufftables;
|
||||
uint32_t count;
|
||||
|
||||
state->state = ZSTATE_HDR;
|
||||
|
||||
if (state->bitbuf.m_bit_count != 0) {
|
||||
if (stream->avail_out < 8)
|
||||
return;
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
flush(&state->bitbuf);
|
||||
count = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
}
|
||||
#ifndef DEFLATE
|
||||
if (!state->has_gzip_hdr)
|
||||
write_gzip_header(stream);
|
||||
#endif
|
||||
|
||||
count = hufftables->deflate_hdr_count - state->count;
|
||||
|
||||
if (count != 0) {
|
||||
if (count > stream->avail_out)
|
||||
count = stream->avail_out;
|
||||
|
||||
memcpy(stream->next_out, hufftables->deflate_hdr + state->count, count);
|
||||
|
||||
if (state->count == 0 && count > 0) {
|
||||
if (!stream->end_of_stream)
|
||||
*stream->next_out &= 0xfe;
|
||||
else
|
||||
state->has_eob_hdr = 1;
|
||||
}
|
||||
|
||||
stream->next_out += count;
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
state->count += count;
|
||||
|
||||
count = hufftables->deflate_hdr_count - state->count;
|
||||
}
|
||||
|
||||
if ((count == 0) && (stream->avail_out >= 8)) {
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
write_bits(&state->bitbuf,
|
||||
hufftables->deflate_hdr[hufftables->deflate_hdr_count],
|
||||
hufftables->deflate_hdr_extra_bits);
|
||||
|
||||
state->state = ZSTATE_BODY;
|
||||
state->count = 0;
|
||||
|
||||
count = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
uint32_t get_crc_01(uint32_t * crc)
|
||||
{
|
||||
return crc_512to32_01(crc);
|
||||
}
|
||||
|
||||
void write_trailer(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
unsigned int bytes;
|
||||
|
||||
if (stream->avail_out >= 8) {
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
/* the flush() will pad to the next byte and write up to 8 bytes
|
||||
* to the output stream/buffer.
|
||||
*/
|
||||
if (!state->has_eob_hdr) {
|
||||
/* If the final header has not been written, write a
|
||||
* final block. This block is a static huffman block
|
||||
* which only contains the end of block symbol. The code
|
||||
* that happens to do this is the fist 10 bits of
|
||||
* 0x003 */
|
||||
state->has_eob_hdr = 1;
|
||||
write_bits(&state->bitbuf, 0x003, 10);
|
||||
if (is_full(&state->bitbuf)) {
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
flush(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
|
||||
#ifndef DEFLATE
|
||||
uint32_t *crc = state->crc;
|
||||
|
||||
if (!is_full(&state->bitbuf)) {
|
||||
*(uint64_t *) stream->next_out =
|
||||
((uint64_t) file_size(state) << 32) | get_crc(crc);
|
||||
stream->next_out += 8;
|
||||
bytes += 8;
|
||||
state->state = ZSTATE_END;
|
||||
}
|
||||
#else
|
||||
state->state = ZSTATE_END;
|
||||
#endif
|
||||
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
static int write_trailer_stateless(struct isal_zstream *stream, uint32_t avail_in,
|
||||
uint32_t crc32)
|
||||
{
|
||||
int ret = COMP_OK;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
unsigned int bytes;
|
||||
|
||||
if (stream->avail_out < 8) {
|
||||
ret = STATELESS_OVERFLOW;
|
||||
} else {
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
/* the flush() will pad to the next byte and write up to 8 bytes
|
||||
* to the output stream/buffer.
|
||||
*/
|
||||
flush(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
bytes = buffer_used(&state->bitbuf);
|
||||
#ifndef DEFLATE
|
||||
if (is_full(&state->bitbuf)) {
|
||||
ret = STATELESS_OVERFLOW;
|
||||
} else {
|
||||
*(uint64_t *) stream->next_out = ((uint64_t) avail_in << 32) | crc32;
|
||||
stream->next_out += 8;
|
||||
bytes += 8;
|
||||
}
|
||||
#endif
|
||||
stream->avail_out -= bytes;
|
||||
stream->total_out += bytes;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
292
igzip/igzip_base.c
Normal file
292
igzip/igzip_base.c
Normal file
@ -0,0 +1,292 @@
|
||||
#include <stdint.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "huffman.h"
|
||||
#include "huff_codes.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
extern const struct isal_hufftables hufftables_default;
|
||||
|
||||
void isal_deflate_init_base(struct isal_zstream *stream)
|
||||
{
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
int i;
|
||||
|
||||
uint32_t *crc = state->crc;
|
||||
|
||||
stream->total_in = 0;
|
||||
stream->total_out = 0;
|
||||
stream->hufftables = (struct isal_hufftables *)&hufftables_default;
|
||||
stream->flush = 0;
|
||||
state->b_bytes_valid = 0;
|
||||
state->b_bytes_processed = 0;
|
||||
state->has_eob = 0;
|
||||
state->has_eob_hdr = 0;
|
||||
state->left_over = 0;
|
||||
state->last_flush = 0;
|
||||
state->has_gzip_hdr = 0;
|
||||
state->state = ZSTATE_NEW_HDR;
|
||||
state->count = 0;
|
||||
|
||||
state->tmp_out_start = 0;
|
||||
state->tmp_out_end = 0;
|
||||
|
||||
state->file_start = state->buffer;
|
||||
|
||||
init(&state->bitbuf);
|
||||
|
||||
*crc = ~0;
|
||||
|
||||
for (i = 0; i < HASH_SIZE; i++)
|
||||
state->head[i] = (uint16_t) - (IGZIP_D + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t get_crc_base(uint32_t * crc)
|
||||
{
|
||||
return ~*crc;
|
||||
}
|
||||
|
||||
static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state,
|
||||
uint8_t * start_in)
|
||||
{
|
||||
uint32_t bytes_written;
|
||||
|
||||
stream->total_in += stream->next_in - start_in;
|
||||
|
||||
bytes_written = buffer_used(&state->bitbuf);
|
||||
stream->total_out += bytes_written;
|
||||
stream->next_out += bytes_written;
|
||||
stream->avail_out -= bytes_written;
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_body_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal, hash;
|
||||
uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
|
||||
uint16_t match_length;
|
||||
uint32_t dist, bytes_to_buffer, offset;
|
||||
uint64_t code, code_len, code2, code_len2;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *last_seen = state->head;
|
||||
uint32_t *crc = state->crc;
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
return;
|
||||
}
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
start_in = stream->next_in;
|
||||
|
||||
while (stream->avail_in != 0) {
|
||||
bytes_to_buffer =
|
||||
IGZIP_D + IGZIP_LA - (state->b_bytes_valid - state->b_bytes_processed);
|
||||
|
||||
if (bytes_to_buffer > IGZIP_D)
|
||||
bytes_to_buffer = IGZIP_D;
|
||||
|
||||
if (stream->avail_in < IGZIP_D)
|
||||
bytes_to_buffer = stream->avail_in;
|
||||
|
||||
if (bytes_to_buffer > BSIZE - state->b_bytes_valid) {
|
||||
if (state->b_bytes_valid - state->b_bytes_processed > IGZIP_LA) {
|
||||
/* There was an out buffer overflow last round,
|
||||
* complete the processing of data */
|
||||
bytes_to_buffer = 0;
|
||||
|
||||
} else {
|
||||
/* Not enough room in the buffer, shift the
|
||||
* buffer down to make space for the new data */
|
||||
offset = state->b_bytes_processed - IGZIP_D; // state->b_bytes_valid - (IGZIP_D + IGZIP_LA);
|
||||
memmove(state->buffer, state->buffer + offset,
|
||||
IGZIP_D + IGZIP_LA);
|
||||
|
||||
state->b_bytes_processed -= offset;
|
||||
state->b_bytes_valid -= offset;
|
||||
state->file_start -= offset;
|
||||
|
||||
stream->avail_in -= bytes_to_buffer;
|
||||
memcpy(state->buffer + state->b_bytes_valid, stream->next_in,
|
||||
bytes_to_buffer);
|
||||
update_crc(crc, stream->next_in, bytes_to_buffer);
|
||||
stream->next_in += bytes_to_buffer;
|
||||
}
|
||||
} else {
|
||||
/* There is enough space in the buffer, copy in the new data */
|
||||
stream->avail_in -= bytes_to_buffer;
|
||||
memcpy(state->buffer + state->b_bytes_valid, stream->next_in,
|
||||
bytes_to_buffer);
|
||||
update_crc(crc, stream->next_in, bytes_to_buffer);
|
||||
stream->next_in += bytes_to_buffer;
|
||||
}
|
||||
|
||||
state->b_bytes_valid += bytes_to_buffer;
|
||||
|
||||
end_in = state->buffer + state->b_bytes_valid - IGZIP_LA;
|
||||
|
||||
next_in = state->b_bytes_processed + state->buffer;
|
||||
|
||||
while (next_in < end_in) {
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
state->b_bytes_processed = next_in - state->buffer;
|
||||
update_state(stream, state, start_in);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - state->file_start);
|
||||
|
||||
if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */
|
||||
assert(next_in - dist >= state->buffer);
|
||||
assert(dist != 0);
|
||||
|
||||
match_length = compare258(next_in - dist, next_in, 258);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - state->file_start);
|
||||
}
|
||||
|
||||
get_len_code(stream->hufftables, match_length, &code,
|
||||
&code_len);
|
||||
get_dist_code(stream->hufftables, dist, &code2,
|
||||
&code_len2);
|
||||
|
||||
code |= code2 << code_len;
|
||||
code_len += code_len2;
|
||||
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
next_in++;
|
||||
}
|
||||
|
||||
state->b_bytes_processed = next_in - state->buffer;
|
||||
|
||||
}
|
||||
|
||||
update_state(stream, state, start_in);
|
||||
|
||||
if (stream->avail_in == 0) {
|
||||
if (stream->end_of_stream || stream->flush != NO_FLUSH)
|
||||
state->state = ZSTATE_FLUSH_READ_BUFFER;
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_finish_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *next_in, *end_in, *end, *next_hash;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint64_t code, code_len, code2, code_len2;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *last_seen = state->head;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
|
||||
end_in = state->b_bytes_valid + (uint8_t *) state->buffer;
|
||||
|
||||
next_in = state->b_bytes_processed + state->buffer;
|
||||
|
||||
while (next_in < end_in) {
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
state->b_bytes_processed = next_in - state->buffer;
|
||||
update_state(stream, state, stream->next_in);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) (next_in - state->file_start);
|
||||
|
||||
if (dist - 1 < IGZIP_D - 1) { /* The -1 are to handle the case when dist = 0 */
|
||||
assert(next_in - dist >= state->buffer);
|
||||
match_length = compare258(next_in - dist, next_in, end_in - next_in);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = next_in;
|
||||
#ifdef LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
next_hash++;
|
||||
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] =
|
||||
(uint64_t) (next_hash - state->file_start);
|
||||
}
|
||||
|
||||
get_len_code(stream->hufftables, match_length, &code,
|
||||
&code_len);
|
||||
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
|
||||
|
||||
code |= code2 << code_len;
|
||||
code_len += code_len2;
|
||||
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
|
||||
next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
next_in++;
|
||||
|
||||
}
|
||||
|
||||
state->b_bytes_processed = next_in - state->buffer;
|
||||
|
||||
if (is_full(&state->bitbuf) || state->left_over > 0) {
|
||||
update_state(stream, state, stream->next_in);
|
||||
return;
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, 256, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
state->has_eob = 1;
|
||||
|
||||
update_state(stream, state, stream->next_in);
|
||||
|
||||
if (stream->end_of_stream == 1)
|
||||
state->state = ZSTATE_TRL;
|
||||
else
|
||||
state->state = ZSTATE_SYNC_FLUSH;
|
||||
|
||||
return;
|
||||
}
|
751
igzip/igzip_body.asm
Normal file
751
igzip/igzip_body.asm
Normal file
@ -0,0 +1,751 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%ifndef TEST
|
||||
|
||||
extern fold_4
|
||||
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
|
||||
%if (ARCH == 04)
|
||||
%define MOVDQA vmovdqa
|
||||
%else
|
||||
%define MOVDQA movdqa
|
||||
%endif
|
||||
|
||||
%ifdef DEBUG
|
||||
%macro MARK 1
|
||||
global %1
|
||||
%1:
|
||||
%endm
|
||||
%else
|
||||
%macro MARK 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%define tmp2 rcx
|
||||
%define hash2 rcx
|
||||
|
||||
%define b_bytes_valid rax
|
||||
%define curr_data rax
|
||||
%define code rax
|
||||
%define tmp5 rax
|
||||
|
||||
%define tmp4 rbx
|
||||
%define dist rbx
|
||||
%define code2 rbx
|
||||
|
||||
%define x rdx
|
||||
%define len rdx
|
||||
%define hash rdx
|
||||
%define code_len3 rdx
|
||||
|
||||
%define tmp1 rsi
|
||||
%define code_len2 rsi
|
||||
|
||||
%define blen rdi
|
||||
%define file_start rdi
|
||||
|
||||
%define m_bit_count rbp
|
||||
|
||||
%define in_buf r8
|
||||
%define curr_data2 r8
|
||||
%define len2 r8
|
||||
%define tmp6 r8
|
||||
|
||||
%define m_bits r9
|
||||
|
||||
%define f_i r10
|
||||
|
||||
%define m_out_buf r11
|
||||
|
||||
%define f_end_i r12
|
||||
%define dist2 r12
|
||||
%define tmp7 r12
|
||||
%define code4 r12
|
||||
|
||||
%define tmp3 r13
|
||||
%define code3 r13
|
||||
|
||||
%define stream r14
|
||||
|
||||
%define hufftables r15
|
||||
|
||||
%define crc_0 xmm0 ; in/out: crc state
|
||||
%define crc_1 xmm1 ; in/out: crc state
|
||||
%define crc_2 xmm2 ; in/out: crc state
|
||||
%define crc_3 xmm3 ; in/out: crc state
|
||||
%define crc_fold xmm4 ; in: (loaded from fold_4)
|
||||
|
||||
%define xtmp0 xmm5 ; tmp
|
||||
%define xtmp1 xmm6 ; tmp
|
||||
%define xtmp2 xmm7 ; tmp
|
||||
%define xtmp3 xmm8 ; tmp
|
||||
%define xtmp4 xmm9 ; tmp
|
||||
|
||||
%define ytmp0 ymm5 ; tmp
|
||||
%define ytmp1 ymm6 ; tmp
|
||||
|
||||
%if (ARCH == 04)
|
||||
%define vtmp0 ymm5 ; tmp
|
||||
%define vtmp1 ymm6 ; tmp
|
||||
%define vtmp2 ymm7 ; tmp
|
||||
%define vtmp3 ymm8 ; tmp
|
||||
%define vtmp4 ymm9 ; tmp
|
||||
%else
|
||||
%define vtmp0 xmm5 ; tmp
|
||||
%define vtmp1 xmm6 ; tmp
|
||||
%define vtmp2 xmm7 ; tmp
|
||||
%define vtmp3 xmm8 ; tmp
|
||||
%define vtmp4 xmm9 ; tmp
|
||||
%endif
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define b_bytes_processed f_i
|
||||
|
||||
blen_mem_offset equ 0 ; local variable (8 bytes)
|
||||
in_buf_mem_offset equ 8
|
||||
f_end_i_mem_offset equ 16
|
||||
empty_buffer_flag equ 24
|
||||
gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes)
|
||||
xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||
stack_size equ 4*8 + 8*8 + 4*16 + 8
|
||||
;;; 8 because stack address is odd multiple of 8 after a function call and
|
||||
;;; we want it aligned to 16 bytes
|
||||
|
||||
; void isal_deflate_body ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_body_ %+ ARCH
|
||||
isal_deflate_body_ %+ ARCH %+ :
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
;; do nothing if (avail_in == 0)
|
||||
cmp dword [rcx + _avail_in], 0
|
||||
jne skip1
|
||||
|
||||
;; Set stream's next state
|
||||
mov rdx, ZSTATE_FLUSH_READ_BUFFER
|
||||
mov rax, ZSTATE_BODY
|
||||
cmp dword [rcx + _end_of_stream], 0
|
||||
cmovne rax, rdx
|
||||
cmp dword [rcx + _flush], _NO_FLUSH
|
||||
cmovne rax, rdx
|
||||
mov dword [rcx + _internal_state_state], eax
|
||||
ret
|
||||
skip1:
|
||||
|
||||
%ifdef ALIGN_STACK
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
sub rsp, stack_size
|
||||
and rsp, ~15
|
||||
%else
|
||||
sub rsp, stack_size
|
||||
%endif
|
||||
|
||||
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
||||
mov [rsp + gpr_save_mem_offset + 1*8], rsi
|
||||
mov [rsp + gpr_save_mem_offset + 2*8], rdi
|
||||
mov [rsp + gpr_save_mem_offset + 3*8], rbp
|
||||
mov [rsp + gpr_save_mem_offset + 4*8], r12
|
||||
mov [rsp + gpr_save_mem_offset + 5*8], r13
|
||||
mov [rsp + gpr_save_mem_offset + 6*8], r14
|
||||
mov [rsp + gpr_save_mem_offset + 7*8], r15
|
||||
MOVDQA [rsp + xmm_save_mem_offset + 0*16], xmm6
|
||||
MOVDQA [rsp + xmm_save_mem_offset + 1*16], xmm7
|
||||
MOVDQA [rsp + xmm_save_mem_offset + 2*16], xmm8
|
||||
MOVDQA [rsp + xmm_save_mem_offset + 3*16], xmm9
|
||||
|
||||
mov stream, rcx
|
||||
|
||||
MOVDQA crc_0, [stream + _internal_state_crc + 0*16]
|
||||
MOVDQA crc_1, [stream + _internal_state_crc + 1*16]
|
||||
MOVDQA crc_2, [stream + _internal_state_crc + 2*16]
|
||||
MOVDQA crc_3, [stream + _internal_state_crc + 3*16]
|
||||
MOVDQA crc_fold, [fold_4]
|
||||
mov dword [stream + _internal_state_has_eob], 0
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov m_out_buf, [stream + _next_out]
|
||||
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
|
||||
mov tmp1 %+ d, [stream + _avail_out]
|
||||
add tmp1, m_out_buf
|
||||
sub tmp1, SLOP
|
||||
skip_SLOP:
|
||||
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
|
||||
|
||||
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
|
||||
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
|
||||
|
||||
mov hufftables, [stream + _hufftables]
|
||||
; in_buf = stream->next_in
|
||||
mov in_buf, [stream + _next_in]
|
||||
mov blen %+ d, [stream + _avail_in]
|
||||
|
||||
mov dword [rsp + empty_buffer_flag], 0
|
||||
cmp dword [stream + _flush], _FULL_FLUSH
|
||||
sete byte [rsp + empty_buffer_flag]
|
||||
cmp dword [stream + _internal_state_b_bytes_processed], 0
|
||||
sete byte [rsp + empty_buffer_flag + 1]
|
||||
|
||||
; while (blen != 0)
|
||||
MARK __Compute_X_ %+ ARCH
|
||||
loop1:
|
||||
; x = D + LA - (state->b_bytes_valid - state->b_bytes_processed);
|
||||
mov b_bytes_valid %+ d, [stream + _internal_state_b_bytes_valid]
|
||||
mov b_bytes_processed %+ d, [stream + _internal_state_b_bytes_processed]
|
||||
lea x, [b_bytes_processed + D + LA]
|
||||
sub x, b_bytes_valid
|
||||
|
||||
; if (x > D) x = D;
|
||||
cmp x, D
|
||||
cmova x, [const_D]
|
||||
|
||||
; if (blen < D) x = blen;
|
||||
cmp blen, D
|
||||
cmovb x, blen
|
||||
|
||||
;; process x bytes starting at in_buf
|
||||
|
||||
;; If there isn't enough room, shift buffer down
|
||||
; if (x > BSIZE - state->b_bytes_valid) {
|
||||
mov tmp1, BSIZE
|
||||
sub tmp1, b_bytes_valid
|
||||
cmp x, tmp1
|
||||
jbe skip_move
|
||||
|
||||
; if (state->b_bytes_processed < state->b_bytes_valid - LA) {
|
||||
mov tmp1, b_bytes_valid
|
||||
sub tmp1, LA
|
||||
cmp b_bytes_processed, tmp1
|
||||
jae do_move
|
||||
|
||||
;; We need to move an odd amount, skip move for this copy of loop
|
||||
xor x,x
|
||||
mov [rsp + blen_mem_offset], blen
|
||||
jmp skip_move_zero
|
||||
|
||||
MARK __shift_data_down_ %+ ARCH
|
||||
do_move:
|
||||
; offset = state->b_bytes_valid - (D + LA);
|
||||
mov tmp4, b_bytes_valid
|
||||
sub tmp4, D + LA
|
||||
; copy_D_LA(state->buffer, state->buffer + offset);
|
||||
lea tmp1, [stream + _internal_state_buffer]
|
||||
lea tmp2, [tmp1 + tmp4]
|
||||
copy_D_LA tmp1, tmp2, tmp3, vtmp0, vtmp1, vtmp2, vtmp3
|
||||
; tmp1 clobbered
|
||||
|
||||
; state->file_start -= offset;
|
||||
sub [stream + _internal_state_file_start], tmp4
|
||||
; state->b_bytes_processed -= offset;
|
||||
sub b_bytes_processed, tmp4
|
||||
mov b_bytes_valid, D + LA
|
||||
|
||||
MARK __copy_in_ %+ ARCH
|
||||
skip_move:
|
||||
sub blen, x
|
||||
|
||||
mov [rsp + blen_mem_offset], blen
|
||||
|
||||
; copy_in(state->buffer + state->b_bytes_valid, in_buf, x);
|
||||
lea tmp1, [stream + _internal_state_buffer + b_bytes_valid]
|
||||
mov tmp2, in_buf
|
||||
mov tmp3, x
|
||||
|
||||
|
||||
COPY_IN_CRC tmp1, tmp2, tmp3, tmp4, crc_0, crc_1, crc_2, crc_3, crc_fold, \
|
||||
xtmp0, xtmp1, xtmp2, xtmp3, xtmp4
|
||||
|
||||
; in_buf += x;
|
||||
add in_buf, x
|
||||
MARK __prepare_loop_ %+ ARCH
|
||||
skip_move_zero:
|
||||
mov [rsp + in_buf_mem_offset], in_buf
|
||||
; state->b_bytes_valid += x;
|
||||
add b_bytes_valid, x
|
||||
mov [stream + _internal_state_b_bytes_valid], b_bytes_valid %+ d
|
||||
|
||||
; f_end_i = state->b_bytes_valid - LA;
|
||||
%ifnidn f_end_i, b_bytes_valid
|
||||
mov f_end_i, b_bytes_valid
|
||||
%endif
|
||||
sub f_end_i, LA
|
||||
; if (f_end_i <= 0) continue;
|
||||
cmp f_end_i, 0
|
||||
jle continue_while
|
||||
|
||||
; f_start_i = state->b_bytes_processed;
|
||||
;; f_i and b_bytes_processed are same register, just store b_bytes_proc
|
||||
mov [stream + _internal_state_b_bytes_processed], b_bytes_processed %+ d
|
||||
|
||||
; f_start_i += (uint32_t)(state->buffer - state->file_start);
|
||||
mov file_start, [stream + _internal_state_file_start]
|
||||
lea tmp1, [stream + _internal_state_buffer]
|
||||
sub tmp1, file_start
|
||||
add f_i, tmp1
|
||||
add f_end_i, tmp1
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
cmp f_i, f_end_i
|
||||
jge end_loop_2
|
||||
|
||||
MARK __misc_compute_hash_lookup_ %+ ARCH
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
|
||||
cmp dword [rsp + empty_buffer_flag], 0
|
||||
jne write_first_byte
|
||||
|
||||
mov curr_data2, curr_data
|
||||
|
||||
compute_hash hash, curr_data
|
||||
jmp loop2
|
||||
|
||||
align 16
|
||||
|
||||
loop2:
|
||||
shr curr_data2, 8
|
||||
xor hash2 %+ d, hash2 %+ d
|
||||
crc32 hash2 %+ d, curr_data2 %+ d
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja bitbuf_full
|
||||
|
||||
xor dist, dist
|
||||
xor dist2, dist2
|
||||
xor tmp3, tmp3
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
lea tmp6, [tmp1 - 1]
|
||||
|
||||
mov dist %+ w, f_i %+ w
|
||||
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
|
||||
|
||||
; state->head[hash] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
inc f_i
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
|
||||
dec dist2
|
||||
|
||||
; state->head[hash2] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
|
||||
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
dec dist
|
||||
|
||||
; if ((dist-1) < (D-1)) {
|
||||
cmp dist %+ d, (D-1)
|
||||
cmovae tmp2, tmp6
|
||||
cmovae dist, tmp3
|
||||
inc dist
|
||||
|
||||
cmp dist2 %+ d, (D-1)
|
||||
cmovae dist2, tmp3
|
||||
inc dist2
|
||||
|
||||
MARK __compare_ %+ ARCH
|
||||
; len = compare258(state->file_start + f_i,
|
||||
; state->file_start + f_i - dist);
|
||||
|
||||
;; Specutively load distance code (except for when large windows are used)
|
||||
get_packed_dist_code dist, code2, hufftables
|
||||
|
||||
;; Check for long len/dist match (>7) with first literal
|
||||
mov len, [tmp1]
|
||||
xor len, [tmp2]
|
||||
jz compare_loop
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
blsmsk tmp3, len
|
||||
or tmp3, 0xFFFFFF
|
||||
%endif
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist2
|
||||
|
||||
;; Specutively load distance code (except for when large windows are used)
|
||||
get_packed_dist_code dist2, code4, hufftables
|
||||
|
||||
;; Check for len/dist match (>7) with second literal
|
||||
mov len2, [tmp1]
|
||||
xor len2, [tmp2]
|
||||
jz compare_loop2
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
;; Check for len/dist match for first literal
|
||||
test tmp3, len2
|
||||
jz len_dist_lit_huffman_pre
|
||||
|
||||
cmp tmp3, 0xFFFFFF
|
||||
je encode_2_literals
|
||||
jmp len_dist_huffman_pre
|
||||
|
||||
|
||||
MARK __len_dist_lit_huffman_ %+ ARCH
|
||||
len_dist_lit_huffman_pre:
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, code_len3, hufftables
|
||||
%else
|
||||
;; Specutively load the code for the first literal
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, rcx, hufftables
|
||||
|
||||
;; Check for len/dist match for first literal
|
||||
test len, 0xFFFFFF
|
||||
jz len_dist_huffman_pre
|
||||
|
||||
;; Specutively load the code for the second literal
|
||||
shr curr_data, 8
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
|
||||
shl code2, cl
|
||||
or code2, code3
|
||||
add code_len2, rcx
|
||||
|
||||
;; Check for len/dist match for second literal
|
||||
test len2, 0xFFFFFF
|
||||
jnz write_lit_bits
|
||||
|
||||
MARK __len_dist_lit_huffman_ %+ ARCH
|
||||
len_dist_lit_huffman_pre:
|
||||
mov code_len3, rcx
|
||||
%endif
|
||||
bsf len2, len2
|
||||
shr len2, 3
|
||||
|
||||
len_dist_lit_huffman:
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp4, dist2
|
||||
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%else
|
||||
unpack_dist_code code4, code_len2
|
||||
%endif
|
||||
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx code4, code4, rcx
|
||||
%else
|
||||
shl code4, cl
|
||||
%endif
|
||||
or code4, code
|
||||
add code_len2, rcx
|
||||
|
||||
mov rcx, code_len3
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx code4, code4, rcx
|
||||
%else
|
||||
shl code4, cl
|
||||
%endif
|
||||
or code4, code3
|
||||
add code_len2, rcx
|
||||
|
||||
mov code2, code4
|
||||
;; Setup for updating hash
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
add f_i, len2
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp5 %+ d, [file_start + tmp3]
|
||||
mov tmp7, tmp5
|
||||
shr tmp7, 8
|
||||
|
||||
compute_hash hash, tmp5
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3,1
|
||||
|
||||
jmp update_hash_for_symbol
|
||||
;; encode as dist/len
|
||||
|
||||
MARK __len_dist_huffman_ %+ ARCH
|
||||
len_dist_huffman_pre:
|
||||
bsf len, len
|
||||
shr len, 3
|
||||
len_dist_huffman:
|
||||
dec f_i
|
||||
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp3, dist ; since code2 and dist are rbx
|
||||
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%else
|
||||
unpack_dist_code code2, code_len2
|
||||
%endif
|
||||
; get_len_code(len, &code, &code_len);
|
||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
; code2 <<= code_len
|
||||
; code2 |= code
|
||||
; code_len2 += code_len
|
||||
%ifdef USE_HSWNI
|
||||
shlx code2, code2, rcx
|
||||
%else
|
||||
shl code2, cl
|
||||
%endif
|
||||
or code2, code
|
||||
add code_len2, rcx
|
||||
|
||||
;; Setup for updateing hash
|
||||
lea tmp3, [f_i + 2] ; tmp3 <= k
|
||||
add f_i, len
|
||||
mov tmp7 %+ d, [file_start + tmp3]
|
||||
|
||||
MARK __update_hash_for_symbol_ %+ ARCH
|
||||
update_hash_for_symbol:
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
%ifdef LIMIT_HASH_UPDATE
|
||||
; only update hash twice, first hash was already calculated.
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
compute_hash hash2, tmp7
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
%else
|
||||
loop3:
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp7 %+ d, [file_start + tmp3]
|
||||
compute_hash hash2, tmp7
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
add tmp3,1
|
||||
cmp tmp3, f_i
|
||||
jl loop3
|
||||
%endif
|
||||
|
||||
|
||||
MARK __write_len_dist_bits_ %+ ARCH
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
|
||||
MARK __write_lit_bits_ %+ ARCH
|
||||
%ifdef USE_HSWNI
|
||||
encode_2_literals:
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, rcx, hufftables
|
||||
|
||||
shr curr_data, 8
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
|
||||
;; Calculate code associated with both literals
|
||||
shlx code2, code2, rcx
|
||||
or code2, code3
|
||||
add code_len2, rcx
|
||||
%endif
|
||||
write_lit_bits:
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
add f_i, 1
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
|
||||
compute_hash hash, curr_data
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
|
||||
|
||||
MARK __end_loops_ %+ ARCH
|
||||
end_loop_2:
|
||||
|
||||
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
|
||||
add f_i, [stream + _internal_state_file_start]
|
||||
sub f_i, stream
|
||||
sub f_i, _internal_state_buffer
|
||||
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
|
||||
|
||||
; continue
|
||||
continue_while:
|
||||
mov blen, [rsp + blen_mem_offset]
|
||||
mov in_buf, [rsp + in_buf_mem_offset]
|
||||
cmp blen, 0
|
||||
jnz loop1
|
||||
|
||||
end:
|
||||
;; update input buffer
|
||||
; stream->total_in += (uint32_t)(in_buf - stream->next_in); // bytes copied
|
||||
mov tmp1 %+ d, [stream + _total_in]
|
||||
mov in_buf, [rsp + in_buf_mem_offset]
|
||||
add tmp1, in_buf
|
||||
sub tmp1, [stream + _next_in]
|
||||
mov [stream + _total_in], tmp1 %+ d
|
||||
|
||||
mov [stream + _next_in], in_buf
|
||||
mov [stream + _avail_in], blen %+ d
|
||||
|
||||
cmp blen, 0
|
||||
jne skip2
|
||||
|
||||
;; Set stream's next state
|
||||
mov tmp1, ZSTATE_FLUSH_READ_BUFFER
|
||||
mov tmp5, ZSTATE_BODY
|
||||
cmp dword [stream + _end_of_stream], 0
|
||||
cmovne tmp5, tmp1
|
||||
cmp dword [stream + _flush], _NO_FLUSH
|
||||
cmovne tmp5, tmp1
|
||||
mov dword [stream + _internal_state_state], tmp5 %+ d
|
||||
skip2:
|
||||
mov [stream + _next_out], m_out_buf
|
||||
; offset = state->bitbuf.buffer_used();
|
||||
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
|
||||
sub [stream + _avail_out], m_out_buf %+ d
|
||||
add [stream + _total_out], m_out_buf %+ d
|
||||
|
||||
mov [stream + _internal_state_bitbuf_m_bits], m_bits
|
||||
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
|
||||
|
||||
|
||||
MOVDQA [stream + _internal_state_crc + 0*16], crc_0
|
||||
MOVDQA [stream + _internal_state_crc + 1*16], crc_1
|
||||
MOVDQA [stream + _internal_state_crc + 2*16], crc_2
|
||||
MOVDQA [stream + _internal_state_crc + 3*16], crc_3
|
||||
|
||||
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
||||
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
|
||||
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
|
||||
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
|
||||
mov r12, [rsp + gpr_save_mem_offset + 4*8]
|
||||
mov r13, [rsp + gpr_save_mem_offset + 5*8]
|
||||
mov r14, [rsp + gpr_save_mem_offset + 6*8]
|
||||
mov r15, [rsp + gpr_save_mem_offset + 7*8]
|
||||
MOVDQA xmm6, [rsp + xmm_save_mem_offset + 0*16]
|
||||
MOVDQA xmm7, [rsp + xmm_save_mem_offset + 1*16]
|
||||
MOVDQA xmm8, [rsp + xmm_save_mem_offset + 2*16]
|
||||
MOVDQA xmm9, [rsp + xmm_save_mem_offset + 3*16]
|
||||
|
||||
%ifndef ALIGN_STACK
|
||||
add rsp, stack_size
|
||||
%else
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
%endif
|
||||
ret
|
||||
|
||||
MARK __bitbuf_full_ %+ ARCH
|
||||
bitbuf_full:
|
||||
mov blen, [rsp + blen_mem_offset]
|
||||
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
|
||||
add f_i, [stream + _internal_state_file_start]
|
||||
sub f_i, stream
|
||||
sub f_i, _internal_state_buffer
|
||||
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
|
||||
jmp end
|
||||
|
||||
MARK __compare_loops_ %+ ARCH
|
||||
compare_loop:
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
jmp len_dist_huffman
|
||||
|
||||
compare_loop2:
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len2, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code3, code_len3, hufftables
|
||||
jmp len_dist_lit_huffman
|
||||
|
||||
MARK __write_first_byte_ %+ ARCH
|
||||
write_first_byte:
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja bitbuf_full
|
||||
|
||||
mov dword [rsp + empty_buffer_flag], 0
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
jmp write_lit_bits
|
||||
|
||||
section .data
|
||||
align 4
|
||||
const_D: dq D
|
||||
|
||||
%endif ;; ifndef TEST
|
8
igzip/igzip_body_01.asm
Normal file
8
igzip/igzip_body_01.asm
Normal file
@ -0,0 +1,8 @@
|
||||
%define ARCH 01
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 2
|
||||
%endif
|
||||
|
||||
%include "igzip_buffer_utils_01.asm"
|
||||
%include "igzip_body.asm"
|
9
igzip/igzip_body_04.asm
Normal file
9
igzip/igzip_body_04.asm
Normal file
@ -0,0 +1,9 @@
|
||||
%define ARCH 04
|
||||
%define USE_HSWNI
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 3
|
||||
%endif
|
||||
|
||||
%include "igzip_buffer_utils_04.asm"
|
||||
%include "igzip_body.asm"
|
543
igzip/igzip_buffer_utils_01.asm
Normal file
543
igzip/igzip_buffer_utils_01.asm
Normal file
@ -0,0 +1,543 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%ifndef BUFFER_UTILS
|
||||
%define BUFFER_UTILS
|
||||
|
||||
%include "options.asm"
|
||||
|
||||
extern pshufb_shf_table
|
||||
extern mask3
|
||||
|
||||
%ifdef FIX_CACHE_READ
|
||||
%define movntdqa movdqa
|
||||
%else
|
||||
%macro prefetchnta 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; code for doing the CRC calculation as part of copy-in, using pclmulqdq
|
||||
|
||||
; "shift" 4 input registers down 4 places
|
||||
; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1
|
||||
%macro FOLD4 7
|
||||
%define %%xmm0 %1 ; xmm reg, in/out
|
||||
%define %%xmm1 %2 ; xmm reg, in/out
|
||||
%define %%xmm2 %3 ; xmm reg, in/out
|
||||
%define %%xmm3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
%define %%tmp1 %7 ; xmm reg, tmp
|
||||
|
||||
movaps %%tmp0, %%xmm0
|
||||
movaps %%tmp1, %%xmm1
|
||||
|
||||
pclmulqdq %%xmm0, %%const, 0x01
|
||||
pclmulqdq %%xmm1, %%const, 0x01
|
||||
|
||||
pclmulqdq %%tmp0, %%const, 0x10
|
||||
pclmulqdq %%tmp1, %%const, 0x10
|
||||
|
||||
xorps %%xmm0, %%tmp0
|
||||
xorps %%xmm1, %%tmp1
|
||||
|
||||
|
||||
movaps %%tmp0, %%xmm2
|
||||
movaps %%tmp1, %%xmm3
|
||||
|
||||
pclmulqdq %%xmm2, %%const, 0x01
|
||||
pclmulqdq %%xmm3, %%const, 0x01
|
||||
|
||||
pclmulqdq %%tmp0, %%const, 0x10
|
||||
pclmulqdq %%tmp1, %%const, 0x10
|
||||
|
||||
xorps %%xmm2, %%tmp0
|
||||
xorps %%xmm3, %%tmp1
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 3 input registers down 4 places
|
||||
; macro FOLD3 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out D A' B' C'
|
||||
%macro FOLD3 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
movdqa %%tmp0, %%x3
|
||||
|
||||
movaps %%x3, %%x2
|
||||
pclmulqdq %%x2, %%const, 0x01
|
||||
pclmulqdq %%x3, %%const, 0x10
|
||||
xorps %%x3, %%x2
|
||||
|
||||
movaps %%x2, %%x1
|
||||
pclmulqdq %%x1, %%const, 0x01
|
||||
pclmulqdq %%x2, %%const, 0x10
|
||||
xorps %%x2, %%x1
|
||||
|
||||
movaps %%x1, %%x0
|
||||
pclmulqdq %%x0, %%const, 0x01
|
||||
pclmulqdq %%x1, %%const, 0x10
|
||||
xorps %%x1, %%x0
|
||||
|
||||
movdqa %%x0, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 2 input registers down 4 places
|
||||
; macro FOLD2 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out C D A' B'
|
||||
%macro FOLD2 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
movdqa %%tmp0, %%x3
|
||||
|
||||
movaps %%x3, %%x1
|
||||
pclmulqdq %%x1, %%const, 0x01
|
||||
pclmulqdq %%x3, %%const, 0x10
|
||||
xorps %%x3, %%x1
|
||||
|
||||
movdqa %%x1, %%tmp0
|
||||
movdqa %%tmp0, %%x2
|
||||
|
||||
movaps %%x2, %%x0
|
||||
pclmulqdq %%x0, %%const, 0x01
|
||||
pclmulqdq %%x2, %%const, 0x10
|
||||
xorps %%x2, %%x0
|
||||
|
||||
movdqa %%x0, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 1 input registers down 4 places
|
||||
; macro FOLD1 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out B C D A'
|
||||
%macro FOLD1 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
movdqa %%tmp0, %%x3
|
||||
|
||||
movaps %%x3, %%x0
|
||||
pclmulqdq %%x0, %%const, 0x01
|
||||
pclmulqdq %%x3, %%const, 0x10
|
||||
xorps %%x3, %%x0
|
||||
|
||||
movdqa %%x0, %%x1
|
||||
movdqa %%x1, %%x2
|
||||
movdqa %%x2, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3
|
||||
|
||||
; XP X3 X2 X1 X0 tmp2
|
||||
; Initial state xI HG FE DC BA
|
||||
; after shift IH GF ED CB A0
|
||||
; after fold ff GF ED CB ff = merge(IH, A0)
|
||||
;
|
||||
%macro PARTIAL_FOLD 12
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%xp %5 ; xmm partial reg, in/clobbered
|
||||
%define %%size %6 ; GPR, in/clobbered (1...15)
|
||||
%define %%const %7 ; xmm reg, in
|
||||
%define %%shl %8 ; xmm reg, tmp
|
||||
%define %%shr %9 ; xmm reg, tmp
|
||||
%define %%tmp2 %10 ; xmm reg, tmp
|
||||
%define %%tmp3 %11 ; xmm reg, tmp
|
||||
%define %%gtmp %12 ; GPR, tmp
|
||||
|
||||
; {XP X3 X2 X1 X0} = {xI HG FE DC BA}
|
||||
shl %%size, 4 ; size *= 16
|
||||
lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT]
|
||||
movdqa %%shl, [%%gtmp + %%size] ; shl constant
|
||||
movdqa %%shr, %%shl
|
||||
pxor %%shr, [mask3 WRT_OPT] ; shr constant
|
||||
|
||||
movdqa %%tmp2, %%x0 ; tmp2 = BA
|
||||
pshufb %%tmp2, %%shl ; tmp2 = A0
|
||||
|
||||
pshufb %%x0, %%shr ; x0 = 0B
|
||||
movdqa %%tmp3, %%x1 ; tmp3 = DC
|
||||
pshufb %%tmp3, %%shl ; tmp3 = C0
|
||||
por %%x0, %%tmp3 ; x0 = CB
|
||||
|
||||
pshufb %%x1, %%shr ; x1 = 0D
|
||||
movdqa %%tmp3, %%x2 ; tmp3 = FE
|
||||
pshufb %%tmp3, %%shl ; tmp3 = E0
|
||||
por %%x1, %%tmp3 ; x1 = ED
|
||||
|
||||
pshufb %%x2, %%shr ; x2 = 0F
|
||||
movdqa %%tmp3, %%x3 ; tmp3 = HG
|
||||
pshufb %%tmp3, %%shl ; tmp3 = G0
|
||||
por %%x2, %%tmp3 ; x2 = GF
|
||||
|
||||
pshufb %%x3, %%shr ; x3 = 0H
|
||||
pshufb %%xp, %%shl ; xp = I0
|
||||
por %%x3, %%xp ; x3 = IH
|
||||
|
||||
; fold tmp2 into X3
|
||||
movaps %%tmp3, %%tmp2
|
||||
pclmulqdq %%tmp2, %%const, 0x01
|
||||
pclmulqdq %%tmp3, %%const, 0x10
|
||||
xorps %%x3, %%tmp2
|
||||
xorps %%x3, %%tmp3
|
||||
%endm
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes.
|
||||
; Returns 0 if data has length 0.
|
||||
; Input: The input data (src), that data's length (size).
|
||||
; Output: The packed xmm register (xmm_out).
|
||||
; size is clobbered.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro LOAD_FRACTIONAL_XMM 3
|
||||
%define %%xmm_out %1 ; %%xmm_out is an xmm register
|
||||
%define %%src %2
|
||||
%define %%size %3
|
||||
|
||||
pxor %%xmm_out, %%xmm_out
|
||||
|
||||
cmp %%size, 0
|
||||
je %%_done
|
||||
|
||||
add %%src, %%size
|
||||
|
||||
cmp %%size, 8
|
||||
jl %%_byte_loop
|
||||
|
||||
sub %%src, 8
|
||||
pinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists
|
||||
sub %%size, 8
|
||||
|
||||
je %%_done
|
||||
|
||||
%%_byte_loop: ;Read in data 1 byte at a time while data is left
|
||||
pslldq %%xmm_out, 1
|
||||
|
||||
dec %%src
|
||||
pinsrb %%xmm_out, BYTE [%%src], 0
|
||||
dec %%size
|
||||
|
||||
jg %%_byte_loop
|
||||
|
||||
%%_done:
|
||||
|
||||
%endmacro ; LOAD_FRACTIONAL_XMM
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; copy x bytes (rounded up to 16 bytes) from src to dst
|
||||
; src & dst are unaligned
|
||||
; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold,
|
||||
; xt0, xt1, xt2, xt3, xt4
|
||||
%macro COPY_IN_CRC 14
|
||||
%define %%dst %1 ; reg, in/clobbered
|
||||
%define %%src %2 ; reg, in/clobbered
|
||||
%define %%size %3 ; reg, in/clobbered
|
||||
%define %%tmp %4 ; reg, tmp
|
||||
%define %%x0 %5 ; xmm, in/out: crc state
|
||||
%define %%x1 %6 ; xmm, in/out: crc state
|
||||
%define %%x2 %7 ; xmm, in/out: crc state
|
||||
%define %%x3 %8 ; xmm, in/out: crc state
|
||||
%define %%xfold %9 ; xmm, in: (loaded from fold4)
|
||||
%define %%xtmp0 %10 ; xmm, tmp
|
||||
%define %%xtmp1 %11 ; xmm, tmp
|
||||
%define %%xtmp2 %12 ; xmm, tmp
|
||||
%define %%xtmp3 %13 ; xmm, tmp
|
||||
%define %%xtmp4 %14 ; xmm, tmp
|
||||
|
||||
cmp %%size, 16
|
||||
jl %%lt_16
|
||||
|
||||
; align source
|
||||
xor %%tmp, %%tmp
|
||||
sub %%tmp, %%src
|
||||
and %%tmp, 15
|
||||
jz %%already_aligned
|
||||
|
||||
; need to align, tmp contains number of bytes to transfer
|
||||
movdqu %%xtmp0, [%%src]
|
||||
movdqu [%%dst], %%xtmp0
|
||||
add %%dst, %%tmp
|
||||
add %%src, %%tmp
|
||||
sub %%size, %%tmp
|
||||
|
||||
%ifndef DEFLATE
|
||||
push %%dst
|
||||
|
||||
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
|
||||
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
|
||||
pop %%dst
|
||||
%endif
|
||||
|
||||
%%already_aligned:
|
||||
sub %%size, 64
|
||||
jl %%end_loop
|
||||
jmp %%loop
|
||||
align 16
|
||||
%%loop:
|
||||
movntdqa %%xtmp0, [%%src+0*16]
|
||||
movntdqa %%xtmp1, [%%src+1*16]
|
||||
movntdqa %%xtmp2, [%%src+2*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4
|
||||
%endif
|
||||
movntdqa %%xtmp3, [%%src+3*16]
|
||||
|
||||
movdqu [%%dst+0*16], %%xtmp0
|
||||
movdqu [%%dst+1*16], %%xtmp1
|
||||
movdqu [%%dst+2*16], %%xtmp2
|
||||
movdqu [%%dst+3*16], %%xtmp3
|
||||
|
||||
%ifndef DEFLATE
|
||||
pxor %%x0, %%xtmp0
|
||||
pxor %%x1, %%xtmp1
|
||||
pxor %%x2, %%xtmp2
|
||||
pxor %%x3, %%xtmp3
|
||||
%endif
|
||||
add %%src, 4*16
|
||||
add %%dst, 4*16
|
||||
sub %%size, 4*16
|
||||
jge %%loop
|
||||
|
||||
%%end_loop:
|
||||
; %%size contains (num bytes left - 64)
|
||||
add %%size, 16
|
||||
jge %%three_full_regs
|
||||
add %%size, 16
|
||||
jge %%two_full_regs
|
||||
add %%size, 16
|
||||
jge %%one_full_reg
|
||||
add %%size, 16
|
||||
|
||||
%%no_full_regs: ; 0 <= %%size < 16, no full regs
|
||||
jz %%done ; if no bytes left, we're done
|
||||
jmp %%partial
|
||||
|
||||
;; Handle case where input is <16 bytes
|
||||
%%lt_16:
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%one_full_reg:
|
||||
movntdqa %%xtmp0, [%%src+0*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
movdqu [%%dst+0*16], %%xtmp0
|
||||
|
||||
%ifndef DEFLATE
|
||||
pxor %%x3, %%xtmp0
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 1*16
|
||||
add %%src, 1*16
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%two_full_regs:
|
||||
movntdqa %%xtmp0, [%%src+0*16]
|
||||
movntdqa %%xtmp1, [%%src+1*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
movdqu [%%dst+0*16], %%xtmp0
|
||||
movdqu [%%dst+1*16], %%xtmp1
|
||||
|
||||
%ifndef DEFLATE
|
||||
pxor %%x2, %%xtmp0
|
||||
pxor %%x3, %%xtmp1
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 2*16
|
||||
add %%src, 2*16
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%three_full_regs:
|
||||
movntdqa %%xtmp0, [%%src+0*16]
|
||||
movntdqa %%xtmp1, [%%src+1*16]
|
||||
movntdqa %%xtmp2, [%%src+2*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
movdqu [%%dst+0*16], %%xtmp0
|
||||
movdqu [%%dst+1*16], %%xtmp1
|
||||
movdqu [%%dst+2*16], %%xtmp2
|
||||
|
||||
%ifndef DEFLATE
|
||||
pxor %%x1, %%xtmp0
|
||||
pxor %%x2, %%xtmp1
|
||||
pxor %%x3, %%xtmp2
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 3*16
|
||||
add %%src, 3*16
|
||||
|
||||
; fall through to %%partial
|
||||
%%partial: ; 0 <= %%size < 16
|
||||
|
||||
%ifndef DEFLATE
|
||||
mov %%tmp, %%size
|
||||
%endif
|
||||
|
||||
LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size
|
||||
|
||||
movdqu [%%dst], %%xtmp0
|
||||
|
||||
%ifndef DEFLATE
|
||||
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
|
||||
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
|
||||
%endif
|
||||
|
||||
%%done:
|
||||
%endm
|
||||
|
||||
|
||||
;%assign K 1024;
|
||||
;%assign D 8 * K; ; Amount of history
|
||||
;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary
|
||||
|
||||
; copy D + LA bytes from src to dst
|
||||
; dst is aligned
|
||||
;void copy_D_LA(uint8_t *dst, uint8_t *src);
|
||||
; arg 1: rcx : dst
|
||||
; arg 2: rdx : src
|
||||
; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3
|
||||
%macro copy_D_LA 7
|
||||
%define %%dst %1 ; reg, clobbered
|
||||
%define %%src %2 ; reg, clobbered
|
||||
%define %%tmp %3
|
||||
%define %%xtmp0 %4
|
||||
%define %%xtmp1 %5
|
||||
%define %%xtmp2 %6
|
||||
%define %%xtmp3 %7
|
||||
|
||||
%assign %%SIZE (D + LA) / 16 ; number of DQ words to be copied
|
||||
%assign %%SIZE4 %%SIZE/4
|
||||
|
||||
lea %%tmp, [%%dst + 4 * 16 * %%SIZE4]
|
||||
jmp %%copy_D_LA_1
|
||||
align 16
|
||||
%%copy_D_LA_1:
|
||||
movdqu %%xtmp0, [%%src]
|
||||
movdqu %%xtmp1, [%%src+16]
|
||||
movdqu %%xtmp2, [%%src+32]
|
||||
movdqu %%xtmp3, [%%src+48]
|
||||
movdqa [%%dst], %%xtmp0
|
||||
movdqa [%%dst+16], %%xtmp1
|
||||
movdqa [%%dst+32], %%xtmp2
|
||||
movdqa [%%dst+48], %%xtmp3
|
||||
add %%src, 4*16
|
||||
add %%dst, 4*16
|
||||
cmp %%dst, %%tmp
|
||||
jne %%copy_D_LA_1
|
||||
%assign %%i 0
|
||||
%rep (%%SIZE - 4 * %%SIZE4)
|
||||
|
||||
%if (%%i == 0)
|
||||
movdqu %%xtmp0, [%%src + %%i*16]
|
||||
%elif (%%i == 1)
|
||||
movdqu %%xtmp1, [%%src + %%i*16]
|
||||
%elif (%%i == 2)
|
||||
movdqu %%xtmp2, [%%src + %%i*16]
|
||||
%elif (%%i == 3)
|
||||
movdqu %%xtmp3, [%%src + %%i*16]
|
||||
%else
|
||||
%error too many i
|
||||
% error
|
||||
%endif
|
||||
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%assign %%i 0
|
||||
%rep (%%SIZE - 4 * %%SIZE4)
|
||||
|
||||
%if (%%i == 0)
|
||||
movdqa [%%dst + %%i*16], %%xtmp0
|
||||
%elif (%%i == 1)
|
||||
movdqa [%%dst + %%i*16], %%xtmp1
|
||||
%elif (%%i == 2)
|
||||
movdqa [%%dst + %%i*16], %%xtmp2
|
||||
%elif (%%i == 3)
|
||||
movdqa [%%dst + %%i*16], %%xtmp3
|
||||
%else
|
||||
%error too many i
|
||||
% error
|
||||
%endif
|
||||
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%endm
|
||||
%endif
|
552
igzip/igzip_buffer_utils_04.asm
Normal file
552
igzip/igzip_buffer_utils_04.asm
Normal file
@ -0,0 +1,552 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%ifndef BUFFER_UTILS
|
||||
%define BUFFER_UTILS
|
||||
|
||||
%include "options.asm"
|
||||
|
||||
extern pshufb_shf_table
|
||||
extern mask3
|
||||
|
||||
%ifdef FIX_CACHE_READ
|
||||
%define vmovntdqa vmovdqa
|
||||
%else
|
||||
%macro prefetchnta 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; code for doing the CRC calculation as part of copy-in, using pclmulqdq
|
||||
|
||||
; "shift" 4 input registers down 4 places
|
||||
; macro FOLD4 xmm0, xmm1, xmm2, xmm3, const, tmp0, tmp1
|
||||
%macro FOLD4 7
|
||||
%define %%xmm0 %1 ; xmm reg, in/out
|
||||
%define %%xmm1 %2 ; xmm reg, in/out
|
||||
%define %%xmm2 %3 ; xmm reg, in/out
|
||||
%define %%xmm3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
%define %%tmp1 %7 ; xmm reg, tmp
|
||||
|
||||
vmovaps %%tmp0, %%xmm0
|
||||
vmovaps %%tmp1, %%xmm1
|
||||
|
||||
vpclmulqdq %%xmm0, %%const, 0x01
|
||||
vpclmulqdq %%xmm1, %%const, 0x01
|
||||
|
||||
vpclmulqdq %%tmp0, %%const, 0x10
|
||||
vpclmulqdq %%tmp1, %%const, 0x10
|
||||
|
||||
vxorps %%xmm0, %%tmp0
|
||||
vxorps %%xmm1, %%tmp1
|
||||
|
||||
|
||||
vmovaps %%tmp0, %%xmm2
|
||||
vmovaps %%tmp1, %%xmm3
|
||||
|
||||
vpclmulqdq %%xmm2, %%const, 0x01
|
||||
vpclmulqdq %%xmm3, %%const, 0x01
|
||||
|
||||
vpclmulqdq %%tmp0, %%const, 0x10
|
||||
vpclmulqdq %%tmp1, %%const, 0x10
|
||||
|
||||
vxorps %%xmm2, %%tmp0
|
||||
vxorps %%xmm3, %%tmp1
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 3 input registers down 4 places
|
||||
; macro FOLD3 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out D A' B' C'
|
||||
%macro FOLD3 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
vmovdqa %%tmp0, %%x3
|
||||
|
||||
vmovaps %%x3, %%x2
|
||||
vpclmulqdq %%x2, %%const, 0x01
|
||||
vpclmulqdq %%x3, %%const, 0x10
|
||||
vxorps %%x3, %%x2
|
||||
|
||||
vmovaps %%x2, %%x1
|
||||
vpclmulqdq %%x1, %%const, 0x01
|
||||
vpclmulqdq %%x2, %%const, 0x10
|
||||
vxorps %%x2, %%x1
|
||||
|
||||
vmovaps %%x1, %%x0
|
||||
vpclmulqdq %%x0, %%const, 0x01
|
||||
vpclmulqdq %%x1, %%const, 0x10
|
||||
vxorps %%x1, %%x0
|
||||
|
||||
vmovdqa %%x0, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 2 input registers down 4 places
|
||||
; macro FOLD2 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out C D A' B'
|
||||
%macro FOLD2 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
vmovdqa %%tmp0, %%x3
|
||||
|
||||
vmovaps %%x3, %%x1
|
||||
vpclmulqdq %%x1, %%const, 0x01
|
||||
vpclmulqdq %%x3, %%const, 0x10
|
||||
vxorps %%x3, %%x1
|
||||
|
||||
vmovdqa %%x1, %%tmp0
|
||||
vmovdqa %%tmp0, %%x2
|
||||
|
||||
vmovaps %%x2, %%x0
|
||||
vpclmulqdq %%x0, %%const, 0x01
|
||||
vpclmulqdq %%x2, %%const, 0x10
|
||||
vxorps %%x2, %%x0
|
||||
|
||||
vmovdqa %%x0, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; "shift" 1 input registers down 4 places
|
||||
; macro FOLD1 x0, x1, x2, x3, const, tmp0
|
||||
; x0 x1 x2 x3
|
||||
; In A B C D
|
||||
; Out B C D A'
|
||||
%macro FOLD1 6
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%const %5 ; xmm reg, in
|
||||
%define %%tmp0 %6 ; xmm reg, tmp
|
||||
|
||||
vmovdqa %%tmp0, %%x3
|
||||
|
||||
vmovaps %%x3, %%x0
|
||||
vpclmulqdq %%x0, %%const, 0x01
|
||||
vpclmulqdq %%x3, %%const, 0x10
|
||||
vxorps %%x3, %%x0
|
||||
|
||||
vmovdqa %%x0, %%x1
|
||||
vmovdqa %%x1, %%x2
|
||||
vmovdqa %%x2, %%tmp0
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; macro PARTIAL_FOLD x0, x1, x2, x3, xp, size, xfold, xt0, xt1, xt2, xt3
|
||||
|
||||
; XP X3 X2 X1 X0 tmp2
|
||||
; Initial state xI HG FE DC BA
|
||||
; after shift IH GF ED CB A0
|
||||
; after fold ff GF ED CB ff = merge(IH, A0)
|
||||
;
|
||||
%macro PARTIAL_FOLD 12
|
||||
%define %%x0 %1 ; xmm reg, in/out
|
||||
%define %%x1 %2 ; xmm reg, in/out
|
||||
%define %%x2 %3 ; xmm reg, in/out
|
||||
%define %%x3 %4 ; xmm reg, in/out
|
||||
%define %%xp %5 ; xmm partial reg, in/clobbered
|
||||
%define %%size %6 ; GPR, in/clobbered (1...15)
|
||||
%define %%const %7 ; xmm reg, in
|
||||
%define %%shl %8 ; xmm reg, tmp
|
||||
%define %%shr %9 ; xmm reg, tmp
|
||||
%define %%tmp2 %10 ; xmm reg, tmp
|
||||
%define %%tmp3 %11 ; xmm reg, tmp
|
||||
%define %%gtmp %12 ; GPR, tmp
|
||||
|
||||
; {XP X3 X2 X1 X0} = {xI HG FE DC BA}
|
||||
shl %%size, 4 ; size *= 16
|
||||
lea %%gtmp, [pshufb_shf_table - 16 WRT_OPT]
|
||||
vmovdqa %%shl, [%%gtmp + %%size] ; shl constant
|
||||
vmovdqa %%shr, %%shl
|
||||
vpxor %%shr, [mask3 WRT_OPT] ; shr constant
|
||||
|
||||
vmovdqa %%tmp2, %%x0 ; tmp2 = BA
|
||||
vpshufb %%tmp2, %%shl ; tmp2 = A0
|
||||
|
||||
vpshufb %%x0, %%shr ; x0 = 0B
|
||||
vmovdqa %%tmp3, %%x1 ; tmp3 = DC
|
||||
vpshufb %%tmp3, %%shl ; tmp3 = C0
|
||||
vpor %%x0, %%tmp3 ; x0 = CB
|
||||
|
||||
vpshufb %%x1, %%shr ; x1 = 0D
|
||||
vmovdqa %%tmp3, %%x2 ; tmp3 = FE
|
||||
vpshufb %%tmp3, %%shl ; tmp3 = E0
|
||||
vpor %%x1, %%tmp3 ; x1 = ED
|
||||
|
||||
vpshufb %%x2, %%shr ; x2 = 0F
|
||||
vmovdqa %%tmp3, %%x3 ; tmp3 = HG
|
||||
vpshufb %%tmp3, %%shl ; tmp3 = G0
|
||||
vpor %%x2, %%tmp3 ; x2 = GF
|
||||
|
||||
vpshufb %%x3, %%shr ; x3 = 0H
|
||||
vpshufb %%xp, %%shl ; xp = I0
|
||||
vpor %%x3, %%xp ; x3 = IH
|
||||
|
||||
; fold tmp2 into X3
|
||||
vmovaps %%tmp3, %%tmp2
|
||||
vpclmulqdq %%tmp2, %%const, 0x01
|
||||
vpclmulqdq %%tmp3, %%const, 0x10
|
||||
vxorps %%x3, %%tmp2
|
||||
vxorps %%x3, %%tmp3
|
||||
%endm
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; LOAD_FRACTIONAL_XMM: Packs xmm register with data when data input is less than 16 bytes.
|
||||
; Returns 0 if data has length 0.
|
||||
; Input: The input data (src), that data's length (size).
|
||||
; Output: The packed xmm register (xmm_out).
|
||||
; size is clobbered.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro LOAD_FRACTIONAL_XMM 3
|
||||
%define %%xmm_out %1 ; %%xmm_out is an xmm register
|
||||
%define %%src %2
|
||||
%define %%size %3
|
||||
|
||||
vpxor %%xmm_out, %%xmm_out
|
||||
|
||||
cmp %%size, 0
|
||||
je %%_done
|
||||
|
||||
add %%src, %%size
|
||||
|
||||
cmp %%size, 8
|
||||
jl %%_byte_loop
|
||||
|
||||
sub %%src, 8
|
||||
vpinsrq %%xmm_out, [%%src], 0 ;Read in 8 bytes if they exists
|
||||
sub %%size, 8
|
||||
|
||||
je %%_done
|
||||
|
||||
%%_byte_loop: ;Read in data 1 byte at a time while data is left
|
||||
vpslldq %%xmm_out, 1
|
||||
|
||||
dec %%src
|
||||
vpinsrb %%xmm_out, BYTE [%%src], 0
|
||||
dec %%size
|
||||
|
||||
jg %%_byte_loop
|
||||
|
||||
%%_done:
|
||||
|
||||
%endmacro ; LOAD_FRACTIONAL_XMM
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; copy x bytes (rounded up to 16 bytes) from src to dst
|
||||
; src & dst are unaligned
|
||||
; macro COPY_IN_CRC dst, src, size_in_bytes, tmp, x0, x1, x2, x3, xfold,
|
||||
; xt0, xt1, xt2, xt3, xt4
|
||||
%macro COPY_IN_CRC 14
|
||||
%define %%dst %1 ; reg, in/clobbered
|
||||
%define %%src %2 ; reg, in/clobbered
|
||||
%define %%size %3 ; reg, in/clobbered
|
||||
%define %%tmp %4 ; reg, tmp
|
||||
%define %%x0 %5 ; xmm, in/out: crc state
|
||||
%define %%x1 %6 ; xmm, in/out: crc state
|
||||
%define %%x2 %7 ; xmm, in/out: crc state
|
||||
%define %%x3 %8 ; xmm, in/out: crc state
|
||||
%define %%xfold %9 ; xmm, in: (loaded from fold4)
|
||||
%define %%xtmp0 %10 ; xmm, tmp
|
||||
%define %%xtmp1 %11 ; xmm, tmp
|
||||
%define %%xtmp2 %12 ; xmm, tmp
|
||||
%define %%xtmp3 %13 ; xmm, tmp
|
||||
%define %%xtmp4 %14 ; xmm, tmp
|
||||
|
||||
cmp %%size, 16
|
||||
jl %%lt_16
|
||||
|
||||
; align source
|
||||
xor %%tmp, %%tmp
|
||||
sub %%tmp, %%src
|
||||
and %%tmp, 15
|
||||
jz %%already_aligned
|
||||
|
||||
; need to align, tmp contains number of bytes to transfer
|
||||
vmovdqu %%xtmp0, [%%src]
|
||||
vmovdqu [%%dst], %%xtmp0
|
||||
add %%dst, %%tmp
|
||||
add %%src, %%tmp
|
||||
sub %%size, %%tmp
|
||||
|
||||
%ifndef DEFLATE
|
||||
push %%dst
|
||||
|
||||
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
|
||||
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
|
||||
pop %%dst
|
||||
%endif
|
||||
|
||||
%%already_aligned:
|
||||
sub %%size, 64
|
||||
jl %%end_loop
|
||||
jmp %%loop
|
||||
align 16
|
||||
%%loop:
|
||||
vmovntdqa %%xtmp0, [%%src+0*16]
|
||||
vmovntdqa %%xtmp1, [%%src+1*16]
|
||||
vmovntdqa %%xtmp2, [%%src+2*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD4 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3, %%xtmp4
|
||||
%endif
|
||||
vmovntdqa %%xtmp3, [%%src+3*16]
|
||||
|
||||
vmovdqu [%%dst+0*16], %%xtmp0
|
||||
vmovdqu [%%dst+1*16], %%xtmp1
|
||||
vmovdqu [%%dst+2*16], %%xtmp2
|
||||
vmovdqu [%%dst+3*16], %%xtmp3
|
||||
|
||||
%ifndef DEFLATE
|
||||
vpxor %%x0, %%xtmp0
|
||||
vpxor %%x1, %%xtmp1
|
||||
vpxor %%x2, %%xtmp2
|
||||
vpxor %%x3, %%xtmp3
|
||||
%endif
|
||||
add %%src, 4*16
|
||||
add %%dst, 4*16
|
||||
sub %%size, 4*16
|
||||
jge %%loop
|
||||
|
||||
%%end_loop:
|
||||
; %%size contains (num bytes left - 64)
|
||||
add %%size, 16
|
||||
jge %%three_full_regs
|
||||
add %%size, 16
|
||||
jge %%two_full_regs
|
||||
add %%size, 16
|
||||
jge %%one_full_reg
|
||||
add %%size, 16
|
||||
|
||||
%%no_full_regs: ; 0 <= %%size < 16, no full regs
|
||||
jz %%done ; if no bytes left, we're done
|
||||
jmp %%partial
|
||||
|
||||
;; Handle case where input is <16 bytes
|
||||
%%lt_16:
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%one_full_reg:
|
||||
vmovntdqa %%xtmp0, [%%src+0*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD1 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
vmovdqu [%%dst+0*16], %%xtmp0
|
||||
|
||||
%ifndef DEFLATE
|
||||
vpxor %%x3, %%xtmp0
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 1*16
|
||||
add %%src, 1*16
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%two_full_regs:
|
||||
vmovntdqa %%xtmp0, [%%src+0*16]
|
||||
vmovntdqa %%xtmp1, [%%src+1*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD2 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
vmovdqu [%%dst+0*16], %%xtmp0
|
||||
vmovdqu [%%dst+1*16], %%xtmp1
|
||||
|
||||
%ifndef DEFLATE
|
||||
vpxor %%x2, %%xtmp0
|
||||
vpxor %%x3, %%xtmp1
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 2*16
|
||||
add %%src, 2*16
|
||||
jmp %%partial
|
||||
|
||||
|
||||
%%three_full_regs:
|
||||
vmovntdqa %%xtmp0, [%%src+0*16]
|
||||
vmovntdqa %%xtmp1, [%%src+1*16]
|
||||
vmovntdqa %%xtmp2, [%%src+2*16]
|
||||
|
||||
%ifndef DEFLATE
|
||||
FOLD3 %%x0, %%x1, %%x2, %%x3, %%xfold, %%xtmp3
|
||||
%endif
|
||||
vmovdqu [%%dst+0*16], %%xtmp0
|
||||
vmovdqu [%%dst+1*16], %%xtmp1
|
||||
vmovdqu [%%dst+2*16], %%xtmp2
|
||||
|
||||
%ifndef DEFLATE
|
||||
vpxor %%x1, %%xtmp0
|
||||
vpxor %%x2, %%xtmp1
|
||||
vpxor %%x3, %%xtmp2
|
||||
%endif
|
||||
test %%size, %%size
|
||||
jz %%done ; if no bytes left, we're done
|
||||
|
||||
add %%dst, 3*16
|
||||
add %%src, 3*16
|
||||
|
||||
; fall through to %%partial
|
||||
%%partial: ; 0 <= %%size < 16
|
||||
|
||||
%ifndef DEFLATE
|
||||
mov %%tmp, %%size
|
||||
%endif
|
||||
|
||||
LOAD_FRACTIONAL_XMM %%xtmp0, %%src, %%size
|
||||
|
||||
vmovdqu [%%dst], %%xtmp0
|
||||
|
||||
%ifndef DEFLATE
|
||||
PARTIAL_FOLD %%x0, %%x1, %%x2, %%x3, %%xtmp0, %%tmp, %%xfold, \
|
||||
%%xtmp1, %%xtmp2, %%xtmp3, %%xtmp4, %%dst
|
||||
%endif
|
||||
|
||||
%%done:
|
||||
%endm
|
||||
|
||||
|
||||
;%assign K 1024;
|
||||
;%assign D 8 * K; ; Amount of history
|
||||
;%assign LA 17 * 16; ; Max look-ahead, rounded up to 32 byte boundary
|
||||
|
||||
; copy D + LA bytes from src to dst
|
||||
; dst is aligned
|
||||
;void copy_D_LA(uint8_t *dst, uint8_t *src);
|
||||
; arg 1: rcx : dst
|
||||
; arg 2: rdx : src
|
||||
; copy_D_LA dst, src, tmp, xtmp0, xtmp1, xtmp2, xtmp3
|
||||
%macro copy_D_LA 7
|
||||
%define %%dst %1 ; reg, clobbered
|
||||
%define %%src %2 ; reg, clobbered
|
||||
%define %%tmp %3
|
||||
%define %%ytmp0 %4
|
||||
%define %%ytmp1 %5
|
||||
%define %%ytmp2 %6
|
||||
%define %%ytmp3 %7
|
||||
|
||||
%define %%xtmp0 %4x
|
||||
|
||||
%assign %%SIZE (D + LA) / 32 ; number of DQ words to be copied
|
||||
%assign %%SIZE4 %%SIZE/4
|
||||
%assign %%MOD16 ((D + LA) - 32 * %%SIZE) / 16
|
||||
|
||||
lea %%tmp, [%%dst + 4 * 32 * %%SIZE4]
|
||||
jmp %%copy_D_LA_1
|
||||
align 16
|
||||
%%copy_D_LA_1:
|
||||
vmovdqu %%ytmp0, [%%src]
|
||||
vmovdqu %%ytmp1, [%%src + 1 * 32]
|
||||
vmovdqu %%ytmp2, [%%src + 2 * 32]
|
||||
vmovdqu %%ytmp3, [%%src + 3 * 32]
|
||||
vmovdqa [%%dst], %%ytmp0
|
||||
vmovdqa [%%dst + 1 * 32], %%ytmp1
|
||||
vmovdqa [%%dst + 2 * 32], %%ytmp2
|
||||
vmovdqa [%%dst + 3 * 32], %%ytmp3
|
||||
add %%src, 4*32
|
||||
add %%dst, 4*32
|
||||
cmp %%dst, %%tmp
|
||||
jne %%copy_D_LA_1
|
||||
%assign %%i 0
|
||||
%rep (%%SIZE - 4 * %%SIZE4)
|
||||
|
||||
%if (%%i == 0)
|
||||
vmovdqu %%ytmp0, [%%src + %%i*32]
|
||||
%elif (%%i == 1)
|
||||
vmovdqu %%ytmp1, [%%src + %%i*32]
|
||||
%elif (%%i == 2)
|
||||
vmovdqu %%ytmp2, [%%src + %%i*32]
|
||||
%elif (%%i == 3)
|
||||
vmovdqu %%ytmp3, [%%src + %%i*32]
|
||||
%else
|
||||
%error too many i
|
||||
% error
|
||||
%endif
|
||||
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
%assign %%i 0
|
||||
%rep (%%SIZE - 4 * %%SIZE4)
|
||||
|
||||
%if (%%i == 0)
|
||||
vmovdqa [%%dst + %%i*32], %%ytmp0
|
||||
%elif (%%i == 1)
|
||||
vmovdqa [%%dst + %%i*32], %%ytmp1
|
||||
%elif (%%i == 2)
|
||||
vmovdqa [%%dst + %%i*32], %%ytmp2
|
||||
%elif (%%i == 3)
|
||||
vmovdqa [%%dst + %%i*32], %%ytmp3
|
||||
%else
|
||||
%error too many i
|
||||
% error
|
||||
%endif
|
||||
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
|
||||
%rep %%MOD16
|
||||
vmovdqu %%xtmp0, [%%src + (%%SIZE - 4 * %%SIZE4)*32]
|
||||
vmovdqa [%%dst + (%%SIZE - 4 * %%SIZE4)*32], %%xtmp0
|
||||
%endrep
|
||||
|
||||
%endm
|
||||
%endif
|
1285
igzip/igzip_check.c
Normal file
1285
igzip/igzip_check.c
Normal file
File diff suppressed because it is too large
Load Diff
416
igzip/igzip_compare_types.asm
Normal file
416
igzip/igzip_compare_types.asm
Normal file
@ -0,0 +1,416 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%ifndef UTILS_ASM
|
||||
%define UTILS_ASM
|
||||
; compare macro
|
||||
|
||||
;; sttni2 is faster, but it can't be debugged
|
||||
;; so following code is based on "mine5"
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2
|
||||
;; tmp16 is a 16-bit version of tmp
|
||||
;; compare258 src1, src2, result, tmp
|
||||
%macro compare258 4
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp16 %4w ; tmp as a 16-bit register
|
||||
|
||||
xor %%result, %%result
|
||||
%%loop1:
|
||||
mov %%tmp, [%%src1 + %%result]
|
||||
xor %%tmp, [%%src2 + %%result]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
|
||||
mov %%tmp, [%%src1 + %%result]
|
||||
xor %%tmp, [%%src2 + %%result]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
|
||||
cmp %%result, 256
|
||||
jb %%loop1
|
||||
|
||||
; compare last two bytes
|
||||
mov %%tmp16, [%%src1 + %%result]
|
||||
xor %%tmp16, [%%src2 + %%result]
|
||||
jnz %%miscompare16
|
||||
|
||||
; no miscompares, return 258
|
||||
add %%result, 2
|
||||
jmp %%end
|
||||
|
||||
%%miscompare16:
|
||||
and %%tmp, 0xFFFF
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
shr %%tmp, 3
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2
|
||||
;; tmp16 is a 16-bit version of tmp
|
||||
;; compare258 src1, src2, result, tmp
|
||||
%macro compare250 4
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp16 %4w ; tmp as a 16-bit register
|
||||
|
||||
mov %%result, 8
|
||||
mov %%tmp, [%%src1 + 8]
|
||||
xor %%tmp, [%%src2 + 8]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
|
||||
%%loop1:
|
||||
mov %%tmp, [%%src1 + %%result]
|
||||
xor %%tmp, [%%src2 + %%result]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
|
||||
mov %%tmp, [%%src1 + %%result]
|
||||
xor %%tmp, [%%src2 + %%result]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
|
||||
cmp %%result, 256
|
||||
jb %%loop1
|
||||
|
||||
; compare last two bytes
|
||||
mov %%tmp16, [%%src1 + %%result]
|
||||
xor %%tmp16, [%%src2 + %%result]
|
||||
jnz %%miscompare16
|
||||
|
||||
; no miscompares, return 258
|
||||
add %%result, 2
|
||||
jmp %%end
|
||||
|
||||
%%miscompare16:
|
||||
and %%tmp, 0xFFFF
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
shr %%tmp, 3
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2
|
||||
;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
|
||||
;; compare258_x src1, src2, result, tmp, xtmp1, xtmp2
|
||||
%macro compare258_x 6
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp32 %4d
|
||||
%define %%tmp16 %4w ; tmp as a 16-bit register
|
||||
%define %%xtmp %5
|
||||
%define %%xtmp2 %6
|
||||
|
||||
xor %%result, %%result
|
||||
%%loop1:
|
||||
movdqu %%xtmp, [%%src1 + %%result]
|
||||
movdqu %%xtmp2, [%%src2 + %%result]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 16
|
||||
|
||||
movdqu %%xtmp, [%%src1 + %%result]
|
||||
movdqu %%xtmp2, [%%src2 + %%result]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 16
|
||||
|
||||
cmp %%result, 256
|
||||
jb %%loop1
|
||||
|
||||
; compare last two bytes
|
||||
mov %%tmp16, [%%src1 + %%result]
|
||||
xor %%tmp16, [%%src2 + %%result]
|
||||
jnz %%miscompare16
|
||||
|
||||
; no miscompares, return 258
|
||||
add %%result, 2
|
||||
jmp %%end
|
||||
|
||||
%%miscompare16:
|
||||
and %%tmp, 0xFFFF
|
||||
bsf %%tmp, %%tmp
|
||||
shr %%tmp, 3
|
||||
add %%result, %%tmp
|
||||
jmp %%end
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
|
||||
;; were already checked
|
||||
;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
|
||||
;; compare250_x src1, src2, result, tmp, xtmp1, xtmp2
|
||||
%macro compare250_x 6
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp32 %4d ; tmp as a 16-bit register
|
||||
%define %%xtmp %5
|
||||
%define %%xtmp2 %6
|
||||
|
||||
mov %%result, 8
|
||||
movdqu %%xtmp, [%%src1 + 8]
|
||||
movdqu %%xtmp2, [%%src2 + 8]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 16
|
||||
%%loop1:
|
||||
movdqu %%xtmp, [%%src1 + %%result]
|
||||
movdqu %%xtmp2, [%%src2 + %%result]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 16
|
||||
|
||||
movdqu %%xtmp, [%%src1 + %%result]
|
||||
movdqu %%xtmp2, [%%src2 + %%result]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 16
|
||||
|
||||
cmp %%result, 258 - 16
|
||||
jb %%loop1
|
||||
|
||||
movdqu %%xtmp, [%%src1 + %%result]
|
||||
movdqu %%xtmp2, [%%src2 + %%result]
|
||||
pcmpeqb %%xtmp, %%xtmp2
|
||||
pmovmskb %%tmp32, %%xtmp
|
||||
xor %%tmp, 0xFFFF
|
||||
jnz %%miscompare_last
|
||||
; no miscompares, return 258
|
||||
mov %%result, 258
|
||||
jmp %%end
|
||||
|
||||
%%miscompare_last:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
|
||||
;; Guarantee the result has length at most 258.
|
||||
mov %%tmp, 258
|
||||
cmp %%result, 258
|
||||
cmova %%result, %%tmp
|
||||
jmp %%end
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2
|
||||
;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
|
||||
;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
|
||||
%macro compare258_y 6
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp16 %4w ; tmp as a 16-bit register
|
||||
%define %%tmp32 %4d ; tmp as a 32-bit register
|
||||
%define %%ytmp %5
|
||||
%define %%ytmp2 %6
|
||||
|
||||
xor %%result, %%result
|
||||
%%loop1:
|
||||
vmovdqu %%ytmp, [%%src1 + %%result]
|
||||
vmovdqu %%ytmp2, [%%src2 + %%result]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 32
|
||||
|
||||
vmovdqu %%ytmp, [%%src1 + %%result]
|
||||
vmovdqu %%ytmp2, [%%src2 + %%result]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 32
|
||||
|
||||
cmp %%result, 256
|
||||
jb %%loop1
|
||||
|
||||
; compare last two bytes
|
||||
mov %%tmp16, [%%src1 + %%result]
|
||||
xor %%tmp16, [%%src2 + %%result]
|
||||
jnz %%miscompare16
|
||||
|
||||
; no miscompares, return 258
|
||||
add %%result, 2
|
||||
jmp %%end
|
||||
|
||||
%%miscompare16:
|
||||
and %%tmp, 0xFFFF
|
||||
bsf %%tmp, %%tmp
|
||||
shr %%tmp, 3
|
||||
add %%result, %%tmp
|
||||
jmp %%end
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
|
||||
;; compare 258 bytes = 8 * 32 + 2, assuming first 8 bytes
|
||||
;; were already checked
|
||||
;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
|
||||
;; compare258_y src1, src2, result, tmp, xtmp1, xtmp2
|
||||
%macro compare250_y 6
|
||||
%define %%src1 %1
|
||||
%define %%src2 %2
|
||||
%define %%result %3
|
||||
%define %%tmp %4
|
||||
%define %%tmp16 %4w ; tmp as a 16-bit register
|
||||
%define %%tmp32 %4d ; tmp as a 32-bit register
|
||||
%define %%ytmp %5
|
||||
%define %%ytmp2 %6
|
||||
|
||||
mov %%result, 8
|
||||
vmovdqu %%ytmp, [%%src1 + 8]
|
||||
vmovdqu %%ytmp2, [%%src2 + 8]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 32
|
||||
%%loop1:
|
||||
vmovdqu %%ytmp, [%%src1 + %%result]
|
||||
vmovdqu %%ytmp2, [%%src2 + %%result]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 32
|
||||
|
||||
vmovdqu %%ytmp, [%%src1 + %%result]
|
||||
vmovdqu %%ytmp2, [%%src2 + %%result]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare
|
||||
add %%result, 32
|
||||
|
||||
cmp %%result, 258 - 32
|
||||
jb %%loop1
|
||||
|
||||
vmovdqu %%ytmp, [%%src1 + %%result]
|
||||
vmovdqu %%ytmp2, [%%src2 + %%result]
|
||||
vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
|
||||
vpmovmskb %%tmp, %%ytmp
|
||||
xor %%tmp32, 0xFFFFFFFF
|
||||
jnz %%miscompare_last
|
||||
mov %%result, 258
|
||||
jmp %%end
|
||||
|
||||
%%miscompare_last:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
|
||||
;; Guarantee the result has length at most 258.
|
||||
mov %%tmp, 258
|
||||
cmp %%result, 258
|
||||
cmova %%result, %%tmp
|
||||
jmp %%end
|
||||
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; compare size, src1, src2, result, tmp
|
||||
%macro compare 5
|
||||
%define %%size %1
|
||||
%define %%src1 %2
|
||||
%define %%src2 %3
|
||||
%define %%result %4
|
||||
%define %%tmp %5
|
||||
%define %%tmp8 %5b ; tmp as a 8-bit register
|
||||
|
||||
xor %%result, %%result
|
||||
sub %%size, 7
|
||||
jle %%lab2
|
||||
%%loop1:
|
||||
mov %%tmp, [%%src1 + %%result]
|
||||
xor %%tmp, [%%src2 + %%result]
|
||||
jnz %%miscompare
|
||||
add %%result, 8
|
||||
sub %%size, 8
|
||||
jg %%loop1
|
||||
%%lab2:
|
||||
;; if we fall through from above, we have found no mismatches,
|
||||
;; %%size+7 is the number of bytes left to look at, and %%result is the
|
||||
;; number of bytes that have matched
|
||||
add %%size, 7
|
||||
jle %%end
|
||||
%%loop3:
|
||||
mov %%tmp8, [%%src1 + %%result]
|
||||
cmp %%tmp8, [%%src2 + %%result]
|
||||
jne %%end
|
||||
inc %%result
|
||||
dec %%size
|
||||
jg %%loop3
|
||||
jmp %%end
|
||||
%%miscompare:
|
||||
bsf %%tmp, %%tmp
|
||||
shr %%tmp, 3
|
||||
add %%result, %%tmp
|
||||
%%end:
|
||||
%endm
|
||||
|
||||
%endif ;UTILS_ASM
|
86
igzip/igzip_example.c
Normal file
86
igzip/igzip_example.c
Normal file
@ -0,0 +1,86 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
|
||||
#define BUF_SIZE 8192
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
|
||||
FILE *in, *out;
|
||||
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: igzip_example infile outfile\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_example\nWindow Size: %d K\n", HIST_SIZE);
|
||||
fflush(0);
|
||||
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 0;
|
||||
stream.flush = NO_FLUSH;
|
||||
|
||||
do {
|
||||
stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
|
||||
stream.end_of_stream = feof(in);
|
||||
stream.next_in = inbuf;
|
||||
do {
|
||||
stream.avail_out = BUF_SIZE;
|
||||
stream.next_out = outbuf;
|
||||
|
||||
isal_deflate(&stream);
|
||||
|
||||
fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
|
||||
} while (stream.avail_out == 0);
|
||||
|
||||
assert(stream.avail_in == 0);
|
||||
} while (stream.internal_state.state != ZSTATE_END);
|
||||
|
||||
fclose(out);
|
||||
fclose(in);
|
||||
|
||||
printf("End of igzip_example\n\n");
|
||||
return 0;
|
||||
}
|
180
igzip/igzip_file_perf.c
Normal file
180
igzip/igzip_file_perf.c
Normal file
@ -0,0 +1,180 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
#define MIN_TEST_LOOPS 100
|
||||
#ifndef RUN_MEM_SIZE
|
||||
# define RUN_MEM_SIZE 500000000
|
||||
#endif
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int get_filesize(FILE * f)
|
||||
{
|
||||
int curr, end;
|
||||
|
||||
curr = ftell(f); /* Save current position */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
end = ftell(f);
|
||||
fseek(f, curr, SEEK_SET); /* Restore position */
|
||||
return end;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in, *out = NULL;
|
||||
unsigned char *inbuf, *outbuf;
|
||||
int i, infile_size, iterations, outbuf_size;
|
||||
struct isal_huff_histogram histogram;
|
||||
struct isal_hufftables hufftables_custom;
|
||||
|
||||
memset(&histogram, 0, sizeof(histogram));
|
||||
|
||||
if (argc > 3 || argc < 2) {
|
||||
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
|
||||
"\t - Runs multiple iterations of igzip on a file to "
|
||||
"get more accurate time results.\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
if (argc > 2) {
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
printf("outfile=%s\n", argv[2]);
|
||||
}
|
||||
printf("Window Size: %d K\n", HIST_SIZE);
|
||||
printf("igzip_file_perf: \n");
|
||||
fflush(0);
|
||||
/* Allocate space for entire input file and output
|
||||
* (assuming some possible expansion on output size)
|
||||
*/
|
||||
infile_size = get_filesize(in);
|
||||
|
||||
if (infile_size != 0) {
|
||||
outbuf_size = infile_size * 2;
|
||||
iterations = RUN_MEM_SIZE / infile_size;
|
||||
} else {
|
||||
outbuf_size = BUF_SIZE;
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
}
|
||||
if (iterations < MIN_TEST_LOOPS)
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
inbuf = malloc(infile_size);
|
||||
if (inbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate input buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
outbuf = malloc(outbuf_size);
|
||||
if (outbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate output buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations);
|
||||
/* Read complete input file into buffer */
|
||||
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
|
||||
if (stream.avail_in != infile_size) {
|
||||
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 1; /* Do the entire file at once */
|
||||
stream.flush = NO_FLUSH;
|
||||
stream.next_in = inbuf;
|
||||
stream.avail_in = infile_size;
|
||||
stream.next_out = outbuf;
|
||||
stream.avail_out = outbuf_size;
|
||||
isal_deflate(&stream);
|
||||
if (stream.avail_in != 0)
|
||||
break;
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
if (stream.avail_in != 0) {
|
||||
fprintf(stderr, "Could not compress all of inbuf\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf(" file %s - in_size=%d out_size=%d iter=%d ratio_default=%3.1f%%", argv[1],
|
||||
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
|
||||
|
||||
isal_update_histogram(inbuf, infile_size, &histogram);
|
||||
isal_create_hufftables(&hufftables_custom, &histogram);
|
||||
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 1; /* Do the entire file at once */
|
||||
stream.flush = NO_FLUSH;
|
||||
stream.next_in = inbuf;
|
||||
stream.avail_in = infile_size;
|
||||
stream.next_out = outbuf;
|
||||
stream.avail_out = outbuf_size;
|
||||
stream.hufftables = &hufftables_custom;
|
||||
isal_deflate(&stream);
|
||||
|
||||
printf(" ratio_custom=%3.1f%%\n", 100.0 * stream.total_out / infile_size);
|
||||
|
||||
if (stream.avail_in != 0) {
|
||||
fprintf(stderr, "Could not compress all of inbuf\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_file: ");
|
||||
perf_print(stop, start, (long long)infile_size * i);
|
||||
|
||||
if (argc > 2 && out) {
|
||||
printf("writing %s\n", argv[2]);
|
||||
fwrite(outbuf, 1, stream.total_out, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
fclose(in);
|
||||
printf("End of igzip_file_perf\n\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
311
igzip/igzip_finish.asm
Normal file
311
igzip/igzip_finish.asm
Normal file
@ -0,0 +1,311 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%include "options.asm"
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define tmp1 rax
|
||||
|
||||
%define f_index rbx
|
||||
%define code rbx
|
||||
%define tmp4 rbx
|
||||
%define tmp5 rbx
|
||||
%define tmp6 rbx
|
||||
|
||||
%define tmp2 rcx
|
||||
%define hash rcx
|
||||
|
||||
%define tmp3 rdx
|
||||
|
||||
%define stream rsi
|
||||
|
||||
%define f_i rdi
|
||||
|
||||
%define code_len2 rbp
|
||||
|
||||
%define m_out_buf r8
|
||||
|
||||
%define m_bits r9
|
||||
|
||||
%define dist r10
|
||||
|
||||
%define m_bit_count r11
|
||||
|
||||
%define code2 r12
|
||||
|
||||
%define f_end_i r12
|
||||
|
||||
%define file_start r13
|
||||
|
||||
%define len r14
|
||||
|
||||
%define hufftables r15
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
f_end_i_mem_offset equ 0 ; local variable (8 bytes)
|
||||
stack_size equ 8
|
||||
; void isal_deflate_finish ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_finish_01
|
||||
isal_deflate_finish_01:
|
||||
PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
|
||||
sub rsp, stack_size
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
mov stream, rcx
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov m_out_buf, [stream + _next_out]
|
||||
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
|
||||
mov tmp1 %+ d, [stream + _avail_out]
|
||||
add tmp1, m_out_buf
|
||||
sub tmp1, SLOP
|
||||
skip_SLOP:
|
||||
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
|
||||
|
||||
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
|
||||
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
|
||||
|
||||
mov hufftables, [stream + _hufftables]
|
||||
|
||||
; f_i = state->b_bytes_processed;
|
||||
; f_end_i = state->b_bytes_valid;
|
||||
mov f_i %+ d, [stream + _internal_state_b_bytes_processed]
|
||||
mov f_end_i %+ d, [stream + _internal_state_b_bytes_valid]
|
||||
|
||||
; f_i += (uint32_t)(state->buffer - state->file_start);
|
||||
; f_end_i += (uint32_t)(state->buffer - state->file_start);
|
||||
mov file_start, [stream + _internal_state_file_start]
|
||||
lea tmp1, [stream + _internal_state_buffer]
|
||||
sub tmp1, file_start
|
||||
add f_i, tmp1
|
||||
add f_end_i, tmp1
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
cmp f_i, f_end_i
|
||||
jge end_loop_2
|
||||
|
||||
mov tmp1 %+ d, [file_start + f_i]
|
||||
|
||||
loop2:
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end_loop_2
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
compute_hash hash, tmp1
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
; f_index = state->head[hash];
|
||||
movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
|
||||
|
||||
; state->head[hash] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
; dist = f_i - f_index; // mod 64k
|
||||
mov dist %+ d, f_i %+ d
|
||||
sub dist %+ d, f_index %+ d
|
||||
and dist %+ d, 0xFFFF
|
||||
|
||||
; if ((dist-1) <= (D-1)) {
|
||||
mov tmp1 %+ d, dist %+ d
|
||||
sub tmp1 %+ d, 1
|
||||
cmp tmp1 %+ d, (D-1)
|
||||
jae encode_literal
|
||||
|
||||
; len = f_end_i - f_i;
|
||||
mov tmp4, [rsp + f_end_i_mem_offset]
|
||||
sub tmp4, f_i
|
||||
|
||||
; if (len > 258) len = 258;
|
||||
cmp tmp4, 258
|
||||
cmovg tmp4, [c258]
|
||||
|
||||
; len = compare(state->file_start + f_i,
|
||||
; state->file_start + f_i - dist, len);
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
compare tmp4, tmp1, tmp2, len, tmp3
|
||||
|
||||
; if (len >= SHORTEST_MATCH) {
|
||||
cmp len, SHORTEST_MATCH
|
||||
jb encode_literal
|
||||
|
||||
;; encode as dist/len
|
||||
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
|
||||
|
||||
; get_len_code(len, &code, &code_len);
|
||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
; code2 <<= code_len
|
||||
; code2 |= code
|
||||
; code_len2 += code_len
|
||||
%ifdef USE_HSWNI
|
||||
shlx code2, code2, rcx
|
||||
%else
|
||||
shl code2, cl
|
||||
%endif
|
||||
or code2, code
|
||||
add code_len2, rcx
|
||||
|
||||
; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
add f_i, len
|
||||
%ifdef LIMIT_HASH_UPDATE
|
||||
; only update hash twice
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp6 %+ d, [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3, 1
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp6 %+ d, [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
%else
|
||||
loop3:
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp6 %+ d, [file_start + tmp3]
|
||||
compute_hash hash, tmp6
|
||||
and hash %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
inc tmp3
|
||||
cmp tmp3, f_i
|
||||
jl loop3
|
||||
%endif
|
||||
|
||||
mov tmp1 %+ d, [file_start + f_i]
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
||||
|
||||
; continue
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
encode_literal:
|
||||
mov tmp1 %+ d, [file_start + f_i + 1]
|
||||
|
||||
; get_lit_code(state->file_start[f_i], &code2, &code_len2);
|
||||
movzx tmp5, byte [file_start + f_i]
|
||||
get_lit_code tmp5, code2, code_len2, hufftables
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp5
|
||||
|
||||
; continue
|
||||
add f_i, 1
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl loop2
|
||||
|
||||
end_loop_2:
|
||||
|
||||
; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
|
||||
cmp f_i, [rsp + f_end_i_mem_offset]
|
||||
jl not_end
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja not_end
|
||||
|
||||
cmp dword [stream + _end_of_stream], 1
|
||||
jne cont
|
||||
cmp dword [stream + _internal_state_left_over], 0
|
||||
jg not_end
|
||||
|
||||
cont:
|
||||
; get_lit_code(256, &code2, &code_len2);
|
||||
get_lit_code 256, code2, code_len2, hufftables
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1
|
||||
|
||||
mov dword [stream + _internal_state_has_eob], 1
|
||||
cmp dword [stream + _end_of_stream], 1
|
||||
jne sync_flush
|
||||
; state->state = ZSTATE_TRL;
|
||||
mov dword [stream + _internal_state_state], ZSTATE_TRL
|
||||
jmp not_end
|
||||
|
||||
sync_flush:
|
||||
; state->state = ZSTATE_SYNC_FLUSH;
|
||||
mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
|
||||
; }
|
||||
not_end:
|
||||
|
||||
; state->b_bytes_processed = f_i - (state->buffer - state->file_start);
|
||||
add f_i, [stream + _internal_state_file_start]
|
||||
sub f_i, stream
|
||||
sub f_i, _internal_state_buffer
|
||||
mov [stream + _internal_state_b_bytes_processed], f_i %+ d
|
||||
|
||||
; // update output buffer
|
||||
; stream->next_out = state->bitbuf.buffer_ptr();
|
||||
mov [stream + _next_out], m_out_buf
|
||||
; len = state->bitbuf.buffer_used();
|
||||
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
|
||||
|
||||
; stream->avail_out -= len;
|
||||
sub [stream + _avail_out], m_out_buf %+ d
|
||||
; stream->total_out += len;
|
||||
add [stream + _total_out], m_out_buf %+ d
|
||||
|
||||
mov [stream + _internal_state_bitbuf_m_bits], m_bits
|
||||
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
|
||||
add rsp, stack_size
|
||||
POP_ALL
|
||||
ret
|
||||
|
||||
section .data
|
||||
align 4
|
||||
c258: dq 258
|
151
igzip/igzip_inflate_perf.c
Normal file
151
igzip/igzip_inflate_perf.c
Normal file
@ -0,0 +1,151 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <zlib.h>
|
||||
#include "huff_codes.h"
|
||||
#include "igzip_inflate_ref.h"
|
||||
#include "test.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
#define MIN_TEST_LOOPS 100
|
||||
#ifndef RUN_MEM_SIZE
|
||||
# define RUN_MEM_SIZE 1000000000
|
||||
#endif
|
||||
|
||||
int get_filesize(FILE * f)
|
||||
{
|
||||
int curr, end;
|
||||
|
||||
curr = ftell(f); /* Save current position */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
end = ftell(f);
|
||||
fseek(f, curr, SEEK_SET); /* Restore position */
|
||||
return end;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in, *out = NULL;
|
||||
unsigned char *inbuf, *outbuf, *tempbuf;
|
||||
int i, infile_size, iterations, outbuf_size, check;
|
||||
uint64_t inbuf_size;
|
||||
struct inflate_state state;
|
||||
|
||||
if (argc > 3 || argc < 2) {
|
||||
fprintf(stderr, "Usage: igzip_inflate_file_perf infile\n"
|
||||
"\t - Runs multiple iterations of igzip on a file to "
|
||||
"get more accurate time results.\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
if (argc > 2) {
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
printf("outfile=%s\n", argv[2]);
|
||||
}
|
||||
printf("igzip_inflate_perf: \n");
|
||||
fflush(0);
|
||||
/* Allocate space for entire input file and output
|
||||
* (assuming some possible expansion on output size)
|
||||
*/
|
||||
infile_size = get_filesize(in);
|
||||
|
||||
if (infile_size != 0) {
|
||||
outbuf_size = infile_size;
|
||||
iterations = RUN_MEM_SIZE / infile_size;
|
||||
} else {
|
||||
printf("Error: input file has 0 size\n");
|
||||
exit(0);
|
||||
}
|
||||
if (iterations < MIN_TEST_LOOPS)
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
tempbuf = malloc(infile_size);
|
||||
if (tempbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate temp buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
inbuf_size = compressBound(infile_size);
|
||||
inbuf = malloc(inbuf_size);
|
||||
if (inbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate input buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
outbuf = malloc(infile_size);
|
||||
if (outbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate output buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
fread(tempbuf, 1, infile_size, in);
|
||||
i = compress2(inbuf, &inbuf_size, tempbuf, infile_size, 9);
|
||||
if (i != Z_OK) {
|
||||
printf("Compression of input file failed\n");
|
||||
exit(0);
|
||||
}
|
||||
printf("igzip_inflate_perf: %s %d iterations\n", argv[1], iterations);
|
||||
/* Read complete input file into buffer */
|
||||
fclose(in);
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
igzip_inflate_init(&state, inbuf + 2, inbuf_size - 2, outbuf, outbuf_size);
|
||||
|
||||
check = igzip_inflate(&state);
|
||||
if (check) {
|
||||
printf("Error in decompression with error %d\n", check);
|
||||
break;
|
||||
}
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
printf(" file %s - in_size=%d out_size=%d iter=%d\n", argv[1],
|
||||
infile_size, state.out_buffer.total_out, i);
|
||||
|
||||
printf("igzip_file: ");
|
||||
perf_print(stop, start, (long long)infile_size * i);
|
||||
|
||||
printf("End of igzip_inflate_perf\n\n");
|
||||
fflush(0);
|
||||
|
||||
free(inbuf);
|
||||
free(outbuf);
|
||||
free(tempbuf);
|
||||
|
||||
return 0;
|
||||
}
|
668
igzip/igzip_inflate_ref.c
Normal file
668
igzip/igzip_inflate_ref.c
Normal file
@ -0,0 +1,668 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include "igzip_inflate_ref.h"
|
||||
|
||||
void inline byte_copy(uint8_t * dest, uint64_t lookback_distance, int repeat_length)
|
||||
{
|
||||
uint8_t *src = dest - lookback_distance;
|
||||
|
||||
for (; repeat_length > 0; repeat_length--)
|
||||
*dest++ = *src++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns integer with first length bits reversed and all higher bits zeroed
|
||||
*/
|
||||
uint16_t inline bit_reverse2(uint16_t bits, uint8_t length)
|
||||
{
|
||||
bits = ((bits >> 1) & 0x55555555) | ((bits & 0x55555555) << 1); // swap bits
|
||||
bits = ((bits >> 2) & 0x33333333) | ((bits & 0x33333333) << 2); // swap pairs
|
||||
bits = ((bits >> 4) & 0x0F0F0F0F) | ((bits & 0x0F0F0F0F) << 4); // swap nibbles
|
||||
bits = ((bits >> 8) & 0x00FF00FF) | ((bits & 0x00FF00FF) << 8); // swap bytes
|
||||
return bits >> (16 - length);
|
||||
}
|
||||
|
||||
void inline init_inflate_in_buffer(struct inflate_in_buffer *inflate_in)
|
||||
{
|
||||
inflate_in->read_in = 0;
|
||||
inflate_in->read_in_length = 0;
|
||||
}
|
||||
|
||||
void inline set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t * in_stream,
|
||||
uint32_t in_size)
|
||||
{
|
||||
inflate_in->next_in = inflate_in->start = in_stream;
|
||||
inflate_in->avail_in = in_size;
|
||||
}
|
||||
|
||||
void inline set_inflate_out_buffer(struct inflate_out_buffer *inflate_out,
|
||||
uint8_t * out_stream, uint32_t out_size)
|
||||
{
|
||||
inflate_out->next_out = out_stream;
|
||||
inflate_out->avail_out = out_size;
|
||||
inflate_out->total_out = 0;
|
||||
}
|
||||
|
||||
void inline inflate_in_clear_bits(struct inflate_in_buffer *inflate_in)
|
||||
{
|
||||
uint8_t bytes;
|
||||
|
||||
bytes = inflate_in->read_in_length / 8;
|
||||
|
||||
inflate_in->read_in = 0;
|
||||
inflate_in->read_in_length = 0;
|
||||
inflate_in->next_in -= bytes;
|
||||
inflate_in->avail_in += bytes;
|
||||
}
|
||||
|
||||
void inline inflate_in_load(struct inflate_in_buffer *inflate_in, int min_required)
|
||||
{
|
||||
uint64_t temp = 0;
|
||||
uint8_t new_bytes;
|
||||
|
||||
if (inflate_in->avail_in >= 8) {
|
||||
/* If there is enough space to load a 64 bits, load the data and use
|
||||
* that to fill read_in */
|
||||
new_bytes = 8 - (inflate_in->read_in_length + 7) / 8;
|
||||
temp = *(uint64_t *) inflate_in->next_in;
|
||||
|
||||
inflate_in->read_in |= temp << inflate_in->read_in_length;
|
||||
inflate_in->next_in += new_bytes;
|
||||
inflate_in->avail_in -= new_bytes;
|
||||
inflate_in->read_in_length += new_bytes * 8;
|
||||
|
||||
} else {
|
||||
/* Else fill the read_in buffer 1 byte at a time */
|
||||
while (inflate_in->read_in_length < 57 && inflate_in->avail_in > 0) {
|
||||
temp = *inflate_in->next_in;
|
||||
inflate_in->read_in |= temp << inflate_in->read_in_length;
|
||||
inflate_in->next_in++;
|
||||
inflate_in->avail_in--;
|
||||
inflate_in->read_in_length += 8;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
uint64_t inline inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
|
||||
{
|
||||
assert(bit_count < 57);
|
||||
|
||||
/* Load inflate_in if not enough data is in the read_in buffer */
|
||||
if (inflate_in->read_in_length < bit_count)
|
||||
inflate_in_load(inflate_in, 0);
|
||||
|
||||
return (inflate_in->read_in) & ((1 << bit_count) - 1);
|
||||
}
|
||||
|
||||
void inline inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
|
||||
{
|
||||
|
||||
inflate_in->read_in >>= bit_count;
|
||||
inflate_in->read_in_length -= bit_count;
|
||||
}
|
||||
|
||||
uint64_t inline inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count)
|
||||
{
|
||||
uint64_t ret;
|
||||
assert(bit_count < 57);
|
||||
|
||||
/* Load inflate_in if not enough data is in the read_in buffer */
|
||||
if (inflate_in->read_in_length < bit_count)
|
||||
inflate_in_load(inflate_in, bit_count);
|
||||
|
||||
ret = (inflate_in->read_in) & ((1 << bit_count) - 1);
|
||||
inflate_in->read_in >>= bit_count;
|
||||
inflate_in->read_in_length -= bit_count;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int inline setup_static_header(struct inflate_state *state)
|
||||
{
|
||||
/* This could be turned into a memcpy of this functions output for
|
||||
* higher speed, but then DECODE_LOOKUP_SIZE couldn't be changed without
|
||||
* regenerating the table. */
|
||||
|
||||
int i;
|
||||
struct huff_code lit_code[LIT_LEN + 2];
|
||||
struct huff_code dist_code[DIST_LEN + 2];
|
||||
|
||||
/* These tables are based on the static huffman tree described in RFC
|
||||
* 1951 */
|
||||
uint16_t lit_count[16] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
uint16_t dist_count[16] = {
|
||||
0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* These for loops set the code lengths for the static literal/length
|
||||
* and distance codes defined in the deflate standard RFC 1951 */
|
||||
for (i = 0; i < 144; i++)
|
||||
lit_code[i].length = 8;
|
||||
|
||||
for (i = 144; i < 256; i++)
|
||||
lit_code[i].length = 9;
|
||||
|
||||
for (i = 256; i < 280; i++)
|
||||
lit_code[i].length = 7;
|
||||
|
||||
for (i = 280; i < LIT_LEN + 2; i++)
|
||||
lit_code[i].length = 8;
|
||||
|
||||
for (i = 0; i < DIST_LEN + 2; i++)
|
||||
dist_code[i].length = 5;
|
||||
|
||||
make_inflate_huff_code(&state->lit_huff_code, lit_code, LIT_LEN + 2, lit_count);
|
||||
make_inflate_huff_code(&state->dist_huff_code, dist_code, DIST_LEN + 2, dist_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void inline make_inflate_huff_code(struct inflate_huff_code *result,
|
||||
struct huff_code *huff_code_table, int table_length,
|
||||
uint16_t * count)
|
||||
{
|
||||
int i, j;
|
||||
uint16_t code = 0;
|
||||
uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
|
||||
uint16_t long_code_list[LIT_LEN];
|
||||
uint32_t long_code_length = 0;
|
||||
uint16_t temp_code_list[1 << (15 - DECODE_LOOKUP_SIZE)];
|
||||
uint32_t temp_code_length;
|
||||
uint32_t long_code_lookup_length = 0;
|
||||
uint32_t max_length;
|
||||
uint16_t first_bits;
|
||||
uint32_t code_length;
|
||||
uint16_t long_bits;
|
||||
uint16_t min_increment;
|
||||
|
||||
memset(result, 0, sizeof(struct inflate_huff_code));
|
||||
|
||||
next_code[0] = code;
|
||||
|
||||
for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
|
||||
next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
|
||||
|
||||
for (i = 0; i < table_length; i++) {
|
||||
if (huff_code_table[i].length != 0) {
|
||||
/* Determine the code for symbol i */
|
||||
huff_code_table[i].code =
|
||||
bit_reverse2(next_code[huff_code_table[i].length],
|
||||
huff_code_table[i].length);
|
||||
|
||||
next_code[huff_code_table[i].length] += 1;
|
||||
|
||||
if (huff_code_table[i].length <= DECODE_LOOKUP_SIZE) {
|
||||
/* Set lookup table to return the current symbol
|
||||
* concatenated with the code length when the
|
||||
* first DECODE_LENGTH bits of the address are
|
||||
* the same as the code for the current
|
||||
* symbol. The first 9 bits are the code, bits
|
||||
* 14:10 are the code length, bit 15 is a flag
|
||||
* representing this is a symbol*/
|
||||
for (j = 0; j < (1 << (DECODE_LOOKUP_SIZE -
|
||||
huff_code_table[i].length)); j++)
|
||||
|
||||
result->small_code_lookup[(j <<
|
||||
huff_code_table[i].length) +
|
||||
huff_code_table[i].code]
|
||||
= i | (huff_code_table[i].length) << 9;
|
||||
|
||||
} else {
|
||||
/* Store the element in a list of elements with long codes. */
|
||||
long_code_list[long_code_length] = i;
|
||||
long_code_length++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < long_code_length; i++) {
|
||||
/*Set the look up table to point to a hint where the symbol can be found
|
||||
* in the list of long codes and add the current symbol to the list of
|
||||
* long codes. */
|
||||
if (huff_code_table[long_code_list[i]].code == 0xFFFF)
|
||||
continue;
|
||||
|
||||
max_length = huff_code_table[long_code_list[i]].length;
|
||||
first_bits =
|
||||
huff_code_table[long_code_list[i]].code & ((1 << DECODE_LOOKUP_SIZE) - 1);
|
||||
|
||||
temp_code_list[0] = long_code_list[i];
|
||||
temp_code_length = 1;
|
||||
|
||||
for (j = i + 1; j < long_code_length; j++) {
|
||||
if ((huff_code_table[long_code_list[j]].code &
|
||||
((1 << DECODE_LOOKUP_SIZE) - 1)) == first_bits) {
|
||||
if (max_length < huff_code_table[long_code_list[j]].length)
|
||||
max_length = huff_code_table[long_code_list[j]].length;
|
||||
temp_code_list[temp_code_length] = long_code_list[j];
|
||||
temp_code_length++;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < temp_code_length; j++) {
|
||||
code_length = huff_code_table[temp_code_list[j]].length;
|
||||
long_bits =
|
||||
huff_code_table[temp_code_list[j]].code >> DECODE_LOOKUP_SIZE;
|
||||
min_increment = 1 << (code_length - DECODE_LOOKUP_SIZE);
|
||||
for (; long_bits < (1 << (max_length - DECODE_LOOKUP_SIZE));
|
||||
long_bits += min_increment) {
|
||||
result->long_code_lookup[long_code_lookup_length + long_bits] =
|
||||
temp_code_list[j] | (code_length << 9);
|
||||
}
|
||||
huff_code_table[temp_code_list[j]].code = 0xFFFF;
|
||||
}
|
||||
result->small_code_lookup[first_bits] =
|
||||
long_code_lookup_length | (max_length << 9) | 0x8000;
|
||||
long_code_lookup_length += 1 << (max_length - DECODE_LOOKUP_SIZE);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t inline decode_next(struct inflate_in_buffer *in_buffer,
|
||||
struct inflate_huff_code *huff_code)
|
||||
{
|
||||
uint16_t next_bits;
|
||||
uint16_t next_sym;
|
||||
|
||||
next_bits = inflate_in_peek_bits(in_buffer, DECODE_LOOKUP_SIZE);
|
||||
|
||||
/* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0,
|
||||
* next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10
|
||||
* represent the length of that symbols huffman code. If next_sym is not
|
||||
* a symbol, it provides a hint of where the large symbols containin
|
||||
* this code are located. Note the hint is at largest the location the
|
||||
* first actual symbol in the long code list.*/
|
||||
next_sym = huff_code->small_code_lookup[next_bits];
|
||||
|
||||
if (next_sym < 0x8000) {
|
||||
/* Return symbol found if next_code is a complete huffman code
|
||||
* and shift in buffer over by the length of the next_code */
|
||||
inflate_in_shift_bits(in_buffer, next_sym >> 9);
|
||||
|
||||
return next_sym & 0x1FF;
|
||||
|
||||
} else {
|
||||
/* If a symbol is not found, perform a linear search of the long code
|
||||
* list starting from the hint in next_sym */
|
||||
next_bits = inflate_in_peek_bits(in_buffer, (next_sym - 0x8000) >> 9);
|
||||
next_sym =
|
||||
huff_code->long_code_lookup[(next_sym & 0x1FF) +
|
||||
(next_bits >> DECODE_LOOKUP_SIZE)];
|
||||
inflate_in_shift_bits(in_buffer, next_sym >> 9);
|
||||
return next_sym & 0x1FF;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int inline setup_dynamic_header(struct inflate_state *state)
|
||||
{
|
||||
int i, j;
|
||||
struct huff_code code_huff[CODE_LEN_CODES];
|
||||
struct huff_code lit_and_dist_huff[LIT_LEN + DIST_LEN];
|
||||
struct huff_code *previous = NULL, *current;
|
||||
struct inflate_huff_code inflate_code_huff;
|
||||
uint8_t hclen, hdist, hlit;
|
||||
uint16_t code_count[16], lit_count[16], dist_count[16];
|
||||
uint16_t *count;
|
||||
uint16_t symbol;
|
||||
|
||||
/* This order is defined in RFC 1951 page 13 */
|
||||
const uint8_t code_length_code_order[CODE_LEN_CODES] = {
|
||||
0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06,
|
||||
0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02,
|
||||
0x0e, 0x01, 0x0f
|
||||
};
|
||||
|
||||
memset(code_count, 0, sizeof(code_count));
|
||||
memset(lit_count, 0, sizeof(lit_count));
|
||||
memset(dist_count, 0, sizeof(dist_count));
|
||||
memset(code_huff, 0, sizeof(code_huff));
|
||||
memset(lit_and_dist_huff, 0, sizeof(lit_and_dist_huff));
|
||||
|
||||
/* These variables are defined in the deflate standard, RFC 1951 */
|
||||
hlit = inflate_in_read_bits(&state->in_buffer, 5);
|
||||
hdist = inflate_in_read_bits(&state->in_buffer, 5);
|
||||
hclen = inflate_in_read_bits(&state->in_buffer, 4);
|
||||
|
||||
/* Create the code huffman code for decoding the lit/len and dist huffman codes */
|
||||
for (i = 0; i < hclen + 4; i++) {
|
||||
code_huff[code_length_code_order[i]].length =
|
||||
inflate_in_read_bits(&state->in_buffer, 3);
|
||||
|
||||
code_count[code_huff[code_length_code_order[i]].length] += 1;
|
||||
}
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
make_inflate_huff_code(&inflate_code_huff, code_huff, CODE_LEN_CODES, code_count);
|
||||
|
||||
/* Decode the lit/len and dist huffman codes using the code huffman code */
|
||||
count = lit_count;
|
||||
current = lit_and_dist_huff;
|
||||
|
||||
while (current < lit_and_dist_huff + LIT_LEN + hdist + 1) {
|
||||
/* If finished decoding the lit/len huffman code, start decoding
|
||||
* the distance code these decodings are in the same loop
|
||||
* because the len/lit and dist huffman codes are run length
|
||||
* encoded together. */
|
||||
if (current == lit_and_dist_huff + 257 + hlit)
|
||||
current = lit_and_dist_huff + LIT_LEN;
|
||||
|
||||
if (current == lit_and_dist_huff + LIT_LEN)
|
||||
count = dist_count;
|
||||
|
||||
symbol = decode_next(&state->in_buffer, &inflate_code_huff);
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (symbol < 16) {
|
||||
/* If a length is found, update the current lit/len/dist
|
||||
* to have length symbol */
|
||||
count[symbol]++;
|
||||
current->length = symbol;
|
||||
previous = current;
|
||||
current++;
|
||||
|
||||
} else if (symbol == 16) {
|
||||
/* If a repeat length is found, update the next repeat
|
||||
* length lit/len/dist elements to have the value of the
|
||||
* repeated length */
|
||||
if (previous == NULL) /* No elements available to be repeated */
|
||||
return INVALID_BLOCK_HEADER;
|
||||
|
||||
i = 3 + inflate_in_read_bits(&state->in_buffer, 2);
|
||||
for (j = 0; j < i; j++) {
|
||||
*current = *previous;
|
||||
count[current->length]++;
|
||||
previous = current;
|
||||
|
||||
if (current == lit_and_dist_huff + 256 + hlit) {
|
||||
current = lit_and_dist_huff + LIT_LEN;
|
||||
count = dist_count;
|
||||
|
||||
} else
|
||||
current++;
|
||||
}
|
||||
|
||||
} else if (symbol == 17) {
|
||||
/* If a repeat zeroes if found, update then next
|
||||
* repeated zeroes length lit/len/dist elements to have
|
||||
* length 0. */
|
||||
i = 3 + inflate_in_read_bits(&state->in_buffer, 3);
|
||||
|
||||
for (j = 0; j < i; j++) {
|
||||
previous = current;
|
||||
|
||||
if (current == lit_and_dist_huff + 256 + hlit) {
|
||||
current = lit_and_dist_huff + LIT_LEN;
|
||||
count = dist_count;
|
||||
|
||||
} else
|
||||
current++;
|
||||
}
|
||||
|
||||
} else if (symbol == 18) {
|
||||
/* If a repeat zeroes if found, update then next
|
||||
* repeated zeroes length lit/len/dist elements to have
|
||||
* length 0. */
|
||||
i = 11 + inflate_in_read_bits(&state->in_buffer, 7);
|
||||
|
||||
for (j = 0; j < i; j++) {
|
||||
previous = current;
|
||||
|
||||
if (current == lit_and_dist_huff + 256 + hlit) {
|
||||
current = lit_and_dist_huff + LIT_LEN;
|
||||
count = dist_count;
|
||||
|
||||
} else
|
||||
current++;
|
||||
}
|
||||
} else
|
||||
return INVALID_BLOCK_HEADER;
|
||||
|
||||
}
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
make_inflate_huff_code(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN, lit_count);
|
||||
make_inflate_huff_code(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN], DIST_LEN,
|
||||
dist_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_header(struct inflate_state *state)
|
||||
{
|
||||
state->new_block = 0;
|
||||
|
||||
/* btype and bfinal are defined in RFC 1951, bfinal represents whether
|
||||
* the current block is the end of block, and btype represents the
|
||||
* encoding method on the current block. */
|
||||
state->bfinal = inflate_in_read_bits(&state->in_buffer, 1);
|
||||
state->btype = inflate_in_read_bits(&state->in_buffer, 2);
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (state->btype == 0) {
|
||||
inflate_in_clear_bits(&state->in_buffer);
|
||||
return 0;
|
||||
|
||||
} else if (state->btype == 1)
|
||||
return setup_static_header(state);
|
||||
|
||||
else if (state->btype == 2)
|
||||
return setup_dynamic_header(state);
|
||||
|
||||
return INVALID_BLOCK_HEADER;
|
||||
}
|
||||
|
||||
void igzip_inflate_init(struct inflate_state *state, uint8_t * in_stream, uint32_t in_size,
|
||||
uint8_t * out_stream, uint64_t out_size)
|
||||
{
|
||||
|
||||
init_inflate_in_buffer(&state->in_buffer);
|
||||
|
||||
set_inflate_in_buffer(&state->in_buffer, in_stream, in_size);
|
||||
set_inflate_out_buffer(&state->out_buffer, out_stream, out_size);
|
||||
|
||||
state->new_block = 1;
|
||||
state->bfinal = 0;
|
||||
}
|
||||
|
||||
int igzip_inflate(struct inflate_state *state)
|
||||
{
|
||||
/* The following tables are based on the tables in the deflate standard,
|
||||
* RFC 1951 page 11. */
|
||||
const uint16_t len_start[29] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x17, 0x1b, 0x1f,
|
||||
0x23, 0x2b, 0x33, 0x3b, 0x43, 0x53, 0x63, 0x73,
|
||||
0x83, 0xa3, 0xc3, 0xe3, 0x102
|
||||
};
|
||||
const uint8_t len_extra_bit_count[29] = {
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
|
||||
0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
|
||||
0x5, 0x5, 0x5, 0x5, 0x0
|
||||
};
|
||||
const uint32_t dist_start[30] = {
|
||||
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
|
||||
0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
|
||||
0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
|
||||
0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
|
||||
};
|
||||
const uint8_t dist_extra_bit_count[30] = {
|
||||
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
|
||||
0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
|
||||
0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
|
||||
0xb, 0xb, 0xc, 0xc, 0xd, 0xd
|
||||
};
|
||||
|
||||
uint16_t next_lit, len, nlen;
|
||||
uint8_t next_dist;
|
||||
uint32_t repeat_length;
|
||||
uint32_t look_back_dist;
|
||||
uint32_t tmp;
|
||||
|
||||
while (state->new_block == 0 || state->bfinal == 0) {
|
||||
if (state->new_block != 0) {
|
||||
tmp = read_header(state);
|
||||
|
||||
if (tmp)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
if (state->btype == 0) {
|
||||
/* If the block is uncompressed, perform a memcopy while
|
||||
* updating state data */
|
||||
if (state->in_buffer.avail_in < 4)
|
||||
return END_OF_INPUT;
|
||||
|
||||
len = *(uint16_t *) state->in_buffer.next_in;
|
||||
state->in_buffer.next_in += 2;
|
||||
nlen = *(uint16_t *) state->in_buffer.next_in;
|
||||
state->in_buffer.next_in += 2;
|
||||
|
||||
/* Check if len and nlen match */
|
||||
if (len != (~nlen & 0xffff))
|
||||
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
|
||||
|
||||
if (state->out_buffer.avail_out < len)
|
||||
return OUT_BUFFER_OVERFLOW;
|
||||
|
||||
if (state->in_buffer.avail_in < len)
|
||||
len = state->in_buffer.avail_in;
|
||||
|
||||
else
|
||||
state->new_block = 1;
|
||||
|
||||
memcpy(state->out_buffer.next_out, state->in_buffer.next_in, len);
|
||||
|
||||
state->out_buffer.next_out += len;
|
||||
state->out_buffer.avail_out -= len;
|
||||
state->out_buffer.total_out += len;
|
||||
state->in_buffer.next_in += len;
|
||||
state->in_buffer.avail_in -= len + 4;
|
||||
|
||||
if (state->in_buffer.avail_in == 0 && state->new_block == 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
} else {
|
||||
/* Else decode a huffman encoded block */
|
||||
while (state->new_block == 0) {
|
||||
/* While not at the end of block, decode the next
|
||||
* symbol */
|
||||
|
||||
next_lit =
|
||||
decode_next(&state->in_buffer, &state->lit_huff_code);
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (next_lit < 256) {
|
||||
/* If the next symbol is a literal,
|
||||
* write out the symbol and update state
|
||||
* data accordingly. */
|
||||
if (state->out_buffer.avail_out < 1)
|
||||
return OUT_BUFFER_OVERFLOW;
|
||||
|
||||
*state->out_buffer.next_out = next_lit;
|
||||
state->out_buffer.next_out++;
|
||||
state->out_buffer.avail_out--;
|
||||
state->out_buffer.total_out++;
|
||||
|
||||
} else if (next_lit == 256) {
|
||||
/* If the next symbol is the end of
|
||||
* block, update the state data
|
||||
* accordingly */
|
||||
state->new_block = 1;
|
||||
|
||||
} else if (next_lit < 286) {
|
||||
/* Else if the next symbol is a repeat
|
||||
* length, read in the length extra
|
||||
* bits, the distance code, the distance
|
||||
* extra bits. Then write out the
|
||||
* corresponding data and update the
|
||||
* state data accordingly*/
|
||||
repeat_length =
|
||||
len_start[next_lit - 257] +
|
||||
inflate_in_read_bits(&state->in_buffer,
|
||||
len_extra_bit_count[next_lit -
|
||||
257]);
|
||||
|
||||
if (state->out_buffer.avail_out < repeat_length)
|
||||
return OUT_BUFFER_OVERFLOW;
|
||||
|
||||
next_dist = decode_next(&state->in_buffer,
|
||||
&state->dist_huff_code);
|
||||
|
||||
look_back_dist = dist_start[next_dist] +
|
||||
inflate_in_read_bits(&state->in_buffer,
|
||||
dist_extra_bit_count
|
||||
[next_dist]);
|
||||
|
||||
if (state->in_buffer.read_in_length < 0)
|
||||
return END_OF_INPUT;
|
||||
|
||||
if (look_back_dist > state->out_buffer.total_out)
|
||||
return INVALID_LOOK_BACK_DISTANCE;
|
||||
|
||||
if (look_back_dist > repeat_length) {
|
||||
memcpy(state->out_buffer.next_out,
|
||||
state->out_buffer.next_out -
|
||||
look_back_dist, repeat_length);
|
||||
} else
|
||||
byte_copy(state->out_buffer.next_out,
|
||||
look_back_dist, repeat_length);
|
||||
|
||||
state->out_buffer.next_out += repeat_length;
|
||||
state->out_buffer.avail_out -= repeat_length;
|
||||
state->out_buffer.total_out += repeat_length;
|
||||
|
||||
} else
|
||||
/* Else the read in bits do not
|
||||
* correspond to any valid symbol */
|
||||
return INVALID_SYMBOL;
|
||||
}
|
||||
}
|
||||
}
|
||||
state->in_buffer.next_in -= state->in_buffer.read_in_length / 8;
|
||||
state->in_buffer.avail_in += state->in_buffer.read_in_length / 8;
|
||||
|
||||
return DECOMPRESSION_FINISHED;
|
||||
}
|
150
igzip/igzip_inflate_ref.h
Normal file
150
igzip/igzip_inflate_ref.h
Normal file
@ -0,0 +1,150 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef INFLATE_H
|
||||
#define INFLATE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "huff_codes.h"
|
||||
|
||||
#define DECOMPRESSION_FINISHED 0
|
||||
#define END_OF_INPUT 1
|
||||
#define OUT_BUFFER_OVERFLOW 2
|
||||
#define INVALID_BLOCK_HEADER 3
|
||||
#define INVALID_SYMBOL 4
|
||||
#define INVALID_NON_COMPRESSED_BLOCK_LENGTH 5
|
||||
#define INVALID_LOOK_BACK_DISTANCE 6
|
||||
|
||||
#define DECODE_LOOKUP_SIZE 10
|
||||
|
||||
#if DECODE_LOOKUP_SIZE > 15
|
||||
# undef DECODE_LOOKUP_SIZE
|
||||
# define DECODE_LOOKUP_SIZE 15
|
||||
#endif
|
||||
|
||||
#if DECODE_LOOKUP_SIZE > 7
|
||||
# define MAX_LONG_CODE ((2 << 8) + 1) * (2 << (15 - DECODE_LOOKUP_SIZE)) + 32
|
||||
#else
|
||||
# define MAX_LONG_CODE (2 << (15 - DECODE_LOOKUP_SIZE)) + (2 << (8 + DECODE_LOOKUP_SIZE)) + 32
|
||||
#endif
|
||||
|
||||
/* Buffer used to manage decompressed output */
|
||||
struct inflate_out_buffer{
|
||||
uint8_t *next_out;
|
||||
uint32_t avail_out;
|
||||
uint32_t total_out;
|
||||
};
|
||||
|
||||
/* Buffer used to manager compressed input */
|
||||
struct inflate_in_buffer{
|
||||
uint8_t *start;
|
||||
uint8_t *next_in;
|
||||
uint32_t avail_in;
|
||||
uint64_t read_in;
|
||||
int32_t read_in_length;
|
||||
};
|
||||
|
||||
/* Data structure used to store a huffman code for fast look up */
|
||||
struct inflate_huff_code{
|
||||
uint16_t small_code_lookup[ 1 << (DECODE_LOOKUP_SIZE)];
|
||||
uint16_t long_code_lookup[MAX_LONG_CODE];
|
||||
};
|
||||
|
||||
/* Structure contained current state of decompression of data */
|
||||
struct inflate_state {
|
||||
struct inflate_out_buffer out_buffer;
|
||||
struct inflate_in_buffer in_buffer;
|
||||
struct inflate_huff_code lit_huff_code;
|
||||
struct inflate_huff_code dist_huff_code;
|
||||
uint8_t new_block;
|
||||
uint8_t bfinal;
|
||||
uint8_t btype;
|
||||
};
|
||||
|
||||
/*Performs a copy of length repeat_length data starting at dest -
|
||||
* lookback_distance into dest. This copy copies data previously copied when the
|
||||
* src buffer and the dest buffer overlap. */
|
||||
void byte_copy(uint8_t *dest, uint64_t lookback_distance, int repeat_length);
|
||||
|
||||
/* Initialize a struct in_buffer for use */
|
||||
void init_inflate_in_buffer(struct inflate_in_buffer *inflate_in);
|
||||
|
||||
/* Set up the in_stream used for the in_buffer*/
|
||||
void set_inflate_in_buffer(struct inflate_in_buffer *inflate_in, uint8_t *in_stream,
|
||||
uint32_t in_size);
|
||||
|
||||
/* Set up the out_stream used for the out_buffer */
|
||||
void set_inflate_out_buffer(struct inflate_out_buffer *inflate_out, uint8_t *out_stream,
|
||||
uint32_t out_size);
|
||||
|
||||
/* Load data from the in_stream into a buffer to allow for handling unaligned data*/
|
||||
void inflate_in_load(struct inflate_in_buffer *inflate_in, int min_load);
|
||||
|
||||
/* Returns the next bit_count bits from the in stream*/
|
||||
uint64_t inflate_in_peek_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
|
||||
|
||||
/* Shifts the in stream over by bit-count bits */
|
||||
void inflate_in_shift_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
|
||||
|
||||
/* Returns the next bit_count bits from the in stream and shifts the stream over
|
||||
* by bit-count bits */
|
||||
uint64_t inflate_in_read_bits(struct inflate_in_buffer *inflate_in, uint8_t bit_count);
|
||||
|
||||
/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
|
||||
* deflate static header */
|
||||
int setup_static_header(struct inflate_state *state);
|
||||
|
||||
/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
|
||||
* the lengths in huff_code_table,where count is a histogram of the appearance
|
||||
* of each code length */
|
||||
void make_inflate_huff_code(struct inflate_huff_code *result, struct huff_code *huff_code_table,
|
||||
int table_length, uint16_t * count);
|
||||
|
||||
/* Decodes the next symbol symbol in in_buffer using the huff code defined by
|
||||
* huff_code */
|
||||
uint16_t decode_next(struct inflate_in_buffer *in_buffer, struct inflate_huff_code *huff_code);
|
||||
|
||||
/* Reads data from the in_buffer and sets the huff code corresponding to that
|
||||
* data */
|
||||
int setup_dynamic_header(struct inflate_state *state);
|
||||
|
||||
/* Reads in the header pointed to by in_stream and sets up state to reflect that
|
||||
* header information*/
|
||||
int read_header(struct inflate_state *state);
|
||||
|
||||
/* Initialize a struct inflate_state for deflate compressed input data at in_stream and to output
|
||||
* data into out_stream */
|
||||
void igzip_inflate_init(struct inflate_state *state, uint8_t *in_stream, uint32_t in_size,
|
||||
uint8_t *out_stream, uint64_t out_size);
|
||||
|
||||
/* Decompress a deflate data. This function assumes a call to igzip_inflate_init
|
||||
* has been made to set up the state structure to allow for decompression.*/
|
||||
int igzip_inflate(struct inflate_state *state);
|
||||
|
||||
#endif //INFLATE_H
|
182
igzip/igzip_inflate_test.c
Normal file
182
igzip/igzip_inflate_test.c
Normal file
@ -0,0 +1,182 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
#include "igzip_inflate_ref.h"
|
||||
#include "huff_codes.h"
|
||||
|
||||
/*Don't use file larger memory can support because compression and decompression
|
||||
* are done in a stateless manner. */
|
||||
#define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L
|
||||
|
||||
int test(uint8_t * compressed_stream, uint64_t * compressed_length,
|
||||
uint8_t * uncompressed_stream, int uncompressed_length,
|
||||
uint8_t * uncompressed_test_stream)
|
||||
{
|
||||
struct inflate_state state;
|
||||
int ret;
|
||||
ret =
|
||||
compress2(compressed_stream, compressed_length, uncompressed_stream,
|
||||
uncompressed_length, 9);
|
||||
if (ret) {
|
||||
printf("Failed compressing input with exit code %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
igzip_inflate_init(&state, compressed_stream + 2, *compressed_length - 2,
|
||||
uncompressed_test_stream, uncompressed_length);
|
||||
ret = igzip_inflate(&state);
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
break;
|
||||
case END_OF_INPUT:
|
||||
printf(" did not decompress all input\n");
|
||||
return END_OF_INPUT;
|
||||
break;
|
||||
case INVALID_BLOCK_HEADER:
|
||||
printf(" invalid header\n");
|
||||
return INVALID_BLOCK_HEADER;
|
||||
break;
|
||||
case INVALID_SYMBOL:
|
||||
printf(" invalid symbol\n");
|
||||
return INVALID_SYMBOL;
|
||||
break;
|
||||
case OUT_BUFFER_OVERFLOW:
|
||||
printf(" out buffer overflow\n");
|
||||
return OUT_BUFFER_OVERFLOW;
|
||||
break;
|
||||
case INVALID_NON_COMPRESSED_BLOCK_LENGTH:
|
||||
printf("Invalid length bits in non-compressed block\n");
|
||||
return INVALID_NON_COMPRESSED_BLOCK_LENGTH;
|
||||
break;
|
||||
case INVALID_LOOK_BACK_DISTANCE:
|
||||
printf("Invalid lookback distance");
|
||||
return INVALID_LOOK_BACK_DISTANCE;
|
||||
break;
|
||||
default:
|
||||
printf(" error\n");
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (state.out_buffer.total_out != uncompressed_length) {
|
||||
printf("incorrect amount of data was decompressed from compressed data\n");
|
||||
printf("%d decompressed of %d compressed", state.out_buffer.total_out,
|
||||
uncompressed_length);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) {
|
||||
printf(" decompressed data is not the same as the compressed data\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i, j, ret = 0, fin_ret = 0;
|
||||
FILE *file;
|
||||
uint64_t compressed_length, file_length, uncompressed_length;
|
||||
uint8_t *uncompressed_stream, *compressed_stream, *uncompressed_test_stream;
|
||||
|
||||
if (argc == 1)
|
||||
printf("Error, no input file\n");
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
file = fopen(argv[i], "r");
|
||||
if (file == NULL) {
|
||||
printf("Error opening file %s\n", argv[i]);
|
||||
return 1;
|
||||
} else
|
||||
printf("Starting file %s", argv[i]);
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
file_length = ftell(file);
|
||||
fseek(file, 0, SEEK_SET);
|
||||
file_length -= ftell(file);
|
||||
if (file_length > MAX_INPUT_FILE_SIZE) {
|
||||
printf("File too large to run on this test\n");
|
||||
fclose(file);
|
||||
continue;
|
||||
}
|
||||
compressed_length = compressBound(file_length);
|
||||
uncompressed_stream = malloc(file_length);
|
||||
compressed_stream = malloc(compressed_length);
|
||||
uncompressed_test_stream = malloc(file_length);
|
||||
|
||||
if (uncompressed_stream == NULL) {
|
||||
printf("Failed to allocate memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (compressed_stream == NULL) {
|
||||
printf("Failed to allocate memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (uncompressed_test_stream == NULL) {
|
||||
printf("Failed to allocate memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
uncompressed_length = fread(uncompressed_stream, 1, file_length, file);
|
||||
ret =
|
||||
test(compressed_stream, &compressed_length, uncompressed_stream,
|
||||
uncompressed_length, uncompressed_test_stream);
|
||||
if (ret) {
|
||||
for (j = 0; j < compressed_length; j++) {
|
||||
if ((j & 31) == 0)
|
||||
printf("\n");
|
||||
else
|
||||
printf(" ");
|
||||
printf("0x%02x,", compressed_stream[j]);
|
||||
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
free(compressed_stream);
|
||||
free(uncompressed_stream);
|
||||
free(uncompressed_test_stream);
|
||||
|
||||
if (ret) {
|
||||
printf(" ... Fail with exit code %d\n", ret);
|
||||
return ret;
|
||||
} else
|
||||
printf(" ... Pass\n");
|
||||
|
||||
fin_ret |= ret;
|
||||
}
|
||||
return fin_ret;
|
||||
}
|
73
igzip/igzip_multibinary.asm
Normal file
73
igzip/igzip_multibinary.asm
Normal file
@ -0,0 +1,73 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
default rel
|
||||
[bits 64]
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define WRT_OPT wrt ..plt
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
extern isal_deflate_body_stateless_base
|
||||
extern isal_deflate_body_stateless_01
|
||||
extern isal_deflate_body_stateless_04
|
||||
|
||||
extern isal_deflate_body_base
|
||||
extern isal_deflate_body_01
|
||||
extern isal_deflate_body_04
|
||||
extern isal_deflate_finish_base
|
||||
extern isal_deflate_finish_01
|
||||
|
||||
extern get_crc_base
|
||||
extern get_crc_01
|
||||
|
||||
extern isal_deflate_init_base
|
||||
extern isal_deflate_init_01
|
||||
|
||||
section .text
|
||||
|
||||
%include "multibinary.asm"
|
||||
|
||||
mbin_interface isal_deflate_init
|
||||
mbin_dispatch_init5 isal_deflate_init, isal_deflate_init_base, isal_deflate_init_01, isal_deflate_init_01, isal_deflate_init_01
|
||||
|
||||
mbin_interface isal_deflate_body_stateless
|
||||
mbin_dispatch_init5 isal_deflate_body_stateless, isal_deflate_body_stateless_base, isal_deflate_body_stateless_01, isal_deflate_body_stateless_01, isal_deflate_body_stateless_04
|
||||
|
||||
mbin_interface isal_deflate_body
|
||||
mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body_01, isal_deflate_body_01, isal_deflate_body_04
|
||||
mbin_interface isal_deflate_finish
|
||||
mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
|
||||
|
||||
mbin_interface get_crc
|
||||
mbin_dispatch_init5 get_crc, get_crc_base, get_crc_01, get_crc_01, get_crc_01
|
92
igzip/igzip_perf.c
Normal file
92
igzip/igzip_perf.c
Normal file
@ -0,0 +1,92 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define TEST_LEN (1024*1024)
|
||||
#define IBUF_SIZE (1024*1024)
|
||||
#define OBUF_SIZE (1024*1024)
|
||||
|
||||
#define TEST_LOOPS 400
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
|
||||
void create_data(unsigned char *data, int size)
|
||||
{
|
||||
char c = 'a';
|
||||
while (size--)
|
||||
*data++ = c = c < 'z' ? c + 1 : 'a';
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i = 1;
|
||||
struct isal_zstream stream;
|
||||
unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE];
|
||||
|
||||
printf("Window Size: %d K\n", HIST_SIZE);
|
||||
printf("igzip_perf: \n");
|
||||
fflush(0);
|
||||
create_data(inbuf, TEST_LEN);
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
isal_deflate_init(&stream);
|
||||
|
||||
stream.avail_in = TEST_LEN;
|
||||
stream.end_of_stream = 1;
|
||||
stream.next_in = inbuf;
|
||||
stream.flush = NO_FLUSH;
|
||||
|
||||
do {
|
||||
stream.avail_out = OBUF_SIZE;
|
||||
stream.next_out = zbuf;
|
||||
isal_deflate(&stream);
|
||||
} while (stream.avail_out == 0);
|
||||
}
|
||||
|
||||
perf_stop(&stop);
|
||||
|
||||
printf("igzip" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||
|
||||
if (!stream.end_of_stream) {
|
||||
printf("error: compression test could not fit into allocated buffers\n");
|
||||
return -1;
|
||||
}
|
||||
printf("End of igzip_perf\n\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
1614
igzip/igzip_rand_test.c
Normal file
1614
igzip/igzip_rand_test.c
Normal file
File diff suppressed because it is too large
Load Diff
644
igzip/igzip_stateless.asm
Normal file
644
igzip/igzip_stateless.asm
Normal file
@ -0,0 +1,644 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%include "options.asm"
|
||||
|
||||
%include "lz0a_const.asm"
|
||||
%include "data_struct2.asm"
|
||||
%include "bitbuf2.asm"
|
||||
%include "huffman.asm"
|
||||
%include "igzip_compare_types.asm"
|
||||
%include "reg_sizes.asm"
|
||||
|
||||
%include "stdmac.asm"
|
||||
|
||||
%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 264 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
|
||||
|
||||
%ifdef DEBUG
|
||||
%macro MARK 1
|
||||
global %1
|
||||
%1:
|
||||
%endm
|
||||
%else
|
||||
%macro MARK 1
|
||||
%endm
|
||||
%endif
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define tmp2 rcx
|
||||
%define hash2 rcx
|
||||
|
||||
%define curr_data rax
|
||||
%define code rax
|
||||
%define tmp5 rax
|
||||
|
||||
%define tmp4 rbx
|
||||
%define dist rbx
|
||||
%define code2 rbx
|
||||
|
||||
%define hash rdx
|
||||
%define len rdx
|
||||
%define code_len3 rdx
|
||||
|
||||
%define tmp1 rsi
|
||||
%define code_len2 rsi
|
||||
|
||||
%define file_start rdi
|
||||
|
||||
%define m_bit_count rbp
|
||||
|
||||
%define curr_data2 r8
|
||||
%define len2 r8
|
||||
%define tmp6 r8
|
||||
|
||||
%define m_bits r9
|
||||
|
||||
%define f_i r10
|
||||
|
||||
%define m_out_buf r11
|
||||
|
||||
%define f_end_i r12
|
||||
%define dist2 r12
|
||||
%define tmp7 r12
|
||||
%define code4 r12
|
||||
|
||||
%define tmp3 r13
|
||||
%define code3 r13
|
||||
|
||||
%define stream r14
|
||||
|
||||
%define hufftables r15
|
||||
|
||||
;; GPR r8 & r15 can be used
|
||||
|
||||
%define xtmp0 xmm0 ; tmp
|
||||
%define xtmp1 xmm1 ; tmp
|
||||
|
||||
%define ytmp0 ymm0 ; tmp
|
||||
%define ytmp1 ymm1 ; tmp
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
blen_mem_offset equ 0 ; local variable (8 bytes)
|
||||
f_end_i_mem_offset equ 8
|
||||
gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
|
||||
xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
|
||||
stack_size equ 2*8 + 8*8 + 4*16 + 8
|
||||
;;; 8 because stack address is odd multiple of 8 after a function call and
|
||||
;;; we want it aligned to 16 bytes
|
||||
|
||||
; void isal_deflate_body_stateless ( isal_zstream *stream )
|
||||
; arg 1: rcx: addr of stream
|
||||
global isal_deflate_body_stateless_ %+ ARCH
|
||||
isal_deflate_body_stateless_ %+ ARCH %+ :
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
mov rcx, rdi
|
||||
%endif
|
||||
|
||||
;; do nothing if (avail_in == 0)
|
||||
cmp dword [rcx + _avail_in], 0
|
||||
jne skip1
|
||||
ret
|
||||
skip1:
|
||||
|
||||
%ifdef ALIGN_STACK
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
sub rsp, stack_size
|
||||
and rsp, ~15
|
||||
%else
|
||||
sub rsp, stack_size
|
||||
%endif
|
||||
|
||||
mov [rsp + gpr_save_mem_offset + 0*8], rbx
|
||||
mov [rsp + gpr_save_mem_offset + 1*8], rsi
|
||||
mov [rsp + gpr_save_mem_offset + 2*8], rdi
|
||||
mov [rsp + gpr_save_mem_offset + 3*8], rbp
|
||||
mov [rsp + gpr_save_mem_offset + 4*8], r12
|
||||
mov [rsp + gpr_save_mem_offset + 5*8], r13
|
||||
mov [rsp + gpr_save_mem_offset + 6*8], r14
|
||||
mov [rsp + gpr_save_mem_offset + 7*8], r15
|
||||
|
||||
mov stream, rcx
|
||||
mov dword [stream + _internal_state_has_eob], 0
|
||||
|
||||
; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
|
||||
mov m_out_buf, [stream + _next_out]
|
||||
mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
|
||||
mov tmp1 %+ d, [stream + _avail_out]
|
||||
add tmp1, m_out_buf
|
||||
sub tmp1, SLOP
|
||||
|
||||
skip_SLOP:
|
||||
mov [stream + _internal_state_bitbuf_m_out_end], tmp1
|
||||
|
||||
mov m_bits, [stream + _internal_state_bitbuf_m_bits]
|
||||
mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
|
||||
mov hufftables, [stream + _hufftables]
|
||||
; state->b_bytes_valid = stream->avail_in;
|
||||
mov f_end_i %+ d, [stream + _avail_in]
|
||||
mov [stream + _internal_state_b_bytes_valid], f_end_i %+ d
|
||||
|
||||
mov f_i, 0
|
||||
mov file_start, [stream + _next_in]
|
||||
mov [stream + _internal_state_file_start], file_start
|
||||
|
||||
; f_end_i -= LA;
|
||||
sub f_end_i, LA_STATELESS
|
||||
mov [rsp + f_end_i_mem_offset], f_end_i
|
||||
; if (f_end_i <= 0) continue;
|
||||
cmp f_end_i, 0
|
||||
jle end_loop_2
|
||||
|
||||
; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
|
||||
MARK __stateless_compute_hash_ %+ ARCH
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end
|
||||
|
||||
;; Encode first byte in the stream as a literal
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
jmp write_lit_bits
|
||||
|
||||
align 16
|
||||
|
||||
loop2:
|
||||
shr curr_data2, 8
|
||||
xor hash2 %+ d, hash2 %+ d
|
||||
crc32 hash2 %+ d, curr_data2 %+ d
|
||||
|
||||
; hash = compute_hash(state->file_start + f_i) & HASH_MASK;
|
||||
and hash %+ d, HASH_MASK
|
||||
and hash2 %+ d, HASH_MASK
|
||||
|
||||
; if (state->bitbuf.is_full()) {
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end
|
||||
|
||||
xor dist, dist
|
||||
xor dist2, dist2
|
||||
xor tmp3, tmp3
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
lea tmp6, [tmp1 - 1]
|
||||
|
||||
mov dist %+ w, f_i %+ w
|
||||
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
|
||||
|
||||
; state->head[hash] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
inc f_i
|
||||
|
||||
mov dist2 %+ w, f_i %+ w
|
||||
sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
|
||||
dec dist2
|
||||
|
||||
; state->head[hash2] = (uint16_t) f_i;
|
||||
mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
|
||||
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
dec dist
|
||||
|
||||
; if ((dist-1) < (D-1)) {
|
||||
cmp dist %+ d, (D-1)
|
||||
cmovae tmp2, tmp6
|
||||
cmovae dist, tmp3
|
||||
inc dist
|
||||
|
||||
cmp dist2 %+ d, (D-1)
|
||||
cmovae dist2, tmp3
|
||||
inc dist2
|
||||
|
||||
MARK __stateless_compare_ %+ ARCH
|
||||
; len = compare258(state->file_start + f_i,
|
||||
; state->file_start + f_i - dist);
|
||||
|
||||
;; Specutively load distance code (except for when large windows are used)
|
||||
get_packed_dist_code dist, code2, hufftables
|
||||
|
||||
;; Check for long len/dist match (>7) with first literal
|
||||
mov len, [tmp1]
|
||||
xor len, [tmp2]
|
||||
jz compare_loop
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
blsmsk tmp3, len
|
||||
or tmp3, 0xFFFFFF
|
||||
%endif
|
||||
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist2
|
||||
|
||||
;; Specutively load distance code (except for when large windows are used)
|
||||
get_packed_dist_code dist2, code4, hufftables
|
||||
|
||||
;; Check for len/dist match (>7) with second literal
|
||||
mov len2, [tmp1]
|
||||
xor len2, [tmp2]
|
||||
jz compare_loop2
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
;; Check for len/dist match for first literal
|
||||
test tmp3, len2
|
||||
jz len_dist_lit_huffman_pre
|
||||
|
||||
cmp tmp3, 0xFFFFFF
|
||||
je encode_2_literals
|
||||
jmp len_dist_huffman_pre
|
||||
|
||||
|
||||
MARK __stateless_len_dist_lit_huffman_ %+ ARCH
|
||||
len_dist_lit_huffman_pre:
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, code_len3, hufftables
|
||||
%else
|
||||
;; Specutively load the code for the first literal
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, rcx, hufftables
|
||||
|
||||
;; Check for len/dist match for first literal
|
||||
test len, 0xFFFFFF
|
||||
jz len_dist_huffman_pre
|
||||
|
||||
;; Specutively load the code for the second literal
|
||||
shr curr_data, 8
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
|
||||
shl code2, cl
|
||||
or code2, code3
|
||||
add code_len2, rcx
|
||||
|
||||
;; Check for len/dist match for second literal
|
||||
test len2, 0xFFFFFF
|
||||
jnz write_lit_bits
|
||||
|
||||
MARK __stateless_len_dist_lit_huffman_ %+ ARCH
|
||||
len_dist_lit_huffman_pre:
|
||||
mov code_len3, rcx
|
||||
%endif
|
||||
bsf len2, len2
|
||||
shr len2, 3
|
||||
|
||||
|
||||
len_dist_lit_huffman:
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp4, dist2
|
||||
get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%else
|
||||
unpack_dist_code code4, code_len2
|
||||
%endif
|
||||
get_len_code len2, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx code4, code4, rcx
|
||||
%else
|
||||
shl code4, cl
|
||||
%endif
|
||||
or code4, code
|
||||
add code_len2, rcx
|
||||
|
||||
mov rcx, code_len3
|
||||
|
||||
%ifdef USE_HSWNI
|
||||
shlx code4, code4, rcx
|
||||
%else
|
||||
shl code4, cl
|
||||
%endif
|
||||
or code4, code3
|
||||
add code_len2, rcx
|
||||
|
||||
mov code2, code4
|
||||
;; Setup for updating hash
|
||||
lea tmp3, [f_i + 1] ; tmp3 <= k
|
||||
add f_i, len2
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp5 %+ d, [file_start + tmp3]
|
||||
mov tmp7, tmp5
|
||||
shr tmp7, 8
|
||||
|
||||
compute_hash hash, tmp5
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
|
||||
|
||||
add tmp3,1
|
||||
|
||||
jmp update_hash_for_symbol
|
||||
;; encode as dist/len
|
||||
|
||||
MARK __stateless_len_dist_huffman_ %+ ARCH
|
||||
len_dist_huffman_pre:
|
||||
bsf len, len
|
||||
shr len, 3
|
||||
|
||||
len_dist_huffman:
|
||||
dec f_i
|
||||
|
||||
; get_dist_code(dist, &code2, &code_len2);
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp3, dist ; since code2 and dist are rbx
|
||||
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%else
|
||||
unpack_dist_code code2, code_len2
|
||||
%endif
|
||||
; get_len_code(len, &code, &code_len);
|
||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
; code2 <<= code_len
|
||||
; code2 |= code
|
||||
; code_len2 += code_len
|
||||
%ifdef USE_HSWNI
|
||||
shlx code2, code2, rcx
|
||||
%else
|
||||
shl code2, cl
|
||||
%endif
|
||||
or code2, code
|
||||
add code_len2, rcx
|
||||
|
||||
;; Setup for updateing hash
|
||||
lea tmp3, [f_i + 2] ; tmp3 <= k
|
||||
add f_i, len
|
||||
mov tmp7 %+ d, [file_start + tmp3]
|
||||
|
||||
MARK __stateless_update_hash_for_symbol_ %+ ARCH
|
||||
update_hash_for_symbol:
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
compute_hash hash, curr_data
|
||||
%ifdef LIMIT_HASH_UPDATE
|
||||
; only update hash twice, first hash was already calculated.
|
||||
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
compute_hash hash2, tmp7
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
|
||||
%else
|
||||
loop3:
|
||||
; hash = compute_hash(state->file_start + k) & HASH_MASK;
|
||||
mov tmp7 %+ d, [file_start + tmp3]
|
||||
compute_hash hash2, tmp7
|
||||
and hash2 %+ d, HASH_MASK
|
||||
; state->head[hash] = k;
|
||||
mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
|
||||
add tmp3,1
|
||||
cmp tmp3, f_i
|
||||
jl loop3
|
||||
%endif
|
||||
|
||||
|
||||
MARK __stateless_write_len_dist_bits_ %+ ARCH
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
jmp end_loop_2
|
||||
|
||||
|
||||
MARK __stateless_write_lit_bits_ %+ ARCH
|
||||
%ifdef USE_HSWNI
|
||||
encode_2_literals:
|
||||
movzx tmp1, curr_data %+ b
|
||||
get_lit_code tmp1, code3, rcx, hufftables
|
||||
|
||||
shr curr_data, 8
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
|
||||
;; Calculate code associated with both literals
|
||||
shlx code2, code2, rcx
|
||||
or code2, code3
|
||||
add code_len2, rcx
|
||||
%endif
|
||||
write_lit_bits:
|
||||
mov f_end_i, [rsp + f_end_i_mem_offset]
|
||||
add f_i, 1
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
mov curr_data2, curr_data
|
||||
|
||||
compute_hash hash, curr_data
|
||||
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
; continue
|
||||
cmp f_i, f_end_i
|
||||
jl loop2
|
||||
|
||||
MARK __stateless_end_loops_ %+ ARCH
|
||||
end_loop_2:
|
||||
;; Handle the last bytes (at most LA_statless bytes)
|
||||
add f_end_i, LA_STATELESS - LAST_BYTES_COUNT
|
||||
cmp f_i, f_end_i
|
||||
jge end_loop_2_finish
|
||||
|
||||
loop2_finish:
|
||||
;; Check for space in out buffer
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end
|
||||
|
||||
mov curr_data %+ d, [file_start + f_i]
|
||||
compute_hash hash, curr_data
|
||||
and hash %+ d, HASH_MASK
|
||||
|
||||
;; Calculate possible distance for length/dist pair.
|
||||
xor dist, dist
|
||||
mov dist %+ w, f_i %+ w
|
||||
sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
|
||||
mov [stream + _internal_state_head + 2 * hash], f_i %+ w
|
||||
|
||||
;; Check if look back distance is valid (the dec is to handle when dist = 0)
|
||||
dec dist
|
||||
cmp dist %+ d, (D-1)
|
||||
jae encode_literal_finish
|
||||
inc dist
|
||||
|
||||
;; Check if look back distance is a match
|
||||
lea tmp6, [f_end_i + LAST_BYTES_COUNT]
|
||||
sub tmp6, f_i
|
||||
lea tmp1, [file_start + f_i]
|
||||
mov tmp2, tmp1
|
||||
sub tmp2, dist
|
||||
compare tmp6, tmp1, tmp2, len, tmp3
|
||||
|
||||
;; Limit len to maximum value of 258
|
||||
mov tmp2, 258
|
||||
cmp len, 258
|
||||
cmova len, tmp2
|
||||
cmp len, SHORTEST_MATCH
|
||||
jb encode_literal_finish
|
||||
|
||||
;; Encode len/dist pair
|
||||
%ifndef LONGER_HUFFTABLE
|
||||
mov tmp3, dist
|
||||
get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%else
|
||||
get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
|
||||
%endif
|
||||
get_len_code len, code, rcx, hufftables ;; rcx is code_len
|
||||
|
||||
;; Combine length and distance code for writing it out
|
||||
%ifdef USE_HSWNI
|
||||
shlx code2, code2, rcx
|
||||
%else
|
||||
shl code2, cl
|
||||
%endif
|
||||
or code2, code
|
||||
add code_len2, rcx
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
;; Setup for next loop
|
||||
add f_i, len
|
||||
cmp f_i, f_end_i
|
||||
jl loop2_finish
|
||||
jmp end_loop_2_finish
|
||||
|
||||
encode_literal_finish:
|
||||
;; Encode literal
|
||||
and curr_data %+ d, 0xFF
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
;; Setup for next loop
|
||||
add f_i, 1
|
||||
cmp f_i, f_end_i
|
||||
jl loop2_finish
|
||||
end_loop_2_finish:
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end
|
||||
|
||||
;; Check if any bytes left (at most LAST_BYTES_COUNT bytes)
|
||||
add f_end_i, LAST_BYTES_COUNT
|
||||
cmp f_i, f_end_i
|
||||
jz write_eob
|
||||
|
||||
;; Handle encoding last few bytes by encoding them as literals
|
||||
xor curr_data, curr_data
|
||||
final_bytes:
|
||||
movzx curr_data, byte [file_start + f_i]
|
||||
get_lit_code curr_data, code2, code_len2, hufftables
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
|
||||
cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
|
||||
ja end
|
||||
|
||||
inc f_i
|
||||
cmp f_i, f_end_i
|
||||
jl final_bytes
|
||||
|
||||
write_eob:
|
||||
;; Write out end of block
|
||||
get_lit_code 256, code2, code_len2, hufftables
|
||||
write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp3
|
||||
mov dword [stream + _internal_state_has_eob], 1
|
||||
|
||||
end:
|
||||
;; update input buffer
|
||||
add [stream + _total_in], f_i %+ d
|
||||
add [stream + _next_in], f_i
|
||||
sub [stream + _avail_in], f_i %+ d
|
||||
|
||||
;; update output buffer
|
||||
mov [stream + _next_out], m_out_buf
|
||||
sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
|
||||
sub [stream + _avail_out], m_out_buf %+ d
|
||||
add [stream + _total_out], m_out_buf %+ d
|
||||
|
||||
mov [stream + _internal_state_bitbuf_m_bits], m_bits
|
||||
mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
|
||||
|
||||
mov rbx, [rsp + gpr_save_mem_offset + 0*8]
|
||||
mov rsi, [rsp + gpr_save_mem_offset + 1*8]
|
||||
mov rdi, [rsp + gpr_save_mem_offset + 2*8]
|
||||
mov rbp, [rsp + gpr_save_mem_offset + 3*8]
|
||||
mov r12, [rsp + gpr_save_mem_offset + 4*8]
|
||||
mov r13, [rsp + gpr_save_mem_offset + 5*8]
|
||||
mov r14, [rsp + gpr_save_mem_offset + 6*8]
|
||||
mov r15, [rsp + gpr_save_mem_offset + 7*8]
|
||||
|
||||
%ifndef ALIGN_STACK
|
||||
add rsp, stack_size
|
||||
%else
|
||||
mov rsp, rbp
|
||||
pop rbp
|
||||
%endif
|
||||
ret
|
||||
|
||||
MARK __stateless_compare_loops_ %+ ARCH
|
||||
compare_loop:
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
jmp len_dist_huffman
|
||||
|
||||
compare_loop2:
|
||||
%if (COMPARE_TYPE == 1)
|
||||
compare250 tmp1, tmp2, len2, tmp3
|
||||
%elif (COMPARE_TYPE == 2)
|
||||
compare250_x tmp1, tmp2, len2, tmp3, xtmp0, xtmp1
|
||||
%elif (COMPARE_TYPE == 3)
|
||||
compare250_y tmp1, tmp2, len2, tmp3, ytmp0, ytmp1
|
||||
%else
|
||||
%error Unknown Compare type COMPARE_TYPE
|
||||
% error
|
||||
%endif
|
||||
and curr_data, 0xff
|
||||
get_lit_code curr_data, code3, code_len3, hufftables
|
||||
jmp len_dist_lit_huffman
|
||||
|
||||
section .data
|
||||
align 4
|
||||
const_D: dq D
|
7
igzip/igzip_stateless_01.asm
Normal file
7
igzip/igzip_stateless_01.asm
Normal file
@ -0,0 +1,7 @@
|
||||
%define ARCH 01
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 1
|
||||
%endif
|
||||
|
||||
%include "igzip_stateless.asm"
|
8
igzip/igzip_stateless_04.asm
Normal file
8
igzip/igzip_stateless_04.asm
Normal file
@ -0,0 +1,8 @@
|
||||
%define ARCH 04
|
||||
%define USE_HSWNI
|
||||
|
||||
%ifndef COMPARE_TYPE
|
||||
%define COMPARE_TYPE 3
|
||||
%endif
|
||||
|
||||
%include "igzip_stateless.asm"
|
151
igzip/igzip_stateless_base.c
Normal file
151
igzip/igzip_stateless_base.c
Normal file
@ -0,0 +1,151 @@
|
||||
#include <stdint.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "huffman.h"
|
||||
#include "huff_codes.h"
|
||||
#include "bitbuf2.h"
|
||||
|
||||
static inline void update_state(struct isal_zstream *stream, struct isal_zstate *state,
|
||||
uint8_t * end_in, uint8_t * start_in)
|
||||
{
|
||||
uint32_t count;
|
||||
stream->avail_in = end_in - stream->next_in;
|
||||
stream->total_in += stream->next_in - start_in;
|
||||
count = buffer_used(&state->bitbuf);
|
||||
stream->next_out = buffer_ptr(&state->bitbuf);
|
||||
stream->avail_out -= count;
|
||||
stream->total_out += count;
|
||||
|
||||
}
|
||||
|
||||
void isal_deflate_body_stateless_base(struct isal_zstream *stream)
|
||||
{
|
||||
uint32_t literal = 0, hash;
|
||||
uint8_t *start_in, *end_in, *end, *next_hash;
|
||||
uint16_t match_length;
|
||||
uint32_t dist;
|
||||
uint64_t code, code_len, code2, code_len2, i;
|
||||
struct isal_zstate *state = &stream->internal_state;
|
||||
uint16_t *last_seen = state->head;
|
||||
|
||||
if (stream->avail_in == 0)
|
||||
return;
|
||||
|
||||
set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
|
||||
start_in = stream->next_in;
|
||||
end_in = stream->next_in + stream->avail_in;
|
||||
|
||||
while (stream->next_in < end_in - 3) {
|
||||
if (is_full(&state->bitbuf)) {
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) stream->next_in;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF;
|
||||
last_seen[hash] = (uint64_t) stream->next_in;
|
||||
|
||||
if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) { /* The -1 are to handle the case when dist = 0 */
|
||||
match_length =
|
||||
compare258(stream->next_in - dist, stream->next_in,
|
||||
end_in - stream->next_in);
|
||||
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
next_hash = stream->next_in;
|
||||
#ifdef LIMIT_HASH_UPDATE
|
||||
end = next_hash + 3;
|
||||
#else
|
||||
end = next_hash + match_length;
|
||||
#endif
|
||||
if (end > end_in - 3)
|
||||
end = end_in - 3;
|
||||
next_hash++;
|
||||
for (; next_hash < end; next_hash++) {
|
||||
literal = *(uint32_t *) next_hash;
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
last_seen[hash] = (uint64_t) next_hash;
|
||||
}
|
||||
|
||||
get_len_code(stream->hufftables, match_length, &code,
|
||||
&code_len);
|
||||
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
|
||||
|
||||
code |= code2 << code_len;
|
||||
code_len += code_len2;
|
||||
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
|
||||
stream->next_in += match_length;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
stream->next_in++;
|
||||
}
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
|
||||
literal = *(uint32_t *) (end_in - 4);
|
||||
|
||||
for (i = 4; i > end_in - stream->next_in; i--)
|
||||
literal = literal >> 8;
|
||||
|
||||
hash = compute_hash(literal) & HASH_MASK;
|
||||
dist = (uint64_t) (stream->next_in - last_seen[hash]) & 0xFFFF;
|
||||
|
||||
if (dist - 1 < IGZIP_D - 1 && stream->next_in - dist >= start_in) {
|
||||
match_length =
|
||||
compare258(stream->next_in - dist, stream->next_in,
|
||||
end_in - stream->next_in);
|
||||
if (match_length >= SHORTEST_MATCH) {
|
||||
get_len_code(stream->hufftables, match_length, &code, &code_len);
|
||||
get_dist_code(stream->hufftables, dist, &code2, &code_len2);
|
||||
code |= code2 << code_len;
|
||||
code_len += code_len2;
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
stream->next_in += 3;
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, 256, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
|
||||
state->has_eob = 1;
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
while (stream->next_in < end_in) {
|
||||
get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
stream->next_in++;
|
||||
|
||||
if (is_full(&state->bitbuf)) {
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
||||
literal >>= 8;
|
||||
}
|
||||
|
||||
get_lit_code(stream->hufftables, 256, &code, &code_len);
|
||||
write_bits(&state->bitbuf, code, code_len);
|
||||
|
||||
state->has_eob = 1;
|
||||
update_state(stream, state, end_in, start_in);
|
||||
return;
|
||||
}
|
155
igzip/igzip_stateless_file_perf.c
Normal file
155
igzip/igzip_stateless_file_perf.c
Normal file
@ -0,0 +1,155 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
#define MIN_TEST_LOOPS 10
|
||||
#ifndef RUN_MEM_SIZE
|
||||
# define RUN_MEM_SIZE 5000000000
|
||||
#endif
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int get_filesize(FILE * f)
|
||||
{
|
||||
int curr, end;
|
||||
|
||||
curr = ftell(f); /* Save current position */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
end = ftell(f);
|
||||
fseek(f, curr, SEEK_SET); /* Restore position */
|
||||
return end;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in, *out = NULL;
|
||||
unsigned char *inbuf, *outbuf;
|
||||
int i, infile_size, iterations, outbuf_size;
|
||||
|
||||
if (argc > 3 || argc < 2) {
|
||||
fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
|
||||
"\t - Runs multiple iterations of igzip on a file to "
|
||||
"get more accurate time results.\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
if (argc > 2) {
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
printf("outfile=%s\n", argv[2]);
|
||||
}
|
||||
printf("Window Size: %d K\n", HIST_SIZE);
|
||||
printf("igzip_file_perf: \n");
|
||||
fflush(0);
|
||||
/* Allocate space for entire input file and output
|
||||
* (assuming some possible expansion on output size)
|
||||
*/
|
||||
infile_size = get_filesize(in);
|
||||
|
||||
if (infile_size != 0) {
|
||||
outbuf_size = infile_size * 1.07;
|
||||
iterations = RUN_MEM_SIZE / infile_size;
|
||||
} else {
|
||||
outbuf_size = BUF_SIZE;
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
}
|
||||
if (iterations < MIN_TEST_LOOPS)
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
inbuf = malloc(infile_size);
|
||||
if (inbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate input buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
outbuf = malloc(outbuf_size);
|
||||
if (outbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate output buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_file_perf: %s %d iterations\n", argv[1], iterations);
|
||||
/* Read complete input file into buffer */
|
||||
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
|
||||
if (stream.avail_in != infile_size) {
|
||||
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 1; /* Do the entire file at once */
|
||||
stream.flush = NO_FLUSH;
|
||||
stream.next_in = inbuf;
|
||||
stream.avail_in = infile_size;
|
||||
stream.next_out = outbuf;
|
||||
stream.avail_out = outbuf_size;
|
||||
isal_deflate_stateless(&stream);
|
||||
if (stream.avail_in != 0)
|
||||
break;
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
if (stream.avail_in != 0) {
|
||||
fprintf(stderr, "Could not compress all of inbuf\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1],
|
||||
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
|
||||
|
||||
printf("igzip_file: ");
|
||||
perf_print(stop, start, (long long)infile_size * i);
|
||||
|
||||
if (argc > 2 && out) {
|
||||
printf("writing %s\n", argv[2]);
|
||||
fwrite(outbuf, 1, stream.total_out, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
fclose(in);
|
||||
printf("End of igzip_file_perf\n\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
86
igzip/igzip_sync_flush_example.c
Normal file
86
igzip/igzip_sync_flush_example.c
Normal file
@ -0,0 +1,86 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
|
||||
#define BUF_SIZE 8 * 1024
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
|
||||
FILE *in, *out;
|
||||
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: igzip_sync_flush_example infile outfile\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_sync_flush_example\nWindow Size: %d K\n", HIST_SIZE);
|
||||
fflush(0);
|
||||
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 0;
|
||||
stream.flush = SYNC_FLUSH;
|
||||
|
||||
do {
|
||||
if (stream.internal_state.state == ZSTATE_NEW_HDR) {
|
||||
stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
|
||||
stream.end_of_stream = feof(in);
|
||||
stream.next_in = inbuf;
|
||||
}
|
||||
do {
|
||||
stream.avail_out = BUF_SIZE;
|
||||
stream.next_out = outbuf;
|
||||
isal_deflate(&stream);
|
||||
fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
|
||||
} while (stream.avail_out == 0);
|
||||
|
||||
} while (stream.internal_state.state != ZSTATE_END);
|
||||
|
||||
fclose(out);
|
||||
fclose(in);
|
||||
|
||||
printf("End of igzip_sync_flush_example\n\n");
|
||||
return 0;
|
||||
}
|
163
igzip/igzip_sync_flush_file_perf.c
Normal file
163
igzip/igzip_sync_flush_file_perf.c
Normal file
@ -0,0 +1,163 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define BUF_SIZE 1024
|
||||
#define MIN_TEST_LOOPS 100
|
||||
#ifndef RUN_MEM_SIZE
|
||||
# define RUN_MEM_SIZE 500000000
|
||||
#endif
|
||||
|
||||
struct isal_zstream stream;
|
||||
|
||||
int get_filesize(FILE * f)
|
||||
{
|
||||
int curr, end;
|
||||
|
||||
curr = ftell(f); /* Save current position */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
end = ftell(f);
|
||||
fseek(f, curr, SEEK_SET); /* Restore position */
|
||||
return end;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
FILE *in, *out = NULL;
|
||||
unsigned char *inbuf, *outbuf;
|
||||
int i, infile_size, iterations, outbuf_size;
|
||||
|
||||
if (argc > 3 || argc < 2) {
|
||||
fprintf(stderr, "Usage: igzip_sync_flush_file_perf infile [outfile]\n"
|
||||
"\t - Runs multiple iterations of igzip on a file to get more accurate time results.\n");
|
||||
exit(0);
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (!in) {
|
||||
fprintf(stderr, "Can't open %s for reading\n", argv[1]);
|
||||
exit(0);
|
||||
}
|
||||
if (argc > 2) {
|
||||
out = fopen(argv[2], "wb");
|
||||
if (!out) {
|
||||
fprintf(stderr, "Can't open %s for writing\n", argv[2]);
|
||||
exit(0);
|
||||
}
|
||||
printf("outfile=%s\n", argv[2]);
|
||||
}
|
||||
printf("Window Size: %d K\n", HIST_SIZE);
|
||||
printf("igzip_sync_flush_file_perf: \n");
|
||||
fflush(0);
|
||||
/* Allocate space for entire input file and
|
||||
* output (assuming 1:1 max output size)
|
||||
*/
|
||||
infile_size = get_filesize(in);
|
||||
|
||||
if (infile_size != 0) {
|
||||
outbuf_size = infile_size;
|
||||
iterations = RUN_MEM_SIZE / infile_size;
|
||||
} else {
|
||||
outbuf_size = BUF_SIZE;
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
}
|
||||
if (iterations < MIN_TEST_LOOPS)
|
||||
iterations = MIN_TEST_LOOPS;
|
||||
|
||||
inbuf = malloc(infile_size);
|
||||
if (inbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate input buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
outbuf = malloc(outbuf_size);
|
||||
if (outbuf == NULL) {
|
||||
fprintf(stderr, "Can't allocate output buffer memory\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("igzip_sync_flush_file_perf: %s %d iterations\n", argv[1], iterations);
|
||||
/* Read complete input file into buffer */
|
||||
stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
|
||||
if (stream.avail_in != infile_size) {
|
||||
fprintf(stderr, "Couldn't fit all of input file into buffer\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
struct perf start, stop;
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
isal_deflate_init(&stream);
|
||||
stream.end_of_stream = 0;
|
||||
stream.flush = SYNC_FLUSH;
|
||||
stream.next_in = inbuf;
|
||||
stream.avail_in = infile_size / 2;
|
||||
stream.next_out = outbuf;
|
||||
stream.avail_out = outbuf_size / 2;
|
||||
isal_deflate(&stream);
|
||||
if (infile_size == 0)
|
||||
continue;
|
||||
stream.avail_in = infile_size - infile_size / 2;
|
||||
stream.end_of_stream = 1;
|
||||
stream.next_in = inbuf + stream.total_in;
|
||||
stream.flush = SYNC_FLUSH;
|
||||
stream.avail_out = infile_size - outbuf_size / 2;
|
||||
stream.next_out = outbuf + stream.total_out;
|
||||
isal_deflate(&stream);
|
||||
if (stream.avail_in != 0)
|
||||
break;
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
if (stream.avail_in != 0) {
|
||||
fprintf(stderr, "Could not compress all of inbuf\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf(" file %s - in_size=%d out_size=%d iter=%d ratio=%3.1f%%\n", argv[1],
|
||||
infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
|
||||
|
||||
printf("igzip_file: ");
|
||||
perf_print(stop, start, (long long)infile_size * i);
|
||||
|
||||
if (argc > 2 && out) {
|
||||
printf("writing %s\n", argv[2]);
|
||||
fwrite(outbuf, 1, stream.total_out, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
fclose(in);
|
||||
printf("End of igzip_sync_flush_file_perf\n\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
96
igzip/igzip_sync_flush_perf.c
Normal file
96
igzip/igzip_sync_flush_perf.c
Normal file
@ -0,0 +1,96 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "igzip_lib.h"
|
||||
#include "test.h"
|
||||
|
||||
#define TEST_LEN (1024*1024)
|
||||
#define IBUF_SIZE (1024*1024)
|
||||
#define OBUF_SIZE (1024*1024)
|
||||
|
||||
#define TEST_LOOPS 400
|
||||
#define TEST_TYPE_STR "_warm"
|
||||
|
||||
void create_data(unsigned char *data, int size)
|
||||
{
|
||||
char c = 'a';
|
||||
while (size--)
|
||||
*data++ = c = c < 'z' ? c + 1 : 'a';
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i = 1;
|
||||
struct isal_zstream stream;
|
||||
unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE];
|
||||
struct perf start, stop;
|
||||
|
||||
create_data(inbuf, TEST_LEN);
|
||||
printf("Window Size: %d K\n", HIST_SIZE);
|
||||
printf("igzip_sync_flush_perf: \n");
|
||||
fflush(0);
|
||||
|
||||
perf_start(&start);
|
||||
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
isal_deflate_init(&stream);
|
||||
|
||||
stream.avail_in = TEST_LEN;
|
||||
if (i == (TEST_LOOPS - 1))
|
||||
stream.end_of_stream = 1;
|
||||
else
|
||||
stream.end_of_stream = 0;
|
||||
stream.next_in = inbuf;
|
||||
stream.flush = SYNC_FLUSH;
|
||||
|
||||
do {
|
||||
stream.avail_out = OBUF_SIZE;
|
||||
stream.next_out = zbuf;
|
||||
isal_deflate(&stream);
|
||||
} while (stream.avail_out == 0);
|
||||
|
||||
}
|
||||
|
||||
perf_stop(&stop);
|
||||
|
||||
printf("igzip_sync_flush_perf" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN) * (i));
|
||||
|
||||
if (!stream.end_of_stream) {
|
||||
printf("error: compression test could not fit into allocated buffers\n");
|
||||
return -1;
|
||||
}
|
||||
printf("End of igzip_sync_flush_perf\n\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
44
igzip/lz0a_const.asm
Normal file
44
igzip/lz0a_const.asm
Normal file
@ -0,0 +1,44 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%assign K 1024
|
||||
%assign D HIST_SIZE * K ;; Amount of history
|
||||
%assign LA 17 * 16 ;; Max look-ahead, rounded up to 32 byte boundary
|
||||
%assign BSIZE 2*HIST_SIZE*K + LA ;; Nominal buffer size
|
||||
|
||||
;; Constants for stateless compression
|
||||
%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds
|
||||
%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer
|
||||
|
||||
%assign HASH_SIZE D
|
||||
%assign HASH_MASK (HASH_SIZE - 1)
|
||||
|
||||
%assign SHORTEST_MATCH 3
|
||||
|
||||
%assign SLOP 8
|
87
igzip/options.asm
Normal file
87
igzip/options.asm
Normal file
@ -0,0 +1,87 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
default rel
|
||||
|
||||
%ifndef __OPTIONS_ASM__
|
||||
%define __OPTIONS_ASM__
|
||||
|
||||
%ifndef IGZIP_USE_GZIP_FORMAT
|
||||
%define DEFLATE
|
||||
%endif
|
||||
|
||||
; Options:dir
|
||||
; m - reschedule mem reads
|
||||
; e b - bitbuff style
|
||||
; t s x - compare style
|
||||
; h - limit hash updates
|
||||
; l - use longer huffman table
|
||||
; f - fix cache read
|
||||
|
||||
%ifdef LARGE_WINDOW
|
||||
%define HIST_SIZE 32
|
||||
%else
|
||||
%define HIST_SIZE 8
|
||||
%endif
|
||||
|
||||
%ifdef USE_BITBUFB
|
||||
%elifdef USE_BITBUF8
|
||||
%elifdef USE_BITBUF_ELSE
|
||||
%else
|
||||
; bit buffer types
|
||||
; BITBUFB: (b) Always write data
|
||||
%define USE_BITBUFB
|
||||
%endif
|
||||
|
||||
; (h) limit hash update
|
||||
%define LIMIT_HASH_UPDATE
|
||||
|
||||
; (l) longer huffman table
|
||||
%define LONGER_HUFFTABLE
|
||||
|
||||
; (f) fix cache read problem
|
||||
%define FIX_CACHE_READ
|
||||
|
||||
%if (HIST_SIZE > 8)
|
||||
%undef LONGER_HUFFTABLE
|
||||
%endif
|
||||
|
||||
%define IGZIP_MAX_DEF_HDR_SIZE 328
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%ifndef __NASM_VER__
|
||||
%define WRT_OPT wrt ..sym
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%endif ; ifndef __OPTIONS_ASM__
|
68
igzip/repeated_char_result.h
Normal file
68
igzip/repeated_char_result.h
Normal file
@ -0,0 +1,68 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#ifndef _IGZIP_REPEATED_8K_CHAR_RESULT_H_
|
||||
#define _IGZIP_REPEATED_8K_CHAR_RESULT_H_
|
||||
|
||||
/* The code for the literal being encoded */
|
||||
#define CODE_LIT 0x1
|
||||
#define CODE_LIT_LENGTH 0x2
|
||||
|
||||
/* The code for repeat 10. The Length includes the distance code length*/
|
||||
#define CODE_10 0x3
|
||||
#define CODE_10_LENGTH 0x4
|
||||
|
||||
/* The code for repeat 115-130. The Length includes the distance code length*/
|
||||
#define CODE_280 0x0f
|
||||
#define CODE_280_LENGTH 0x4
|
||||
#define CODE_280_TOTAL_LENGTH CODE_280_LENGTH + 4 + 1
|
||||
|
||||
/* Code representing the end of block. */
|
||||
#define END_OF_BLOCK 0x7
|
||||
#define END_OF_BLOCK_LEN 0x4
|
||||
|
||||
/* MIN_REPEAT_LEN currently optimizes storage space, another possiblity is to
|
||||
* find the size which optimizes speed instead.*/
|
||||
#define MIN_REPEAT_LEN 4*1024
|
||||
|
||||
#define HEADER_LENGTH 16
|
||||
|
||||
/* Maximum length of the portion of the header represented by repeat lengths
|
||||
* smaller than 258 */
|
||||
#define MAX_FIXUP_CODE_LENGTH 8
|
||||
|
||||
|
||||
/* Headers for constant 0x00 and 0xFF blocks
|
||||
* This also contains the first literal character. */
|
||||
const uint32_t repeated_char_header[2][5] = {
|
||||
{ 0x0121c0ec, 0xc30c0000, 0x7d57fab0, 0x49270938}, /* Deflate header for 0x00 */
|
||||
{ 0x0121c0ec, 0xc30c0000, 0x7baaff30, 0x49270938} /* Deflate header for 0xFF */
|
||||
|
||||
};
|
||||
|
||||
#endif /*_IGZIP_REPEATED_8K_CHAR_RESULT_H_*/
|
207
igzip/stdmac.asm
Normal file
207
igzip/stdmac.asm
Normal file
@ -0,0 +1,207 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
;; internal macro used by push_all
|
||||
;; push args L to R
|
||||
%macro push_all_ 1-*
|
||||
%xdefine _PUSH_ALL_REGS_COUNT_ %0
|
||||
%rep %0
|
||||
push %1
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
;; internal macro used by pop_all
|
||||
;; pop args R to L
|
||||
%macro pop_all_ 1-*
|
||||
%rep %0
|
||||
%rotate -1
|
||||
pop %1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%xdefine _PUSH_ALL_REGS_COUNT_ 0
|
||||
%xdefine _ALLOC_STACK_VAL_ 0
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; STACK_OFFSET
|
||||
;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; PUSH_ALL reg1, reg2, ...
|
||||
;; push args L to R, remember regs for pop_all
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro PUSH_ALL 1+
|
||||
%xdefine _PUSH_ALL_REGS_ %1
|
||||
push_all_ %1
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; POP_ALL
|
||||
;; push args from prev "push_all" R to L
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro POP_ALL 0
|
||||
pop_all_ _PUSH_ALL_REGS_
|
||||
%xdefine _PUSH_ALL_REGS_COUNT_ 0
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ALLOC_STACK n
|
||||
;; subtract n from the stack pointer and remember the value for restore_stack
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro ALLOC_STACK 1
|
||||
%xdefine _ALLOC_STACK_VAL_ %1
|
||||
sub rsp, %1
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; RESTORE_STACK
|
||||
;; add n to the stack pointer, where n is the arg to the previous alloc_stack
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro RESTORE_STACK 0
|
||||
add rsp, _ALLOC_STACK_VAL_
|
||||
%xdefine _ALLOC_STACK_VAL_ 0
|
||||
%endmacro
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; NOPN n
|
||||
;; Create n bytes of NOP, using nops of up to 8 bytes each
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro NOPN 1
|
||||
|
||||
%assign %%i %1
|
||||
%rep 200
|
||||
%if (%%i < 9)
|
||||
nopn %%i
|
||||
%exitrep
|
||||
%else
|
||||
nopn 8
|
||||
%assign %%i (%%i - 8)
|
||||
%endif
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; nopn n
|
||||
;; Create n bytes of NOP, where n is between 1 and 9
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro nopn 1
|
||||
%if (%1 == 1)
|
||||
nop
|
||||
%elif (%1 == 2)
|
||||
db 0x66
|
||||
nop
|
||||
%elif (%1 == 3)
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x00
|
||||
%elif (%1 == 4)
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x40
|
||||
db 0x00
|
||||
%elif (%1 == 5)
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x44
|
||||
db 0x00
|
||||
db 0x00
|
||||
%elif (%1 == 6)
|
||||
db 0x66
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x44
|
||||
db 0x00
|
||||
db 0x00
|
||||
%elif (%1 == 7)
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x80
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
%elif (%1 == 8)
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x84
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
%elif (%1 == 9)
|
||||
db 0x66
|
||||
db 0x0F
|
||||
db 0x1F
|
||||
db 0x84
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
db 0x00
|
||||
%else
|
||||
%error Invalid value to nopn
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rolx64 dst, src, amount
|
||||
;; Emulate a rolx instruction using rorx, assuming data 64 bits wide
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro rolx64 3
|
||||
rorx %1, %2, (64-%3)
|
||||
%endm
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rolx32 dst, src, amount
|
||||
;; Emulate a rolx instruction using rorx, assuming data 32 bits wide
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro rolx32 3
|
||||
rorx %1, %2, (32-%3)
|
||||
%endm
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Define a function void ssc(uint64_t x)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
%macro DEF_SSC 0
|
||||
global ssc
|
||||
ssc:
|
||||
mov rax, rbx
|
||||
mov rbx, rcx
|
||||
db 0x64
|
||||
db 0x67
|
||||
nop
|
||||
mov rbx, rax
|
||||
ret
|
||||
%endm
|
371
include/igzip_lib.h
Normal file
371
include/igzip_lib.h
Normal file
@ -0,0 +1,371 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef _IGZIP_H
|
||||
#define _IGZIP_H
|
||||
|
||||
/**
|
||||
* @file igzip_lib.h
|
||||
*
|
||||
* @brief This file defines the igzip compression interface, a high performance
|
||||
* deflate compression interface for storage applications.
|
||||
*
|
||||
* Deflate is a widely used compression standard that can be used standalone, it
|
||||
* also forms the basis of gzip and zlib compression formats. Igzip supports the
|
||||
* following flush features:
|
||||
*
|
||||
* - No Flush: The default method where no flush is performed.
|
||||
*
|
||||
* - Sync flush: whereby isal_deflate() finishes the current deflate block at
|
||||
* the end of each input buffer. The deflate block is byte aligned by
|
||||
* appending an empty stored block.
|
||||
*
|
||||
* - Full flush: whereby isal_deflate() finishes and aligns the deflate block as
|
||||
* in sync flush but also ensures that subsequent block's history does not
|
||||
* look back beyond this point and new blocks are fully independent.
|
||||
*
|
||||
* Igzip's default configuration is:
|
||||
*
|
||||
* - 8K window size
|
||||
*
|
||||
* This option can be overridden to enable:
|
||||
*
|
||||
* - 32K window size, by adding \#define LARGE_WINDOW 1 in igzip_lib.h and
|
||||
* \%define LARGE_WINDOW in options.asm, or via the command line with
|
||||
* @verbatim gmake D="-D LARGE_WINDOW" @endverbatim on Linux and FreeBSD, or
|
||||
* with @verbatim nmake -f Makefile.nmake D="-D LARGE_WINDOW" @endverbatim on
|
||||
* Windows.
|
||||
*
|
||||
* KNOWN ISSUES:
|
||||
* - If building the code on Windows with the 32K window enabled, the
|
||||
* /LARGEADDRESSAWARE:NO link option must be added.
|
||||
* - The 32K window isn't supported when used in a shared library.
|
||||
*
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include "types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Options:dir
|
||||
// m - reschedule mem reads
|
||||
// e b - bitbuff style
|
||||
// t s x - compare style
|
||||
// h - limit hash updates
|
||||
// l - use longer huffman table
|
||||
// f - fix cache read
|
||||
|
||||
#if defined(LARGE_WINDOW)
|
||||
# define HIST_SIZE 32
|
||||
#else
|
||||
# define HIST_SIZE 8
|
||||
#endif
|
||||
|
||||
/* bit buffer types
|
||||
* BITBUF8: (e) Always write 8 bytes of data
|
||||
* BITBUFB: (b) Always write data
|
||||
*/
|
||||
#if !(defined(USE_BITBUFB) || defined(USE_BITBUF8) || defined(USE_BITBUF_ELSE))
|
||||
# define USE_BITBUFB
|
||||
#endif
|
||||
|
||||
/* compare types
|
||||
* 1: ( ) original
|
||||
* 2: (t) with CMOV
|
||||
* 3: (s) with sttni
|
||||
* 4: (x) with xmm / pmovbmsk
|
||||
* 5: (y) with ymm / pmovbmsk (32-bytes at a time)
|
||||
*/
|
||||
# define LIMIT_HASH_UPDATE
|
||||
|
||||
/* (l) longer huffman table */
|
||||
#define LONGER_HUFFTABLE
|
||||
|
||||
/* (f) fix cache read problem */
|
||||
#define FIX_CACHE_READ
|
||||
|
||||
#if (HIST_SIZE > 8)
|
||||
# undef LONGER_HUFFTABLE
|
||||
#endif
|
||||
|
||||
#define IGZIP_K 1024
|
||||
#define IGZIP_D (HIST_SIZE * IGZIP_K) /* Amount of history */
|
||||
#define IGZIP_LA (17 * 16) /* Max look-ahead, rounded up to 32 byte boundary */
|
||||
#define BSIZE (2*IGZIP_D + IGZIP_LA) /* Nominal buffer size */
|
||||
|
||||
#define HASH_SIZE IGZIP_D
|
||||
#define HASH_MASK (HASH_SIZE - 1)
|
||||
|
||||
#define SHORTEST_MATCH 3
|
||||
|
||||
#define IGZIP_MAX_DEF_HDR_SIZE 328
|
||||
|
||||
#ifdef LONGER_HUFFTABLE
|
||||
enum {DIST_TABLE_SIZE = 8*1024};
|
||||
|
||||
/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
|
||||
enum { DECODE_OFFSET = 26 };
|
||||
#else
|
||||
enum {DIST_TABLE_SIZE = 1024};
|
||||
/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
|
||||
enum { DECODE_OFFSET = 20 };
|
||||
#endif
|
||||
enum {LEN_TABLE_SIZE = 256};
|
||||
enum {LIT_TABLE_SIZE = 257};
|
||||
|
||||
#define IGZIP_LIT_LEN 286
|
||||
#define IGZIP_DIST_LEN 30
|
||||
|
||||
/* Flush Flags */
|
||||
#define NO_FLUSH 0 /* Default */
|
||||
#define SYNC_FLUSH 1
|
||||
#define FULL_FLUSH 2
|
||||
#define FINISH_FLUSH 0 /* Deprecated */
|
||||
|
||||
/* Return values */
|
||||
#define COMP_OK 0
|
||||
#define INVALID_FLUSH -7
|
||||
#define INVALID_PARAM -8
|
||||
#define STATELESS_OVERFLOW -1
|
||||
#define DEFLATE_HDR_LEN 3
|
||||
/**
|
||||
* @enum isal_zstate
|
||||
* @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
|
||||
*/
|
||||
|
||||
|
||||
/* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the
|
||||
* hufftable being used for compression may be swapped
|
||||
*/
|
||||
enum isal_zstate_state {
|
||||
ZSTATE_NEW_HDR, //!< Header to be written
|
||||
ZSTATE_HDR, //!< Header state
|
||||
ZSTATE_BODY, //!< Body state
|
||||
ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
|
||||
ZSTATE_SYNC_FLUSH, //!< Write sync flush block
|
||||
ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
|
||||
ZSTATE_TRL, //!< Trailer state
|
||||
ZSTATE_END, //!< End state
|
||||
ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
|
||||
ZSTATE_TMP_HDR, //!< Temporary Header state
|
||||
ZSTATE_TMP_BODY, //!< Temporary Body state
|
||||
ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
|
||||
ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
|
||||
ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
|
||||
ZSTATE_TMP_TRL, //!< Temporary Trailer state
|
||||
ZSTATE_TMP_END //!< Temporary End state
|
||||
};
|
||||
|
||||
/* Offset used to switch between TMP states and non-tmp states */
|
||||
#define TMP_OFFSET_SIZE ZSTATE_TMP_HDR - ZSTATE_HDR
|
||||
|
||||
struct isal_huff_histogram {
|
||||
uint64_t lit_len_histogram[IGZIP_LIT_LEN];
|
||||
uint64_t dist_histogram[IGZIP_DIST_LEN];
|
||||
};
|
||||
|
||||
/** @brief Holds Bit Buffer information*/
|
||||
struct BitBuf2 {
|
||||
uint64_t m_bits; //!< bits in the bit buffer
|
||||
uint32_t m_bit_count; //!< number of valid bits in the bit buffer
|
||||
uint8_t *m_out_buf; //!< current index of buffer to write to
|
||||
uint8_t *m_out_end; //!< end of buffer to write to
|
||||
uint8_t *m_out_start; //!< start of buffer to write to
|
||||
};
|
||||
|
||||
/* Variable prefixes:
|
||||
* b_ : Measured wrt the start of the buffer
|
||||
* f_ : Measured wrt the start of the file (aka file_start)
|
||||
*/
|
||||
|
||||
/** @brief Holds the internal state information for input and output compression streams*/
|
||||
struct isal_zstate {
|
||||
uint32_t b_bytes_valid; //!< number of bytes of valid data in buffer
|
||||
uint32_t b_bytes_processed; //!< keeps track of the number of bytes processed in isal_zstate.buffer
|
||||
uint8_t *file_start; //!< pointer to where file would logically start
|
||||
DECLARE_ALIGNED(uint32_t crc[16], 16); //!< actually 4 128-bit integers
|
||||
struct BitBuf2 bitbuf; //!< Bit Buffer
|
||||
enum isal_zstate_state state; //!< Current state in processing the data stream
|
||||
uint32_t count; //!< used for partial header/trailer writes
|
||||
uint8_t tmp_out_buff[16]; //!< temporary array
|
||||
uint32_t tmp_out_start; //!< temporary variable
|
||||
uint32_t tmp_out_end; //!< temporary variable
|
||||
uint32_t last_flush; //!< keeps track of last submitted flush
|
||||
uint32_t has_gzip_hdr; //!< keeps track of if the gzip header has been written.
|
||||
uint32_t has_eob; //!< keeps track of eob on the last deflate block
|
||||
uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
|
||||
uint32_t left_over; //!< keeps track of overflow bytes
|
||||
|
||||
|
||||
|
||||
DECLARE_ALIGNED(uint8_t buffer[BSIZE + 16], 32); //!< Internal buffer
|
||||
|
||||
DECLARE_ALIGNED(uint16_t head[HASH_SIZE], 16); //!< Hash array
|
||||
|
||||
};
|
||||
|
||||
/** @brief Holds the huffman tree used to huffman encode the input stream **/
|
||||
struct isal_hufftables {
|
||||
|
||||
uint8_t deflate_hdr[IGZIP_MAX_DEF_HDR_SIZE]; //!< deflate huffman tree header
|
||||
uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr
|
||||
uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header
|
||||
uint32_t dist_table[DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
|
||||
uint32_t len_table[LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
|
||||
uint16_t lit_table[LIT_TABLE_SIZE]; //!< literal code
|
||||
uint8_t lit_table_sizes[LIT_TABLE_SIZE]; //!< literal code length
|
||||
uint16_t dcodes[30 - DECODE_OFFSET]; //!< distance code
|
||||
uint8_t dcodes_sizes[30 - DECODE_OFFSET]; //!< distance code length
|
||||
|
||||
};
|
||||
|
||||
/** @brief Holds stream information*/
|
||||
struct isal_zstream {
|
||||
uint8_t *next_in; //!< Next input byte
|
||||
uint32_t avail_in; //!< number of bytes available at next_in
|
||||
uint32_t total_in; //!< total number of bytes read so far
|
||||
|
||||
uint8_t *next_out; //!< Next output byte
|
||||
uint32_t avail_out; //!< number of bytes available at next_out
|
||||
uint32_t total_out; //!< total number of bytes written so far
|
||||
|
||||
struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
|
||||
uint32_t end_of_stream; //!< non-zero if this is the last input buffer
|
||||
uint32_t flush; //!< Flush type can be NO_FLUSH or SYNC_FLUSH
|
||||
|
||||
struct isal_zstate internal_state; //!< Internal state for this stream
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Updates histograms to include the symbols found in the input
|
||||
* stream. Since this function only updates the histograms, it can be called on
|
||||
* multiple streams to get a histogram better representing the desired data
|
||||
* set. When first using histogram it must be initialized by zeroing the
|
||||
* structure.
|
||||
*
|
||||
* @param in_stream: Input stream of data.
|
||||
* @param length: The length of start_stream.
|
||||
* @param histogram: The returned histogram of lit/len/dist symbols.
|
||||
*/
|
||||
void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_histogram * histogram);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates a custom huffman code for the given histograms in which
|
||||
* every literal and repeat length is assigned a code and all possible lookback
|
||||
* distances are assigned a code.
|
||||
*
|
||||
* @param hufftables: the output structure containing the huffman code
|
||||
* @param lit_histogram: histogram containing frequency of literal symbols and
|
||||
* repeat lengths
|
||||
* @param dist_histogram: histogram containing frequency of of lookback distances
|
||||
* @returns Returns a non zero value if an invalid huffman code was created.
|
||||
*/
|
||||
int isal_create_hufftables(struct isal_hufftables * hufftables,
|
||||
struct isal_huff_histogram * histogram);
|
||||
|
||||
/**
|
||||
* @brief Creates a custom huffman code for the given histograms like
|
||||
* isal_create_hufftables() except literals with 0 frequency in the histogram
|
||||
* are not assigned a code
|
||||
*
|
||||
* @param hufftables: the output structure containing the huffman code
|
||||
* @param lit_histogram: histogram containing frequency of literal symbols and
|
||||
* repeat lengths
|
||||
* @param dist_histogram: histogram containing frequency of of lookback distances
|
||||
* @returns Returns a non zero value if an invalid huffman code was created.
|
||||
*/
|
||||
int isal_create_hufftables_subset(struct isal_hufftables * hufftables,
|
||||
struct isal_huff_histogram * histogram);
|
||||
|
||||
/**
|
||||
* @brief Initialize compression stream data structure
|
||||
*
|
||||
* @param stream Structure holding state information on the compression streams.
|
||||
* @returns none
|
||||
*/
|
||||
void isal_deflate_init(struct isal_zstream *stream);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast data (deflate) compression for storage applications.
|
||||
*
|
||||
* On entry to isal_deflate(), next_in points to an input buffer and avail_in
|
||||
* indicates the length of that buffer. Similarly next_out points to an empty
|
||||
* output buffer and avail_out indicates the size of that buffer.
|
||||
*
|
||||
* The fields total_in and total_out start at 0 and are updated by
|
||||
* isal_deflate(). These reflect the total number of bytes read or written so far.
|
||||
*
|
||||
* The call to isal_deflate() will take data from the input buffer (updating
|
||||
* next_in, avail_in and write a compressed stream to the output buffer
|
||||
* (updating next_out and avail_out). The function returns when either the input
|
||||
* buffer is empty or the output buffer is full.
|
||||
*
|
||||
* When the last input buffer is passed in, signaled by setting the
|
||||
* end_of_stream, the routine will complete compression at the end of the input
|
||||
* buffer, as long as the output buffer is big enough.
|
||||
*
|
||||
* The equivalent of the zlib FLUSH_SYNC operation is currently supported.
|
||||
* Flush types can be NO_FLUSH or SYNC_FLUSH. Default flush type is NO_FLUSH.
|
||||
* If SYNC_FLUSH is selected each input buffer is compressed and byte aligned
|
||||
* with a type 0 block appended to the end. Switching between NO_FLUSH and
|
||||
* SYNC_FLUSH is supported to select after which input buffer a SYNC_FLUSH is
|
||||
* performed.
|
||||
*
|
||||
* @param stream Structure holding state information on the compression streams.
|
||||
* @return COMP_OK (if everything is ok),
|
||||
* INVALID_FLUSH (if an invalid FLUSH is selected),
|
||||
*/
|
||||
int isal_deflate(struct isal_zstream *stream);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Fast data (deflate) stateless compression for storage applications.
|
||||
*
|
||||
* Stateless (one shot) compression routine with a similar interface to
|
||||
* isal_deflate() but operates on entire input buffer at one time. Parameter
|
||||
* avail_out must be large enough to fit the entire compressed output. Max
|
||||
* expansion is limited to the input size plus the header size of a stored/raw
|
||||
* block.
|
||||
*
|
||||
* @param stream Structure holding state information on the compression streams.
|
||||
* @return COMP_OK (if everything is ok),
|
||||
* STATELESS_OVERFLOW (if output buffer will not fit output).
|
||||
*/
|
||||
int isal_deflate_stateless(struct isal_zstream *stream);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* ifndef _IGZIP_H */
|
Loading…
Reference in New Issue
Block a user