mirror of
https://github.com/intel/isa-l.git
synced 2024-12-12 17:33:50 +01:00
01dfbcc484
Change-Id: I9564b2da251a02197b39cab5f141e7aff1ae8439 Signed-off-by: Roy Oursler <roy.j.oursler@intel.com>
249 lines
7.5 KiB
NASM
249 lines
7.5 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions
|
|
; are met:
|
|
; * Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; * Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in
|
|
; the documentation and/or other materials provided with the
|
|
; distribution.
|
|
; * Neither the name of Intel Corporation nor the names of its
|
|
; contributors may be used to endorse or promote products derived
|
|
; from this software without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
%include "options.asm"
|
|
%include "lz0a_const.asm"
|
|
|
|
; Macros for doing Huffman Encoding
|
|
|
|
%ifdef LONGER_HUFFTABLE
|
|
%if (D > 8192)
|
|
%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
|
|
% error
|
|
%else
|
|
%define DIST_TABLE_SIZE 8192
|
|
%define DECODE_OFFSET 26
|
|
%endif
|
|
%else
|
|
%define DIST_TABLE_SIZE 2
|
|
%define DECODE_OFFSET 0
|
|
%endif
|
|
|
|
%define LEN_TABLE_SIZE 256
|
|
%define LIT_TABLE_SIZE 257
|
|
|
|
%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
|
|
%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
|
|
%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
|
|
%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
|
|
%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
|
|
%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
|
|
%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
|
|
;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
|
|
;; struct isal_hufftables {
|
|
;; // deflate huffman tree header
|
|
;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
|
|
;;
|
|
;; //!< Number of whole bytes in deflate_huff_hdr
|
|
;; uint32_t deflate_huff_hdr_count;
|
|
;;
|
|
;; //!< Number of bits in the partial byte in header
|
|
;; uint32_t deflate_huff_hdr_extra_bits;
|
|
;;
|
|
;; //!< bits 7:0 are the code length, bits 31:8 are the code
|
|
;; uint32_t dist_table[DIST_TABLE_SIZE];
|
|
;;
|
|
;; //!< bits 7:0 are the code length, bits 31:8 are the code
|
|
;; uint32_t len_table[LEN_TABLE_SIZE];
|
|
;;
|
|
;; //!< bits 3:0 are the code length, bits 15:4 are the code
|
|
;; uint16_t lit_table[LIT_TABLE_SIZE];
|
|
;;
|
|
;; //!< bits 3:0 are the code length, bits 15:4 are the code
|
|
;; uint16_t dcodes[30 - DECODE_OFFSET];
|
|
|
|
;; };
|
|
|
|
|
|
%ifdef LONGER_HUFFTABLE
|
|
; Uses RCX, clobbers dist
|
|
; get_dist_code dist, code, len
|
|
%macro get_dist_code 4
|
|
%define %%dist %1 ; 64-bit IN
|
|
%define %%code %2d ; 32-bit OUT
|
|
%define %%len %3d ; 32-bit OUT
|
|
%define %%hufftables %4 ; address of the hufftable
|
|
|
|
mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
|
|
mov %%code, %%len
|
|
and %%len, 0x1F;
|
|
shr %%code, 5
|
|
%endm
|
|
|
|
%macro get_packed_dist_code 3
|
|
%define %%dist %1 ; 64-bit IN
|
|
%define %%code_len %2d ; 32-bit OUT
|
|
%define %%hufftables %3 ; address of the hufftable
|
|
mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
|
|
%endm
|
|
|
|
%macro unpack_dist_code 2
|
|
%define %%code %1d ; 32-bit OUT
|
|
%define %%len %2d ; 32-bit OUT
|
|
|
|
mov %%len, %%code
|
|
and %%len, 0x1F;
|
|
shr %%code, 5
|
|
%endm
|
|
|
|
%else
|
|
; Assumes (dist != 0)
|
|
; Uses RCX, clobbers dist
|
|
; void compute_dist_code dist, code, len
|
|
%macro compute_dist_code 4
|
|
%define %%dist %1 ; IN, clobbered
|
|
%define %%distq %1
|
|
%define %%code %2 ; OUT
|
|
%define %%len %3 ; OUT
|
|
%define %%hufftables %4
|
|
|
|
bsr rcx, %%dist ; ecx = msb = bsr(dist)
|
|
dec rcx ; ecx = num_extra_bits = msb - N
|
|
BZHI %%code, %%dist, rcx, %%len
|
|
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
|
|
lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
|
|
mov %%len, rcx ; len = num_extra_bits
|
|
movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
|
|
movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
|
|
SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
|
|
or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
|
|
add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
|
|
%endm
|
|
|
|
; Uses RCX, clobbers dist
|
|
; get_dist_code dist, code, len
|
|
%macro get_dist_code 4
|
|
%define %%dist %1 ; 32-bit IN, clobbered
|
|
%define %%distq %1 ; 64-bit IN, clobbered
|
|
%define %%code %2 ; 32-bit OUT
|
|
%define %%len %3 ; 32-bit OUT
|
|
%define %%hufftables %4
|
|
|
|
cmp %%dist, DIST_TABLE_SIZE - 1
|
|
jg %%do_compute
|
|
%ifndef IACA
|
|
mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
|
|
mov %%code, %%len
|
|
and %%len, 0x1F;
|
|
shr %%code, 5
|
|
jmp %%done
|
|
%endif
|
|
%%do_compute:
|
|
compute_dist_code %%distq, %%code, %%len, %%hufftables
|
|
%%done:
|
|
%endm
|
|
|
|
%macro get_packed_dist_code 3
|
|
%define %%dist %1 ; 64-bit IN
|
|
%define %%code_len %2d ; 32-bit OUT
|
|
%define %%hufftables %3 ; address of the hufftable
|
|
%endm
|
|
|
|
%endif
|
|
|
|
|
|
; Macros for doing Huffman Encoding
|
|
|
|
; Assumes (dist != 0)
|
|
; Uses RCX, clobbers dist
|
|
; void compute_dist_code dist, code, len
|
|
%macro compute_dist_icf_code 3
|
|
%define %%dist %1 ; IN, clobbered
|
|
%define %%distq %1
|
|
%define %%code %2 ; OUT
|
|
%define %%tmp1 %3
|
|
|
|
bsr rcx, %%dist ; ecx = msb = bsr(dist)
|
|
dec rcx ; ecx = num_extra_bits = msb - N
|
|
BZHI %%code, %%dist, rcx, %%tmp1
|
|
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
|
|
lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
|
|
shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
|
|
add %%code, %%dist ; code = extra_bits | sym
|
|
|
|
%endm
|
|
|
|
; Uses RCX, clobbers dist
|
|
; get_dist_code dist, code, len
|
|
%macro get_dist_icf_code 3
|
|
%define %%dist %1 ; 32-bit IN, clobbered
|
|
%define %%distq %1 ; 64-bit IN, clobbered
|
|
%define %%code %2 ; 32-bit OUT
|
|
%define %%tmp1 %3
|
|
|
|
cmp %%dist, 1
|
|
jg %%do_compute
|
|
|
|
%ifnidn %%code, %%dist
|
|
mov %%code, %%dist
|
|
%endif
|
|
jmp %%done
|
|
%%do_compute:
|
|
compute_dist_icf_code %%distq, %%code, %%tmp1
|
|
%%done:
|
|
shl %%code, DIST_OFFSET
|
|
%endm
|
|
|
|
|
|
; "len" can be same register as "length"
|
|
; get_len_code length, code, len
|
|
%macro get_len_code 4
|
|
%define %%length %1 ; 64-bit IN
|
|
%define %%code %2d ; 32-bit OUT
|
|
%define %%len %3d ; 32-bit OUT
|
|
%define %%hufftables %4
|
|
|
|
mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
|
|
mov %%code, %%len
|
|
and %%len, 0x1F
|
|
shr %%code, 5
|
|
%endm
|
|
|
|
|
|
%macro get_lit_code 4
|
|
%define %%lit %1 ; 64-bit IN or CONST
|
|
%define %%code %2d ; 32-bit OUT
|
|
%define %%len %3d ; 32-bit OUT
|
|
%define %%hufftables %4
|
|
|
|
movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
|
|
movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
|
|
|
|
%endm
|
|
|
|
|
|
;; Compute hash of first 3 bytes of data
|
|
%macro compute_hash 2
|
|
%define %%result %1d ; 32-bit reg
|
|
%define %%data %2d ; 32-bit reg (low byte not clobbered)
|
|
|
|
xor %%result, %%result
|
|
crc32 %%result, %%data
|
|
%endm
|