mirror of
https://github.com/intel/isa-l.git
synced 2025-01-07 15:22:25 +01:00
9f75defd57
The relic slver is no longer used for individual versioning on functions and is confusing tools looking for data in text sections. This removes all instances instead of fixing since its usefulness is waining. Fixes #221 Change-Id: Ife0b9f105950a90337c58e8a41ac2cffc0f67d99 Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
323 lines
8.1 KiB
NASM
323 lines
8.1 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions
|
|
; are met:
|
|
; * Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; * Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in
|
|
; the documentation and/or other materials provided with the
|
|
; distribution.
|
|
; * Neither the name of Intel Corporation nor the names of its
|
|
; contributors may be used to endorse or promote products derived
|
|
; from this software without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
default rel
|
|
[bits 64]
|
|
|
|
%include "reg_sizes.asm"
|
|
|
|
extern crc32_iscsi_00
|
|
extern crc32_iscsi_01
|
|
extern crc32_iscsi_base
|
|
|
|
extern crc32_ieee_01
|
|
extern crc32_ieee_by4 ;; Optimized for SLM
|
|
extern crc32_ieee_02
|
|
extern crc32_ieee_base
|
|
|
|
extern crc16_t10dif_01
|
|
extern crc16_t10dif_by4 ;; Optimized for SLM
|
|
extern crc16_t10dif_02
|
|
extern crc16_t10dif_base
|
|
|
|
extern crc32_gzip_refl_by8
|
|
extern crc32_gzip_refl_by8_02
|
|
extern crc32_gzip_refl_base
|
|
|
|
extern crc16_t10dif_copy_by4
|
|
extern crc16_t10dif_copy_by4_02
|
|
extern crc16_t10dif_copy_base
|
|
|
|
%if (AS_FEATURE_LEVEL) >= 10
|
|
extern crc32_gzip_refl_by16_10
|
|
extern crc32_ieee_by16_10
|
|
extern crc32_iscsi_by16_10
|
|
extern crc16_t10dif_by16_10
|
|
%endif
|
|
|
|
%include "multibinary.asm"
|
|
|
|
section .data
|
|
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
|
;;; Therefore, *_dispatch_init is only executed on first call.
|
|
|
|
crc32_iscsi_dispatched:
|
|
dq crc32_iscsi_mbinit
|
|
|
|
crc32_ieee_dispatched:
|
|
dq crc32_ieee_mbinit
|
|
|
|
crc16_t10dif_dispatched:
|
|
dq crc16_t10dif_mbinit
|
|
|
|
section .text
|
|
;;;;
|
|
; crc32_iscsi multibinary function
|
|
;;;;
|
|
mk_global crc32_iscsi, function
|
|
crc32_iscsi_mbinit:
|
|
endbranch
|
|
call crc32_iscsi_dispatch_init
|
|
crc32_iscsi:
|
|
endbranch
|
|
jmp qword [crc32_iscsi_dispatched]
|
|
|
|
crc32_iscsi_dispatch_init:
|
|
push rax
|
|
push rbx
|
|
push rcx
|
|
push rdx
|
|
push rsi
|
|
push rdi
|
|
lea rsi, [crc32_iscsi_base WRT_OPT] ; Default
|
|
|
|
mov eax, 1
|
|
cpuid
|
|
mov ebx, ecx ; save cpuid1.ecx
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
|
jz .crc_iscsi_init_done ; use iscsi_base
|
|
lea rsi, [crc32_iscsi_00 WRT_OPT]
|
|
test ecx, FLAG_CPUID1_ECX_CLMUL
|
|
jz .crc_iscsi_init_done ; use ieee_base
|
|
lea rsi, [crc32_iscsi_01 WRT_OPT]
|
|
|
|
;; Test for XMM_YMM support/AVX
|
|
test ecx, FLAG_CPUID1_ECX_OSXSAVE
|
|
je .crc_iscsi_init_done
|
|
xor ecx, ecx
|
|
xgetbv ; xcr -> edx:eax
|
|
mov edi, eax ; save xgetvb.eax
|
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
jne .crc_iscsi_init_done
|
|
test ebx, FLAG_CPUID1_ECX_AVX
|
|
je .crc_iscsi_init_done
|
|
;; AVX/02 opt if available
|
|
|
|
%if AS_FEATURE_LEVEL >= 10
|
|
;; Test for AVX2
|
|
xor ecx, ecx
|
|
mov eax, 7
|
|
cpuid
|
|
test ebx, FLAG_CPUID7_EBX_AVX2
|
|
je .crc_iscsi_init_done ; No AVX2 possible
|
|
|
|
;; Test for AVX512
|
|
and edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
jne .crc_iscsi_init_done ; No AVX512 possible
|
|
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
jne .crc_iscsi_init_done
|
|
|
|
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt
|
|
cmove rsi, rbx
|
|
%endif
|
|
|
|
.crc_iscsi_init_done:
|
|
mov [crc32_iscsi_dispatched], rsi
|
|
pop rdi
|
|
pop rsi
|
|
pop rdx
|
|
pop rcx
|
|
pop rbx
|
|
pop rax
|
|
ret
|
|
|
|
;;;;
|
|
; crc32_ieee multibinary function
|
|
;;;;
|
|
mk_global crc32_ieee, function
|
|
crc32_ieee_mbinit:
|
|
endbranch
|
|
call crc32_ieee_dispatch_init
|
|
crc32_ieee:
|
|
endbranch
|
|
jmp qword [crc32_ieee_dispatched]
|
|
|
|
crc32_ieee_dispatch_init:
|
|
push rax
|
|
push rbx
|
|
push rcx
|
|
push rdx
|
|
push rsi
|
|
push rdi
|
|
lea rsi, [crc32_ieee_base WRT_OPT] ; Default
|
|
|
|
mov eax, 1
|
|
cpuid
|
|
mov ebx, ecx ; save cpuid1.ecx
|
|
test ecx, FLAG_CPUID1_ECX_SSE3
|
|
jz .crc_ieee_init_done ; use ieee_base
|
|
test ecx, FLAG_CPUID1_ECX_CLMUL
|
|
jz .crc_ieee_init_done ; use ieee_base
|
|
lea rsi, [crc32_ieee_01 WRT_OPT]
|
|
|
|
;; Extra Avoton test
|
|
lea rdx, [crc32_ieee_by4 WRT_OPT]
|
|
and eax, FLAG_CPUID1_EAX_STEP_MASK
|
|
cmp eax, FLAG_CPUID1_EAX_AVOTON
|
|
cmove rsi, rdx
|
|
|
|
;; Test for XMM_YMM support/AVX
|
|
test ecx, FLAG_CPUID1_ECX_OSXSAVE
|
|
je .crc_ieee_init_done
|
|
xor ecx, ecx
|
|
xgetbv ; xcr -> edx:eax
|
|
mov edi, eax ; save xgetvb.eax
|
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
jne .crc_ieee_init_done
|
|
test ebx, FLAG_CPUID1_ECX_AVX
|
|
je .crc_ieee_init_done
|
|
lea rsi, [crc32_ieee_02 WRT_OPT] ; AVX/02 opt
|
|
|
|
%if AS_FEATURE_LEVEL >= 10
|
|
;; Test for AVX2
|
|
xor ecx, ecx
|
|
mov eax, 7
|
|
cpuid
|
|
test ebx, FLAG_CPUID7_EBX_AVX2
|
|
je .crc_ieee_init_done ; No AVX2 possible
|
|
|
|
;; Test for AVX512
|
|
and edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
jne .crc_ieee_init_done ; No AVX512 possible
|
|
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
jne .crc_ieee_init_done
|
|
|
|
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
lea rbx, [crc32_ieee_by16_10 WRT_OPT] ; AVX512/10 opt
|
|
cmove rsi, rbx
|
|
%endif
|
|
|
|
.crc_ieee_init_done:
|
|
mov [crc32_ieee_dispatched], rsi
|
|
pop rdi
|
|
pop rsi
|
|
pop rdx
|
|
pop rcx
|
|
pop rbx
|
|
pop rax
|
|
ret
|
|
|
|
;;;;
|
|
; crc16_t10dif multibinary function
|
|
;;;;
|
|
mk_global crc16_t10dif, function
|
|
crc16_t10dif_mbinit:
|
|
endbranch
|
|
call crc16_t10dif_dispatch_init
|
|
crc16_t10dif:
|
|
endbranch
|
|
jmp qword [crc16_t10dif_dispatched]
|
|
|
|
crc16_t10dif_dispatch_init:
|
|
push rax
|
|
push rbx
|
|
push rcx
|
|
push rdx
|
|
push rsi
|
|
push rdi
|
|
lea rsi, [crc16_t10dif_base WRT_OPT] ; Default
|
|
|
|
mov eax, 1
|
|
cpuid
|
|
mov ebx, ecx ; save cpuid1.ecx
|
|
test ecx, FLAG_CPUID1_ECX_SSE3
|
|
jz .t10dif_init_done ; use t10dif_base
|
|
test ecx, FLAG_CPUID1_ECX_CLMUL
|
|
jz .t10dif_init_done ; use t10dif_base
|
|
lea rsi, [crc16_t10dif_01 WRT_OPT]
|
|
|
|
;; Extra Avoton test
|
|
lea rdx, [crc16_t10dif_by4 WRT_OPT]
|
|
and eax, FLAG_CPUID1_EAX_STEP_MASK
|
|
cmp eax, FLAG_CPUID1_EAX_AVOTON
|
|
cmove rsi, rdx
|
|
|
|
;; Test for XMM_YMM support/AVX
|
|
test ecx, FLAG_CPUID1_ECX_OSXSAVE
|
|
je .t10dif_init_done
|
|
xor ecx, ecx
|
|
xgetbv ; xcr -> edx:eax
|
|
mov edi, eax ; save xgetvb.eax
|
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
|
jne .t10dif_init_done
|
|
test ebx, FLAG_CPUID1_ECX_AVX
|
|
je .t10dif_init_done
|
|
lea rsi, [crc16_t10dif_02 WRT_OPT] ; AVX/02 opt
|
|
|
|
%if AS_FEATURE_LEVEL >= 10
|
|
;; Test for AVX2
|
|
xor ecx, ecx
|
|
mov eax, 7
|
|
cpuid
|
|
test ebx, FLAG_CPUID7_EBX_AVX2
|
|
je .t10dif_init_done ; No AVX2 possible
|
|
|
|
;; Test for AVX512
|
|
and edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
|
|
jne .t10dif_init_done ; No AVX512 possible
|
|
and ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
|
|
jne .t10dif_init_done
|
|
|
|
and ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
|
|
lea rbx, [crc16_t10dif_by16_10 WRT_OPT] ; AVX512/10 opt
|
|
cmove rsi, rbx
|
|
%endif
|
|
|
|
.t10dif_init_done:
|
|
mov [crc16_t10dif_dispatched], rsi
|
|
pop rdi
|
|
pop rsi
|
|
pop rdx
|
|
pop rcx
|
|
pop rbx
|
|
pop rax
|
|
ret
|
|
|
|
mbin_interface crc32_gzip_refl
|
|
mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10
|
|
|
|
mbin_interface crc16_t10dif_copy
|
|
mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4, crc16_t10dif_copy_by4_02, crc16_t10dif_copy_by4_02
|