mem: Add zero detect memory functions

This patch introduces the base, avx and sse optimized zero detect memory function.
The zero detect memory function tests if a memory region is all zeroes. If all the
bytes in the memory region are zero, the function return a zero. Otherwise, if the
memory region has non zero bytes, the zero detect function returns a 1.

Change-Id: If965badf750377124d0067d09f888d0419554998
Signed-off-by: John Kariuki <John.K.Kariuki@intel.com>
This commit is contained in:
John Kariuki 2016-03-26 02:44:32 -07:00 committed by Greg Tucker
parent c872426b1c
commit 6e2013391a
13 changed files with 940 additions and 9 deletions

View File

@ -36,6 +36,7 @@ include igzip/Makefile.am
include tests/fuzz/Makefile.am
include examples/ec/Makefile.am
include programs/Makefile.am
include mem/Makefile.am
# LIB version info not necessarily the same as package version
LIBISAL_CURRENT=2

View File

@ -135,9 +135,13 @@ objs = \
bin\igzip_gen_icf_map_lh1_06.obj \
bin\igzip_set_long_icf_fg_04.obj \
bin\igzip_set_long_icf_fg_06.obj \
bin\igzip_icf_body.obj
bin\igzip_icf_body.obj \
bin\mem_zero_detect_avx.obj \
bin\mem_zero_detect_base.obj \
bin\mem_multibinary.obj \
bin\mem_zero_detect_sse.obj
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/
INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/
LINKFLAGS = /nologo
CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D)
AFLAGS = -f win64 $(INCLUDES) $(D)
@ -180,6 +184,10 @@ $?
{igzip}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
{mem}.c.obj:
$(CC) $(CFLAGS) /c -Fo$@ $?
{mem}.asm.obj:
$(AS) $(AFLAGS) -o $@ $?
# Examples
ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe
@ -205,7 +213,8 @@ checks = \
crc32_funcs_test.exe \
crc64_funcs_test.exe \
igzip_wrapper_hdr_test.exe \
igzip_rand_test.exe
igzip_rand_test.exe \
mem_zero_detect_test.exe
checks: lib $(checks)
$(checks): $(@B).obj
@ -249,7 +258,8 @@ perfs = \
crc32_iscsi_perf.exe \
igzip_perf.exe \
igzip_sync_flush_perf.exe \
crc32_gzip_refl_perf.exe
crc32_gzip_refl_perf.exe \
mem_zero_detect_perf.exe
perfs: lib $(perfs)
$(perfs): $(@B).obj

View File

@ -27,7 +27,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
units = erasure_code raid crc igzip programs
units = erasure_code raid crc igzip programs mem
default: lib

64
include/mem_routines.h Normal file
View File

@ -0,0 +1,64 @@
/**********************************************************************
Copyright(c) 2011-2018 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stddef.h>
/**
* @file mem_routines.h
* @brief Interface to storage mem operations
*
* Defines the interface for vector versions of common memory functions.
*/
#ifndef _MEM_ROUTINES_H_
#define _MEM_ROUTINES_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Detect if a memory region is all zero
*
* Zero detect function with optimizations for large blocks > 128 bytes
*
* @param mem Pointer to memory region to test
* @param len Length of region in bytes
* @returns 0 - region is all zeros
* other - region has non zero bytes
*/
int isal_zero_detect(void *mem, size_t len);
#ifdef __cplusplus
}
#endif
#endif // _MEM_ROUTINES_H_

View File

@ -105,7 +105,8 @@ isal_deflate_reset @101
isal_inflate_set_dict @102
isal_inflate_reset @103
crc16_t10dif_copy @104
isal_read_gzip_header @105
isal_read_zlib_header @106
isal_write_gzip_header @107
isal_write_zlib_header @108
isal_read_gzip_header @105
isal_read_zlib_header @106
isal_write_gzip_header @107
isal_write_zlib_header @108
isal_zero_detect @109

45
mem/Makefile.am Normal file
View File

@ -0,0 +1,45 @@
########################################################################
# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
lsrc += mem/mem_zero_detect_base.c
lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c
lsrc_x86_64 += mem/mem_zero_detect_avx.asm \
mem/mem_zero_detect_sse.asm \
mem/mem_multibinary.asm
extern_hdrs += include/mem_routines.h
other_src += include/test.h \
include/types.h
check_tests += mem/mem_zero_detect_test
perf_tests += mem/mem_zero_detect_perf

42
mem/mem_multibinary.asm Normal file
View File

@ -0,0 +1,42 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "reg_sizes.asm"
%include "multibinary.asm"
default rel
[bits 64]
extern mem_zero_detect_avx
extern mem_zero_detect_sse
extern mem_zero_detect_base
mbin_interface isal_zero_detect
mbin_dispatch_init5 isal_zero_detect, mem_zero_detect_base, mem_zero_detect_sse, mem_zero_detect_avx, mem_zero_detect_avx

189
mem/mem_zero_detect_avx.asm Normal file
View File

@ -0,0 +1,189 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmpb r11b
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmpb r11b
%define tmp3 r10
%define return rax
%define func(x) proc_frame x
%macro FUNC_SAVE 0
end_prolog
%endmacro
%macro FUNC_RESTORE 0
%endmacro
%endif
%define src arg0
%define len arg1
%define ptr arg2
%define pos return
default rel
[bits 64]
section .text
align 16
global mem_zero_detect_avx:function
func(mem_zero_detect_avx)
FUNC_SAVE
mov pos, 0
sub len, 4*32
jle .mem_z_small_block
.mem_z_loop:
vmovdqu ymm0, [src+pos]
vmovdqu ymm1, [src+pos+1*32]
vmovdqu ymm2, [src+pos+2*32]
vmovdqu ymm3, [src+pos+3*32]
vptest ymm0, ymm0
jnz .return_fail
vptest ymm1, ymm1
jnz .return_fail
vptest ymm2, ymm2
jnz .return_fail
vptest ymm3, ymm3
jnz .return_fail
add pos, 4*32
cmp pos, len
jl .mem_z_loop
.mem_z_last_block:
vmovdqu ymm0, [src+len]
vmovdqu ymm1, [src+len+1*32]
vmovdqu ymm2, [src+len+2*32]
vmovdqu ymm3, [src+len+3*32]
vptest ymm0, ymm0
jnz .return_fail
vptest ymm1, ymm1
jnz .return_fail
vptest ymm2, ymm2
jnz .return_fail
vptest ymm3, ymm3
jnz .return_fail
.return_pass:
mov return, 0
FUNC_RESTORE
ret
.mem_z_small_block:
add len, 4*32
cmp len, 2*32
jl .mem_z_lt64
vmovdqu ymm0, [src]
vmovdqu ymm1, [src+32]
vmovdqu ymm2, [src+len-2*32]
vmovdqu ymm3, [src+len-1*32]
vptest ymm0, ymm0
jnz .return_fail
vptest ymm1, ymm1
jnz .return_fail
vptest ymm2, ymm2
jnz .return_fail
vptest ymm3, ymm3
jnz .return_fail
jmp .return_pass
.mem_z_lt64:
cmp len, 32
jl .mem_z_lt32
vmovdqu ymm0, [src]
vmovdqu ymm1, [src+len-32]
vptest ymm0, ymm0
jnz .return_fail
vptest ymm1, ymm1
jnz .return_fail
jmp .return_pass
.mem_z_lt32:
cmp len, 16
jl .mem_z_lt16
vmovdqu xmm0, [src]
vmovdqu xmm1, [src+len-16]
vptest xmm0, xmm0
jnz .return_fail
vptest xmm1, xmm1
jnz .return_fail
jmp .return_pass
.mem_z_lt16:
cmp len, 8
jl .mem_z_lt8
mov tmp, [src]
mov tmp3,[src+len-8]
or tmp, tmp3
test tmp, tmp
jnz .return_fail
jmp .return_pass
.mem_z_lt8:
cmp len, 0
je .return_pass
.mem_z_1byte_loop:
mov tmpb, [src+pos]
cmp tmpb, 0
jnz .return_fail
add pos, 1
cmp pos, len
jl .mem_z_1byte_loop
jmp .return_pass
.return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame

View File

@ -0,0 +1,73 @@
/**********************************************************************
Copyright(c) 2011-2018 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdint.h>
#include <stddef.h>
int mem_zero_detect_base(void *buf, size_t n)
{
unsigned char *c;
#if __WORDSIZE == 64
unsigned long long a = 0, *p = buf;
#else
unsigned int a = 0, *p = buf;
#endif
// Check buffer in native machine width comparisons
while (n >= sizeof(p)) {
n -= sizeof(p);
if (*p++ != 0)
return -1;
}
// Check remaining bytes
c = (unsigned char *)p;
switch (n) {
case 7:
a |= *c++; // fall through to case 6,5,4
case 6:
a |= *c++; // fall through to case 5,4
case 5:
a |= *c++; // fall through to case 4
case 4:
a |= *((unsigned int *)c);
break;
case 3:
a |= *c++; // fall through to case 2
case 2:
a |= *((unsigned short *)c);
break;
case 1:
a |= *c;
break;
}
return (a == 0) ? 0 : -1;
}

View File

@ -0,0 +1,38 @@
/**********************************************************************
Copyright(c) 2011-2018 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdint.h>
#include "mem_routines.h"
int mem_zero_detect_base(void *buf, size_t n);
int isal_zero_detect(void *mem, size_t len)
{
return mem_zero_detect_base(mem, len);
}

View File

@ -0,0 +1,66 @@
/**********************************************************************
Copyright(c) 2011-2018 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "mem_routines.h"
#include "test.h"
#include "types.h"
#define TEST_LEN 8*1024
#define TEST_LOOPS 10000000
#define TEST_TYPE_STR "_warm"
int main(int argc, char *argv[])
{
int i;
int val = 0;
void *buf;
struct perf start, stop;
printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN);
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
// Warm up
isal_zero_detect(buf, TEST_LEN);
perf_start(&start);
for (i = 0; i < TEST_LOOPS; i++)
val |= isal_zero_detect(buf, TEST_LEN);
perf_stop(&stop);
printf("mem_zero_detect_perf" TEST_TYPE_STR ": ");
perf_print(stop, start, (long long)TEST_LEN * i);
return 0;
}

176
mem/mem_zero_detect_sse.asm Normal file
View File

@ -0,0 +1,176 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmpb r11b
%define tmp3 arg4
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
%define arg2 r8
%define arg3 r9
%define tmp r11
%define tmpb r11b
%define tmp3 r10
%define return rax
%define func(x) proc_frame x
%macro FUNC_SAVE 0
end_prolog
%endmacro
%macro FUNC_RESTORE 0
%endmacro
%endif
%define src arg0
%define len arg1
%define ptr arg2
%define pos return
default rel
[bits 64]
section .text
align 16
global mem_zero_detect_sse:function
func(mem_zero_detect_sse)
FUNC_SAVE
mov pos, 0
sub len, 4*16
jle .mem_z_small_block
.mem_z_loop:
movdqu xmm0, [src+pos]
movdqu xmm1, [src+pos+1*16]
movdqu xmm2, [src+pos+2*16]
movdqu xmm3, [src+pos+3*16]
ptest xmm0, xmm0
jnz .return_fail
ptest xmm1, xmm1
jnz .return_fail
ptest xmm2, xmm2
jnz .return_fail
ptest xmm3, xmm3
jnz .return_fail
add pos, 4*16
cmp pos, len
jl .mem_z_loop
.mem_z_last_block:
movdqu xmm0, [src+len]
movdqu xmm1, [src+len+1*16]
movdqu xmm2, [src+len+2*16]
movdqu xmm3, [src+len+3*16]
ptest xmm0, xmm0
jnz .return_fail
ptest xmm1, xmm1
jnz .return_fail
ptest xmm2, xmm2
jnz .return_fail
ptest xmm3, xmm3
jnz .return_fail
.return_pass:
mov return, 0
FUNC_RESTORE
ret
.mem_z_small_block:
add len, 4*16
cmp len, 2*16
jl .mem_z_lt32
movdqu xmm0, [src]
movdqu xmm1, [src+16]
movdqu xmm2, [src+len-2*16]
movdqu xmm3, [src+len-1*16]
ptest xmm0, xmm0
jnz .return_fail
ptest xmm1, xmm1
jnz .return_fail
ptest xmm2, xmm2
jnz .return_fail
ptest xmm3, xmm3
jnz .return_fail
jmp .return_pass
.mem_z_lt32:
cmp len, 16
jl .mem_z_lt16
movdqu xmm0, [src]
movdqu xmm1, [src+len-16]
ptest xmm0, xmm0
jnz .return_fail
ptest xmm1, xmm1
jnz .return_fail
jmp .return_pass
.mem_z_lt16:
cmp len, 8
jl .mem_z_lt8
mov tmp, [src]
mov tmp3,[src+len-8]
or tmp, tmp3
test tmp, tmp
jnz .return_fail
jmp .return_pass
.mem_z_lt8:
cmp len, 0
je .return_pass
.mem_z_1byte_loop:
mov tmpb, [src+pos]
cmp tmpb, 0
jnz .return_fail
add pos, 1
cmp pos, len
jl .mem_z_1byte_loop
jmp .return_pass
.return_fail:
mov return, 1
FUNC_RESTORE
ret
endproc_frame

226
mem/mem_zero_detect_test.c Normal file
View File

@ -0,0 +1,226 @@
/**********************************************************************
Copyright(c) 2011-2018 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "mem_routines.h"
#include "types.h"
#define TEST_MEM 10*1024*1024
#define TEST_LEN 8*1024
#define RAND_ALIGN 32
#define BORDER_BYTES (5*RAND_ALIGN + 7)
#ifndef RANDOMS
# define RANDOMS 2000
#endif
#ifndef TEST_SEED
# define TEST_SEED 0x1234
#endif
int main(int argc, char *argv[])
{
int i, j, l, sign;
unsigned long long r;
void *buf;
unsigned char *a;
int failures = 0, ret_neg = 1;
printf("mem_zero_detect_test %d bytes, %d randoms, seed=0x%x ", TEST_MEM, RANDOMS,
TEST_SEED);
if (posix_memalign(&buf, 64, TEST_MEM)) {
printf("alloc error: Fail");
return -1;
}
srand(TEST_SEED);
// Test full zero buffer
memset(buf, 0, TEST_MEM);
failures = isal_zero_detect(buf, TEST_MEM);
if (failures) {
printf("Fail large buf test\n");
return failures;
}
putchar('.');
// Test small buffers
for (i = 0; i < TEST_LEN; i++) {
failures |= isal_zero_detect(buf, i);
if (failures) {
printf("Fail len=%d\n", i);
return failures;
}
}
putchar('.');
// Test small buffers near end of alloc region
a = buf;
for (i = 0; i < TEST_LEN; i++)
failures |= isal_zero_detect(&a[TEST_LEN - i], i);
if (failures) {
printf("Fail:\n");
return failures;
}
putchar('.');
// Test for detect non zero
a[TEST_MEM / 2] = 1;
ret_neg = isal_zero_detect(a, TEST_MEM);
if (ret_neg == 0) {
printf("Fail on not detect\n");
return -1;
}
a[TEST_MEM / 2] = 0;
putchar('.');
// Test various non-zero offsets
for (i = 0; i < BORDER_BYTES; i++) {
for (j = 0; j < CHAR_BIT; j++) {
a[i] = 1 << j;
ret_neg = isal_zero_detect(a, TEST_MEM);
if (ret_neg == 0) {
printf("Fail on not detect offsets %d, %d\n", i, j);
return -1;
}
a[i] = 0;
}
}
putchar('.');
fflush(0);
// Test random non-zero offsets
for (i = 0; i < RANDOMS; i++) {
r = rand();
r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
if (r >= TEST_LEN)
continue;
a[r] = 1 << (r & (CHAR_BIT - 1));
ret_neg = isal_zero_detect(a, TEST_MEM);
if (ret_neg == 0) {
printf("Fail on not detect rand %d, e=%lld\n", i, r);
return -1;
}
a[r] = 0;
}
putchar('.');
fflush(0);
// Test putting non-zero byte at end of buffer
for (i = 1; i < BORDER_BYTES; i++) {
for (j = 0; j < CHAR_BIT; j++) {
a[TEST_MEM - i] = 1 << j;
ret_neg = isal_zero_detect(a, TEST_MEM);
if (ret_neg == 0) {
printf("Fail on not detect rand offset=%d, idx=%d\n", i, j);
return -1;
}
a[TEST_MEM - i] = 0;
}
}
putchar('.');
// Test various size buffers and non-zero offsets
for (l = 1; l < TEST_LEN; l++) {
for (i = 0; i < l + BORDER_BYTES; i++) {
failures = isal_zero_detect(a, l);
if (failures) {
printf("Fail on detect non-zero with l=%d\n", l);
return -1;
}
a[i] = 1;
ret_neg = isal_zero_detect(a, l);
if ((i < l) && (ret_neg == 0)) {
printf("Fail on non-zero buffer l=%d err=%d\n", l, i);
return -1;
}
if ((i >= l) && (ret_neg != 0)) {
printf("Fail on bad pass detect l=%d err=%d\n", l, i);
return -1;
}
a[i] = 0;
}
}
putchar('.');
// Test random test size and non-zero error offsets
for (i = 0; i < RANDOMS; i++) {
r = rand();
r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
l = r + 1 + (rand() & (CHAR_BIT - 1));
a[r] = 1 << (r & (CHAR_BIT - 1));
ret_neg = isal_zero_detect(a, l);
if (ret_neg == 0) {
printf("Fail on not detect rand %d, l=%d, e=%lld\n", i, l, r);
return -1;
}
a[r] = 0;
}
putchar('.');
fflush(0);
// Test combinations of zero and non-zero buffers
for (i = 0; i < RANDOMS; i++) {
r = rand();
r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
sign = rand() & 1 ? 1 : -1;
l = r + sign * (rand() & (2 * RAND_ALIGN - 1));
if ((l >= TEST_LEN) || (l < 0) || (r >= TEST_LEN))
continue;
a[r] = 1 << (r & (CHAR_BIT - 1));
ret_neg = isal_zero_detect(a, l);
if ((r < l) && (ret_neg == 0)) {
printf("Fail on non-zero rand buffer %d, l=%d, e=%lld\n", i, l, r);
return -1;
}
if ((r >= l) && (ret_neg != 0)) {
printf("Fail on bad pass zero detect rand %d, l=%d, e=%lld\n", i, l,
r);
return -1;
}
a[r] = 0;
}
putchar('.');
fflush(0);
printf(failures == 0 ? " Pass\n" : " Fail\n");
return failures;
}