From 6e2013391ae9ac498c8ef35f8f819c08a9873c32 Mon Sep 17 00:00:00 2001 From: John Kariuki Date: Sat, 26 Mar 2016 02:44:32 -0700 Subject: [PATCH] mem: Add zero detect memory functions This patch introduces the base, avx and sse optimized zero detect memory function. The zero detect memory function tests if a memory region is all zeroes. If all the bytes in the memory region are zero, the function return a zero. Otherwise, if the memory region has non zero bytes, the zero detect function returns a 1. Change-Id: If965badf750377124d0067d09f888d0419554998 Signed-off-by: John Kariuki --- Makefile.am | 1 + Makefile.nmake | 18 ++- Makefile.unx | 2 +- include/mem_routines.h | 64 ++++++++ isa-l.def | 9 +- mem/Makefile.am | 45 ++++++ mem/mem_multibinary.asm | 42 ++++++ mem/mem_zero_detect_avx.asm | 189 ++++++++++++++++++++++++ mem/mem_zero_detect_base.c | 73 ++++++++++ mem/mem_zero_detect_base_aliases.c | 38 +++++ mem/mem_zero_detect_perf.c | 66 +++++++++ mem/mem_zero_detect_sse.asm | 176 ++++++++++++++++++++++ mem/mem_zero_detect_test.c | 226 +++++++++++++++++++++++++++++ 13 files changed, 940 insertions(+), 9 deletions(-) create mode 100644 include/mem_routines.h create mode 100644 mem/Makefile.am create mode 100644 mem/mem_multibinary.asm create mode 100644 mem/mem_zero_detect_avx.asm create mode 100644 mem/mem_zero_detect_base.c create mode 100644 mem/mem_zero_detect_base_aliases.c create mode 100644 mem/mem_zero_detect_perf.c create mode 100644 mem/mem_zero_detect_sse.asm create mode 100644 mem/mem_zero_detect_test.c diff --git a/Makefile.am b/Makefile.am index aa0b34e..6a14873 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,6 +36,7 @@ include igzip/Makefile.am include tests/fuzz/Makefile.am include examples/ec/Makefile.am include programs/Makefile.am +include mem/Makefile.am # LIB version info not necessarily the same as package version LIBISAL_CURRENT=2 diff --git a/Makefile.nmake b/Makefile.nmake index 8b2fee8..56ae888 100644 --- a/Makefile.nmake +++ b/Makefile.nmake @@ -135,9 +135,13 @@ objs = \ bin\igzip_gen_icf_map_lh1_06.obj \ bin\igzip_set_long_icf_fg_04.obj \ bin\igzip_set_long_icf_fg_06.obj \ - bin\igzip_icf_body.obj + bin\igzip_icf_body.obj \ + bin\mem_zero_detect_avx.obj \ + bin\mem_zero_detect_base.obj \ + bin\mem_multibinary.obj \ + bin\mem_zero_detect_sse.obj -INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ +INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/ LINKFLAGS = /nologo CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) AFLAGS = -f win64 $(INCLUDES) $(D) @@ -180,6 +184,10 @@ $? {igzip}.asm.obj: $(AS) $(AFLAGS) -o $@ $? +{mem}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{mem}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? # Examples ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe @@ -205,7 +213,8 @@ checks = \ crc32_funcs_test.exe \ crc64_funcs_test.exe \ igzip_wrapper_hdr_test.exe \ - igzip_rand_test.exe + igzip_rand_test.exe \ + mem_zero_detect_test.exe checks: lib $(checks) $(checks): $(@B).obj @@ -249,7 +258,8 @@ perfs = \ crc32_iscsi_perf.exe \ igzip_perf.exe \ igzip_sync_flush_perf.exe \ - crc32_gzip_refl_perf.exe + crc32_gzip_refl_perf.exe \ + mem_zero_detect_perf.exe perfs: lib $(perfs) $(perfs): $(@B).obj diff --git a/Makefile.unx b/Makefile.unx index 4960b63..16ad1ff 100644 --- a/Makefile.unx +++ b/Makefile.unx @@ -27,7 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################## -units = erasure_code raid crc igzip programs +units = erasure_code raid crc igzip programs mem default: lib diff --git a/include/mem_routines.h b/include/mem_routines.h new file mode 100644 index 0000000..3d23522 --- /dev/null +++ b/include/mem_routines.h @@ -0,0 +1,64 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include + +/** + * @file mem_routines.h + * @brief Interface to storage mem operations + * + * Defines the interface for vector versions of common memory functions. + */ + + +#ifndef _MEM_ROUTINES_H_ +#define _MEM_ROUTINES_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Detect if a memory region is all zero + * + * Zero detect function with optimizations for large blocks > 128 bytes + * + * @param mem Pointer to memory region to test + * @param len Length of region in bytes + * @returns 0 - region is all zeros + * other - region has non zero bytes + */ +int isal_zero_detect(void *mem, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif // _MEM_ROUTINES_H_ + diff --git a/isa-l.def b/isa-l.def index 1a64232..e7d735c 100644 --- a/isa-l.def +++ b/isa-l.def @@ -105,7 +105,8 @@ isal_deflate_reset @101 isal_inflate_set_dict @102 isal_inflate_reset @103 crc16_t10dif_copy @104 -isal_read_gzip_header @105 -isal_read_zlib_header @106 -isal_write_gzip_header @107 -isal_write_zlib_header @108 +isal_read_gzip_header @105 +isal_read_zlib_header @106 +isal_write_gzip_header @107 +isal_write_zlib_header @108 +isal_zero_detect @109 diff --git a/mem/Makefile.am b/mem/Makefile.am new file mode 100644 index 0000000..4fc82a7 --- /dev/null +++ b/mem/Makefile.am @@ -0,0 +1,45 @@ +######################################################################## +# Copyright(c) 2011-2018 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc += mem/mem_zero_detect_base.c + +lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c + +lsrc_x86_64 += mem/mem_zero_detect_avx.asm \ + mem/mem_zero_detect_sse.asm \ + mem/mem_multibinary.asm + +extern_hdrs += include/mem_routines.h + +other_src += include/test.h \ + include/types.h + +check_tests += mem/mem_zero_detect_test + +perf_tests += mem/mem_zero_detect_perf diff --git a/mem/mem_multibinary.asm b/mem/mem_multibinary.asm new file mode 100644 index 0000000..38f63e2 --- /dev/null +++ b/mem/mem_multibinary.asm @@ -0,0 +1,42 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "multibinary.asm" + +default rel +[bits 64] + +extern mem_zero_detect_avx +extern mem_zero_detect_sse +extern mem_zero_detect_base + +mbin_interface isal_zero_detect + +mbin_dispatch_init5 isal_zero_detect, mem_zero_detect_base, mem_zero_detect_sse, mem_zero_detect_avx, mem_zero_detect_avx diff --git a/mem/mem_zero_detect_avx.asm b/mem/mem_zero_detect_avx.asm new file mode 100644 index 0000000..1ecd786 --- /dev/null +++ b/mem/mem_zero_detect_avx.asm @@ -0,0 +1,189 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 arg4 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 r10 + %define return rax + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + end_prolog + %endmacro + %macro FUNC_RESTORE 0 + %endmacro +%endif + +%define src arg0 +%define len arg1 +%define ptr arg2 +%define pos return + +default rel + +[bits 64] +section .text + +align 16 +global mem_zero_detect_avx:function +func(mem_zero_detect_avx) + FUNC_SAVE + mov pos, 0 + sub len, 4*32 + jle .mem_z_small_block + +.mem_z_loop: + vmovdqu ymm0, [src+pos] + vmovdqu ymm1, [src+pos+1*32] + vmovdqu ymm2, [src+pos+2*32] + vmovdqu ymm3, [src+pos+3*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + add pos, 4*32 + cmp pos, len + jl .mem_z_loop + +.mem_z_last_block: + vmovdqu ymm0, [src+len] + vmovdqu ymm1, [src+len+1*32] + vmovdqu ymm2, [src+len+2*32] + vmovdqu ymm3, [src+len+3*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + + +.mem_z_small_block: + add len, 4*32 + cmp len, 2*32 + jl .mem_z_lt64 + vmovdqu ymm0, [src] + vmovdqu ymm1, [src+32] + vmovdqu ymm2, [src+len-2*32] + vmovdqu ymm3, [src+len-1*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + jmp .return_pass + +.mem_z_lt64: + cmp len, 32 + jl .mem_z_lt32 + vmovdqu ymm0, [src] + vmovdqu ymm1, [src+len-32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + jmp .return_pass + + +.mem_z_lt32: + cmp len, 16 + jl .mem_z_lt16 + vmovdqu xmm0, [src] + vmovdqu xmm1, [src+len-16] + vptest xmm0, xmm0 + jnz .return_fail + vptest xmm1, xmm1 + jnz .return_fail + jmp .return_pass + + +.mem_z_lt16: + cmp len, 8 + jl .mem_z_lt8 + mov tmp, [src] + mov tmp3,[src+len-8] + or tmp, tmp3 + test tmp, tmp + jnz .return_fail + jmp .return_pass + +.mem_z_lt8: + cmp len, 0 + je .return_pass +.mem_z_1byte_loop: + mov tmpb, [src+pos] + cmp tmpb, 0 + jnz .return_fail + add pos, 1 + cmp pos, len + jl .mem_z_1byte_loop + jmp .return_pass + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame diff --git a/mem/mem_zero_detect_base.c b/mem/mem_zero_detect_base.c new file mode 100644 index 0000000..9f67e0c --- /dev/null +++ b/mem/mem_zero_detect_base.c @@ -0,0 +1,73 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include + +int mem_zero_detect_base(void *buf, size_t n) +{ + unsigned char *c; +#if __WORDSIZE == 64 + unsigned long long a = 0, *p = buf; +#else + unsigned int a = 0, *p = buf; +#endif + + // Check buffer in native machine width comparisons + while (n >= sizeof(p)) { + n -= sizeof(p); + if (*p++ != 0) + return -1; + } + + // Check remaining bytes + c = (unsigned char *)p; + + switch (n) { + case 7: + a |= *c++; // fall through to case 6,5,4 + case 6: + a |= *c++; // fall through to case 5,4 + case 5: + a |= *c++; // fall through to case 4 + case 4: + a |= *((unsigned int *)c); + break; + case 3: + a |= *c++; // fall through to case 2 + case 2: + a |= *((unsigned short *)c); + break; + case 1: + a |= *c; + break; + } + + return (a == 0) ? 0 : -1; +} diff --git a/mem/mem_zero_detect_base_aliases.c b/mem/mem_zero_detect_base_aliases.c new file mode 100644 index 0000000..8c75b06 --- /dev/null +++ b/mem/mem_zero_detect_base_aliases.c @@ -0,0 +1,38 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "mem_routines.h" + +int mem_zero_detect_base(void *buf, size_t n); + +int isal_zero_detect(void *mem, size_t len) +{ + return mem_zero_detect_base(mem, len); +} diff --git a/mem/mem_zero_detect_perf.c b/mem/mem_zero_detect_perf.c new file mode 100644 index 0000000..1295ae1 --- /dev/null +++ b/mem/mem_zero_detect_perf.c @@ -0,0 +1,66 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "mem_routines.h" +#include "test.h" +#include "types.h" + +#define TEST_LEN 8*1024 +#define TEST_LOOPS 10000000 +#define TEST_TYPE_STR "_warm" + +int main(int argc, char *argv[]) +{ + int i; + int val = 0; + void *buf; + struct perf start, stop; + + printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN); + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + // Warm up + isal_zero_detect(buf, TEST_LEN); + + perf_start(&start); + + for (i = 0; i < TEST_LOOPS; i++) + val |= isal_zero_detect(buf, TEST_LEN); + + perf_stop(&stop); + printf("mem_zero_detect_perf" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_LEN * i); + + return 0; +} diff --git a/mem/mem_zero_detect_sse.asm b/mem/mem_zero_detect_sse.asm new file mode 100644 index 0000000..5f1c7eb --- /dev/null +++ b/mem/mem_zero_detect_sse.asm @@ -0,0 +1,176 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 arg4 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 r10 + %define return rax + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + end_prolog + %endmacro + %macro FUNC_RESTORE 0 + %endmacro +%endif + +%define src arg0 +%define len arg1 +%define ptr arg2 +%define pos return + +default rel + +[bits 64] +section .text + +align 16 +global mem_zero_detect_sse:function +func(mem_zero_detect_sse) + FUNC_SAVE + mov pos, 0 + sub len, 4*16 + jle .mem_z_small_block + +.mem_z_loop: + movdqu xmm0, [src+pos] + movdqu xmm1, [src+pos+1*16] + movdqu xmm2, [src+pos+2*16] + movdqu xmm3, [src+pos+3*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + add pos, 4*16 + cmp pos, len + jl .mem_z_loop + +.mem_z_last_block: + movdqu xmm0, [src+len] + movdqu xmm1, [src+len+1*16] + movdqu xmm2, [src+len+2*16] + movdqu xmm3, [src+len+3*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + + +.mem_z_small_block: + add len, 4*16 + cmp len, 2*16 + jl .mem_z_lt32 + movdqu xmm0, [src] + movdqu xmm1, [src+16] + movdqu xmm2, [src+len-2*16] + movdqu xmm3, [src+len-1*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + jmp .return_pass + +.mem_z_lt32: + cmp len, 16 + jl .mem_z_lt16 + movdqu xmm0, [src] + movdqu xmm1, [src+len-16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + jmp .return_pass + +.mem_z_lt16: + cmp len, 8 + jl .mem_z_lt8 + mov tmp, [src] + mov tmp3,[src+len-8] + or tmp, tmp3 + test tmp, tmp + jnz .return_fail + jmp .return_pass + +.mem_z_lt8: + cmp len, 0 + je .return_pass +.mem_z_1byte_loop: + mov tmpb, [src+pos] + cmp tmpb, 0 + jnz .return_fail + add pos, 1 + cmp pos, len + jl .mem_z_1byte_loop + jmp .return_pass + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame diff --git a/mem/mem_zero_detect_test.c b/mem/mem_zero_detect_test.c new file mode 100644 index 0000000..1336c97 --- /dev/null +++ b/mem/mem_zero_detect_test.c @@ -0,0 +1,226 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "mem_routines.h" +#include "types.h" + +#define TEST_MEM 10*1024*1024 +#define TEST_LEN 8*1024 +#define RAND_ALIGN 32 +#define BORDER_BYTES (5*RAND_ALIGN + 7) + +#ifndef RANDOMS +# define RANDOMS 2000 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +int main(int argc, char *argv[]) +{ + int i, j, l, sign; + unsigned long long r; + void *buf; + unsigned char *a; + int failures = 0, ret_neg = 1; + + printf("mem_zero_detect_test %d bytes, %d randoms, seed=0x%x ", TEST_MEM, RANDOMS, + TEST_SEED); + if (posix_memalign(&buf, 64, TEST_MEM)) { + printf("alloc error: Fail"); + return -1; + } + + srand(TEST_SEED); + + // Test full zero buffer + memset(buf, 0, TEST_MEM); + failures = isal_zero_detect(buf, TEST_MEM); + + if (failures) { + printf("Fail large buf test\n"); + return failures; + } + putchar('.'); + + // Test small buffers + for (i = 0; i < TEST_LEN; i++) { + failures |= isal_zero_detect(buf, i); + if (failures) { + printf("Fail len=%d\n", i); + return failures; + } + } + putchar('.'); + + // Test small buffers near end of alloc region + a = buf; + for (i = 0; i < TEST_LEN; i++) + failures |= isal_zero_detect(&a[TEST_LEN - i], i); + + if (failures) { + printf("Fail:\n"); + return failures; + } + putchar('.'); + + // Test for detect non zero + a[TEST_MEM / 2] = 1; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect\n"); + return -1; + } + a[TEST_MEM / 2] = 0; + putchar('.'); + + // Test various non-zero offsets + for (i = 0; i < BORDER_BYTES; i++) { + for (j = 0; j < CHAR_BIT; j++) { + a[i] = 1 << j; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect offsets %d, %d\n", i, j); + return -1; + } + a[i] = 0; + } + } + putchar('.'); + fflush(0); + + // Test random non-zero offsets + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + if (r >= TEST_LEN) + continue; + + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect rand %d, e=%lld\n", i, r); + return -1; + } + a[r] = 0; + } + putchar('.'); + fflush(0); + + // Test putting non-zero byte at end of buffer + for (i = 1; i < BORDER_BYTES; i++) { + for (j = 0; j < CHAR_BIT; j++) { + a[TEST_MEM - i] = 1 << j; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect rand offset=%d, idx=%d\n", i, j); + return -1; + } + a[TEST_MEM - i] = 0; + } + } + putchar('.'); + + // Test various size buffers and non-zero offsets + for (l = 1; l < TEST_LEN; l++) { + for (i = 0; i < l + BORDER_BYTES; i++) { + failures = isal_zero_detect(a, l); + + if (failures) { + printf("Fail on detect non-zero with l=%d\n", l); + return -1; + } + + a[i] = 1; + ret_neg = isal_zero_detect(a, l); + + if ((i < l) && (ret_neg == 0)) { + printf("Fail on non-zero buffer l=%d err=%d\n", l, i); + return -1; + } + if ((i >= l) && (ret_neg != 0)) { + printf("Fail on bad pass detect l=%d err=%d\n", l, i); + return -1; + } + a[i] = 0; + } + } + putchar('.'); + + // Test random test size and non-zero error offsets + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + l = r + 1 + (rand() & (CHAR_BIT - 1)); + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, l); + if (ret_neg == 0) { + printf("Fail on not detect rand %d, l=%d, e=%lld\n", i, l, r); + return -1; + } + a[r] = 0; + } + putchar('.'); + fflush(0); + + // Test combinations of zero and non-zero buffers + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + sign = rand() & 1 ? 1 : -1; + l = r + sign * (rand() & (2 * RAND_ALIGN - 1)); + + if ((l >= TEST_LEN) || (l < 0) || (r >= TEST_LEN)) + continue; + + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, l); + + if ((r < l) && (ret_neg == 0)) { + printf("Fail on non-zero rand buffer %d, l=%d, e=%lld\n", i, l, r); + return -1; + } + if ((r >= l) && (ret_neg != 0)) { + printf("Fail on bad pass zero detect rand %d, l=%d, e=%lld\n", i, l, + r); + return -1; + } + + a[r] = 0; + } + putchar('.'); + fflush(0); + + printf(failures == 0 ? " Pass\n" : " Fail\n"); + return failures; +}