am 1abb1b93: Merge "Add 64-bit Silvermont-optimized string/memory functions."
* commit '1abb1b939d318513066ef580377cc3659ceac8ef': Add 64-bit Silvermont-optimized string/memory functions.
This commit is contained in:
commit
199e830a88
36
libc/arch-x86_64/string/cache.h
Normal file
36
libc/arch-x86_64/string/cache.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Values are optimized for Silvermont */
|
||||||
|
#define SHARED_CACHE_SIZE (1024*1024) /* Silvermont L2 Cache */
|
||||||
|
#define DATA_CACHE_SIZE (24*1024) /* Silvermont L1 Data Cache */
|
||||||
|
|
||||||
|
#define SHARED_CACHE_SIZE_HALF (SHARED_CACHE_SIZE / 2)
|
||||||
|
#define DATA_CACHE_SIZE_HALF (DATA_CACHE_SIZE / 2)
|
33
libc/arch-x86_64/string/sse2-bcopy-slm.S
Normal file
33
libc/arch-x86_64/string/sse2-bcopy-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_BCOPY
|
||||||
|
#define MEMMOVE bcopy
|
||||||
|
#include "sse2-memmove-slm.S"
|
33
libc/arch-x86_64/string/sse2-bzero-slm.S
Normal file
33
libc/arch-x86_64/string/sse2-bzero-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_BZERO_P
|
||||||
|
#define MEMSET bzero
|
||||||
|
#include "sse2-memset-slm.S"
|
299
libc/arch-x86_64/string/sse2-memcpy-slm.S
Normal file
299
libc/arch-x86_64/string/sse2-memcpy-slm.S
Normal file
@ -0,0 +1,299 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cache.h"
|
||||||
|
|
||||||
|
#ifndef MEMCPY
|
||||||
|
# define MEMCPY memcpy
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef L
|
||||||
|
# define L(label) .L##label
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_startproc
|
||||||
|
# define cfi_startproc .cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_endproc
|
||||||
|
# define cfi_endproc .cfi_endproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_rel_offset
|
||||||
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_restore
|
||||||
|
# define cfi_restore(reg) .cfi_restore reg
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_adjust_cfa_offset
|
||||||
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type name, @function; \
|
||||||
|
.globl name; \
|
||||||
|
.p2align 4; \
|
||||||
|
name: \
|
||||||
|
cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; \
|
||||||
|
.size name, .-name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CFI_PUSH(REG) \
|
||||||
|
cfi_adjust_cfa_offset (4); \
|
||||||
|
cfi_rel_offset (REG, 0)
|
||||||
|
|
||||||
|
#define CFI_POP(REG) \
|
||||||
|
cfi_adjust_cfa_offset (-4); \
|
||||||
|
cfi_restore (REG)
|
||||||
|
|
||||||
|
#define PUSH(REG) push REG;
|
||||||
|
#define POP(REG) pop REG;
|
||||||
|
|
||||||
|
#define ENTRANCE PUSH (%rbx);
|
||||||
|
#define RETURN_END POP (%rbx); ret
|
||||||
|
#define RETURN RETURN_END;
|
||||||
|
|
||||||
|
.section .text.sse2,"ax",@progbits
|
||||||
|
ENTRY (MEMCPY)
|
||||||
|
ENTRANCE
|
||||||
|
cmp %rsi, %rdi
|
||||||
|
je L(return)
|
||||||
|
|
||||||
|
cmp $16, %rdx
|
||||||
|
jbe L(len_0_16_bytes)
|
||||||
|
|
||||||
|
cmp $SHARED_CACHE_SIZE_HALF, %rdx
|
||||||
|
jae L(large_page)
|
||||||
|
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm1
|
||||||
|
cmp $32, %rdx
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, -16(%rdi, %rdx)
|
||||||
|
jbe L(return)
|
||||||
|
|
||||||
|
movdqu 16(%rsi), %xmm0
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm1
|
||||||
|
cmp $64, %rdx
|
||||||
|
movdqu %xmm0, 16(%rdi)
|
||||||
|
movdqu %xmm1, -32(%rdi, %rdx)
|
||||||
|
jbe L(return)
|
||||||
|
|
||||||
|
movdqu 32(%rsi), %xmm0
|
||||||
|
movdqu 48(%rsi), %xmm1
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm2
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm3
|
||||||
|
cmp $128, %rdx
|
||||||
|
movdqu %xmm0, 32(%rdi)
|
||||||
|
movdqu %xmm1, 48(%rdi)
|
||||||
|
movdqu %xmm2, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -64(%rdi, %rdx)
|
||||||
|
jbe L(return)
|
||||||
|
|
||||||
|
/* Now the main loop: we align the address of the destination. */
|
||||||
|
lea 64(%rdi), %r8
|
||||||
|
and $-64, %r8
|
||||||
|
|
||||||
|
add %rdi, %rdx
|
||||||
|
and $-64, %rdx
|
||||||
|
|
||||||
|
sub %rdi, %rsi
|
||||||
|
|
||||||
|
/* We should stop two iterations before the termination
|
||||||
|
(in order not to misprefetch). */
|
||||||
|
sub $64, %rdx
|
||||||
|
cmp %r8, %rdx
|
||||||
|
je L(main_loop_just_one_iteration)
|
||||||
|
|
||||||
|
sub $64, %rdx
|
||||||
|
cmp %r8, %rdx
|
||||||
|
je L(main_loop_last_two_iterations)
|
||||||
|
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(main_loop_cache):
|
||||||
|
|
||||||
|
prefetcht0 128(%r8, %rsi)
|
||||||
|
|
||||||
|
movdqu (%r8, %rsi), %xmm0
|
||||||
|
movdqu 16(%r8, %rsi), %xmm1
|
||||||
|
movdqu 32(%r8, %rsi), %xmm2
|
||||||
|
movdqu 48(%r8, %rsi), %xmm3
|
||||||
|
movdqa %xmm0, (%r8)
|
||||||
|
movdqa %xmm1, 16(%r8)
|
||||||
|
movdqa %xmm2, 32(%r8)
|
||||||
|
movdqa %xmm3, 48(%r8)
|
||||||
|
lea 64(%r8), %r8
|
||||||
|
cmp %r8, %rdx
|
||||||
|
jne L(main_loop_cache)
|
||||||
|
|
||||||
|
L(main_loop_last_two_iterations):
|
||||||
|
movdqu (%r8, %rsi), %xmm0
|
||||||
|
movdqu 16(%r8, %rsi), %xmm1
|
||||||
|
movdqu 32(%r8, %rsi), %xmm2
|
||||||
|
movdqu 48(%r8, %rsi), %xmm3
|
||||||
|
movdqu 64(%r8, %rsi), %xmm4
|
||||||
|
movdqu 80(%r8, %rsi), %xmm5
|
||||||
|
movdqu 96(%r8, %rsi), %xmm6
|
||||||
|
movdqu 112(%r8, %rsi), %xmm7
|
||||||
|
movdqa %xmm0, (%r8)
|
||||||
|
movdqa %xmm1, 16(%r8)
|
||||||
|
movdqa %xmm2, 32(%r8)
|
||||||
|
movdqa %xmm3, 48(%r8)
|
||||||
|
movdqa %xmm4, 64(%r8)
|
||||||
|
movdqa %xmm5, 80(%r8)
|
||||||
|
movdqa %xmm6, 96(%r8)
|
||||||
|
movdqa %xmm7, 112(%r8)
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(main_loop_just_one_iteration):
|
||||||
|
movdqu (%r8, %rsi), %xmm0
|
||||||
|
movdqu 16(%r8, %rsi), %xmm1
|
||||||
|
movdqu 32(%r8, %rsi), %xmm2
|
||||||
|
movdqu 48(%r8, %rsi), %xmm3
|
||||||
|
movdqa %xmm0, (%r8)
|
||||||
|
movdqa %xmm1, 16(%r8)
|
||||||
|
movdqa %xmm2, 32(%r8)
|
||||||
|
movdqa %xmm3, 48(%r8)
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(large_page):
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu 32(%rsi), %xmm2
|
||||||
|
movdqu 48(%rsi), %xmm3
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm4
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm5
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm6
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm7
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, 32(%rdi)
|
||||||
|
movdqu %xmm3, 48(%rdi)
|
||||||
|
movdqu %xmm4, -64(%rdi, %rdx)
|
||||||
|
movdqu %xmm5, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm6, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm7, -16(%rdi, %rdx)
|
||||||
|
|
||||||
|
movdqu 64(%rsi), %xmm0
|
||||||
|
movdqu 80(%rsi), %xmm1
|
||||||
|
movdqu 96(%rsi), %xmm2
|
||||||
|
movdqu 112(%rsi), %xmm3
|
||||||
|
movdqu -128(%rsi, %rdx), %xmm4
|
||||||
|
movdqu -112(%rsi, %rdx), %xmm5
|
||||||
|
movdqu -96(%rsi, %rdx), %xmm6
|
||||||
|
movdqu -80(%rsi, %rdx), %xmm7
|
||||||
|
movdqu %xmm0, 64(%rdi)
|
||||||
|
movdqu %xmm1, 80(%rdi)
|
||||||
|
movdqu %xmm2, 96(%rdi)
|
||||||
|
movdqu %xmm3, 112(%rdi)
|
||||||
|
movdqu %xmm4, -128(%rdi, %rdx)
|
||||||
|
movdqu %xmm5, -112(%rdi, %rdx)
|
||||||
|
movdqu %xmm6, -96(%rdi, %rdx)
|
||||||
|
movdqu %xmm7, -80(%rdi, %rdx)
|
||||||
|
|
||||||
|
/* Now the main loop with non temporal stores. We align
|
||||||
|
the address of the destination. */
|
||||||
|
lea 128(%rdi), %r8
|
||||||
|
and $-128, %r8
|
||||||
|
|
||||||
|
add %rdi, %rdx
|
||||||
|
and $-128, %rdx
|
||||||
|
|
||||||
|
sub %rdi, %rsi
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(main_loop_large_page):
|
||||||
|
movdqu (%r8, %rsi), %xmm0
|
||||||
|
movdqu 16(%r8, %rsi), %xmm1
|
||||||
|
movdqu 32(%r8, %rsi), %xmm2
|
||||||
|
movdqu 48(%r8, %rsi), %xmm3
|
||||||
|
movdqu 64(%r8, %rsi), %xmm4
|
||||||
|
movdqu 80(%r8, %rsi), %xmm5
|
||||||
|
movdqu 96(%r8, %rsi), %xmm6
|
||||||
|
movdqu 112(%r8, %rsi), %xmm7
|
||||||
|
movntdq %xmm0, (%r8)
|
||||||
|
movntdq %xmm1, 16(%r8)
|
||||||
|
movntdq %xmm2, 32(%r8)
|
||||||
|
movntdq %xmm3, 48(%r8)
|
||||||
|
movntdq %xmm4, 64(%r8)
|
||||||
|
movntdq %xmm5, 80(%r8)
|
||||||
|
movntdq %xmm6, 96(%r8)
|
||||||
|
movntdq %xmm7, 112(%r8)
|
||||||
|
lea 128(%r8), %r8
|
||||||
|
cmp %r8, %rdx
|
||||||
|
jne L(main_loop_large_page)
|
||||||
|
sfence
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(len_0_16_bytes):
|
||||||
|
testb $24, %dl
|
||||||
|
jne L(len_9_16_bytes)
|
||||||
|
testb $4, %dl
|
||||||
|
.p2align 4,,5
|
||||||
|
jne L(len_5_8_bytes)
|
||||||
|
test %rdx, %rdx
|
||||||
|
.p2align 4,,2
|
||||||
|
je L(return)
|
||||||
|
movzbl (%rsi), %ebx
|
||||||
|
testb $2, %dl
|
||||||
|
movb %bl, (%rdi)
|
||||||
|
je L(return)
|
||||||
|
movzwl -2(%rsi,%rdx), %ebx
|
||||||
|
movw %bx, -2(%rdi,%rdx)
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(len_9_16_bytes):
|
||||||
|
movq (%rsi), %xmm0
|
||||||
|
movq -8(%rsi, %rdx), %xmm1
|
||||||
|
movq %xmm0, (%rdi)
|
||||||
|
movq %xmm1, -8(%rdi, %rdx)
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(len_5_8_bytes):
|
||||||
|
movl (%rsi), %ebx
|
||||||
|
movl %ebx, (%rdi)
|
||||||
|
movl -4(%rsi,%rdx), %ebx
|
||||||
|
movl %ebx, -4(%rdi,%rdx)
|
||||||
|
jmp L(return)
|
||||||
|
|
||||||
|
L(return):
|
||||||
|
mov %rdi, %rax
|
||||||
|
RETURN
|
||||||
|
|
||||||
|
END (MEMCPY)
|
635
libc/arch-x86_64/string/sse2-memmove-slm.S
Normal file
635
libc/arch-x86_64/string/sse2-memmove-slm.S
Normal file
@ -0,0 +1,635 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cache.h"
|
||||||
|
|
||||||
|
#ifndef MEMMOVE
|
||||||
|
# define MEMMOVE memmove
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef L
|
||||||
|
# define L(label) .L##label
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_startproc
|
||||||
|
# define cfi_startproc .cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_endproc
|
||||||
|
# define cfi_endproc .cfi_endproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_rel_offset
|
||||||
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_restore
|
||||||
|
# define cfi_restore(reg) .cfi_restore reg
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_adjust_cfa_offset
|
||||||
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type name, @function; \
|
||||||
|
.globl name; \
|
||||||
|
.p2align 4; \
|
||||||
|
name: \
|
||||||
|
cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; \
|
||||||
|
.size name, .-name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CFI_PUSH(REG) \
|
||||||
|
cfi_adjust_cfa_offset (4); \
|
||||||
|
cfi_rel_offset (REG, 0)
|
||||||
|
|
||||||
|
#define CFI_POP(REG) \
|
||||||
|
cfi_adjust_cfa_offset (-4); \
|
||||||
|
cfi_restore (REG)
|
||||||
|
|
||||||
|
#define PUSH(REG) push REG;
|
||||||
|
#define POP(REG) pop REG;
|
||||||
|
|
||||||
|
#define ENTRANCE PUSH (%rbx);
|
||||||
|
#define RETURN_END POP (%rbx); ret
|
||||||
|
#define RETURN RETURN_END;
|
||||||
|
|
||||||
|
.section .text.sse2,"ax",@progbits
|
||||||
|
ENTRY (MEMMOVE)
|
||||||
|
ENTRANCE
|
||||||
|
#ifdef USE_AS_BCOPY
|
||||||
|
xchg %rsi, %rdi
|
||||||
|
#endif
|
||||||
|
mov %rdi, %rax
|
||||||
|
|
||||||
|
/* Check whether we should copy backward or forward. */
|
||||||
|
cmp %rsi, %rdi
|
||||||
|
je L(mm_return)
|
||||||
|
ja L(mm_len_0_or_more_backward)
|
||||||
|
|
||||||
|
/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
|
||||||
|
separately. */
|
||||||
|
cmp $16, %rdx
|
||||||
|
jbe L(mm_len_0_16_bytes_forward)
|
||||||
|
|
||||||
|
cmp $32, %rdx
|
||||||
|
jg L(mm_len_32_or_more_forward)
|
||||||
|
|
||||||
|
/* Copy [0..32] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm1
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_32_or_more_forward):
|
||||||
|
cmp $64, %rdx
|
||||||
|
jg L(mm_len_64_or_more_forward)
|
||||||
|
|
||||||
|
/* Copy [0..64] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm2
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm3
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, -16(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -32(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_64_or_more_forward):
|
||||||
|
cmp $128, %rdx
|
||||||
|
jg L(mm_len_128_or_more_forward)
|
||||||
|
|
||||||
|
/* Copy [0..128] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu 32(%rsi), %xmm2
|
||||||
|
movdqu 48(%rsi), %xmm3
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm4
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm5
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm6
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm7
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, 32(%rdi)
|
||||||
|
movdqu %xmm3, 48(%rdi)
|
||||||
|
movdqu %xmm4, -64(%rdi, %rdx)
|
||||||
|
movdqu %xmm5, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm6, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm7, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_128_or_more_forward):
|
||||||
|
|
||||||
|
cmp $SHARED_CACHE_SIZE_HALF, %rdx
|
||||||
|
jae L(mm_large_page_forward)
|
||||||
|
|
||||||
|
mov %rsi, %r8 // copy src to r8
|
||||||
|
mov %rdi, %r9 // copy dst to r9
|
||||||
|
|
||||||
|
/* Aligning the address of destination. */
|
||||||
|
/* save first unaligned 64 bytes */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu 32(%rsi), %xmm2
|
||||||
|
movdqu 48(%rsi), %xmm3
|
||||||
|
|
||||||
|
lea 64(%r9), %rdi
|
||||||
|
and $-64, %rdi /* rdi now aligned to next 64 byte boundary */
|
||||||
|
|
||||||
|
sub %r9, %rsi /* rsi = src - dst = diff */
|
||||||
|
|
||||||
|
movdqu (%rdi, %rsi), %xmm4
|
||||||
|
movdqu 16(%rdi, %rsi), %xmm5
|
||||||
|
movdqu 32(%rdi, %rsi), %xmm6
|
||||||
|
movdqu 48(%rdi, %rsi), %xmm7
|
||||||
|
|
||||||
|
movdqu %xmm0, (%r9)
|
||||||
|
movdqu %xmm1, 16(%r9)
|
||||||
|
movdqu %xmm2, 32(%r9)
|
||||||
|
movdqu %xmm3, 48(%r9)
|
||||||
|
movdqa %xmm4, (%rdi)
|
||||||
|
movdqa %xmm5, 16(%rdi)
|
||||||
|
movdqa %xmm6, 32(%rdi)
|
||||||
|
movdqa %xmm7, 48(%rdi)
|
||||||
|
add $64, %rdi
|
||||||
|
|
||||||
|
lea (%r9, %rdx), %rbx
|
||||||
|
and $-64, %rbx
|
||||||
|
|
||||||
|
cmp %rdi, %rbx
|
||||||
|
jbe L(mm_copy_remaining_forward)
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(mm_main_loop_forward):
|
||||||
|
|
||||||
|
prefetcht0 128(%rdi, %rsi)
|
||||||
|
|
||||||
|
movdqu (%rdi, %rsi), %xmm0
|
||||||
|
movdqu 16(%rdi, %rsi), %xmm1
|
||||||
|
movdqu 32(%rdi, %rsi), %xmm2
|
||||||
|
movdqu 48(%rdi, %rsi), %xmm3
|
||||||
|
movdqa %xmm0, (%rdi)
|
||||||
|
movdqa %xmm1, 16(%rdi)
|
||||||
|
movdqa %xmm2, 32(%rdi)
|
||||||
|
movdqa %xmm3, 48(%rdi)
|
||||||
|
lea 64(%rdi), %rdi
|
||||||
|
cmp %rdi, %rbx
|
||||||
|
ja L(mm_main_loop_forward)
|
||||||
|
|
||||||
|
L(mm_copy_remaining_forward):
|
||||||
|
add %r9, %rdx
|
||||||
|
sub %rdi, %rdx
|
||||||
|
/* We copied all up till %rdi position in the dst.
|
||||||
|
In %rdx now is how many bytes are left to copy.
|
||||||
|
Now we need to advance %r8. */
|
||||||
|
lea (%rdi, %rsi), %r8
|
||||||
|
|
||||||
|
L(mm_remaining_0_64_bytes_forward):
|
||||||
|
cmp $32, %rdx
|
||||||
|
ja L(mm_remaining_33_64_bytes_forward)
|
||||||
|
cmp $16, %rdx
|
||||||
|
ja L(mm_remaining_17_32_bytes_forward)
|
||||||
|
test %rdx, %rdx
|
||||||
|
.p2align 4,,2
|
||||||
|
je L(mm_return)
|
||||||
|
|
||||||
|
cmpb $8, %dl
|
||||||
|
ja L(mm_remaining_9_16_bytes_forward)
|
||||||
|
cmpb $4, %dl
|
||||||
|
.p2align 4,,5
|
||||||
|
ja L(mm_remaining_5_8_bytes_forward)
|
||||||
|
cmpb $2, %dl
|
||||||
|
.p2align 4,,1
|
||||||
|
ja L(mm_remaining_3_4_bytes_forward)
|
||||||
|
movzbl -1(%r8,%rdx), %esi
|
||||||
|
movzbl (%r8), %ebx
|
||||||
|
movb %sil, -1(%rdi,%rdx)
|
||||||
|
movb %bl, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_remaining_33_64_bytes_forward):
|
||||||
|
movdqu (%r8), %xmm0
|
||||||
|
movdqu 16(%r8), %xmm1
|
||||||
|
movdqu -32(%r8, %rdx), %xmm2
|
||||||
|
movdqu -16(%r8, %rdx), %xmm3
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_remaining_17_32_bytes_forward):
|
||||||
|
movdqu (%r8), %xmm0
|
||||||
|
movdqu -16(%r8, %rdx), %xmm1
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_remaining_3_4_bytes_forward):
|
||||||
|
movzwl -2(%r8,%rdx), %esi
|
||||||
|
movzwl (%r8), %ebx
|
||||||
|
movw %si, -2(%rdi,%rdx)
|
||||||
|
movw %bx, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_remaining_5_8_bytes_forward):
|
||||||
|
movl (%r8), %esi
|
||||||
|
movl -4(%r8,%rdx), %ebx
|
||||||
|
movl %esi, (%rdi)
|
||||||
|
movl %ebx, -4(%rdi,%rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_remaining_9_16_bytes_forward):
|
||||||
|
mov (%r8), %rsi
|
||||||
|
mov -8(%r8, %rdx), %rbx
|
||||||
|
mov %rsi, (%rdi)
|
||||||
|
mov %rbx, -8(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_0_16_bytes_forward):
|
||||||
|
testb $24, %dl
|
||||||
|
jne L(mm_len_9_16_bytes_forward)
|
||||||
|
testb $4, %dl
|
||||||
|
.p2align 4,,5
|
||||||
|
jne L(mm_len_5_8_bytes_forward)
|
||||||
|
test %rdx, %rdx
|
||||||
|
.p2align 4,,2
|
||||||
|
je L(mm_return)
|
||||||
|
testb $2, %dl
|
||||||
|
.p2align 4,,1
|
||||||
|
jne L(mm_len_2_4_bytes_forward)
|
||||||
|
movzbl -1(%rsi,%rdx), %ebx
|
||||||
|
movzbl (%rsi), %esi
|
||||||
|
movb %bl, -1(%rdi,%rdx)
|
||||||
|
movb %sil, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_2_4_bytes_forward):
|
||||||
|
movzwl -2(%rsi,%rdx), %ebx
|
||||||
|
movzwl (%rsi), %esi
|
||||||
|
movw %bx, -2(%rdi,%rdx)
|
||||||
|
movw %si, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_5_8_bytes_forward):
|
||||||
|
movl (%rsi), %ebx
|
||||||
|
movl -4(%rsi,%rdx), %esi
|
||||||
|
movl %ebx, (%rdi)
|
||||||
|
movl %esi, -4(%rdi,%rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_9_16_bytes_forward):
|
||||||
|
mov (%rsi), %rbx
|
||||||
|
mov -8(%rsi, %rdx), %rsi
|
||||||
|
mov %rbx, (%rdi)
|
||||||
|
mov %rsi, -8(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
/* The code for copying backwards. */
|
||||||
|
L(mm_len_0_or_more_backward):
|
||||||
|
|
||||||
|
/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
|
||||||
|
separately. */
|
||||||
|
cmp $16, %rdx
|
||||||
|
jbe L(mm_len_0_16_bytes_backward)
|
||||||
|
|
||||||
|
cmp $32, %rdx
|
||||||
|
jg L(mm_len_32_or_more_backward)
|
||||||
|
|
||||||
|
/* Copy [0..32] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm1
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_32_or_more_backward):
|
||||||
|
cmp $64, %rdx
|
||||||
|
jg L(mm_len_64_or_more_backward)
|
||||||
|
|
||||||
|
/* Copy [0..64] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm2
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm3
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, -16(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -32(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_64_or_more_backward):
|
||||||
|
cmp $128, %rdx
|
||||||
|
jg L(mm_len_128_or_more_backward)
|
||||||
|
|
||||||
|
/* Copy [0..128] and return. */
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu 32(%rsi), %xmm2
|
||||||
|
movdqu 48(%rsi), %xmm3
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm4
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm5
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm6
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm7
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, 32(%rdi)
|
||||||
|
movdqu %xmm3, 48(%rdi)
|
||||||
|
movdqu %xmm4, -64(%rdi, %rdx)
|
||||||
|
movdqu %xmm5, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm6, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm7, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_128_or_more_backward):
|
||||||
|
|
||||||
|
cmp $SHARED_CACHE_SIZE_HALF, %rdx
|
||||||
|
jae L(mm_large_page_backward)
|
||||||
|
|
||||||
|
/* Aligning the address of destination. We need to save
|
||||||
|
16 bits from the source in order not to overwrite them. */
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm0
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm1
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm2
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm3
|
||||||
|
|
||||||
|
lea (%rdi, %rdx), %r9
|
||||||
|
and $-64, %r9 /* r9 = aligned dst */
|
||||||
|
|
||||||
|
mov %rsi, %r8
|
||||||
|
sub %rdi, %r8 /* r8 = src - dst, diff */
|
||||||
|
|
||||||
|
movdqu -16(%r9, %r8), %xmm4
|
||||||
|
movdqu -32(%r9, %r8), %xmm5
|
||||||
|
movdqu -48(%r9, %r8), %xmm6
|
||||||
|
movdqu -64(%r9, %r8), %xmm7
|
||||||
|
|
||||||
|
movdqu %xmm0, -16(%rdi, %rdx)
|
||||||
|
movdqu %xmm1, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm2, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -64(%rdi, %rdx)
|
||||||
|
movdqa %xmm4, -16(%r9)
|
||||||
|
movdqa %xmm5, -32(%r9)
|
||||||
|
movdqa %xmm6, -48(%r9)
|
||||||
|
movdqa %xmm7, -64(%r9)
|
||||||
|
lea -64(%r9), %r9
|
||||||
|
|
||||||
|
lea 64(%rdi), %rbx
|
||||||
|
and $-64, %rbx
|
||||||
|
|
||||||
|
/* Compute in %rdx how many bytes are left to copy after
|
||||||
|
the main loop stops. */
|
||||||
|
mov %rbx, %rdx
|
||||||
|
sub %rdi, %rdx
|
||||||
|
|
||||||
|
cmp %r9, %rbx
|
||||||
|
jb L(mm_main_loop_backward)
|
||||||
|
jmp L(mm_len_0_or_more_backward)
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(mm_main_loop_backward):
|
||||||
|
|
||||||
|
prefetcht0 -128(%r9, %r8)
|
||||||
|
|
||||||
|
movdqu -64(%r9, %r8), %xmm0
|
||||||
|
movdqu -48(%r9, %r8), %xmm1
|
||||||
|
movdqu -32(%r9, %r8), %xmm2
|
||||||
|
movdqu -16(%r9, %r8), %xmm3
|
||||||
|
movdqa %xmm0, -64(%r9)
|
||||||
|
movdqa %xmm1, -48(%r9)
|
||||||
|
movdqa %xmm2, -32(%r9)
|
||||||
|
movdqa %xmm3, -16(%r9)
|
||||||
|
lea -64(%r9), %r9
|
||||||
|
cmp %r9, %rbx
|
||||||
|
jb L(mm_main_loop_backward)
|
||||||
|
jmp L(mm_len_0_or_more_backward)
|
||||||
|
|
||||||
|
/* Copy [0..16] and return. */
|
||||||
|
L(mm_len_0_16_bytes_backward):
|
||||||
|
testb $24, %dl
|
||||||
|
jnz L(mm_len_9_16_bytes_backward)
|
||||||
|
testb $4, %dl
|
||||||
|
.p2align 4,,5
|
||||||
|
jnz L(mm_len_5_8_bytes_backward)
|
||||||
|
test %rdx, %rdx
|
||||||
|
.p2align 4,,2
|
||||||
|
je L(mm_return)
|
||||||
|
testb $2, %dl
|
||||||
|
.p2align 4,,1
|
||||||
|
jne L(mm_len_3_4_bytes_backward)
|
||||||
|
movzbl -1(%rsi,%rdx), %ebx
|
||||||
|
movzbl (%rsi), %ecx
|
||||||
|
movb %bl, -1(%rdi,%rdx)
|
||||||
|
movb %cl, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_3_4_bytes_backward):
|
||||||
|
movzwl -2(%rsi,%rdx), %ebx
|
||||||
|
movzwl (%rsi), %ecx
|
||||||
|
movw %bx, -2(%rdi,%rdx)
|
||||||
|
movw %cx, (%rdi)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
L(mm_len_9_16_bytes_backward):
|
||||||
|
movl -4(%rsi,%rdx), %ebx
|
||||||
|
movl -8(%rsi,%rdx), %ecx
|
||||||
|
movl %ebx, -4(%rdi,%rdx)
|
||||||
|
movl %ecx, -8(%rdi,%rdx)
|
||||||
|
sub $8, %rdx
|
||||||
|
jmp L(mm_len_0_16_bytes_backward)
|
||||||
|
|
||||||
|
L(mm_len_5_8_bytes_backward):
|
||||||
|
movl (%rsi), %ebx
|
||||||
|
movl -4(%rsi,%rdx), %ecx
|
||||||
|
movl %ebx, (%rdi)
|
||||||
|
movl %ecx, -4(%rdi,%rdx)
|
||||||
|
|
||||||
|
L(mm_return):
|
||||||
|
RETURN
|
||||||
|
|
||||||
|
/* Big length copy forward part. */
|
||||||
|
|
||||||
|
L(mm_large_page_forward):
|
||||||
|
/* Aligning the address of destination. We need to save
|
||||||
|
16 bits from the source in order not to overwrite them. */
|
||||||
|
|
||||||
|
mov %rsi, %r8
|
||||||
|
mov %rdi, %r9
|
||||||
|
|
||||||
|
movdqu (%rsi), %xmm0
|
||||||
|
movdqu 16(%rsi), %xmm1
|
||||||
|
movdqu 32(%rsi), %xmm2
|
||||||
|
movdqu 48(%rsi), %xmm3
|
||||||
|
|
||||||
|
lea 64(%r9), %rdi
|
||||||
|
and $-64, %rdi /* rdi = aligned dst */
|
||||||
|
|
||||||
|
sub %r9, %rsi /* rsi = diff */
|
||||||
|
|
||||||
|
movdqu (%rdi, %rsi), %xmm4
|
||||||
|
movdqu 16(%rdi, %rsi), %xmm5
|
||||||
|
movdqu 32(%rdi, %rsi), %xmm6
|
||||||
|
movdqu 48(%rdi, %rsi), %xmm7
|
||||||
|
|
||||||
|
movdqu %xmm0, (%r9)
|
||||||
|
movdqu %xmm1, 16(%r9)
|
||||||
|
movdqu %xmm2, 32(%r9)
|
||||||
|
movdqu %xmm3, 48(%r9)
|
||||||
|
movntdq %xmm4, (%rdi)
|
||||||
|
movntdq %xmm5, 16(%rdi)
|
||||||
|
movntdq %xmm6, 32(%rdi)
|
||||||
|
movntdq %xmm7, 48(%rdi)
|
||||||
|
add $64, %rdi
|
||||||
|
|
||||||
|
lea (%r9, %rdx), %rbx
|
||||||
|
and $-128, %rbx
|
||||||
|
|
||||||
|
cmp %rdi, %rbx
|
||||||
|
jbe L(mm_copy_remaining_forward)
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(mm_large_page_loop_forward):
|
||||||
|
movdqu (%rdi, %rsi), %xmm0
|
||||||
|
movdqu 16(%rdi, %rsi), %xmm1
|
||||||
|
movdqu 32(%rdi, %rsi), %xmm2
|
||||||
|
movdqu 48(%rdi, %rsi), %xmm3
|
||||||
|
movdqu 64(%rdi, %rsi), %xmm4
|
||||||
|
movdqu 80(%rdi, %rsi), %xmm5
|
||||||
|
movdqu 96(%rdi, %rsi), %xmm6
|
||||||
|
movdqu 112(%rdi, %rsi), %xmm7
|
||||||
|
movntdq %xmm0, (%rdi)
|
||||||
|
movntdq %xmm1, 16(%rdi)
|
||||||
|
movntdq %xmm2, 32(%rdi)
|
||||||
|
movntdq %xmm3, 48(%rdi)
|
||||||
|
movntdq %xmm4, 64(%rdi)
|
||||||
|
movntdq %xmm5, 80(%rdi)
|
||||||
|
movntdq %xmm6, 96(%rdi)
|
||||||
|
movntdq %xmm7, 112(%rdi)
|
||||||
|
lea 128(%rdi), %rdi
|
||||||
|
cmp %rdi, %rbx
|
||||||
|
ja L(mm_large_page_loop_forward)
|
||||||
|
sfence
|
||||||
|
|
||||||
|
add %r9, %rdx
|
||||||
|
sub %rdi, %rdx
|
||||||
|
/* We copied all up till %rdi position in the dst.
|
||||||
|
In %rdx now is how many bytes are left to copy.
|
||||||
|
Now we need to advance %r8. */
|
||||||
|
lea (%rdi, %rsi), %r8
|
||||||
|
|
||||||
|
cmp $64, %rdx
|
||||||
|
jb L(mm_remaining_0_64_bytes_forward)
|
||||||
|
|
||||||
|
movdqu (%r8), %xmm0
|
||||||
|
movdqu 16(%r8), %xmm1
|
||||||
|
movdqu 32(%r8), %xmm2
|
||||||
|
movdqu 48(%r8), %xmm3
|
||||||
|
movdqu -64(%r8, %rdx), %xmm4
|
||||||
|
movdqu -48(%r8, %rdx), %xmm5
|
||||||
|
movdqu -32(%r8, %rdx), %xmm6
|
||||||
|
movdqu -16(%r8, %rdx), %xmm7
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm1, 16(%rdi)
|
||||||
|
movdqu %xmm2, 32(%rdi)
|
||||||
|
movdqu %xmm3, 48(%rdi)
|
||||||
|
movdqu %xmm4, -64(%rdi, %rdx)
|
||||||
|
movdqu %xmm5, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm6, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm7, -16(%rdi, %rdx)
|
||||||
|
jmp L(mm_return)
|
||||||
|
|
||||||
|
|
||||||
|
/* Big length copy backward part. */
|
||||||
|
L(mm_large_page_backward):
|
||||||
|
/* Aligning the address of destination. We need to save
|
||||||
|
16 bits from the source in order not to overwrite them. */
|
||||||
|
|
||||||
|
movdqu -16(%rsi, %rdx), %xmm0
|
||||||
|
movdqu -32(%rsi, %rdx), %xmm1
|
||||||
|
movdqu -48(%rsi, %rdx), %xmm2
|
||||||
|
movdqu -64(%rsi, %rdx), %xmm3
|
||||||
|
|
||||||
|
lea (%rdi, %rdx), %r9
|
||||||
|
and $-64, %r9
|
||||||
|
|
||||||
|
mov %rsi, %r8
|
||||||
|
sub %rdi, %r8
|
||||||
|
|
||||||
|
movdqu -16(%r9, %r8), %xmm4
|
||||||
|
movdqu -32(%r9, %r8), %xmm5
|
||||||
|
movdqu -48(%r9, %r8), %xmm6
|
||||||
|
movdqu -64(%r9, %r8), %xmm7
|
||||||
|
|
||||||
|
movdqu %xmm0, -16(%rdi, %rdx)
|
||||||
|
movdqu %xmm1, -32(%rdi, %rdx)
|
||||||
|
movdqu %xmm2, -48(%rdi, %rdx)
|
||||||
|
movdqu %xmm3, -64(%rdi, %rdx)
|
||||||
|
movntdq %xmm4, -16(%r9)
|
||||||
|
movntdq %xmm5, -32(%r9)
|
||||||
|
movntdq %xmm6, -48(%r9)
|
||||||
|
movntdq %xmm7, -64(%r9)
|
||||||
|
lea -64(%r9), %r9
|
||||||
|
|
||||||
|
lea 128(%rdi), %rbx
|
||||||
|
and $-64, %rbx
|
||||||
|
|
||||||
|
/* Compute in %rdx how many bytes are left to copy after
|
||||||
|
the main loop stops. */
|
||||||
|
mov %rbx, %rdx
|
||||||
|
sub %rdi, %rdx
|
||||||
|
|
||||||
|
cmp %r9, %rbx
|
||||||
|
jae L(mm_len_0_or_more_backward)
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(mm_large_page_loop_backward):
|
||||||
|
movdqu -64(%r9, %r8), %xmm0
|
||||||
|
movdqu -48(%r9, %r8), %xmm1
|
||||||
|
movdqu -32(%r9, %r8), %xmm2
|
||||||
|
movdqu -16(%r9, %r8), %xmm3
|
||||||
|
movntdq %xmm0, -64(%r9)
|
||||||
|
movntdq %xmm1, -48(%r9)
|
||||||
|
movntdq %xmm2, -32(%r9)
|
||||||
|
movntdq %xmm3, -16(%r9)
|
||||||
|
lea -64(%r9), %r9
|
||||||
|
cmp %r9, %rbx
|
||||||
|
jb L(mm_large_page_loop_backward)
|
||||||
|
jmp L(mm_len_0_or_more_backward)
|
||||||
|
|
||||||
|
END (MEMMOVE)
|
173
libc/arch-x86_64/string/sse2-memset-slm.S
Normal file
173
libc/arch-x86_64/string/sse2-memset-slm.S
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cache.h"
|
||||||
|
|
||||||
|
#ifndef MEMSET
|
||||||
|
# define MEMSET memset
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef L
|
||||||
|
# define L(label) .L##label
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ALIGN
|
||||||
|
# define ALIGN(n) .p2align n
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_startproc
|
||||||
|
# define cfi_startproc .cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_endproc
|
||||||
|
# define cfi_endproc .cfi_endproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type name, @function; \
|
||||||
|
.globl name; \
|
||||||
|
name: \
|
||||||
|
cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; \
|
||||||
|
.size name, .-name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.section .text.sse2,"ax",@progbits
|
||||||
|
ENTRY (MEMSET)
|
||||||
|
movq %rdi, %rax
|
||||||
|
#ifdef USE_AS_BZERO_P
|
||||||
|
mov %rsi, %rdx
|
||||||
|
xor %rcx, %rcx
|
||||||
|
#else
|
||||||
|
and $0xff, %rsi
|
||||||
|
mov $0x0101010101010101, %rcx
|
||||||
|
imul %rsi, %rcx
|
||||||
|
#endif
|
||||||
|
cmpq $16, %rdx
|
||||||
|
jae L(16bytesormore)
|
||||||
|
testb $8, %dl
|
||||||
|
jnz L(8_15bytes)
|
||||||
|
testb $4, %dl
|
||||||
|
jnz L(4_7bytes)
|
||||||
|
testb $2, %dl
|
||||||
|
jnz L(2_3bytes)
|
||||||
|
testb $1, %dl
|
||||||
|
jz L(return)
|
||||||
|
movb %cl, (%rdi)
|
||||||
|
L(return):
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(8_15bytes):
|
||||||
|
movq %rcx, (%rdi)
|
||||||
|
movq %rcx, -8(%rdi, %rdx)
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(4_7bytes):
|
||||||
|
movl %ecx, (%rdi)
|
||||||
|
movl %ecx, -4(%rdi, %rdx)
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(2_3bytes):
|
||||||
|
movw %cx, (%rdi)
|
||||||
|
movw %cx, -2(%rdi, %rdx)
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN (4)
|
||||||
|
L(16bytesormore):
|
||||||
|
#ifdef USE_AS_BZERO_P
|
||||||
|
pxor %xmm0, %xmm0
|
||||||
|
#else
|
||||||
|
movd %rcx, %xmm0
|
||||||
|
pshufd $0, %xmm0, %xmm0
|
||||||
|
#endif
|
||||||
|
movdqu %xmm0, (%rdi)
|
||||||
|
movdqu %xmm0, -16(%rdi, %rdx)
|
||||||
|
cmpq $32, %rdx
|
||||||
|
jbe L(32bytesless)
|
||||||
|
movdqu %xmm0, 16(%rdi)
|
||||||
|
movdqu %xmm0, -32(%rdi, %rdx)
|
||||||
|
cmpq $64, %rdx
|
||||||
|
jbe L(64bytesless)
|
||||||
|
movdqu %xmm0, 32(%rdi)
|
||||||
|
movdqu %xmm0, 48(%rdi)
|
||||||
|
movdqu %xmm0, -64(%rdi, %rdx)
|
||||||
|
movdqu %xmm0, -48(%rdi, %rdx)
|
||||||
|
cmpq $128, %rdx
|
||||||
|
ja L(128bytesmore)
|
||||||
|
L(32bytesless):
|
||||||
|
L(64bytesless):
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN (4)
|
||||||
|
L(128bytesmore):
|
||||||
|
leaq 64(%rdi), %rcx
|
||||||
|
andq $-64, %rcx
|
||||||
|
movq %rdx, %r8
|
||||||
|
addq %rdi, %rdx
|
||||||
|
andq $-64, %rdx
|
||||||
|
cmpq %rcx, %rdx
|
||||||
|
je L(return)
|
||||||
|
|
||||||
|
#ifdef SHARED_CACHE_SIZE
|
||||||
|
cmp $SHARED_CACHE_SIZE, %r8
|
||||||
|
#else
|
||||||
|
cmp __x86_64_shared_cache_size(%rip), %r8
|
||||||
|
#endif
|
||||||
|
ja L(128bytesmore_nt)
|
||||||
|
|
||||||
|
ALIGN (4)
|
||||||
|
L(128bytesmore_normal):
|
||||||
|
movdqa %xmm0, (%rcx)
|
||||||
|
movaps %xmm0, 0x10(%rcx)
|
||||||
|
movaps %xmm0, 0x20(%rcx)
|
||||||
|
movaps %xmm0, 0x30(%rcx)
|
||||||
|
addq $64, %rcx
|
||||||
|
cmpq %rcx, %rdx
|
||||||
|
jne L(128bytesmore_normal)
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN (4)
|
||||||
|
L(128bytesmore_nt):
|
||||||
|
movntdq %xmm0, (%rcx)
|
||||||
|
movntdq %xmm0, 0x10(%rcx)
|
||||||
|
movntdq %xmm0, 0x20(%rcx)
|
||||||
|
movntdq %xmm0, 0x30(%rcx)
|
||||||
|
leaq 64(%rcx), %rcx
|
||||||
|
cmpq %rcx, %rdx
|
||||||
|
jne L(128bytesmore_nt)
|
||||||
|
sfence
|
||||||
|
ret
|
||||||
|
|
||||||
|
END (MEMSET)
|
33
libc/arch-x86_64/string/sse2-stpcpy-slm.S
Normal file
33
libc/arch-x86_64/string/sse2-stpcpy-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_STPCPY
|
||||||
|
#define STRCPY stpcpy
|
||||||
|
#include "sse2-strcpy-slm.S"
|
34
libc/arch-x86_64/string/sse2-stpncpy-slm.S
Normal file
34
libc/arch-x86_64/string/sse2-stpncpy-slm.S
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_STRNCPY
|
||||||
|
#define USE_AS_STPCPY
|
||||||
|
#define STRCPY stpncpy
|
||||||
|
#include "sse2-strcpy-slm.S"
|
87
libc/arch-x86_64/string/sse2-strcat-slm.S
Normal file
87
libc/arch-x86_64/string/sse2-strcat-slm.S
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef STRCAT
|
||||||
|
# define STRCAT strcat
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef L
|
||||||
|
# define L(label) .L##label
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_startproc
|
||||||
|
# define cfi_startproc .cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_endproc
|
||||||
|
# define cfi_endproc .cfi_endproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type name, @function; \
|
||||||
|
.globl name; \
|
||||||
|
.p2align 4; \
|
||||||
|
name: \
|
||||||
|
cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; \
|
||||||
|
.size name, .-name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define USE_AS_STRCAT
|
||||||
|
|
||||||
|
.text
|
||||||
|
ENTRY (STRCAT)
|
||||||
|
mov %rdi, %r9
|
||||||
|
#ifdef USE_AS_STRNCAT
|
||||||
|
mov %rdx, %r8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define RETURN jmp L(Strcpy)
|
||||||
|
#include "sse2-strlen-slm.S"
|
||||||
|
|
||||||
|
#undef RETURN
|
||||||
|
#define RETURN ret
|
||||||
|
|
||||||
|
L(Strcpy):
|
||||||
|
lea (%r9, %rax), %rdi
|
||||||
|
mov %rsi, %rcx
|
||||||
|
mov %r9, %rax /* save result */
|
||||||
|
|
||||||
|
#ifdef USE_AS_STRNCAT
|
||||||
|
test %r8, %r8
|
||||||
|
jz L(ExitZero)
|
||||||
|
# define USE_AS_STRNCPY
|
||||||
|
#endif
|
||||||
|
#include "sse2-strcpy-slm.S"
|
1921
libc/arch-x86_64/string/sse2-strcpy-slm.S
Normal file
1921
libc/arch-x86_64/string/sse2-strcpy-slm.S
Normal file
File diff suppressed because it is too large
Load Diff
294
libc/arch-x86_64/string/sse2-strlen-slm.S
Normal file
294
libc/arch-x86_64/string/sse2-strlen-slm.S
Normal file
@ -0,0 +1,294 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef USE_AS_STRCAT
|
||||||
|
|
||||||
|
#ifndef STRLEN
|
||||||
|
# define STRLEN strlen
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef L
|
||||||
|
# define L(label) .L##label
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_startproc
|
||||||
|
# define cfi_startproc .cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef cfi_endproc
|
||||||
|
# define cfi_endproc .cfi_endproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type name, @function; \
|
||||||
|
.globl name; \
|
||||||
|
.p2align 4; \
|
||||||
|
name: \
|
||||||
|
cfi_startproc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; \
|
||||||
|
.size name, .-name
|
||||||
|
#endif
|
||||||
|
#define RETURN ret
|
||||||
|
.section .text.sse2,"ax",@progbits
|
||||||
|
ENTRY (STRLEN)
|
||||||
|
/* end ifndef USE_AS_STRCAT */
|
||||||
|
#endif
|
||||||
|
xor %rax, %rax
|
||||||
|
mov %edi, %ecx
|
||||||
|
and $0x3f, %ecx
|
||||||
|
pxor %xmm0, %xmm0
|
||||||
|
cmp $0x30, %ecx
|
||||||
|
ja L(next)
|
||||||
|
movdqu (%rdi), %xmm1
|
||||||
|
pcmpeqb %xmm1, %xmm0
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit_less16)
|
||||||
|
mov %rdi, %rax
|
||||||
|
and $-16, %rax
|
||||||
|
jmp L(align16_start)
|
||||||
|
L(next):
|
||||||
|
mov %rdi, %rax
|
||||||
|
and $-16, %rax
|
||||||
|
pcmpeqb (%rax), %xmm0
|
||||||
|
mov $-1, %r10d
|
||||||
|
sub %rax, %rcx
|
||||||
|
shl %cl, %r10d
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
and %r10d, %edx
|
||||||
|
jnz L(exit)
|
||||||
|
L(align16_start):
|
||||||
|
pxor %xmm0, %xmm0
|
||||||
|
pxor %xmm1, %xmm1
|
||||||
|
pxor %xmm2, %xmm2
|
||||||
|
pxor %xmm3, %xmm3
|
||||||
|
pcmpeqb 16(%rax), %xmm0
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit16)
|
||||||
|
|
||||||
|
pcmpeqb 32(%rax), %xmm1
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit32)
|
||||||
|
|
||||||
|
pcmpeqb 48(%rax), %xmm2
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit48)
|
||||||
|
|
||||||
|
pcmpeqb 64(%rax), %xmm3
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit64)
|
||||||
|
|
||||||
|
pcmpeqb 80(%rax), %xmm0
|
||||||
|
add $64, %rax
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit16)
|
||||||
|
|
||||||
|
pcmpeqb 32(%rax), %xmm1
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit32)
|
||||||
|
|
||||||
|
pcmpeqb 48(%rax), %xmm2
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit48)
|
||||||
|
|
||||||
|
pcmpeqb 64(%rax), %xmm3
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit64)
|
||||||
|
|
||||||
|
pcmpeqb 80(%rax), %xmm0
|
||||||
|
add $64, %rax
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit16)
|
||||||
|
|
||||||
|
pcmpeqb 32(%rax), %xmm1
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit32)
|
||||||
|
|
||||||
|
pcmpeqb 48(%rax), %xmm2
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit48)
|
||||||
|
|
||||||
|
pcmpeqb 64(%rax), %xmm3
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit64)
|
||||||
|
|
||||||
|
pcmpeqb 80(%rax), %xmm0
|
||||||
|
add $64, %rax
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit16)
|
||||||
|
|
||||||
|
pcmpeqb 32(%rax), %xmm1
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit32)
|
||||||
|
|
||||||
|
pcmpeqb 48(%rax), %xmm2
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit48)
|
||||||
|
|
||||||
|
pcmpeqb 64(%rax), %xmm3
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit64)
|
||||||
|
|
||||||
|
|
||||||
|
test $0x3f, %rax
|
||||||
|
jz L(align64_loop)
|
||||||
|
|
||||||
|
pcmpeqb 80(%rax), %xmm0
|
||||||
|
add $80, %rax
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit)
|
||||||
|
|
||||||
|
test $0x3f, %rax
|
||||||
|
jz L(align64_loop)
|
||||||
|
|
||||||
|
pcmpeqb 16(%rax), %xmm1
|
||||||
|
add $16, %rax
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit)
|
||||||
|
|
||||||
|
test $0x3f, %rax
|
||||||
|
jz L(align64_loop)
|
||||||
|
|
||||||
|
pcmpeqb 16(%rax), %xmm2
|
||||||
|
add $16, %rax
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit)
|
||||||
|
|
||||||
|
test $0x3f, %rax
|
||||||
|
jz L(align64_loop)
|
||||||
|
|
||||||
|
pcmpeqb 16(%rax), %xmm3
|
||||||
|
add $16, %rax
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit)
|
||||||
|
|
||||||
|
add $16, %rax
|
||||||
|
.p2align 4
|
||||||
|
L(align64_loop):
|
||||||
|
movaps (%rax), %xmm4
|
||||||
|
pminub 16(%rax), %xmm4
|
||||||
|
movaps 32(%rax), %xmm5
|
||||||
|
pminub 48(%rax), %xmm5
|
||||||
|
add $64, %rax
|
||||||
|
pminub %xmm4, %xmm5
|
||||||
|
pcmpeqb %xmm0, %xmm5
|
||||||
|
pmovmskb %xmm5, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jz L(align64_loop)
|
||||||
|
|
||||||
|
|
||||||
|
pcmpeqb -64(%rax), %xmm0
|
||||||
|
sub $80, %rax
|
||||||
|
pmovmskb %xmm0, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit16)
|
||||||
|
|
||||||
|
pcmpeqb 32(%rax), %xmm1
|
||||||
|
pmovmskb %xmm1, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit32)
|
||||||
|
|
||||||
|
pcmpeqb 48(%rax), %xmm2
|
||||||
|
pmovmskb %xmm2, %edx
|
||||||
|
test %edx, %edx
|
||||||
|
jnz L(exit48)
|
||||||
|
|
||||||
|
pcmpeqb 64(%rax), %xmm3
|
||||||
|
pmovmskb %xmm3, %edx
|
||||||
|
sub %rdi, %rax
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
add $64, %rax
|
||||||
|
RETURN
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(exit):
|
||||||
|
sub %rdi, %rax
|
||||||
|
L(exit_less16):
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
RETURN
|
||||||
|
.p2align 4
|
||||||
|
L(exit16):
|
||||||
|
sub %rdi, %rax
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
add $16, %rax
|
||||||
|
RETURN
|
||||||
|
.p2align 4
|
||||||
|
L(exit32):
|
||||||
|
sub %rdi, %rax
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
add $32, %rax
|
||||||
|
RETURN
|
||||||
|
.p2align 4
|
||||||
|
L(exit48):
|
||||||
|
sub %rdi, %rax
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
add $48, %rax
|
||||||
|
RETURN
|
||||||
|
.p2align 4
|
||||||
|
L(exit64):
|
||||||
|
sub %rdi, %rax
|
||||||
|
bsf %rdx, %rdx
|
||||||
|
add %rdx, %rax
|
||||||
|
add $64, %rax
|
||||||
|
#ifndef USE_AS_STRCAT
|
||||||
|
RETURN
|
||||||
|
|
||||||
|
END (STRLEN)
|
||||||
|
#endif
|
33
libc/arch-x86_64/string/sse2-strncat-slm.S
Normal file
33
libc/arch-x86_64/string/sse2-strncat-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_STRNCAT
|
||||||
|
#define STRCAT strncat
|
||||||
|
#include "sse2-strcat-slm.S"
|
33
libc/arch-x86_64/string/sse2-strncpy-slm.S
Normal file
33
libc/arch-x86_64/string/sse2-strncpy-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_STRNCPY
|
||||||
|
#define STRCPY strncpy
|
||||||
|
#include "sse2-strcpy-slm.S"
|
1799
libc/arch-x86_64/string/sse4-memcmp-slm.S
Normal file
1799
libc/arch-x86_64/string/sse4-memcmp-slm.S
Normal file
File diff suppressed because it is too large
Load Diff
1925
libc/arch-x86_64/string/ssse3-strcmp-slm.S
Normal file
1925
libc/arch-x86_64/string/ssse3-strcmp-slm.S
Normal file
File diff suppressed because it is too large
Load Diff
33
libc/arch-x86_64/string/ssse3-strncmp-slm.S
Normal file
33
libc/arch-x86_64/string/ssse3-strncmp-slm.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2014, Intel Corporation
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define USE_AS_STRNCMP
|
||||||
|
#define STRCMP strncmp
|
||||||
|
#include "ssse3-strcmp-slm.S"
|
@ -3,11 +3,7 @@
|
|||||||
libc_common_src_files_x86_64 := \
|
libc_common_src_files_x86_64 := \
|
||||||
bionic/index.cpp \
|
bionic/index.cpp \
|
||||||
bionic/memchr.c \
|
bionic/memchr.c \
|
||||||
bionic/memcmp.c \
|
|
||||||
bionic/memcpy.cpp \
|
|
||||||
bionic/memmove.c \
|
|
||||||
bionic/memrchr.c \
|
bionic/memrchr.c \
|
||||||
bionic/memset.c \
|
|
||||||
bionic/strchr.cpp \
|
bionic/strchr.cpp \
|
||||||
bionic/strnlen.c \
|
bionic/strnlen.c \
|
||||||
bionic/strrchr.cpp \
|
bionic/strrchr.cpp \
|
||||||
@ -18,18 +14,8 @@ libc_common_src_files_x86_64 := \
|
|||||||
upstream-freebsd/lib/libc/string/wcslen.c \
|
upstream-freebsd/lib/libc/string/wcslen.c \
|
||||||
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
||||||
upstream-freebsd/lib/libc/string/wmemcmp.c \
|
upstream-freebsd/lib/libc/string/wmemcmp.c \
|
||||||
upstream-openbsd/lib/libc/string/bcopy.c \
|
|
||||||
upstream-openbsd/lib/libc/string/stpcpy.c \
|
|
||||||
upstream-openbsd/lib/libc/string/stpncpy.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strcat.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strcmp.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strcpy.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strlcat.c \
|
upstream-openbsd/lib/libc/string/strlcat.c \
|
||||||
upstream-openbsd/lib/libc/string/strlcpy.c \
|
upstream-openbsd/lib/libc/string/strlcpy.c \
|
||||||
upstream-openbsd/lib/libc/string/strlen.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strncat.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strncmp.c \
|
|
||||||
upstream-openbsd/lib/libc/string/strncpy.c \
|
|
||||||
|
|
||||||
# Fortify implementations of libc functions.
|
# Fortify implementations of libc functions.
|
||||||
libc_common_src_files_x86_64 += \
|
libc_common_src_files_x86_64 += \
|
||||||
@ -55,6 +41,23 @@ libc_bionic_src_files_x86_64 := \
|
|||||||
arch-x86_64/bionic/vfork.S \
|
arch-x86_64/bionic/vfork.S \
|
||||||
bionic/__memcmp16.cpp \
|
bionic/__memcmp16.cpp \
|
||||||
|
|
||||||
|
libc_bionic_src_files_x86_64 += \
|
||||||
|
arch-x86_64/string/sse2-bcopy-slm.S \
|
||||||
|
arch-x86_64/string/sse2-bzero-slm.S \
|
||||||
|
arch-x86_64/string/sse2-memcpy-slm.S \
|
||||||
|
arch-x86_64/string/sse2-memmove-slm.S \
|
||||||
|
arch-x86_64/string/sse2-memset-slm.S \
|
||||||
|
arch-x86_64/string/sse2-stpcpy-slm.S \
|
||||||
|
arch-x86_64/string/sse2-stpncpy-slm.S \
|
||||||
|
arch-x86_64/string/sse2-strcat-slm.S \
|
||||||
|
arch-x86_64/string/sse2-strcpy-slm.S \
|
||||||
|
arch-x86_64/string/sse2-strlen-slm.S \
|
||||||
|
arch-x86_64/string/sse2-strncat-slm.S \
|
||||||
|
arch-x86_64/string/sse2-strncpy-slm.S \
|
||||||
|
arch-x86_64/string/sse4-memcmp-slm.S \
|
||||||
|
arch-x86_64/string/ssse3-strcmp-slm.S \
|
||||||
|
arch-x86_64/string/ssse3-strncmp-slm.S \
|
||||||
|
|
||||||
libc_crt_target_cflags_x86_64 += \
|
libc_crt_target_cflags_x86_64 += \
|
||||||
-m64 \
|
-m64 \
|
||||||
-I$(LOCAL_PATH)/arch-x86_64/include
|
-I$(LOCAL_PATH)/arch-x86_64/include
|
||||||
|
Loading…
x
Reference in New Issue
Block a user