bionic/libc/arch-x86/string/ssse3-strcpy-atom.S
Liubov Dmitrieva 0a490665a3 bionic/x86: Optimization for string routines
Optimized strcpy, strcat,
strncpy, strncat, strlcpy, strlcat,
memchr, memrchr, strchr, strrchr, index,
strnlen, strlen, wcslen, wmemcmp, wcscmp,
wcschr, wcsrchr, wcscpy, wcscat

Change-Id: I82b29132edf9a2e144e0bb3ee4ff5217df8d2a6d
Signed-off-by: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
2013-05-31 13:37:03 +04:00

3956 lines
71 KiB
ArmAsm

/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_STRCAT
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
# endif
# ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
# endif
# ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# ifndef STRCPY
# define STRCPY strcpy
# endif
# ifdef USE_AS_STRNCPY
# define PARMS 8
# define ENTRANCE PUSH (%ebx)
# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
# else
# define PARMS 4
# define ENTRANCE
# define RETURN ret
# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
# endif
# ifdef USE_AS_STPCPY
# define SAVE_RESULT(n) lea n(%edx), %eax
# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
# else
# define SAVE_RESULT(n) movl %edi, %eax
# define SAVE_RESULT_TAIL(n) movl %edx, %eax
# endif
# define STR1 PARMS
# define STR2 STR1+4
# define LEN STR2+4
/* In this code following instructions are used for copying:
movb - 1 byte
movw - 2 byte
movl - 4 byte
movlpd - 8 byte
movaps - 16 byte - requires 16 byte alignment
of sourse and destination adresses.
*/
.text
ENTRY (STRCPY)
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
# ifdef USE_AS_STRNCPY
movl LEN(%esp), %ebx
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
cmpb $0, 3(%ecx)
jz L(ExitTail4)
cmpb $0, 4(%ecx)
jz L(ExitTail5)
cmpb $0, 5(%ecx)
jz L(ExitTail6)
cmpb $0, 6(%ecx)
jz L(ExitTail7)
cmpb $0, 7(%ecx)
jz L(ExitTail8)
# ifdef USE_AS_STRNCPY
cmp $16, %ebx
jb L(StrncpyExit15Bytes)
# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
cmpb $0, 11(%ecx)
jz L(ExitTail12)
cmpb $0, 12(%ecx)
jz L(ExitTail13)
cmpb $0, 13(%ecx)
jz L(ExitTail14)
cmpb $0, 14(%ecx)
jz L(ExitTail15)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY
cmp $16, %ebx
je L(ExitTail16)
# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
# if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY
cmp $16, %ebx
je L(StrlcpyExitTail16)
# endif
PUSH (%edi)
# ifndef USE_AS_STRLCPY
mov %edx, %edi
# else
mov %ecx, %edi
# endif
#endif
PUSH (%esi)
#ifdef USE_AS_STRNCPY
mov %ecx, %esi
sub $16, %ebx
and $0xf, %esi
/* add 16 bytes ecx_offset to ebx */
add %esi, %ebx
#endif
lea 16(%ecx), %esi
and $-16, %esi
pxor %xmm0, %xmm0
movlpd (%ecx), %xmm1
movlpd %xmm1, (%edx)
pcmpeqb (%esi), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
pmovmskb %xmm0, %eax
sub %ecx, %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
mov %edx, %eax
lea 16(%edx), %edx
and $-16, %edx
sub %edx, %eax
#ifdef USE_AS_STRNCPY
add %eax, %esi
lea -1(%esi), %esi
and $1<<31, %esi
test %esi, %esi
jnz L(ContinueCopy)
lea 16(%ebx), %ebx
L(ContinueCopy):
#endif
sub %eax, %ecx
mov %ecx, %eax
and $0xf, %eax
mov $0, %esi
/* case: ecx_offset == edx_offset */
jz L(Align16Both)
cmp $8, %eax
jae L(ShlHigh8)
cmp $1, %eax
je L(Shl1)
cmp $2, %eax
je L(Shl2)
cmp $3, %eax
je L(Shl3)
cmp $4, %eax
je L(Shl4)
cmp $5, %eax
je L(Shl5)
cmp $6, %eax
je L(Shl6)
jmp L(Shl7)
L(ShlHigh8):
je L(Shl8)
cmp $9, %eax
je L(Shl9)
cmp $10, %eax
je L(Shl10)
cmp $11, %eax
je L(Shl11)
cmp $12, %eax
je L(Shl12)
cmp $13, %eax
je L(Shl13)
cmp $14, %eax
je L(Shl14)
jmp L(Shl15)
L(Align16Both):
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
#endif
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
and $-0x40, %ecx
sub %ecx, %eax
sub %eax, %edx
#ifdef USE_AS_STRNCPY
lea 112(%ebx, %eax), %ebx
#endif
mov $-0x40, %esi
L(Aligned64Loop):
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
movaps %xmm3, %xmm6
movaps 48(%ecx), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
lea 64(%edx), %edx
pcmpeqb %xmm0, %xmm3
lea 64(%ecx), %ecx
pmovmskb %xmm3, %eax
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeaveCase2OrCase3)
#endif
test %eax, %eax
jnz L(Aligned64Leave)
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
L(Aligned64Leave):
#ifdef USE_AS_STRNCPY
lea 48(%ebx), %ebx
#endif
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
#ifdef USE_AS_STRNCPY
lea -16(%ebx), %ebx
#endif
pmovmskb %xmm0, %eax
movaps %xmm4, -64(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
#ifdef USE_AS_STRNCPY
lea -16(%ebx), %ebx
#endif
pmovmskb %xmm0, %eax
movaps %xmm5, -48(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm6, -32(%edx)
pcmpeqb %xmm7, %xmm0
#ifdef USE_AS_STRNCPY
lea -16(%ebx), %ebx
#endif
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl1):
movaps -1(%ecx), %xmm1
movaps 15(%ecx), %xmm2
L(Shl1Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl1LoopExit)
palignr $1, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 31(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -15(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -1(%ecx), %xmm1
L(Shl1LoopStart):
movaps 15(%ecx), %xmm2
movaps 31(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 47(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 63(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $1, %xmm4, %xmm5
palignr $1, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl1Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave1)
#endif
palignr $1, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $1, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
mov $15, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl2):
movaps -2(%ecx), %xmm1
movaps 14(%ecx), %xmm2
L(Shl2Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl2LoopExit)
palignr $2, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 30(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -14(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -2(%ecx), %xmm1
L(Shl2LoopStart):
movaps 14(%ecx), %xmm2
movaps 30(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 46(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 62(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $2, %xmm4, %xmm5
palignr $2, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl2Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave2)
#endif
palignr $2, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $2, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
movlpd (%ecx), %xmm0
movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 6(%edx)
mov $14, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl3):
movaps -3(%ecx), %xmm1
movaps 13(%ecx), %xmm2
L(Shl3Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl3LoopExit)
palignr $3, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 29(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -13(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -3(%ecx), %xmm1
L(Shl3LoopStart):
movaps 13(%ecx), %xmm2
movaps 29(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 45(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 61(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $3, %xmm4, %xmm5
palignr $3, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl3Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave3)
#endif
palignr $3, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $3, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
movlpd (%ecx), %xmm0
movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 5(%edx)
mov $13, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl4):
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -12(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 44(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 60(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $4, %xmm4, %xmm5
palignr $4, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl4Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave4)
#endif
palignr $4, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
mov $12, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl5):
movaps -5(%ecx), %xmm1
movaps 11(%ecx), %xmm2
L(Shl5Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl5LoopExit)
palignr $5, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 27(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -11(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -5(%ecx), %xmm1
L(Shl5LoopStart):
movaps 11(%ecx), %xmm2
movaps 27(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 43(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 59(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $5, %xmm4, %xmm5
palignr $5, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl5Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave5)
#endif
palignr $5, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $5, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
movlpd (%ecx), %xmm0
movl 7(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 7(%edx)
mov $11, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl6):
movaps -6(%ecx), %xmm1
movaps 10(%ecx), %xmm2
L(Shl6Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl6LoopExit)
palignr $6, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 26(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -10(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -6(%ecx), %xmm1
L(Shl6LoopStart):
movaps 10(%ecx), %xmm2
movaps 26(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 42(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 58(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $6, %xmm4, %xmm5
palignr $6, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl6Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave6)
#endif
palignr $6, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $6, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
movlpd (%ecx), %xmm0
movl 6(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 6(%edx)
mov $10, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl7):
movaps -7(%ecx), %xmm1
movaps 9(%ecx), %xmm2
L(Shl7Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl7LoopExit)
palignr $7, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 25(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -9(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -7(%ecx), %xmm1
L(Shl7LoopStart):
movaps 9(%ecx), %xmm2
movaps 25(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 41(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 57(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $7, %xmm4, %xmm5
palignr $7, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl7Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave7)
#endif
palignr $7, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $7, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
movlpd (%ecx), %xmm0
movl 5(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 5(%edx)
mov $9, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl8):
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -8(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 40(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 56(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $8, %xmm4, %xmm5
palignr $8, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl8Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave8)
#endif
palignr $8, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
mov $8, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl9):
movaps -9(%ecx), %xmm1
movaps 7(%ecx), %xmm2
L(Shl9Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl9LoopExit)
palignr $9, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 23(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -7(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -9(%ecx), %xmm1
L(Shl9LoopStart):
movaps 7(%ecx), %xmm2
movaps 23(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 39(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 55(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $9, %xmm4, %xmm5
palignr $9, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl9Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave9)
#endif
palignr $9, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $9, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
movlpd -1(%ecx), %xmm0
movlpd %xmm0, -1(%edx)
mov $7, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl10):
movaps -10(%ecx), %xmm1
movaps 6(%ecx), %xmm2
L(Shl10Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl10LoopExit)
palignr $10, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 22(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -6(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -10(%ecx), %xmm1
L(Shl10LoopStart):
movaps 6(%ecx), %xmm2
movaps 22(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 38(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 54(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $10, %xmm4, %xmm5
palignr $10, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl10Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave10)
#endif
palignr $10, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $10, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
movlpd -2(%ecx), %xmm0
movlpd %xmm0, -2(%edx)
mov $6, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl11):
movaps -11(%ecx), %xmm1
movaps 5(%ecx), %xmm2
L(Shl11Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl11LoopExit)
palignr $11, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 21(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -5(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -11(%ecx), %xmm1
L(Shl11LoopStart):
movaps 5(%ecx), %xmm2
movaps 21(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 37(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 53(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $11, %xmm4, %xmm5
palignr $11, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl11Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave11)
#endif
palignr $11, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $11, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
movlpd -3(%ecx), %xmm0
movlpd %xmm0, -3(%edx)
mov $5, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl12):
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -4(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 36(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 52(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $12, %xmm4, %xmm5
palignr $12, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl12Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave12)
#endif
palignr $12, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
movl (%ecx), %esi
movl %esi, (%edx)
mov $4, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl13):
movaps -13(%ecx), %xmm1
movaps 3(%ecx), %xmm2
L(Shl13Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl13LoopExit)
palignr $13, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 19(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -3(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -13(%ecx), %xmm1
L(Shl13LoopStart):
movaps 3(%ecx), %xmm2
movaps 19(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 35(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 51(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $13, %xmm4, %xmm5
palignr $13, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl13Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave13)
#endif
palignr $13, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $13, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
movl -1(%ecx), %esi
movl %esi, -1(%edx)
mov $3, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl14):
movaps -14(%ecx), %xmm1
movaps 2(%ecx), %xmm2
L(Shl14Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl14LoopExit)
palignr $14, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 18(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -2(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -14(%ecx), %xmm1
L(Shl14LoopStart):
movaps 2(%ecx), %xmm2
movaps 18(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 34(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 50(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $14, %xmm4, %xmm5
palignr $14, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl14Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave14)
#endif
palignr $14, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $14, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
movl -2(%ecx), %esi
movl %esi, -2(%edx)
mov $2, %esi
jmp L(CopyFrom1To16Bytes)
.p2align 4
L(Shl15):
movaps -15(%ecx), %xmm1
movaps 1(%ecx), %xmm2
L(Shl15Start):
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
pcmpeqb %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
#ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
#endif
test %eax, %eax
jnz L(Shl15LoopExit)
palignr $15, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 17(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -1(%ecx), %ecx
sub %eax, %edx
#ifdef USE_AS_STRNCPY
add %eax, %ebx
#endif
movaps -15(%ecx), %xmm1
L(Shl15LoopStart):
movaps 1(%ecx), %xmm2
movaps 17(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 33(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 49(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqb %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $15, %xmm4, %xmm5
palignr $15, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl15Start)
#ifdef USE_AS_STRNCPY
sub $64, %ebx
jbe L(StrncpyLeave15)
#endif
palignr $15, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $15, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
movl -3(%ecx), %esi
movl %esi, -3(%edx)
mov $1, %esi
#if defined USE_AS_STRCAT || defined USE_AS_STRLCPY
jmp L(CopyFrom1To16Bytes)
#endif
#if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY
.p2align 4
L(CopyFrom1To16Bytes):
# ifdef USE_AS_STRNCPY
add $16, %ebx
# endif
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh8)
L(CopyFrom1To16BytesLess8):
mov %al, %ah
and $15, %ah
jz L(ExitHigh4)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
.p2align 4
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
SAVE_RESULT (3)
# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(ExitHigh4):
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
.p2align 4
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
SAVE_RESULT (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(ExitHigh8):
mov %ah, %al
and $15, %al
jz L(ExitHigh12)
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
.p2align 4
L(Exit12):
movlpd (%ecx), %xmm0
movl 8(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
SAVE_RESULT (11)
# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(ExitHigh12):
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
.p2align 4
L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
SAVE_RESULT (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
add %esi, %edx
POP (%esi)
test %al, %al
jz L(ExitHighCase2)
cmp $8, %ebx
ja L(CopyFrom1To16BytesLess8)
test $0x01, %al
jnz L(Exit1)
cmp $1, %ebx
je L(Exit1)
test $0x02, %al
jnz L(Exit2)
cmp $2, %ebx
je L(Exit2)
test $0x04, %al
jnz L(Exit3)
cmp $3, %ebx
je L(Exit3)
test $0x08, %al
jnz L(Exit4)
cmp $4, %ebx
je L(Exit4)
test $0x10, %al
jnz L(Exit5)
cmp $5, %ebx
je L(Exit5)
test $0x20, %al
jnz L(Exit6)
cmp $6, %ebx
je L(Exit6)
test $0x40, %al
jnz L(Exit7)
cmp $7, %ebx
je L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
cmp $8, %ebx
jbe L(CopyFrom1To16BytesLess8Case3)
test $0x01, %ah
jnz L(Exit9)
cmp $9, %ebx
je L(Exit9)
test $0x02, %ah
jnz L(Exit10)
cmp $10, %ebx
je L(Exit10)
test $0x04, %ah
jnz L(Exit11)
cmp $11, %ebx
je L(Exit11)
test $0x8, %ah
jnz L(Exit12)
cmp $12, %ebx
je L(Exit12)
test $0x10, %ah
jnz L(Exit13)
cmp $13, %ebx
je L(Exit13)
test $0x20, %ah
jnz L(Exit14)
cmp $14, %ebx
je L(Exit14)
test $0x40, %ah
jnz L(Exit15)
cmp $15, %ebx
je L(Exit15)
jmp L(Exit16)
CFI_PUSH(%esi)
.p2align 4
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %esi, %edx
add %esi, %ecx
POP (%esi)
cmp $8, %ebx
ja L(ExitHigh8Case3)
L(CopyFrom1To16BytesLess8Case3):
cmp $4, %ebx
ja L(ExitHigh4Case3)
cmp $1, %ebx
je L(Exit1)
cmp $2, %ebx
je L(Exit2)
cmp $3, %ebx
je L(Exit3)
movl (%ecx), %eax
movl %eax, (%edx)
SAVE_RESULT (4)
RETURN1
.p2align 4
L(ExitHigh4Case3):
cmp $5, %ebx
je L(Exit5)
cmp $6, %ebx
je L(Exit6)
cmp $7, %ebx
je L(Exit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
SAVE_RESULT (8)
RETURN1
.p2align 4
L(ExitHigh8Case3):
cmp $12, %ebx
ja L(ExitHigh12Case3)
cmp $9, %ebx
je L(Exit9)
cmp $10, %ebx
je L(Exit10)
cmp $11, %ebx
je L(Exit11)
movlpd (%ecx), %xmm0
movl 8(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
SAVE_RESULT (12)
RETURN1
.p2align 4
L(ExitHigh12Case3):
cmp $13, %ebx
je L(Exit13)
cmp $14, %ebx
je L(Exit14)
cmp $15, %ebx
je L(Exit15)
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
SAVE_RESULT (16)
RETURN1
# endif
.p2align 4
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
SAVE_RESULT (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
SAVE_RESULT (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit3):
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
SAVE_RESULT (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
SAVE_RESULT (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit6):
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
SAVE_RESULT (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit7):
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
SAVE_RESULT (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit9):
movlpd (%ecx), %xmm0
movb 8(%ecx), %al
movlpd %xmm0, (%edx)
movb %al, 8(%edx)
SAVE_RESULT (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit10):
movlpd (%ecx), %xmm0
movw 8(%ecx), %ax
movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
SAVE_RESULT (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit11):
movlpd (%ecx), %xmm0
movl 7(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
SAVE_RESULT (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit13):
movlpd (%ecx), %xmm0
movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 5(%edx)
SAVE_RESULT (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit14):
movlpd (%ecx), %xmm0
movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 6(%edx)
SAVE_RESULT (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
.p2align 4
L(Exit15):
movlpd (%ecx), %xmm0
movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 7(%edx)
SAVE_RESULT (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero1)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN1
CFI_POP (%edi)
# ifdef USE_AS_STRNCPY
.p2align 4
L(Fill0):
RETURN
.p2align 4
L(Fill1):
movb %dl, (%ecx)
RETURN
.p2align 4
L(Fill2):
movw %dx, (%ecx)
RETURN
.p2align 4
L(Fill3):
movw %dx, (%ecx)
movb %dl, 2(%ecx)
RETURN
.p2align 4
L(Fill4):
movl %edx, (%ecx)
RETURN
.p2align 4
L(Fill5):
movl %edx, (%ecx)
movb %dl, 4(%ecx)
RETURN
.p2align 4
L(Fill6):
movl %edx, (%ecx)
movw %dx, 4(%ecx)
RETURN
.p2align 4
L(Fill7):
movl %edx, (%ecx)
movl %edx, 3(%ecx)
RETURN
.p2align 4
L(Fill8):
movlpd %xmm0, (%ecx)
RETURN
.p2align 4
L(Fill9):
movlpd %xmm0, (%ecx)
movb %dl, 8(%ecx)
RETURN
.p2align 4
L(Fill10):
movlpd %xmm0, (%ecx)
movw %dx, 8(%ecx)
RETURN
.p2align 4
L(Fill11):
movlpd %xmm0, (%ecx)
movl %edx, 7(%ecx)
RETURN
.p2align 4
L(Fill12):
movlpd %xmm0, (%ecx)
movl %edx, 8(%ecx)
RETURN
.p2align 4
L(Fill13):
movlpd %xmm0, (%ecx)
movlpd %xmm0, 5(%ecx)
RETURN
.p2align 4
L(Fill14):
movlpd %xmm0, (%ecx)
movlpd %xmm0, 6(%ecx)
RETURN
.p2align 4
L(Fill15):
movlpd %xmm0, (%ecx)
movlpd %xmm0, 7(%ecx)
RETURN
.p2align 4
L(Fill16):
movlpd %xmm0, (%ecx)
movlpd %xmm0, 8(%ecx)
RETURN
.p2align 4
L(StrncpyFillExit1):
lea 16(%ebx), %ebx
L(FillFrom1To16Bytes):
test %ebx, %ebx
jz L(Fill0)
cmp $16, %ebx
je L(Fill16)
cmp $8, %ebx
je L(Fill8)
jg L(FillMore8)
cmp $4, %ebx
je L(Fill4)
jg L(FillMore4)
cmp $2, %ebx
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
L(FillMore8): /* but less than 16 */
cmp $12, %ebx
je L(Fill12)
jl L(FillLess12)
cmp $14, %ebx
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
L(FillMore4): /* but less than 8 */
cmp $6, %ebx
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
L(FillLess12): /* but more than 8 */
cmp $10, %ebx
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
CFI_PUSH(%edi)
.p2align 4
L(StrncpyFillTailWithZero1):
POP (%edi)
L(StrncpyFillTailWithZero):
pxor %xmm0, %xmm0
xor %edx, %edx
sub $16, %ebx
jbe L(StrncpyFillExit1)
movlpd %xmm0, (%ecx)
movlpd %xmm0, 8(%ecx)
lea 16(%ecx), %ecx
mov %ecx, %edx
and $0xf, %edx
sub %edx, %ecx
add %edx, %ebx
xor %edx, %edx
sub $64, %ebx
jb L(StrncpyFillLess64)
L(StrncpyFillLoopMovdqa):
movdqa %xmm0, (%ecx)
movdqa %xmm0, 16(%ecx)
movdqa %xmm0, 32(%ecx)
movdqa %xmm0, 48(%ecx)
lea 64(%ecx), %ecx
sub $64, %ebx
jae L(StrncpyFillLoopMovdqa)
L(StrncpyFillLess64):
add $32, %ebx
jl L(StrncpyFillLess32)
movdqa %xmm0, (%ecx)
movdqa %xmm0, 16(%ecx)
lea 32(%ecx), %ecx
sub $16, %ebx
jl L(StrncpyFillExit1)
movdqa %xmm0, (%ecx)
lea 16(%ecx), %ecx
jmp L(FillFrom1To16Bytes)
L(StrncpyFillLess32):
add $16, %ebx
jl L(StrncpyFillExit1)
movdqa %xmm0, (%ecx)
lea 16(%ecx), %ecx
jmp L(FillFrom1To16Bytes)
# endif
.p2align 4
L(ExitTail1):
movb (%ecx), %al
movb %al, (%edx)
SAVE_RESULT_TAIL (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
movw %ax, (%edx)
SAVE_RESULT_TAIL (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail3):
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
SAVE_RESULT_TAIL (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
SAVE_RESULT_TAIL (3)
# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
SAVE_RESULT_TAIL (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail6):
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
SAVE_RESULT_TAIL (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail7):
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
SAVE_RESULT_TAIL (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
SAVE_RESULT_TAIL (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# endif
RETURN
.p2align 4
L(ExitTail9):
movlpd (%ecx), %xmm0
movb 8(%ecx), %al
movlpd %xmm0, (%edx)
movb %al, 8(%edx)
SAVE_RESULT_TAIL (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail10):
movlpd (%ecx), %xmm0
movw 8(%ecx), %ax
movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
SAVE_RESULT_TAIL (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail11):
movlpd (%ecx), %xmm0
movl 7(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
SAVE_RESULT_TAIL (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
movl 8(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
SAVE_RESULT_TAIL (11)
# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 5(%edx)
SAVE_RESULT_TAIL (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 6(%edx)
SAVE_RESULT_TAIL (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 7(%edx)
SAVE_RESULT_TAIL (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# endif
RETURN
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
SAVE_RESULT_TAIL (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
# endif
RETURN
#endif
#ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
CFI_PUSH (%esi)
CFI_PUSH (%edi)
# endif
.p2align 4
L(StrncpyLeaveCase2OrCase3):
test %eax, %eax
jnz L(Aligned64LeaveCase2)
L(Aligned64LeaveCase3):
add $48, %ebx
jle L(CopyFrom1To16BytesCase3)
movaps %xmm4, -64(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm5, -48(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase3)
movaps %xmm6, -32(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
jmp L(CopyFrom1To16BytesCase3)
L(Aligned64LeaveCase2):
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm0, %eax
add $48, %ebx
jle L(CopyFrom1To16BytesCase2OrCase3)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm4, -64(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm5, -48(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm6, -32(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
jmp L(CopyFrom1To16BytesCase2)
/*--------------------------------------------------*/
.p2align 4
L(StrncpyExit1Case2OrCase3):
movlpd (%ecx), %xmm0
movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 7(%edx)
mov $15, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit2Case2OrCase3):
movlpd (%ecx), %xmm0
movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 6(%edx)
mov $14, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit3Case2OrCase3):
movlpd (%ecx), %xmm0
movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 5(%edx)
mov $13, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit4Case2OrCase3):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
mov $12, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit5Case2OrCase3):
movlpd (%ecx), %xmm0
movl 7(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 7(%edx)
mov $11, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit6Case2OrCase3):
movlpd (%ecx), %xmm0
movl 6(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 6(%edx)
mov $10, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit7Case2OrCase3):
movlpd (%ecx), %xmm0
movl 5(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 5(%edx)
mov $9, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit8Case2OrCase3):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
mov $8, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit9Case2OrCase3):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
mov $7, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit10Case2OrCase3):
movlpd -1(%ecx), %xmm0
movlpd %xmm0, -1(%edx)
mov $6, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit11Case2OrCase3):
movlpd -2(%ecx), %xmm0
movlpd %xmm0, -2(%edx)
mov $5, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit12Case2OrCase3):
movl (%ecx), %esi
movl %esi, (%edx)
mov $4, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit13Case2OrCase3):
movl -1(%ecx), %esi
movl %esi, -1(%edx)
mov $3, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit14Case2OrCase3):
movl -2(%ecx), %esi
movl %esi, -2(%edx)
mov $2, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
.p2align 4
L(StrncpyExit15Case2OrCase3):
movl -3(%ecx), %esi
movl %esi, -3(%edx)
mov $1, %esi
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave1):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit1):
lea 15(%edx, %esi), %edx
lea 15(%ecx, %esi), %ecx
movdqu -16(%ecx), %xmm0
xor %esi, %esi
movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave2):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit2):
lea 14(%edx, %esi), %edx
lea 14(%ecx, %esi), %ecx
movdqu -16(%ecx), %xmm0
xor %esi, %esi
movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave3):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit3):
lea 13(%edx, %esi), %edx
lea 13(%ecx, %esi), %ecx
movdqu -16(%ecx), %xmm0
xor %esi, %esi
movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave4):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit4):
lea 12(%edx, %esi), %edx
lea 12(%ecx, %esi), %ecx
movlpd -12(%ecx), %xmm0
movl -4(%ecx), %eax
movlpd %xmm0, -12(%edx)
movl %eax, -4(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave5):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit5):
lea 11(%edx, %esi), %edx
lea 11(%ecx, %esi), %ecx
movlpd -11(%ecx), %xmm0
movl -4(%ecx), %eax
movlpd %xmm0, -11(%edx)
movl %eax, -4(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave6):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit6):
lea 10(%edx, %esi), %edx
lea 10(%ecx, %esi), %ecx
movlpd -10(%ecx), %xmm0
movw -2(%ecx), %ax
movlpd %xmm0, -10(%edx)
movw %ax, -2(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave7):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit7):
lea 9(%edx, %esi), %edx
lea 9(%ecx, %esi), %ecx
movlpd -9(%ecx), %xmm0
movb -1(%ecx), %ah
movlpd %xmm0, -9(%edx)
movb %ah, -1(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave8):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit8):
lea 8(%edx, %esi), %edx
lea 8(%ecx, %esi), %ecx
movlpd -8(%ecx), %xmm0
movlpd %xmm0, -8(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave9):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit9):
lea 7(%edx, %esi), %edx
lea 7(%ecx, %esi), %ecx
movlpd -8(%ecx), %xmm0
movlpd %xmm0, -8(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave10):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit10):
lea 6(%edx, %esi), %edx
lea 6(%ecx, %esi), %ecx
movlpd -8(%ecx), %xmm0
movlpd %xmm0, -8(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave11):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit11):
lea 5(%edx, %esi), %edx
lea 5(%ecx, %esi), %ecx
movl -5(%ecx), %esi
movb -1(%ecx), %ah
movl %esi, -5(%edx)
movb %ah, -1(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave12):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit12):
lea 4(%edx, %esi), %edx
lea 4(%ecx, %esi), %ecx
movl -4(%ecx), %eax
movl %eax, -4(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave13):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit13):
lea 3(%edx, %esi), %edx
lea 3(%ecx, %esi), %ecx
movl -4(%ecx), %eax
movl %eax, -4(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave14):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit14):
lea 2(%edx, %esi), %edx
lea 2(%ecx, %esi), %ecx
movw -2(%ecx), %ax
movw %ax, -2(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave15):
movaps %xmm2, %xmm3
add $48, %ebx
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
L(StrncpyExit15):
lea 1(%edx, %esi), %edx
lea 1(%ecx, %esi), %ecx
movb -1(%ecx), %ah
movb %ah, -1(%edx)
xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
#endif
#if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY
# ifdef USE_AS_STRNCPY
CFI_POP (%esi)
CFI_POP (%edi)
.p2align 4
L(ExitTail0):
movl %edx, %eax
RETURN
.p2align 4
L(StrncpyExit15Bytes):
cmp $12, %ebx
jbe L(StrncpyExit12Bytes)
cmpb $0, 8(%ecx)
jz L(ExitTail9)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
cmpb $0, 11(%ecx)
jz L(ExitTail12)
cmp $13, %ebx
je L(ExitTail13)
cmpb $0, 12(%ecx)
jz L(ExitTail13)
cmp $14, %ebx
je L(ExitTail14)
cmpb $0, 13(%ecx)
jz L(ExitTail14)
movlpd (%ecx), %xmm0
movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 7(%edx)
# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
# else
movl %edx, %eax
# endif
RETURN
.p2align 4
L(StrncpyExit12Bytes):
cmp $9, %ebx
je L(ExitTail9)
cmpb $0, 8(%ecx)
jz L(ExitTail9)
cmp $10, %ebx
je L(ExitTail10)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
cmp $11, %ebx
je L(ExitTail11)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
movlpd (%ecx), %xmm0
movl 8(%ecx), %eax
movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
SAVE_RESULT_TAIL (11)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
RETURN
.p2align 4
L(StrncpyExit8Bytes):
cmp $4, %ebx
jbe L(StrncpyExit4Bytes)
cmpb $0, (%ecx)
jz L(ExitTail1)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
cmpb $0, 3(%ecx)
jz L(ExitTail4)
cmp $5, %ebx
je L(ExitTail5)
cmpb $0, 4(%ecx)
jz L(ExitTail5)
cmp $6, %ebx
je L(ExitTail6)
cmpb $0, 5(%ecx)
jz L(ExitTail6)
cmp $7, %ebx
je L(ExitTail7)
cmpb $0, 6(%ecx)
jz L(ExitTail7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
# ifdef USE_AS_STPCPY
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
# else
movl %edx, %eax
# endif
RETURN
.p2align 4
L(StrncpyExit4Bytes):
test %ebx, %ebx
jz L(ExitTail0)
cmp $1, %ebx
je L(ExitTail1)
cmpb $0, (%ecx)
jz L(ExitTail1)
cmp $2, %ebx
je L(ExitTail2)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
cmp $3, %ebx
je L(ExitTail3)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
movl (%ecx), %eax
movl %eax, (%edx)
SAVE_RESULT_TAIL (3)
# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
RETURN
# endif
END (STRCPY)
#endif