5a92284167
Add following functions: bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen, strcpy, strncpy, stpcpy, stpncpy. Create new directories inside arch-x86 to specify architecture: atom, silvermont and generic (non atom or silvermont architectures are treated like generic). Due to introducing optimized versions of stpcpy and stpncpy, c-implementations of these functions are moved from common for architectures makefile to arm and mips specific makefiles. Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
2158 lines
41 KiB
ArmAsm
Executable File
2158 lines
41 KiB
ArmAsm
Executable File
/*
|
|
Copyright (c) 2014, Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef L
|
|
# define L(label) .L##label
|
|
#endif
|
|
|
|
#ifndef cfi_startproc
|
|
# define cfi_startproc .cfi_startproc
|
|
#endif
|
|
|
|
#ifndef cfi_endproc
|
|
# define cfi_endproc .cfi_endproc
|
|
#endif
|
|
|
|
#ifndef cfi_rel_offset
|
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
|
#endif
|
|
|
|
#ifndef cfi_restore
|
|
# define cfi_restore(reg) .cfi_restore reg
|
|
#endif
|
|
|
|
#ifndef cfi_adjust_cfa_offset
|
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
|
#endif
|
|
|
|
#ifndef ENTRY
|
|
# define ENTRY(name) \
|
|
.type name, @function; \
|
|
.globl name; \
|
|
.p2align 4; \
|
|
name: \
|
|
cfi_startproc
|
|
#endif
|
|
|
|
#ifndef END
|
|
# define END(name) \
|
|
cfi_endproc; \
|
|
.size name, .-name
|
|
#endif
|
|
|
|
#define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
#define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
#define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
#ifndef STRCPY
|
|
# define STRCPY strcpy
|
|
#endif
|
|
|
|
#ifdef USE_AS_STPNCPY
|
|
# define USE_AS_STRNCPY
|
|
# define USE_AS_STPCPY
|
|
#endif
|
|
|
|
#ifdef USE_AS_STRNCPY
|
|
# define PARMS 16
|
|
# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
|
|
# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi); CFI_PUSH(%edi);
|
|
#else
|
|
# define PARMS 12
|
|
# define ENTRANCE PUSH(%esi); PUSH(%edi)
|
|
# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
|
|
#endif
|
|
|
|
#define STR1 PARMS
|
|
#define STR2 STR1+4
|
|
#define LEN STR2+4
|
|
|
|
|
|
#if (defined SHARED || defined __PIC__)
|
|
# define JMPTBL(I, B) I - B
|
|
|
|
/* Load an entry in a jump table into ECX and branch to it. TABLE is a
|
|
jump table with relative offsets. INDEX is a register contains the
|
|
index into the jump table. SCALE is the scale of INDEX. */
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
/* We first load PC into ECX. */ \
|
|
call __x86.get_pc_thunk.cx; \
|
|
/* Get the address of the jump table. */ \
|
|
addl $(TABLE - .), %ecx; \
|
|
/* Get the entry and convert the relative offset to the \
|
|
absolute address. */ \
|
|
addl (%ecx,INDEX,SCALE), %ecx; \
|
|
/* We loaded the jump table and adjuested ECX. Go. */ \
|
|
jmp *%ecx
|
|
#else
|
|
# define JMPTBL(I, B) I
|
|
|
|
/* Branch to an entry in a jump table. TABLE is a jump table with
|
|
absolute offsets. INDEX is a register contains the index into the
|
|
jump table. SCALE is the scale of INDEX. */
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
jmp *TABLE(,INDEX,SCALE)
|
|
#endif
|
|
|
|
.text
|
|
ENTRY (STRCPY)
|
|
ENTRANCE
|
|
mov STR1(%esp), %edi
|
|
mov STR2(%esp), %esi
|
|
#ifdef USE_AS_STRNCPY
|
|
movl LEN(%esp), %ebx
|
|
test %ebx, %ebx
|
|
jz L(ExitZero)
|
|
#endif
|
|
|
|
mov %esi, %ecx
|
|
#ifndef USE_AS_STPCPY
|
|
mov %edi, %eax /* save result */
|
|
#endif
|
|
and $15, %ecx
|
|
jz L(SourceStringAlignmentZero)
|
|
|
|
and $-16, %esi
|
|
pxor %xmm0, %xmm0
|
|
pxor %xmm1, %xmm1
|
|
|
|
pcmpeqb (%esi), %xmm1
|
|
#ifdef USE_AS_STRNCPY
|
|
add %ecx, %ebx
|
|
#endif
|
|
pmovmskb %xmm1, %edx
|
|
shr %cl, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
cmp $16, %ebx
|
|
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
|
|
#else
|
|
cmp $17, %ebx
|
|
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
|
|
#endif
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail)
|
|
|
|
pcmpeqb 16(%esi), %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
cmp $32, %ebx
|
|
jbe L(CopyFrom1To32BytesCase2OrCase3)
|
|
#else
|
|
cmp $33, %ebx
|
|
jbe L(CopyFrom1To32BytesCase2OrCase3)
|
|
#endif
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32Bytes)
|
|
|
|
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
|
|
movdqu %xmm1, (%edi)
|
|
|
|
sub %ecx, %edi
|
|
mov %edi, %edx
|
|
mov $16, %ecx
|
|
and $15, %edx
|
|
jz L(Align16Both)
|
|
|
|
/* If source adress alignment != destination adress alignment */
|
|
.p2align 4
|
|
L(Unalign16Both):
|
|
movdqa (%esi, %ecx), %xmm1
|
|
movaps 16(%esi, %ecx), %xmm2
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $48, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm2)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movaps 16(%esi, %ecx), %xmm3
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm3)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movaps 16(%esi, %ecx), %xmm4
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm4)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movaps 16(%esi, %ecx), %xmm1
|
|
movdqu %xmm4, (%edi, %ecx)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm1)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movaps 16(%esi, %ecx), %xmm2
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm2)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movaps 16(%esi, %ecx), %xmm3
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm3)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
mov %esi, %edx
|
|
lea 16(%esi, %ecx), %esi
|
|
and $-0x40, %esi
|
|
sub %esi, %edx
|
|
sub %edx, %edi
|
|
#ifdef USE_AS_STRNCPY
|
|
lea 64+64(%ebx, %edx), %ebx
|
|
#endif
|
|
L(Unaligned64Loop):
|
|
movaps (%esi), %xmm2
|
|
movaps %xmm2, %xmm4
|
|
movaps 16(%esi), %xmm5
|
|
movaps 32(%esi), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(UnalignedLeaveCase2OrCase3)
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(Unaligned64Leave)
|
|
|
|
L(Unaligned64Loop_start):
|
|
add $64, %edi
|
|
add $64, %esi
|
|
movdqu %xmm4, -64(%edi)
|
|
movaps (%esi), %xmm2
|
|
movdqa %xmm2, %xmm4
|
|
movdqu %xmm5, -48(%edi)
|
|
movaps 16(%esi), %xmm5
|
|
pminub %xmm5, %xmm2
|
|
movaps 32(%esi), %xmm3
|
|
movdqu %xmm6, -32(%edi)
|
|
movaps %xmm3, %xmm6
|
|
movdqu %xmm7, -16(%edi)
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(UnalignedLeaveCase2OrCase3)
|
|
#endif
|
|
test %edx, %edx
|
|
jz L(Unaligned64Loop_start)
|
|
|
|
L(Unaligned64Leave):
|
|
pxor %xmm0, %xmm0
|
|
pxor %xmm1, %xmm1
|
|
|
|
pcmpeqb %xmm4, %xmm0
|
|
pcmpeqb %xmm5, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnaligned_0)
|
|
test %ecx, %ecx
|
|
jnz L(CopyFrom1To16BytesUnaligned_16)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pcmpeqb %xmm7, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnaligned_32)
|
|
|
|
bsf %ecx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
movdqu %xmm5, 16(%edi)
|
|
movdqu %xmm6, 32(%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 48(%edi, %edx), %eax
|
|
#endif
|
|
movdqu %xmm7, 48(%edi)
|
|
add $15, %ebx
|
|
sub %edx, %ebx
|
|
lea 49(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $48, %esi
|
|
add $48, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
/* If source adress alignment == destination adress alignment */
|
|
|
|
L(SourceStringAlignmentZero):
|
|
pxor %xmm0, %xmm0
|
|
movdqa (%esi), %xmm1
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
cmp $16, %ebx
|
|
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
|
|
#else
|
|
cmp $17, %ebx
|
|
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
|
|
#endif
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail1)
|
|
|
|
pcmpeqb 16(%esi), %xmm0
|
|
movdqu %xmm1, (%edi)
|
|
pmovmskb %xmm0, %edx
|
|
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
cmp $32, %ebx
|
|
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
|
|
#else
|
|
cmp $33, %ebx
|
|
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
|
|
#endif
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32Bytes1)
|
|
|
|
mov %edi, %edx
|
|
mov $16, %ecx
|
|
and $15, %edx
|
|
jnz L(Unalign16Both)
|
|
|
|
L(Align16Both):
|
|
movdqa (%esi, %ecx), %xmm1
|
|
movdqa 16(%esi, %ecx), %xmm2
|
|
movdqa %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $16, %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $48, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm2)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa 16(%esi, %ecx), %xmm3
|
|
movdqa %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
lea 16(%ecx), %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm3)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa 16(%esi, %ecx), %xmm4
|
|
movdqa %xmm3, (%edi, %ecx)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
lea 16(%ecx), %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm4)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa 16(%esi, %ecx), %xmm1
|
|
movdqa %xmm4, (%edi, %ecx)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
lea 16(%ecx), %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm1)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa 16(%esi, %ecx), %xmm2
|
|
movdqa %xmm1, (%edi, %ecx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
lea 16(%ecx), %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm2)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa 16(%esi, %ecx), %xmm3
|
|
movdqa %xmm2, (%edi, %ecx)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
lea 16(%ecx), %ecx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm3)
|
|
#else
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes)
|
|
#endif
|
|
|
|
movdqa %xmm3, (%edi, %ecx)
|
|
mov %esi, %edx
|
|
lea 16(%esi, %ecx), %esi
|
|
and $-0x40, %esi
|
|
sub %esi, %edx
|
|
sub %edx, %edi
|
|
#ifdef USE_AS_STRNCPY
|
|
lea 64+64(%ebx, %edx), %ebx
|
|
#endif
|
|
L(Aligned64Loop):
|
|
movdqa (%esi), %xmm2
|
|
movdqa %xmm2, %xmm4
|
|
movaps 16(%esi), %xmm5
|
|
movdqa 32(%esi), %xmm3
|
|
movdqa %xmm3, %xmm6
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(AlignedLeaveCase2OrCase3)
|
|
#endif
|
|
test %edx, %edx
|
|
jnz L(Aligned64Leave)
|
|
|
|
L(Aligned64Loop_start):
|
|
add $64, %esi
|
|
add $64, %edi
|
|
movaps %xmm4, -64(%edi)
|
|
movdqa (%esi), %xmm2
|
|
movdqa %xmm2, %xmm4
|
|
movaps %xmm5, -48(%edi)
|
|
movaps 16(%esi), %xmm5
|
|
pminub %xmm5, %xmm2
|
|
movaps 32(%esi), %xmm3
|
|
movaps %xmm6, -32(%edi)
|
|
movdqa %xmm3, %xmm6
|
|
movaps %xmm7, -16(%edi)
|
|
movaps 48(%esi), %xmm7
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(AlignedLeaveCase2OrCase3)
|
|
#endif
|
|
test %edx, %edx
|
|
jz L(Aligned64Loop_start)
|
|
|
|
L(Aligned64Leave):
|
|
pxor %xmm0, %xmm0
|
|
pxor %xmm1, %xmm1
|
|
|
|
pcmpeqb %xmm4, %xmm0
|
|
pcmpeqb %xmm5, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes_0)
|
|
test %ecx, %ecx
|
|
jnz L(CopyFrom1To16Bytes_16)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pcmpeqb %xmm7, %xmm1
|
|
pmovmskb %xmm0, %edx
|
|
pmovmskb %xmm1, %ecx
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16Bytes_32)
|
|
|
|
bsf %ecx, %edx
|
|
movdqa %xmm4, (%edi)
|
|
movdqa %xmm5, 16(%edi)
|
|
movdqa %xmm6, 32(%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 48(%edi, %edx), %eax
|
|
#endif
|
|
movdqa %xmm7, 48(%edi)
|
|
add $15, %ebx
|
|
sub %edx, %ebx
|
|
lea 49(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $48, %esi
|
|
add $48, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
/*----------------------------------------------------*/
|
|
|
|
/* Case1 */
|
|
#ifndef USE_AS_STRNCPY
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes):
|
|
add %ecx, %edi
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesTail):
|
|
#ifdef USE_AS_STRNCPY
|
|
sub %ecx, %ebx
|
|
#endif
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes1):
|
|
add $16, %esi
|
|
add $16, %edi
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
#endif
|
|
L(CopyFrom1To16BytesTail1):
|
|
bsf %edx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes):
|
|
#ifdef USE_AS_STRNCPY
|
|
sub %ecx, %ebx
|
|
#endif
|
|
bsf %edx, %edx
|
|
add %ecx, %esi
|
|
add $16, %edx
|
|
sub %ecx, %edx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes_0):
|
|
bsf %edx, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea (%edi, %edx), %eax
|
|
#endif
|
|
movdqa %xmm4, (%edi)
|
|
add $63, %ebx
|
|
sub %edx, %ebx
|
|
lea 1(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes_16):
|
|
bsf %ecx, %edx
|
|
movdqa %xmm4, (%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 16(%edi, %edx), %eax
|
|
#endif
|
|
movdqa %xmm5, 16(%edi)
|
|
add $47, %ebx
|
|
sub %edx, %ebx
|
|
lea 17(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $16, %esi
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes_32):
|
|
bsf %edx, %edx
|
|
movdqa %xmm4, (%edi)
|
|
movdqa %xmm5, 16(%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 32(%edi, %edx), %eax
|
|
#endif
|
|
movdqa %xmm6, 32(%edi)
|
|
add $31, %ebx
|
|
sub %edx, %ebx
|
|
lea 33(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $32, %esi
|
|
add $32, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_0):
|
|
bsf %edx, %edx
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea (%edi, %edx), %eax
|
|
#endif
|
|
movdqu %xmm4, (%edi)
|
|
add $63, %ebx
|
|
sub %edx, %ebx
|
|
lea 1(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_16):
|
|
bsf %ecx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 16(%edi, %edx), %eax
|
|
#endif
|
|
movdqu %xmm5, 16(%edi)
|
|
add $47, %ebx
|
|
sub %edx, %ebx
|
|
lea 17(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $16, %esi
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnaligned_32):
|
|
bsf %edx, %edx
|
|
movdqu %xmm4, (%edi)
|
|
movdqu %xmm5, 16(%edi)
|
|
#ifdef USE_AS_STRNCPY
|
|
#ifdef USE_AS_STPCPY
|
|
lea 32(%edi, %edx), %eax
|
|
#endif
|
|
movdqu %xmm6, 32(%edi)
|
|
add $31, %ebx
|
|
sub %edx, %ebx
|
|
lea 33(%edi, %edx), %edi
|
|
jmp L(StrncpyFillTailWithZero)
|
|
#else
|
|
add $32, %esi
|
|
add $32, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
#endif
|
|
|
|
#ifdef USE_AS_STRNCPY
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm6):
|
|
movdqa %xmm6, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm5):
|
|
movdqa %xmm5, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm4):
|
|
movdqa %xmm4, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm3):
|
|
movdqa %xmm3, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm2):
|
|
movdqa %xmm2, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmm1):
|
|
movdqa %xmm1, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm6):
|
|
movdqu %xmm6, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm5):
|
|
movdqu %xmm5, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm4):
|
|
movdqu %xmm4, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm3):
|
|
movdqu %xmm3, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm1):
|
|
movdqu %xmm1, (%edi, %ecx)
|
|
jmp L(CopyFrom1To16BytesXmmExit)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesExit):
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
|
|
|
|
/* Case2 */
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2):
|
|
add $16, %ebx
|
|
add %ecx, %edi
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32BytesCase2):
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
add $16, %edx
|
|
sub %ecx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
L(CopyFrom1To16BytesTailCase2):
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
L(CopyFrom1To16BytesTail1Case2):
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
/* Case2 or Case3, Case3 */
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
L(CopyFrom1To16BytesCase3):
|
|
add $16, %ebx
|
|
add %ecx, %edi
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32BytesCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To32BytesCase2)
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesTailCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTailCase2)
|
|
sub %ecx, %ebx
|
|
add %ecx, %esi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To32Bytes1Case2OrCase3):
|
|
add $16, %edi
|
|
add $16, %esi
|
|
sub $16, %ebx
|
|
L(CopyFrom1To16BytesTail1Case2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesTail1Case2)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
#endif
|
|
|
|
/*-----------------------------------------------------------------*/
|
|
.p2align 4
|
|
L(Exit0):
|
|
#ifdef USE_AS_STPCPY
|
|
mov %edi, %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit1):
|
|
movb %dh, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea (%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $1, %ebx
|
|
lea 1(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit2):
|
|
movw (%esi), %dx
|
|
movw %dx, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 1(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $2, %ebx
|
|
lea 2(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit3):
|
|
movw (%esi), %cx
|
|
movw %cx, (%edi)
|
|
movb %dh, 2(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 2(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $3, %ebx
|
|
lea 3(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit4):
|
|
movl (%esi), %edx
|
|
movl %edx, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 3(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $4, %ebx
|
|
lea 4(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit5):
|
|
movl (%esi), %ecx
|
|
movb %dh, 4(%edi)
|
|
movl %ecx, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 4(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $5, %ebx
|
|
lea 5(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit6):
|
|
movl (%esi), %ecx
|
|
movw 4(%esi), %dx
|
|
movl %ecx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 5(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $6, %ebx
|
|
lea 6(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit7):
|
|
movl (%esi), %ecx
|
|
movl 3(%esi), %edx
|
|
movl %ecx, (%edi)
|
|
movl %edx, 3(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 6(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $7, %ebx
|
|
lea 7(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit8):
|
|
movlpd (%esi), %xmm0
|
|
movlpd %xmm0, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 7(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $8, %ebx
|
|
lea 8(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit9):
|
|
movlpd (%esi), %xmm0
|
|
movb %dh, 8(%edi)
|
|
movlpd %xmm0, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 8(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $9, %ebx
|
|
lea 9(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit10):
|
|
movlpd (%esi), %xmm0
|
|
movw 8(%esi), %dx
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 9(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $10, %ebx
|
|
lea 10(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit11):
|
|
movlpd (%esi), %xmm0
|
|
movl 7(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 10(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $11, %ebx
|
|
lea 11(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit12):
|
|
movlpd (%esi), %xmm0
|
|
movl 8(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 11(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $12, %ebx
|
|
lea 12(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit13):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 5(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 5(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 12(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $13, %ebx
|
|
lea 13(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit14):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 6(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 6(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 13(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $14, %ebx
|
|
lea 14(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit15):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 7(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 7(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 14(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $15, %ebx
|
|
lea 15(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit16):
|
|
movdqu (%esi), %xmm0
|
|
movdqu %xmm0, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 15(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
lea 16(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit17):
|
|
movdqu (%esi), %xmm0
|
|
xor %cl, %cl
|
|
movdqu %xmm0, (%edi)
|
|
movb %cl, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 16(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $17, %ebx
|
|
lea 17(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit18):
|
|
movdqu (%esi), %xmm0
|
|
movw 16(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movw %cx, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 17(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $18, %ebx
|
|
lea 18(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit19):
|
|
movdqu (%esi), %xmm0
|
|
movl 15(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 18(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $19, %ebx
|
|
lea 19(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit20):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 19(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $20, %ebx
|
|
lea 20(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit21):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
xor %dl, %dl
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
movb %dl, 20(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 20(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $21, %ebx
|
|
lea 21(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit22):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 14(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 14(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 21(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $22, %ebx
|
|
lea 22(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit23):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 15(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 22(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $23, %ebx
|
|
lea 23(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit24):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 23(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $24, %ebx
|
|
lea 24(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit25):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
xor %cl, %cl
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movb %cl, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 24(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $25, %ebx
|
|
lea 25(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit26):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movw 24(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movw %cx, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 25(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $26, %ebx
|
|
lea 26(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit27):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 23(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 23(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 26(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $27, %ebx
|
|
lea 27(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit28):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 24(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 27(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $28, %ebx
|
|
lea 28(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit29):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 13(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 13(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 28(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $29, %ebx
|
|
lea 29(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit30):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 14(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 14(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 29(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $30, %ebx
|
|
lea 30(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
|
|
.p2align 4
|
|
L(Exit31):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 15(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 30(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $31, %ebx
|
|
lea 31(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Exit32):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 31(%edi), %eax
|
|
#endif
|
|
#ifdef USE_AS_STRNCPY
|
|
sub $32, %ebx
|
|
lea 32(%edi), %edi
|
|
jnz L(StrncpyFillTailWithZero)
|
|
#endif
|
|
RETURN
|
|
|
|
#ifdef USE_AS_STRNCPY
|
|
|
|
.p2align 4
|
|
L(StrncpyExit1):
|
|
movb (%esi), %dl
|
|
movb %dl, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 1(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit2):
|
|
movw (%esi), %dx
|
|
movw %dx, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 2(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
.p2align 4
|
|
L(StrncpyExit3):
|
|
movw (%esi), %cx
|
|
movb 2(%esi), %dl
|
|
movw %cx, (%edi)
|
|
movb %dl, 2(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 3(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit4):
|
|
movl (%esi), %edx
|
|
movl %edx, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 4(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit5):
|
|
movl (%esi), %ecx
|
|
movb 4(%esi), %dl
|
|
movl %ecx, (%edi)
|
|
movb %dl, 4(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 5(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit6):
|
|
movl (%esi), %ecx
|
|
movw 4(%esi), %dx
|
|
movl %ecx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 6(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit7):
|
|
movl (%esi), %ecx
|
|
movl 3(%esi), %edx
|
|
movl %ecx, (%edi)
|
|
movl %edx, 3(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 7(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit8):
|
|
movlpd (%esi), %xmm0
|
|
movlpd %xmm0, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 8(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit9):
|
|
movlpd (%esi), %xmm0
|
|
movb 8(%esi), %dl
|
|
movlpd %xmm0, (%edi)
|
|
movb %dl, 8(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 9(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit10):
|
|
movlpd (%esi), %xmm0
|
|
movw 8(%esi), %dx
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 10(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit11):
|
|
movlpd (%esi), %xmm0
|
|
movl 7(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 11(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit12):
|
|
movlpd (%esi), %xmm0
|
|
movl 8(%esi), %edx
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 12(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit13):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 5(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 5(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 13(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit14):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 6(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 6(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 14(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit15):
|
|
movlpd (%esi), %xmm0
|
|
movlpd 7(%esi), %xmm1
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm1, 7(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 15(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit16):
|
|
movdqu (%esi), %xmm0
|
|
movdqu %xmm0, (%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 16(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit17):
|
|
movdqu (%esi), %xmm0
|
|
movb 16(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movb %cl, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 17(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit18):
|
|
movdqu (%esi), %xmm0
|
|
movw 16(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movw %cx, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 18(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit19):
|
|
movdqu (%esi), %xmm0
|
|
movl 15(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 19(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit20):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 20(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit21):
|
|
movdqu (%esi), %xmm0
|
|
movl 16(%esi), %ecx
|
|
movb 20(%esi), %dl
|
|
movdqu %xmm0, (%edi)
|
|
movl %ecx, 16(%edi)
|
|
movb %dl, 20(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 21(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit22):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 14(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 14(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 22(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit23):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 15(%esi), %xmm3
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm3, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 23(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit24):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 24(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit25):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movb 24(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movb %cl, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 25(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit26):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movw 24(%esi), %cx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movw %cx, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 26(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit27):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 23(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 23(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 27(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit28):
|
|
movdqu (%esi), %xmm0
|
|
movlpd 16(%esi), %xmm2
|
|
movl 24(%esi), %ecx
|
|
movdqu %xmm0, (%edi)
|
|
movlpd %xmm2, 16(%edi)
|
|
movl %ecx, 24(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 28(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit29):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 13(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 13(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 29(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit30):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 14(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 14(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 30(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit31):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 15(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 15(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 31(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit32):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 32(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit33):
|
|
movdqu (%esi), %xmm0
|
|
movdqu 16(%esi), %xmm2
|
|
movb 32(%esi), %cl
|
|
movdqu %xmm0, (%edi)
|
|
movdqu %xmm2, 16(%edi)
|
|
movb %cl, 32(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill0):
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill1):
|
|
movb %dl, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill2):
|
|
movw %dx, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill3):
|
|
movl %edx, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill4):
|
|
movl %edx, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill5):
|
|
movl %edx, (%edi)
|
|
movb %dl, 4(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill6):
|
|
movl %edx, (%edi)
|
|
movw %dx, 4(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill7):
|
|
movlpd %xmm0, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill8):
|
|
movlpd %xmm0, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill9):
|
|
movlpd %xmm0, (%edi)
|
|
movb %dl, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill10):
|
|
movlpd %xmm0, (%edi)
|
|
movw %dx, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill11):
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 7(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill12):
|
|
movlpd %xmm0, (%edi)
|
|
movl %edx, 8(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill13):
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm0, 5(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill14):
|
|
movlpd %xmm0, (%edi)
|
|
movlpd %xmm0, 6(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill15):
|
|
movdqu %xmm0, -1(%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill16):
|
|
movdqu %xmm0, (%edi)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesUnalignedXmm2):
|
|
movdqu %xmm2, (%edi, %ecx)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesXmmExit):
|
|
bsf %edx, %edx
|
|
add $15, %ebx
|
|
add %ecx, %edi
|
|
#ifdef USE_AS_STPCPY
|
|
lea (%edi, %edx), %eax
|
|
#endif
|
|
sub %edx, %ebx
|
|
lea 1(%edi, %edx), %edi
|
|
|
|
.p2align 4
|
|
L(StrncpyFillTailWithZero):
|
|
pxor %xmm0, %xmm0
|
|
xor %edx, %edx
|
|
sub $16, %ebx
|
|
jbe L(StrncpyFillExit)
|
|
|
|
movdqu %xmm0, (%edi)
|
|
add $16, %edi
|
|
|
|
mov %edi, %esi
|
|
and $0xf, %esi
|
|
sub %esi, %edi
|
|
add %esi, %ebx
|
|
sub $64, %ebx
|
|
jb L(StrncpyFillLess64)
|
|
|
|
L(StrncpyFillLoopMovdqa):
|
|
movdqa %xmm0, (%edi)
|
|
movdqa %xmm0, 16(%edi)
|
|
movdqa %xmm0, 32(%edi)
|
|
movdqa %xmm0, 48(%edi)
|
|
add $64, %edi
|
|
sub $64, %ebx
|
|
jae L(StrncpyFillLoopMovdqa)
|
|
|
|
L(StrncpyFillLess64):
|
|
add $32, %ebx
|
|
jl L(StrncpyFillLess32)
|
|
movdqa %xmm0, (%edi)
|
|
movdqa %xmm0, 16(%edi)
|
|
add $32, %edi
|
|
sub $16, %ebx
|
|
jl L(StrncpyFillExit)
|
|
movdqa %xmm0, (%edi)
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
L(StrncpyFillLess32):
|
|
add $16, %ebx
|
|
jl L(StrncpyFillExit)
|
|
movdqa %xmm0, (%edi)
|
|
add $16, %edi
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
L(StrncpyFillExit):
|
|
add $16, %ebx
|
|
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(AlignedLeaveCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(Aligned64LeaveCase2)
|
|
L(Aligned64LeaveCase3):
|
|
lea 64(%ebx), %ecx
|
|
and $-16, %ecx
|
|
add $48, %ebx
|
|
jl L(CopyFrom1To16BytesCase3)
|
|
movdqa %xmm4, (%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqa %xmm5, 16(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqa %xmm6, 32(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqa %xmm7, 48(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 64(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Aligned64LeaveCase2):
|
|
pxor %xmm0, %xmm0
|
|
xor %ecx, %ecx
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm4)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqa %xmm4, (%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm5)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqa %xmm5, 16(%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesXmm6)
|
|
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqa %xmm6, 32(%edi)
|
|
lea 16(%edi, %ecx), %edi
|
|
lea 16(%esi, %ecx), %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(UnalignedLeaveCase2OrCase3):
|
|
test %edx, %edx
|
|
jnz L(Unaligned64LeaveCase2)
|
|
L(Unaligned64LeaveCase3):
|
|
lea 64(%ebx), %ecx
|
|
and $-16, %ecx
|
|
add $48, %ebx
|
|
jl L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm4, (%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm5, 16(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm6, 32(%edi)
|
|
sub $16, %ebx
|
|
jb L(CopyFrom1To16BytesCase3)
|
|
movdqu %xmm7, 48(%edi)
|
|
#ifdef USE_AS_STPCPY
|
|
lea 64(%edi), %eax
|
|
#endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Unaligned64LeaveCase2):
|
|
pxor %xmm0, %xmm0
|
|
xor %ecx, %ecx
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm4)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm4, (%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm5)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm5, 16(%edi)
|
|
add $16, %ecx
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %edx, %edx
|
|
jnz L(CopyFrom1To16BytesUnalignedXmm6)
|
|
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %edx
|
|
movdqu %xmm6, 32(%edi)
|
|
lea 16(%edi, %ecx), %edi
|
|
lea 16(%esi, %ecx), %esi
|
|
bsf %edx, %edx
|
|
cmp %ebx, %edx
|
|
jb L(CopyFrom1To16BytesExit)
|
|
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
|
|
|
|
.p2align 4
|
|
L(ExitZero):
|
|
movl %edi, %eax
|
|
RETURN
|
|
#endif
|
|
|
|
END (STRCPY)
|
|
|
|
.p2align 4
|
|
.section .rodata
|
|
L(ExitTable):
|
|
.int JMPTBL(L(Exit1), L(ExitTable))
|
|
.int JMPTBL(L(Exit2), L(ExitTable))
|
|
.int JMPTBL(L(Exit3), L(ExitTable))
|
|
.int JMPTBL(L(Exit4), L(ExitTable))
|
|
.int JMPTBL(L(Exit5), L(ExitTable))
|
|
.int JMPTBL(L(Exit6), L(ExitTable))
|
|
.int JMPTBL(L(Exit7), L(ExitTable))
|
|
.int JMPTBL(L(Exit8), L(ExitTable))
|
|
.int JMPTBL(L(Exit9), L(ExitTable))
|
|
.int JMPTBL(L(Exit10), L(ExitTable))
|
|
.int JMPTBL(L(Exit11), L(ExitTable))
|
|
.int JMPTBL(L(Exit12), L(ExitTable))
|
|
.int JMPTBL(L(Exit13), L(ExitTable))
|
|
.int JMPTBL(L(Exit14), L(ExitTable))
|
|
.int JMPTBL(L(Exit15), L(ExitTable))
|
|
.int JMPTBL(L(Exit16), L(ExitTable))
|
|
.int JMPTBL(L(Exit17), L(ExitTable))
|
|
.int JMPTBL(L(Exit18), L(ExitTable))
|
|
.int JMPTBL(L(Exit19), L(ExitTable))
|
|
.int JMPTBL(L(Exit20), L(ExitTable))
|
|
.int JMPTBL(L(Exit21), L(ExitTable))
|
|
.int JMPTBL(L(Exit22), L(ExitTable))
|
|
.int JMPTBL(L(Exit23), L(ExitTable))
|
|
.int JMPTBL(L(Exit24), L(ExitTable))
|
|
.int JMPTBL(L(Exit25), L(ExitTable))
|
|
.int JMPTBL(L(Exit26), L(ExitTable))
|
|
.int JMPTBL(L(Exit27), L(ExitTable))
|
|
.int JMPTBL(L(Exit28), L(ExitTable))
|
|
.int JMPTBL(L(Exit29), L(ExitTable))
|
|
.int JMPTBL(L(Exit30), L(ExitTable))
|
|
.int JMPTBL(L(Exit31), L(ExitTable))
|
|
.int JMPTBL(L(Exit32), L(ExitTable))
|
|
#ifdef USE_AS_STRNCPY
|
|
L(ExitStrncpyTable):
|
|
.int JMPTBL(L(Exit0), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
|
|
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
|
|
|
|
.p2align 4
|
|
L(FillTable):
|
|
.int JMPTBL(L(Fill0), L(FillTable))
|
|
.int JMPTBL(L(Fill1), L(FillTable))
|
|
.int JMPTBL(L(Fill2), L(FillTable))
|
|
.int JMPTBL(L(Fill3), L(FillTable))
|
|
.int JMPTBL(L(Fill4), L(FillTable))
|
|
.int JMPTBL(L(Fill5), L(FillTable))
|
|
.int JMPTBL(L(Fill6), L(FillTable))
|
|
.int JMPTBL(L(Fill7), L(FillTable))
|
|
.int JMPTBL(L(Fill8), L(FillTable))
|
|
.int JMPTBL(L(Fill9), L(FillTable))
|
|
.int JMPTBL(L(Fill10), L(FillTable))
|
|
.int JMPTBL(L(Fill11), L(FillTable))
|
|
.int JMPTBL(L(Fill12), L(FillTable))
|
|
.int JMPTBL(L(Fill13), L(FillTable))
|
|
.int JMPTBL(L(Fill14), L(FillTable))
|
|
.int JMPTBL(L(Fill15), L(FillTable))
|
|
.int JMPTBL(L(Fill16), L(FillTable))
|
|
#endif
|