/* Copyright (c) 2011, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef USE_AS_STRCAT # ifndef L # define L(label) .L##label # endif # ifndef cfi_startproc # define cfi_startproc .cfi_startproc # endif # ifndef cfi_endproc # define cfi_endproc .cfi_endproc # endif # ifndef cfi_rel_offset # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off # endif # ifndef cfi_restore # define cfi_restore(reg) .cfi_restore reg # endif # ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off # endif # ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc # endif # ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name # endif # define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) # define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) # define PUSH(REG) pushl REG; CFI_PUSH (REG) # define POP(REG) popl REG; CFI_POP (REG) # ifndef STRCPY # define STRCPY strcpy # endif # ifdef USE_AS_STRNCPY # define PARMS 8 # define ENTRANCE PUSH (%ebx) # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); # define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) # else # define PARMS 4 # define ENTRANCE # define RETURN ret # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) # endif # ifdef USE_AS_STPCPY # define SAVE_RESULT(n) lea n(%edx), %eax # define SAVE_RESULT_TAIL(n) lea n(%edx), %eax # else # define SAVE_RESULT(n) movl %edi, %eax # define SAVE_RESULT_TAIL(n) movl %edx, %eax # endif # define STR1 PARMS # define STR2 STR1+4 # define LEN STR2+4 /* In this code following instructions are used for copying: movb - 1 byte movw - 2 byte movl - 4 byte movlpd - 8 byte movaps - 16 byte - requires 16 byte alignment of sourse and destination adresses. */ .text ENTRY (STRCPY) ENTRANCE mov STR1(%esp), %edx mov STR2(%esp), %ecx # ifdef USE_AS_STRNCPY movl LEN(%esp), %ebx cmp $8, %ebx jbe L(StrncpyExit8Bytes) # endif cmpb $0, (%ecx) jz L(ExitTail1) cmpb $0, 1(%ecx) jz L(ExitTail2) cmpb $0, 2(%ecx) jz L(ExitTail3) cmpb $0, 3(%ecx) jz L(ExitTail4) cmpb $0, 4(%ecx) jz L(ExitTail5) cmpb $0, 5(%ecx) jz L(ExitTail6) cmpb $0, 6(%ecx) jz L(ExitTail7) cmpb $0, 7(%ecx) jz L(ExitTail8) # ifdef USE_AS_STRNCPY cmp $16, %ebx jb L(StrncpyExit15Bytes) # endif cmpb $0, 8(%ecx) jz L(ExitTail9) cmpb $0, 9(%ecx) jz L(ExitTail10) cmpb $0, 10(%ecx) jz L(ExitTail11) cmpb $0, 11(%ecx) jz L(ExitTail12) cmpb $0, 12(%ecx) jz L(ExitTail13) cmpb $0, 13(%ecx) jz L(ExitTail14) cmpb $0, 14(%ecx) jz L(ExitTail15) # if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY cmp $16, %ebx je L(ExitTail16) # endif cmpb $0, 15(%ecx) jz L(ExitTail16) # if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY cmp $16, %ebx je L(StrlcpyExitTail16) # endif PUSH (%edi) # ifndef USE_AS_STRLCPY mov %edx, %edi # else mov %ecx, %edi # endif #endif PUSH (%esi) #ifdef USE_AS_STRNCPY mov %ecx, %esi sub $16, %ebx and $0xf, %esi /* add 16 bytes ecx_offset to ebx */ add %esi, %ebx #endif lea 16(%ecx), %esi and $-16, %esi pxor %xmm0, %xmm0 movlpd (%ecx), %xmm1 movlpd %xmm1, (%edx) pcmpeqb (%esi), %xmm0 movlpd 8(%ecx), %xmm1 movlpd %xmm1, 8(%edx) pmovmskb %xmm0, %eax sub %ecx, %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) mov %edx, %eax lea 16(%edx), %edx and $-16, %edx sub %edx, %eax #ifdef USE_AS_STRNCPY add %eax, %esi lea -1(%esi), %esi and $1<<31, %esi test %esi, %esi jnz L(ContinueCopy) lea 16(%ebx), %ebx L(ContinueCopy): #endif sub %eax, %ecx mov %ecx, %eax and $0xf, %eax mov $0, %esi /* case: ecx_offset == edx_offset */ jz L(Align16Both) cmp $8, %eax jae L(ShlHigh8) cmp $1, %eax je L(Shl1) cmp $2, %eax je L(Shl2) cmp $3, %eax je L(Shl3) cmp $4, %eax je L(Shl4) cmp $5, %eax je L(Shl5) cmp $6, %eax je L(Shl6) jmp L(Shl7) L(ShlHigh8): je L(Shl8) cmp $9, %eax je L(Shl9) cmp $10, %eax je L(Shl10) cmp $11, %eax je L(Shl11) cmp $12, %eax je L(Shl12) cmp $13, %eax je L(Shl13) cmp $14, %eax je L(Shl14) jmp L(Shl15) L(Align16Both): movaps (%ecx), %xmm1 movaps 16(%ecx), %xmm2 movaps %xmm1, (%edx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm4 movaps %xmm3, (%edx, %esi) pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm1 movaps %xmm4, (%edx, %esi) pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm2 movaps %xmm1, (%edx, %esi) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %eax lea 16(%esi), %esi #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) #endif test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps %xmm3, (%edx, %esi) mov %ecx, %eax lea 16(%ecx, %esi), %ecx and $-0x40, %ecx sub %ecx, %eax sub %eax, %edx #ifdef USE_AS_STRNCPY lea 112(%ebx, %eax), %ebx #endif mov $-0x40, %esi L(Aligned64Loop): movaps (%ecx), %xmm2 movaps 32(%ecx), %xmm3 movaps %xmm2, %xmm4 movaps 16(%ecx), %xmm5 movaps %xmm3, %xmm6 movaps 48(%ecx), %xmm7 pminub %xmm5, %xmm2 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 lea 64(%edx), %edx pcmpeqb %xmm0, %xmm3 lea 64(%ecx), %ecx pmovmskb %xmm3, %eax #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeaveCase2OrCase3) #endif test %eax, %eax jnz L(Aligned64Leave) movaps %xmm4, -64(%edx) movaps %xmm5, -48(%edx) movaps %xmm6, -32(%edx) movaps %xmm7, -16(%edx) jmp L(Aligned64Loop) L(Aligned64Leave): #ifdef USE_AS_STRNCPY lea 48(%ebx), %ebx #endif pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 #ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx #endif pmovmskb %xmm0, %eax movaps %xmm4, -64(%edx) lea 16(%esi), %esi test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 #ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx #endif pmovmskb %xmm0, %eax movaps %xmm5, -48(%edx) lea 16(%esi), %esi test %eax, %eax jnz L(CopyFrom1To16Bytes) movaps %xmm6, -32(%edx) pcmpeqb %xmm7, %xmm0 #ifdef USE_AS_STRNCPY lea -16(%ebx), %ebx #endif pmovmskb %xmm0, %eax lea 16(%esi), %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl1): movaps -1(%ecx), %xmm1 movaps 15(%ecx), %xmm2 L(Shl1Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) #endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) #endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) #endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit1Case2OrCase3) #endif test %eax, %eax jnz L(Shl1LoopExit) palignr $1, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 31(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -15(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -1(%ecx), %xmm1 L(Shl1LoopStart): movaps 15(%ecx), %xmm2 movaps 31(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 47(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 63(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $1, %xmm4, %xmm5 palignr $1, %xmm3, %xmm4 test %eax, %eax jnz L(Shl1Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave1) #endif palignr $1, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $1, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl1LoopStart) L(Shl1LoopExit): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) mov $15, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl2): movaps -2(%ecx), %xmm1 movaps 14(%ecx), %xmm2 L(Shl2Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) #endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) #endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) #endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit2Case2OrCase3) #endif test %eax, %eax jnz L(Shl2LoopExit) palignr $2, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 30(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -14(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -2(%ecx), %xmm1 L(Shl2LoopStart): movaps 14(%ecx), %xmm2 movaps 30(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 46(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 62(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $2, %xmm4, %xmm5 palignr $2, %xmm3, %xmm4 test %eax, %eax jnz L(Shl2Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave2) #endif palignr $2, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $2, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl2LoopStart) L(Shl2LoopExit): movlpd (%ecx), %xmm0 movlpd 6(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 6(%edx) mov $14, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl3): movaps -3(%ecx), %xmm1 movaps 13(%ecx), %xmm2 L(Shl3Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) #endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) #endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) #endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit3Case2OrCase3) #endif test %eax, %eax jnz L(Shl3LoopExit) palignr $3, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 29(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -13(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -3(%ecx), %xmm1 L(Shl3LoopStart): movaps 13(%ecx), %xmm2 movaps 29(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 45(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 61(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $3, %xmm4, %xmm5 palignr $3, %xmm3, %xmm4 test %eax, %eax jnz L(Shl3Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave3) #endif palignr $3, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $3, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl3LoopStart) L(Shl3LoopExit): movlpd (%ecx), %xmm0 movlpd 5(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 5(%edx) mov $13, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl4): movaps -4(%ecx), %xmm1 movaps 12(%ecx), %xmm2 L(Shl4Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) #endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) #endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) #endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit4Case2OrCase3) #endif test %eax, %eax jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 28(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -12(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -4(%ecx), %xmm1 L(Shl4LoopStart): movaps 12(%ecx), %xmm2 movaps 28(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 44(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 60(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $4, %xmm4, %xmm5 palignr $4, %xmm3, %xmm4 test %eax, %eax jnz L(Shl4Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave4) #endif palignr $4, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl4LoopStart) L(Shl4LoopExit): movlpd (%ecx), %xmm0 movl 8(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 8(%edx) mov $12, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl5): movaps -5(%ecx), %xmm1 movaps 11(%ecx), %xmm2 L(Shl5Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) #endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) #endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) #endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit5Case2OrCase3) #endif test %eax, %eax jnz L(Shl5LoopExit) palignr $5, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 27(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -11(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -5(%ecx), %xmm1 L(Shl5LoopStart): movaps 11(%ecx), %xmm2 movaps 27(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 43(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 59(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $5, %xmm4, %xmm5 palignr $5, %xmm3, %xmm4 test %eax, %eax jnz L(Shl5Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave5) #endif palignr $5, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $5, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl5LoopStart) L(Shl5LoopExit): movlpd (%ecx), %xmm0 movl 7(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 7(%edx) mov $11, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl6): movaps -6(%ecx), %xmm1 movaps 10(%ecx), %xmm2 L(Shl6Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) #endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) #endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) #endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit6Case2OrCase3) #endif test %eax, %eax jnz L(Shl6LoopExit) palignr $6, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 26(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -10(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -6(%ecx), %xmm1 L(Shl6LoopStart): movaps 10(%ecx), %xmm2 movaps 26(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 42(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 58(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $6, %xmm4, %xmm5 palignr $6, %xmm3, %xmm4 test %eax, %eax jnz L(Shl6Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave6) #endif palignr $6, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $6, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl6LoopStart) L(Shl6LoopExit): movlpd (%ecx), %xmm0 movl 6(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 6(%edx) mov $10, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl7): movaps -7(%ecx), %xmm1 movaps 9(%ecx), %xmm2 L(Shl7Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) #endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) #endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) #endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit7Case2OrCase3) #endif test %eax, %eax jnz L(Shl7LoopExit) palignr $7, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 25(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -9(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -7(%ecx), %xmm1 L(Shl7LoopStart): movaps 9(%ecx), %xmm2 movaps 25(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 41(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 57(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $7, %xmm4, %xmm5 palignr $7, %xmm3, %xmm4 test %eax, %eax jnz L(Shl7Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave7) #endif palignr $7, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $7, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl7LoopStart) L(Shl7LoopExit): movlpd (%ecx), %xmm0 movl 5(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 5(%edx) mov $9, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl8): movaps -8(%ecx), %xmm1 movaps 8(%ecx), %xmm2 L(Shl8Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) #endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) #endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) #endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit8Case2OrCase3) #endif test %eax, %eax jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 24(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -8(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -8(%ecx), %xmm1 L(Shl8LoopStart): movaps 8(%ecx), %xmm2 movaps 24(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 40(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 56(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $8, %xmm4, %xmm5 palignr $8, %xmm3, %xmm4 test %eax, %eax jnz L(Shl8Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave8) #endif palignr $8, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl8LoopStart) L(Shl8LoopExit): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) mov $8, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl9): movaps -9(%ecx), %xmm1 movaps 7(%ecx), %xmm2 L(Shl9Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) #endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) #endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) #endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit9Case2OrCase3) #endif test %eax, %eax jnz L(Shl9LoopExit) palignr $9, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 23(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -7(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -9(%ecx), %xmm1 L(Shl9LoopStart): movaps 7(%ecx), %xmm2 movaps 23(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 39(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 55(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $9, %xmm4, %xmm5 palignr $9, %xmm3, %xmm4 test %eax, %eax jnz L(Shl9Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave9) #endif palignr $9, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $9, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl9LoopStart) L(Shl9LoopExit): movlpd -1(%ecx), %xmm0 movlpd %xmm0, -1(%edx) mov $7, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl10): movaps -10(%ecx), %xmm1 movaps 6(%ecx), %xmm2 L(Shl10Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) #endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) #endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) #endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit10Case2OrCase3) #endif test %eax, %eax jnz L(Shl10LoopExit) palignr $10, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 22(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -6(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -10(%ecx), %xmm1 L(Shl10LoopStart): movaps 6(%ecx), %xmm2 movaps 22(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 38(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 54(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $10, %xmm4, %xmm5 palignr $10, %xmm3, %xmm4 test %eax, %eax jnz L(Shl10Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave10) #endif palignr $10, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $10, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl10LoopStart) L(Shl10LoopExit): movlpd -2(%ecx), %xmm0 movlpd %xmm0, -2(%edx) mov $6, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl11): movaps -11(%ecx), %xmm1 movaps 5(%ecx), %xmm2 L(Shl11Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) #endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) #endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) #endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit11Case2OrCase3) #endif test %eax, %eax jnz L(Shl11LoopExit) palignr $11, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 21(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -5(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -11(%ecx), %xmm1 L(Shl11LoopStart): movaps 5(%ecx), %xmm2 movaps 21(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 37(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 53(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $11, %xmm4, %xmm5 palignr $11, %xmm3, %xmm4 test %eax, %eax jnz L(Shl11Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave11) #endif palignr $11, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $11, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl11LoopStart) L(Shl11LoopExit): movlpd -3(%ecx), %xmm0 movlpd %xmm0, -3(%edx) mov $5, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl12): movaps -12(%ecx), %xmm1 movaps 4(%ecx), %xmm2 L(Shl12Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) #endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) #endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) #endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit12Case2OrCase3) #endif test %eax, %eax jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 20(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -4(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -12(%ecx), %xmm1 L(Shl12LoopStart): movaps 4(%ecx), %xmm2 movaps 20(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 36(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 52(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $12, %xmm4, %xmm5 palignr $12, %xmm3, %xmm4 test %eax, %eax jnz L(Shl12Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave12) #endif palignr $12, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl12LoopStart) L(Shl12LoopExit): movl (%ecx), %esi movl %esi, (%edx) mov $4, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl13): movaps -13(%ecx), %xmm1 movaps 3(%ecx), %xmm2 L(Shl13Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) #endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) #endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) #endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit13Case2OrCase3) #endif test %eax, %eax jnz L(Shl13LoopExit) palignr $13, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 19(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -3(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -13(%ecx), %xmm1 L(Shl13LoopStart): movaps 3(%ecx), %xmm2 movaps 19(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 35(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 51(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $13, %xmm4, %xmm5 palignr $13, %xmm3, %xmm4 test %eax, %eax jnz L(Shl13Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave13) #endif palignr $13, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $13, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl13LoopStart) L(Shl13LoopExit): movl -1(%ecx), %esi movl %esi, -1(%edx) mov $3, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl14): movaps -14(%ecx), %xmm1 movaps 2(%ecx), %xmm2 L(Shl14Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) #endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) #endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) #endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit14Case2OrCase3) #endif test %eax, %eax jnz L(Shl14LoopExit) palignr $14, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 18(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -2(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -14(%ecx), %xmm1 L(Shl14LoopStart): movaps 2(%ecx), %xmm2 movaps 18(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 34(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 50(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $14, %xmm4, %xmm5 palignr $14, %xmm3, %xmm4 test %eax, %eax jnz L(Shl14Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave14) #endif palignr $14, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $14, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl14LoopStart) L(Shl14LoopExit): movl -2(%ecx), %esi movl %esi, -2(%edx) mov $2, %esi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl15): movaps -15(%ecx), %xmm1 movaps 1(%ecx), %xmm2 L(Shl15Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %eax movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) #endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm3, %xmm1 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) #endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 movaps %xmm3, %xmm1 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx movaps %xmm2, %xmm3 #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) #endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%edx), %edx pmovmskb %xmm0, %eax lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(StrncpyExit15Case2OrCase3) #endif test %eax, %eax jnz L(Shl15LoopExit) palignr $15, %xmm3, %xmm2 movaps %xmm2, (%edx) lea 17(%ecx), %ecx lea 16(%edx), %edx mov %ecx, %eax and $-0x40, %ecx sub %ecx, %eax lea -1(%ecx), %ecx sub %eax, %edx #ifdef USE_AS_STRNCPY add %eax, %ebx #endif movaps -15(%ecx), %xmm1 L(Shl15LoopStart): movaps 1(%ecx), %xmm2 movaps 17(%ecx), %xmm3 movaps %xmm3, %xmm6 movaps 33(%ecx), %xmm4 movaps %xmm4, %xmm7 movaps 49(%ecx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %eax movaps %xmm5, %xmm7 palignr $15, %xmm4, %xmm5 palignr $15, %xmm3, %xmm4 test %eax, %eax jnz L(Shl15Start) #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(StrncpyLeave15) #endif palignr $15, %xmm2, %xmm3 lea 64(%ecx), %ecx palignr $15, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) movaps %xmm2, (%edx) lea 64(%edx), %edx jmp L(Shl15LoopStart) L(Shl15LoopExit): movl -3(%ecx), %esi movl %esi, -3(%edx) mov $1, %esi #if defined USE_AS_STRCAT || defined USE_AS_STRLCPY jmp L(CopyFrom1To16Bytes) #endif #if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY .p2align 4 L(CopyFrom1To16Bytes): # ifdef USE_AS_STRNCPY add $16, %ebx # endif add %esi, %edx add %esi, %ecx POP (%esi) test %al, %al jz L(ExitHigh8) L(CopyFrom1To16BytesLess8): mov %al, %ah and $15, %ah jz L(ExitHigh4) test $0x01, %al jnz L(Exit1) test $0x02, %al jnz L(Exit2) test $0x04, %al jnz L(Exit3) .p2align 4 L(Exit4): movl (%ecx), %eax movl %eax, (%edx) SAVE_RESULT (3) # ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(ExitHigh4): test $0x10, %al jnz L(Exit5) test $0x20, %al jnz L(Exit6) test $0x40, %al jnz L(Exit7) .p2align 4 L(Exit8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) SAVE_RESULT (7) # ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(ExitHigh8): mov %ah, %al and $15, %al jz L(ExitHigh12) test $0x01, %ah jnz L(Exit9) test $0x02, %ah jnz L(Exit10) test $0x04, %ah jnz L(Exit11) .p2align 4 L(Exit12): movlpd (%ecx), %xmm0 movl 8(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 8(%edx) SAVE_RESULT (11) # ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(ExitHigh12): test $0x10, %ah jnz L(Exit13) test $0x20, %ah jnz L(Exit14) test $0x40, %ah jnz L(Exit15) .p2align 4 L(Exit16): movdqu (%ecx), %xmm0 movdqu %xmm0, (%edx) SAVE_RESULT (15) # ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 # ifdef USE_AS_STRNCPY CFI_PUSH(%esi) .p2align 4 L(CopyFrom1To16BytesCase2): add $16, %ebx add %esi, %ecx add %esi, %edx POP (%esi) test %al, %al jz L(ExitHighCase2) cmp $8, %ebx ja L(CopyFrom1To16BytesLess8) test $0x01, %al jnz L(Exit1) cmp $1, %ebx je L(Exit1) test $0x02, %al jnz L(Exit2) cmp $2, %ebx je L(Exit2) test $0x04, %al jnz L(Exit3) cmp $3, %ebx je L(Exit3) test $0x08, %al jnz L(Exit4) cmp $4, %ebx je L(Exit4) test $0x10, %al jnz L(Exit5) cmp $5, %ebx je L(Exit5) test $0x20, %al jnz L(Exit6) cmp $6, %ebx je L(Exit6) test $0x40, %al jnz L(Exit7) cmp $7, %ebx je L(Exit7) jmp L(Exit8) .p2align 4 L(ExitHighCase2): cmp $8, %ebx jbe L(CopyFrom1To16BytesLess8Case3) test $0x01, %ah jnz L(Exit9) cmp $9, %ebx je L(Exit9) test $0x02, %ah jnz L(Exit10) cmp $10, %ebx je L(Exit10) test $0x04, %ah jnz L(Exit11) cmp $11, %ebx je L(Exit11) test $0x8, %ah jnz L(Exit12) cmp $12, %ebx je L(Exit12) test $0x10, %ah jnz L(Exit13) cmp $13, %ebx je L(Exit13) test $0x20, %ah jnz L(Exit14) cmp $14, %ebx je L(Exit14) test $0x40, %ah jnz L(Exit15) cmp $15, %ebx je L(Exit15) jmp L(Exit16) CFI_PUSH(%esi) .p2align 4 L(CopyFrom1To16BytesCase2OrCase3): test %eax, %eax jnz L(CopyFrom1To16BytesCase2) .p2align 4 L(CopyFrom1To16BytesCase3): add $16, %ebx add %esi, %edx add %esi, %ecx POP (%esi) cmp $8, %ebx ja L(ExitHigh8Case3) L(CopyFrom1To16BytesLess8Case3): cmp $4, %ebx ja L(ExitHigh4Case3) cmp $1, %ebx je L(Exit1) cmp $2, %ebx je L(Exit2) cmp $3, %ebx je L(Exit3) movl (%ecx), %eax movl %eax, (%edx) SAVE_RESULT (4) RETURN1 .p2align 4 L(ExitHigh4Case3): cmp $5, %ebx je L(Exit5) cmp $6, %ebx je L(Exit6) cmp $7, %ebx je L(Exit7) movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) SAVE_RESULT (8) RETURN1 .p2align 4 L(ExitHigh8Case3): cmp $12, %ebx ja L(ExitHigh12Case3) cmp $9, %ebx je L(Exit9) cmp $10, %ebx je L(Exit10) cmp $11, %ebx je L(Exit11) movlpd (%ecx), %xmm0 movl 8(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 8(%edx) SAVE_RESULT (12) RETURN1 .p2align 4 L(ExitHigh12Case3): cmp $13, %ebx je L(Exit13) cmp $14, %ebx je L(Exit14) cmp $15, %ebx je L(Exit15) movlpd (%ecx), %xmm0 movlpd 8(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 8(%edx) SAVE_RESULT (16) RETURN1 # endif .p2align 4 L(Exit1): movb (%ecx), %al movb %al, (%edx) SAVE_RESULT (0) # ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit2): movw (%ecx), %ax movw %ax, (%edx) SAVE_RESULT (1) # ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit3): movw (%ecx), %ax movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) SAVE_RESULT (2) # ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit5): movl (%ecx), %eax movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) SAVE_RESULT (4) # ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit6): movl (%ecx), %eax movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) SAVE_RESULT (5) # ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit7): movl (%ecx), %eax movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) SAVE_RESULT (6) # ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit9): movlpd (%ecx), %xmm0 movb 8(%ecx), %al movlpd %xmm0, (%edx) movb %al, 8(%edx) SAVE_RESULT (8) # ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit10): movlpd (%ecx), %xmm0 movw 8(%ecx), %ax movlpd %xmm0, (%edx) movw %ax, 8(%edx) SAVE_RESULT (9) # ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit11): movlpd (%ecx), %xmm0 movl 7(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 7(%edx) SAVE_RESULT (10) # ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit13): movlpd (%ecx), %xmm0 movlpd 5(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 5(%edx) SAVE_RESULT (12) # ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit14): movlpd (%ecx), %xmm0 movlpd 6(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 6(%edx) SAVE_RESULT (13) # ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 .p2align 4 L(Exit15): movlpd (%ecx), %xmm0 movlpd 7(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 7(%edx) SAVE_RESULT (14) # ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edx), %ecx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN1 CFI_POP (%edi) # ifdef USE_AS_STRNCPY .p2align 4 L(Fill0): RETURN .p2align 4 L(Fill1): movb %dl, (%ecx) RETURN .p2align 4 L(Fill2): movw %dx, (%ecx) RETURN .p2align 4 L(Fill3): movw %dx, (%ecx) movb %dl, 2(%ecx) RETURN .p2align 4 L(Fill4): movl %edx, (%ecx) RETURN .p2align 4 L(Fill5): movl %edx, (%ecx) movb %dl, 4(%ecx) RETURN .p2align 4 L(Fill6): movl %edx, (%ecx) movw %dx, 4(%ecx) RETURN .p2align 4 L(Fill7): movl %edx, (%ecx) movl %edx, 3(%ecx) RETURN .p2align 4 L(Fill8): movlpd %xmm0, (%ecx) RETURN .p2align 4 L(Fill9): movlpd %xmm0, (%ecx) movb %dl, 8(%ecx) RETURN .p2align 4 L(Fill10): movlpd %xmm0, (%ecx) movw %dx, 8(%ecx) RETURN .p2align 4 L(Fill11): movlpd %xmm0, (%ecx) movl %edx, 7(%ecx) RETURN .p2align 4 L(Fill12): movlpd %xmm0, (%ecx) movl %edx, 8(%ecx) RETURN .p2align 4 L(Fill13): movlpd %xmm0, (%ecx) movlpd %xmm0, 5(%ecx) RETURN .p2align 4 L(Fill14): movlpd %xmm0, (%ecx) movlpd %xmm0, 6(%ecx) RETURN .p2align 4 L(Fill15): movlpd %xmm0, (%ecx) movlpd %xmm0, 7(%ecx) RETURN .p2align 4 L(Fill16): movlpd %xmm0, (%ecx) movlpd %xmm0, 8(%ecx) RETURN .p2align 4 L(StrncpyFillExit1): lea 16(%ebx), %ebx L(FillFrom1To16Bytes): test %ebx, %ebx jz L(Fill0) cmp $16, %ebx je L(Fill16) cmp $8, %ebx je L(Fill8) jg L(FillMore8) cmp $4, %ebx je L(Fill4) jg L(FillMore4) cmp $2, %ebx jl L(Fill1) je L(Fill2) jg L(Fill3) L(FillMore8): /* but less than 16 */ cmp $12, %ebx je L(Fill12) jl L(FillLess12) cmp $14, %ebx jl L(Fill13) je L(Fill14) jg L(Fill15) L(FillMore4): /* but less than 8 */ cmp $6, %ebx jl L(Fill5) je L(Fill6) jg L(Fill7) L(FillLess12): /* but more than 8 */ cmp $10, %ebx jl L(Fill9) je L(Fill10) jmp L(Fill11) CFI_PUSH(%edi) .p2align 4 L(StrncpyFillTailWithZero1): POP (%edi) L(StrncpyFillTailWithZero): pxor %xmm0, %xmm0 xor %edx, %edx sub $16, %ebx jbe L(StrncpyFillExit1) movlpd %xmm0, (%ecx) movlpd %xmm0, 8(%ecx) lea 16(%ecx), %ecx mov %ecx, %edx and $0xf, %edx sub %edx, %ecx add %edx, %ebx xor %edx, %edx sub $64, %ebx jb L(StrncpyFillLess64) L(StrncpyFillLoopMovdqa): movdqa %xmm0, (%ecx) movdqa %xmm0, 16(%ecx) movdqa %xmm0, 32(%ecx) movdqa %xmm0, 48(%ecx) lea 64(%ecx), %ecx sub $64, %ebx jae L(StrncpyFillLoopMovdqa) L(StrncpyFillLess64): add $32, %ebx jl L(StrncpyFillLess32) movdqa %xmm0, (%ecx) movdqa %xmm0, 16(%ecx) lea 32(%ecx), %ecx sub $16, %ebx jl L(StrncpyFillExit1) movdqa %xmm0, (%ecx) lea 16(%ecx), %ecx jmp L(FillFrom1To16Bytes) L(StrncpyFillLess32): add $16, %ebx jl L(StrncpyFillExit1) movdqa %xmm0, (%ecx) lea 16(%ecx), %ecx jmp L(FillFrom1To16Bytes) # endif .p2align 4 L(ExitTail1): movb (%ecx), %al movb %al, (%edx) SAVE_RESULT_TAIL (0) # ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail2): movw (%ecx), %ax movw %ax, (%edx) SAVE_RESULT_TAIL (1) # ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail3): movw (%ecx), %ax movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) SAVE_RESULT_TAIL (2) # ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail4): movl (%ecx), %eax movl %eax, (%edx) SAVE_RESULT_TAIL (3) # ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail5): movl (%ecx), %eax movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) SAVE_RESULT_TAIL (4) # ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail6): movl (%ecx), %eax movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) SAVE_RESULT_TAIL (5) # ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail7): movl (%ecx), %eax movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) SAVE_RESULT_TAIL (6) # ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail8): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) SAVE_RESULT_TAIL (7) # ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edx), %ecx jnz L(StrncpyFillTailWithZero) # endif RETURN .p2align 4 L(ExitTail9): movlpd (%ecx), %xmm0 movb 8(%ecx), %al movlpd %xmm0, (%edx) movb %al, 8(%edx) SAVE_RESULT_TAIL (8) # ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail10): movlpd (%ecx), %xmm0 movw 8(%ecx), %ax movlpd %xmm0, (%edx) movw %ax, 8(%edx) SAVE_RESULT_TAIL (9) # ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail11): movlpd (%ecx), %xmm0 movl 7(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 7(%edx) SAVE_RESULT_TAIL (10) # ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail12): movlpd (%ecx), %xmm0 movl 8(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 8(%edx) SAVE_RESULT_TAIL (11) # ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail13): movlpd (%ecx), %xmm0 movlpd 5(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 5(%edx) SAVE_RESULT_TAIL (12) # ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail14): movlpd (%ecx), %xmm0 movlpd 6(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 6(%edx) SAVE_RESULT_TAIL (13) # ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN .p2align 4 L(ExitTail15): movlpd (%ecx), %xmm0 movlpd 7(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 7(%edx) SAVE_RESULT_TAIL (14) # ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edx), %ecx jnz L(StrncpyFillTailWithZero) # endif RETURN .p2align 4 L(ExitTail16): movdqu (%ecx), %xmm0 movdqu %xmm0, (%edx) SAVE_RESULT_TAIL (15) # ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edx), %ecx jnz L(StrncpyFillTailWithZero) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif # endif RETURN #endif #ifdef USE_AS_STRNCPY # ifndef USE_AS_STRCAT CFI_PUSH (%esi) CFI_PUSH (%edi) # endif .p2align 4 L(StrncpyLeaveCase2OrCase3): test %eax, %eax jnz L(Aligned64LeaveCase2) L(Aligned64LeaveCase3): add $48, %ebx jle L(CopyFrom1To16BytesCase3) movaps %xmm4, -64(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase3) movaps %xmm5, -48(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase3) movaps %xmm6, -32(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx jmp L(CopyFrom1To16BytesCase3) L(Aligned64LeaveCase2): pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %eax add $48, %ebx jle L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %eax movaps %xmm4, -64(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %eax movaps %xmm5, -48(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %eax, %eax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %eax movaps %xmm6, -32(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx jmp L(CopyFrom1To16BytesCase2) /*--------------------------------------------------*/ .p2align 4 L(StrncpyExit1Case2OrCase3): movlpd (%ecx), %xmm0 movlpd 7(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 7(%edx) mov $15, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit2Case2OrCase3): movlpd (%ecx), %xmm0 movlpd 6(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 6(%edx) mov $14, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit3Case2OrCase3): movlpd (%ecx), %xmm0 movlpd 5(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 5(%edx) mov $13, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit4Case2OrCase3): movlpd (%ecx), %xmm0 movl 8(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 8(%edx) mov $12, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit5Case2OrCase3): movlpd (%ecx), %xmm0 movl 7(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 7(%edx) mov $11, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit6Case2OrCase3): movlpd (%ecx), %xmm0 movl 6(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 6(%edx) mov $10, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit7Case2OrCase3): movlpd (%ecx), %xmm0 movl 5(%ecx), %esi movlpd %xmm0, (%edx) movl %esi, 5(%edx) mov $9, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit8Case2OrCase3): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) mov $8, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit9Case2OrCase3): movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) mov $7, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit10Case2OrCase3): movlpd -1(%ecx), %xmm0 movlpd %xmm0, -1(%edx) mov $6, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit11Case2OrCase3): movlpd -2(%ecx), %xmm0 movlpd %xmm0, -2(%edx) mov $5, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit12Case2OrCase3): movl (%ecx), %esi movl %esi, (%edx) mov $4, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit13Case2OrCase3): movl -1(%ecx), %esi movl %esi, -1(%edx) mov $3, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit14Case2OrCase3): movl -2(%ecx), %esi movl %esi, -2(%edx) mov $2, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit15Case2OrCase3): movl -3(%ecx), %esi movl %esi, -3(%edx) mov $1, %esi test %eax, %eax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave1): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit1) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 31(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit1) palignr $1, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit1) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit1) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit1): lea 15(%edx, %esi), %edx lea 15(%ecx, %esi), %ecx movdqu -16(%ecx), %xmm0 xor %esi, %esi movdqu %xmm0, -16(%edx) jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave2): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit2) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 30(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit2) palignr $2, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit2) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit2) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit2): lea 14(%edx, %esi), %edx lea 14(%ecx, %esi), %ecx movdqu -16(%ecx), %xmm0 xor %esi, %esi movdqu %xmm0, -16(%edx) jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave3): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit3) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 29(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit3) palignr $3, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit3) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit3) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit3): lea 13(%edx, %esi), %edx lea 13(%ecx, %esi), %ecx movdqu -16(%ecx), %xmm0 xor %esi, %esi movdqu %xmm0, -16(%edx) jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave4): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit4) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit4) palignr $4, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit4) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit4) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit4): lea 12(%edx, %esi), %edx lea 12(%ecx, %esi), %ecx movlpd -12(%ecx), %xmm0 movl -4(%ecx), %eax movlpd %xmm0, -12(%edx) movl %eax, -4(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave5): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit5) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 27(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit5) palignr $5, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit5) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit5) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit5): lea 11(%edx, %esi), %edx lea 11(%ecx, %esi), %ecx movlpd -11(%ecx), %xmm0 movl -4(%ecx), %eax movlpd %xmm0, -11(%edx) movl %eax, -4(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave6): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit6) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 26(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit6) palignr $6, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit6) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit6) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit6): lea 10(%edx, %esi), %edx lea 10(%ecx, %esi), %ecx movlpd -10(%ecx), %xmm0 movw -2(%ecx), %ax movlpd %xmm0, -10(%edx) movw %ax, -2(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave7): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit7) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 25(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit7) palignr $7, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit7) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit7) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit7): lea 9(%edx, %esi), %edx lea 9(%ecx, %esi), %ecx movlpd -9(%ecx), %xmm0 movb -1(%ecx), %ah movlpd %xmm0, -9(%edx) movb %ah, -1(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave8): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit8) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit8) palignr $8, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit8) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit8) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit8): lea 8(%edx, %esi), %edx lea 8(%ecx, %esi), %ecx movlpd -8(%ecx), %xmm0 movlpd %xmm0, -8(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave9): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit9) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 23(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit9) palignr $9, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit9) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit9) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit9): lea 7(%edx, %esi), %edx lea 7(%ecx, %esi), %ecx movlpd -8(%ecx), %xmm0 movlpd %xmm0, -8(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave10): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit10) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 22(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit10) palignr $10, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit10) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit10) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit10): lea 6(%edx, %esi), %edx lea 6(%ecx, %esi), %ecx movlpd -8(%ecx), %xmm0 movlpd %xmm0, -8(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave11): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit11) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 21(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit11) palignr $11, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit11) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit11) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit11): lea 5(%edx, %esi), %edx lea 5(%ecx, %esi), %ecx movl -5(%ecx), %esi movb -1(%ecx), %ah movl %esi, -5(%edx) movb %ah, -1(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave12): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit12) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit12) palignr $12, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit12) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit12) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit12): lea 4(%edx, %esi), %edx lea 4(%ecx, %esi), %ecx movl -4(%ecx), %eax movl %eax, -4(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave13): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit13) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 19(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit13) palignr $13, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit13) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit13) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit13): lea 3(%edx, %esi), %edx lea 3(%ecx, %esi), %ecx movl -4(%ecx), %eax movl %eax, -4(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave14): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit14) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 18(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit14) palignr $14, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit14) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit14) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit14): lea 2(%edx, %esi), %edx lea 2(%ecx, %esi), %ecx movw -2(%ecx), %ax movw %ax, -2(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) L(StrncpyLeave15): movaps %xmm2, %xmm3 add $48, %ebx jle L(StrncpyExit15) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%edx) movaps 17(%ecx), %xmm2 lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit15) palignr $15, %xmm3, %xmm2 movaps %xmm2, 16(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit15) movaps %xmm4, 32(%edx) lea 16(%esi), %esi sub $16, %ebx jbe L(StrncpyExit15) movaps %xmm5, 48(%edx) lea 16(%esi), %esi lea -16(%ebx), %ebx L(StrncpyExit15): lea 1(%edx, %esi), %edx lea 1(%ecx, %esi), %ecx movb -1(%ecx), %ah movb %ah, -1(%edx) xor %esi, %esi jmp L(CopyFrom1To16BytesCase3) #endif #if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY # ifdef USE_AS_STRNCPY CFI_POP (%esi) CFI_POP (%edi) .p2align 4 L(ExitTail0): movl %edx, %eax RETURN .p2align 4 L(StrncpyExit15Bytes): cmp $12, %ebx jbe L(StrncpyExit12Bytes) cmpb $0, 8(%ecx) jz L(ExitTail9) cmpb $0, 9(%ecx) jz L(ExitTail10) cmpb $0, 10(%ecx) jz L(ExitTail11) cmpb $0, 11(%ecx) jz L(ExitTail12) cmp $13, %ebx je L(ExitTail13) cmpb $0, 12(%ecx) jz L(ExitTail13) cmp $14, %ebx je L(ExitTail14) cmpb $0, 13(%ecx) jz L(ExitTail14) movlpd (%ecx), %xmm0 movlpd 7(%ecx), %xmm1 movlpd %xmm0, (%edx) movlpd %xmm1, 7(%edx) # ifdef USE_AS_STPCPY lea 14(%edx), %eax cmpb $1, (%eax) sbb $-1, %eax # else movl %edx, %eax # endif RETURN .p2align 4 L(StrncpyExit12Bytes): cmp $9, %ebx je L(ExitTail9) cmpb $0, 8(%ecx) jz L(ExitTail9) cmp $10, %ebx je L(ExitTail10) cmpb $0, 9(%ecx) jz L(ExitTail10) cmp $11, %ebx je L(ExitTail11) cmpb $0, 10(%ecx) jz L(ExitTail11) movlpd (%ecx), %xmm0 movl 8(%ecx), %eax movlpd %xmm0, (%edx) movl %eax, 8(%edx) SAVE_RESULT_TAIL (11) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif RETURN .p2align 4 L(StrncpyExit8Bytes): cmp $4, %ebx jbe L(StrncpyExit4Bytes) cmpb $0, (%ecx) jz L(ExitTail1) cmpb $0, 1(%ecx) jz L(ExitTail2) cmpb $0, 2(%ecx) jz L(ExitTail3) cmpb $0, 3(%ecx) jz L(ExitTail4) cmp $5, %ebx je L(ExitTail5) cmpb $0, 4(%ecx) jz L(ExitTail5) cmp $6, %ebx je L(ExitTail6) cmpb $0, 5(%ecx) jz L(ExitTail6) cmp $7, %ebx je L(ExitTail7) cmpb $0, 6(%ecx) jz L(ExitTail7) movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) # ifdef USE_AS_STPCPY lea 7(%edx), %eax cmpb $1, (%eax) sbb $-1, %eax # else movl %edx, %eax # endif RETURN .p2align 4 L(StrncpyExit4Bytes): test %ebx, %ebx jz L(ExitTail0) cmp $1, %ebx je L(ExitTail1) cmpb $0, (%ecx) jz L(ExitTail1) cmp $2, %ebx je L(ExitTail2) cmpb $0, 1(%ecx) jz L(ExitTail2) cmp $3, %ebx je L(ExitTail3) cmpb $0, 2(%ecx) jz L(ExitTail3) movl (%ecx), %eax movl %eax, (%edx) SAVE_RESULT_TAIL (3) # ifdef USE_AS_STPCPY cmpb $1, (%eax) sbb $-1, %eax # endif RETURN # endif END (STRCPY) #endif