5a92284167
Add following functions: bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen, strcpy, strncpy, stpcpy, stpncpy. Create new directories inside arch-x86 to specify architecture: atom, silvermont and generic (non atom or silvermont architectures are treated like generic). Due to introducing optimized versions of stpcpy and stpncpy, c-implementations of these functions are moved from common for architectures makefile to arm and mips specific makefiles. Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
2497 lines
43 KiB
ArmAsm
2497 lines
43 KiB
ArmAsm
/*
|
|
Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef L
|
|
# define L(label) .L##label
|
|
#endif
|
|
|
|
#ifndef cfi_startproc
|
|
# define cfi_startproc .cfi_startproc
|
|
#endif
|
|
|
|
#ifndef cfi_endproc
|
|
# define cfi_endproc .cfi_endproc
|
|
#endif
|
|
|
|
#ifndef cfi_rel_offset
|
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
|
#endif
|
|
|
|
#ifndef cfi_restore
|
|
# define cfi_restore(reg) .cfi_restore reg
|
|
#endif
|
|
|
|
#ifndef cfi_adjust_cfa_offset
|
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
|
#endif
|
|
|
|
#ifndef cfi_remember_state
|
|
# define cfi_remember_state .cfi_remember_state
|
|
#endif
|
|
|
|
#ifndef cfi_restore_state
|
|
# define cfi_restore_state .cfi_restore_state
|
|
#endif
|
|
|
|
#ifndef ENTRY
|
|
# define ENTRY(name) \
|
|
.type name, @function; \
|
|
.globl name; \
|
|
.p2align 4; \
|
|
name: \
|
|
cfi_startproc
|
|
#endif
|
|
|
|
#ifndef END
|
|
# define END(name) \
|
|
cfi_endproc; \
|
|
.size name, .-name
|
|
#endif
|
|
|
|
#ifndef MEMCMP
|
|
# define MEMCMP memcmp
|
|
#endif
|
|
|
|
#define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
#define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
#define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
#define PARMS 4
|
|
#define BLK1 PARMS
|
|
#define BLK2 BLK1+4
|
|
#define LEN BLK2+4
|
|
#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
|
|
#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
|
|
|
|
/* Warning!
|
|
wmemcmp has to use SIGNED comparison for elements.
|
|
memcmp has to use UNSIGNED comparison for elemnts.
|
|
*/
|
|
|
|
.text
|
|
ENTRY (MEMCMP)
|
|
movl LEN(%esp), %ecx
|
|
|
|
#ifdef USE_WCHAR
|
|
shl $2, %ecx
|
|
jz L(zero)
|
|
#elif defined USE_UTF16
|
|
shl $1, %ecx
|
|
jz L(zero)
|
|
#endif
|
|
|
|
movl BLK1(%esp), %eax
|
|
cmp $48, %ecx
|
|
movl BLK2(%esp), %edx
|
|
jae L(48bytesormore)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $1, %ecx
|
|
jbe L(less1bytes)
|
|
#endif
|
|
|
|
PUSH (%ebx)
|
|
add %ecx, %edx
|
|
add %ecx, %eax
|
|
jmp L(less48bytes)
|
|
|
|
CFI_POP (%ebx)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
.p2align 4
|
|
L(less1bytes):
|
|
jb L(zero)
|
|
movb (%eax), %cl
|
|
cmp (%edx), %cl
|
|
je L(zero)
|
|
mov $1, %eax
|
|
ja L(1bytesend)
|
|
neg %eax
|
|
L(1bytesend):
|
|
ret
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(zero):
|
|
xor %eax, %eax
|
|
ret
|
|
|
|
.p2align 4
|
|
L(48bytesormore):
|
|
PUSH (%ebx)
|
|
PUSH (%esi)
|
|
PUSH (%edi)
|
|
cfi_remember_state
|
|
movdqu (%eax), %xmm3
|
|
movdqu (%edx), %xmm0
|
|
movl %eax, %edi
|
|
movl %edx, %esi
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 16(%edi), %edi
|
|
|
|
sub $0xffff, %edx
|
|
lea 16(%esi), %esi
|
|
jnz L(less16bytes)
|
|
mov %edi, %edx
|
|
and $0xf, %edx
|
|
xor %edx, %edi
|
|
sub %edx, %esi
|
|
add %edx, %ecx
|
|
mov %esi, %edx
|
|
and $0xf, %edx
|
|
jz L(shr_0)
|
|
xor %edx, %esi
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $8, %edx
|
|
jae L(next_unaligned_table)
|
|
cmp $0, %edx
|
|
je L(shr_0)
|
|
cmp $1, %edx
|
|
je L(shr_1)
|
|
cmp $2, %edx
|
|
je L(shr_2)
|
|
cmp $3, %edx
|
|
je L(shr_3)
|
|
cmp $4, %edx
|
|
je L(shr_4)
|
|
cmp $5, %edx
|
|
je L(shr_5)
|
|
cmp $6, %edx
|
|
je L(shr_6)
|
|
jmp L(shr_7)
|
|
|
|
.p2align 2
|
|
L(next_unaligned_table):
|
|
cmp $8, %edx
|
|
je L(shr_8)
|
|
cmp $9, %edx
|
|
je L(shr_9)
|
|
cmp $10, %edx
|
|
je L(shr_10)
|
|
cmp $11, %edx
|
|
je L(shr_11)
|
|
cmp $12, %edx
|
|
je L(shr_12)
|
|
cmp $13, %edx
|
|
je L(shr_13)
|
|
cmp $14, %edx
|
|
je L(shr_14)
|
|
jmp L(shr_15)
|
|
#elif defined(USE_WCHAR)
|
|
cmp $0, %edx
|
|
je L(shr_0)
|
|
cmp $4, %edx
|
|
je L(shr_4)
|
|
cmp $8, %edx
|
|
je L(shr_8)
|
|
jmp L(shr_12)
|
|
#elif defined(USE_UTF16)
|
|
cmp $0, %edx
|
|
je L(shr_0)
|
|
cmp $2, %edx
|
|
je L(shr_2)
|
|
cmp $4, %edx
|
|
je L(shr_4)
|
|
cmp $6, %edx
|
|
je L(shr_6)
|
|
cmp $8, %edx
|
|
je L(shr_8)
|
|
cmp $10, %edx
|
|
je L(shr_10)
|
|
cmp $12, %edx
|
|
je L(shr_12)
|
|
jmp L(shr_14)
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(shr_0):
|
|
cmp $80, %ecx
|
|
jae L(shr_0_gobble)
|
|
lea -48(%ecx), %ecx
|
|
xor %eax, %eax
|
|
movaps (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
movaps 16(%esi), %xmm2
|
|
pcmpeqb 16(%edi), %xmm2
|
|
pand %xmm1, %xmm2
|
|
pmovmskb %xmm2, %edx
|
|
add $32, %edi
|
|
add $32, %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea (%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_0_gobble):
|
|
lea -48(%ecx), %ecx
|
|
movdqa (%esi), %xmm0
|
|
xor %eax, %eax
|
|
pcmpeqb (%edi), %xmm0
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm2
|
|
pcmpeqb 16(%edi), %xmm2
|
|
L(shr_0_gobble_loop):
|
|
pand %xmm0, %xmm2
|
|
sub $32, %ecx
|
|
pmovmskb %xmm2, %edx
|
|
movdqa %xmm0, %xmm1
|
|
movdqa 32(%esi), %xmm0
|
|
movdqa 48(%esi), %xmm2
|
|
sbb $0xffff, %edx
|
|
pcmpeqb 32(%edi), %xmm0
|
|
pcmpeqb 48(%edi), %xmm2
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
jz L(shr_0_gobble_loop)
|
|
|
|
pand %xmm0, %xmm2
|
|
cmp $0, %ecx
|
|
jge L(shr_0_gobble_loop_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_0_gobble_loop_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm2, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea (%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_1):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_1_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $1,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $1,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 1(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_1_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $1,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $1,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_1_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $1,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $1,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_1_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_1_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_1_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 1(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
|
|
#if !defined(USE_WCHAR)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_2):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_2_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $2,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $2,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 2(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_2_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $2,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $2,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_2_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $2,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $2,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_2_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_2_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_2_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 2(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_3):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_3_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $3,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $3,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 3(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_3_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $3,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $3,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_3_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $3,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $3,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_3_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_3_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_3_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 3(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_4):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_4_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $4,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $4,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 4(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_4_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $4,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $4,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_4_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $4,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $4,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_4_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_4_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_4_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 4(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_5):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_5_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $5,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $5,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 5(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_5_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $5,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $5,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_5_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $5,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $5,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_5_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_5_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_5_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 5(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_6):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_6_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $6,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $6,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 6(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_6_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $6,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $6,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_6_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $6,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $6,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_6_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_6_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_6_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 6(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_7):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_7_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $7,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $7,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 7(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_7_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $7,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $7,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_7_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $7,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $7,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_7_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_7_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_7_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 7(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_8):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_8_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $8,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $8,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 8(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_8_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $8,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $8,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_8_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $8,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $8,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_8_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_8_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_8_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 8(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_9):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_9_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $9,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $9,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 9(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_9_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $9,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $9,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_9_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $9,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $9,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_9_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_9_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_9_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 9(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_10):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_10_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $10, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $10,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 10(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_10_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $10, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $10, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_10_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $10,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $10,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_10_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_10_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_10_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 10(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_11):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_11_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $11, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $11, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 11(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_11_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $11, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $11, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_11_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $11,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $11,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_11_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_11_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_11_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 11(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_12):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_12_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $12, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $12, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 12(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_12_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $12, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $12, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_12_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $12,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $12,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_12_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_12_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_12_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 12(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_13):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_13_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $13, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $13, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 13(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_13_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $13, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $13, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_13_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $13,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $13,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_13_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_13_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_13_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 13(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_14):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_14_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $14, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $14, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 14(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_14_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $14, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $14, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_14_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $14,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $14,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_14_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_14_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_14_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 14(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_15):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_15_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $15, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $15, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 15(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(shr_15_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $15, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $15, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_15_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $15,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $15,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_15_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_15_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_15_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 15(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
#endif
|
|
|
|
cfi_restore_state
|
|
cfi_remember_state
|
|
.p2align 4
|
|
L(exit):
|
|
pmovmskb %xmm1, %ebx
|
|
sub $0xffff, %ebx
|
|
jz L(first16bytes)
|
|
lea -16(%esi), %esi
|
|
lea -16(%edi), %edi
|
|
mov %ebx, %edx
|
|
|
|
L(first16bytes):
|
|
add %eax, %esi
|
|
L(less16bytes):
|
|
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
test %dl, %dl
|
|
jz L(next_24_bytes)
|
|
|
|
test $0x01, %dl
|
|
jnz L(Byte16)
|
|
|
|
test $0x02, %dl
|
|
jnz L(Byte17)
|
|
|
|
test $0x04, %dl
|
|
jnz L(Byte18)
|
|
|
|
test $0x08, %dl
|
|
jnz L(Byte19)
|
|
|
|
test $0x10, %dl
|
|
jnz L(Byte20)
|
|
|
|
test $0x20, %dl
|
|
jnz L(Byte21)
|
|
|
|
test $0x40, %dl
|
|
jnz L(Byte22)
|
|
L(Byte23):
|
|
movzbl -9(%edi), %eax
|
|
movzbl -9(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte16):
|
|
movzbl -16(%edi), %eax
|
|
movzbl -16(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte17):
|
|
movzbl -15(%edi), %eax
|
|
movzbl -15(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte18):
|
|
movzbl -14(%edi), %eax
|
|
movzbl -14(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte19):
|
|
movzbl -13(%edi), %eax
|
|
movzbl -13(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte20):
|
|
movzbl -12(%edi), %eax
|
|
movzbl -12(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte21):
|
|
movzbl -11(%edi), %eax
|
|
movzbl -11(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Byte22):
|
|
movzbl -10(%edi), %eax
|
|
movzbl -10(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_24_bytes):
|
|
lea 8(%edi), %edi
|
|
lea 8(%esi), %esi
|
|
test $0x01, %dh
|
|
jnz L(Byte16)
|
|
|
|
test $0x02, %dh
|
|
jnz L(Byte17)
|
|
|
|
test $0x04, %dh
|
|
jnz L(Byte18)
|
|
|
|
test $0x08, %dh
|
|
jnz L(Byte19)
|
|
|
|
test $0x10, %dh
|
|
jnz L(Byte20)
|
|
|
|
test $0x20, %dh
|
|
jnz L(Byte21)
|
|
|
|
test $0x40, %dh
|
|
jnz L(Byte22)
|
|
|
|
.p2align 4
|
|
L(Byte31):
|
|
movzbl -9(%edi), %eax
|
|
movzbl -9(%esi), %edx
|
|
sub %edx, %eax
|
|
RETURN_END
|
|
#elif defined(USE_AS_WMEMCMP)
|
|
|
|
/* special for wmemcmp */
|
|
test %dl, %dl
|
|
jz L(next_two_double_words)
|
|
and $15, %dl
|
|
jz L(second_double_word)
|
|
mov -16(%edi), %ecx
|
|
cmp -16(%esi), %ecx
|
|
mov $1, %eax
|
|
jg L(nequal_bigger)
|
|
neg %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_double_word):
|
|
mov -12(%edi), %ecx
|
|
cmp -12(%esi), %ecx
|
|
mov $1, %eax
|
|
jg L(nequal_bigger)
|
|
neg %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_two_double_words):
|
|
and $15, %dh
|
|
jz L(fourth_double_word)
|
|
mov -8(%edi), %ecx
|
|
cmp -8(%esi), %ecx
|
|
mov $1, %eax
|
|
jg L(nequal_bigger)
|
|
neg %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_double_word):
|
|
mov -4(%edi), %ecx
|
|
cmp -4(%esi), %ecx
|
|
mov $1, %eax
|
|
jg L(nequal_bigger)
|
|
neg %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(nequal_bigger):
|
|
RETURN_END
|
|
|
|
#elif defined(USE_AS_MEMCMP16)
|
|
|
|
/* special for __memcmp16 */
|
|
test %dl, %dl
|
|
jz L(next_four_words)
|
|
test $15, %dl
|
|
jz L(second_two_words)
|
|
test $3, %dl
|
|
jz L(second_word)
|
|
movzwl -16(%edi), %eax
|
|
movzwl -16(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_word):
|
|
movzwl -14(%edi), %eax
|
|
movzwl -14(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_two_words):
|
|
test $63, %dl
|
|
jz L(fourth_word)
|
|
movzwl -12(%edi), %eax
|
|
movzwl -12(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_word):
|
|
movzwl -10(%edi), %eax
|
|
movzwl -10(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_four_words):
|
|
test $15, %dh
|
|
jz L(fourth_two_words)
|
|
test $3, %dh
|
|
jz L(sixth_word)
|
|
movzwl -8(%edi), %eax
|
|
movzwl -8(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(sixth_word):
|
|
movzwl -6(%edi), %eax
|
|
movzwl -6(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_two_words):
|
|
test $63, %dh
|
|
jz L(eighth_word)
|
|
movzwl -4(%edi), %eax
|
|
movzwl -4(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(eighth_word):
|
|
movzwl -2(%edi), %eax
|
|
movzwl -2(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(more8bytes):
|
|
cmp $16, %ecx
|
|
jae L(more16bytes)
|
|
cmp $8, %ecx
|
|
je L(8bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $9, %ecx
|
|
je L(9bytes)
|
|
cmp $10, %ecx
|
|
je L(10bytes)
|
|
cmp $11, %ecx
|
|
je L(11bytes)
|
|
cmp $12, %ecx
|
|
je L(12bytes)
|
|
cmp $13, %ecx
|
|
je L(13bytes)
|
|
cmp $14, %ecx
|
|
je L(14bytes)
|
|
jmp L(15bytes)
|
|
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
jmp L(12bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $10, %ecx
|
|
je L(10bytes)
|
|
cmp $12, %ecx
|
|
je L(12bytes)
|
|
jmp L(14bytes)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(more16bytes):
|
|
cmp $24, %ecx
|
|
jae L(more24bytes)
|
|
cmp $16, %ecx
|
|
je L(16bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $17, %ecx
|
|
je L(17bytes)
|
|
cmp $18, %ecx
|
|
je L(18bytes)
|
|
cmp $19, %ecx
|
|
je L(19bytes)
|
|
cmp $20, %ecx
|
|
je L(20bytes)
|
|
cmp $21, %ecx
|
|
je L(21bytes)
|
|
cmp $22, %ecx
|
|
je L(22bytes)
|
|
jmp L(23bytes)
|
|
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
jmp L(20bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $18, %ecx
|
|
je L(18bytes)
|
|
cmp $20, %ecx
|
|
je L(20bytes)
|
|
jmp L(22bytes)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(more24bytes):
|
|
cmp $32, %ecx
|
|
jae L(more32bytes)
|
|
cmp $24, %ecx
|
|
je L(24bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $25, %ecx
|
|
je L(25bytes)
|
|
cmp $26, %ecx
|
|
je L(26bytes)
|
|
cmp $27, %ecx
|
|
je L(27bytes)
|
|
cmp $28, %ecx
|
|
je L(28bytes)
|
|
cmp $29, %ecx
|
|
je L(29bytes)
|
|
cmp $30, %ecx
|
|
je L(30bytes)
|
|
jmp L(31bytes)
|
|
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
jmp L(28bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $26, %ecx
|
|
je L(26bytes)
|
|
cmp $28, %ecx
|
|
je L(28bytes)
|
|
jmp L(30bytes)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(more32bytes):
|
|
cmp $40, %ecx
|
|
jae L(more40bytes)
|
|
cmp $32, %ecx
|
|
je L(32bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $33, %ecx
|
|
je L(33bytes)
|
|
cmp $34, %ecx
|
|
je L(34bytes)
|
|
cmp $35, %ecx
|
|
je L(35bytes)
|
|
cmp $36, %ecx
|
|
je L(36bytes)
|
|
cmp $37, %ecx
|
|
je L(37bytes)
|
|
cmp $38, %ecx
|
|
je L(38bytes)
|
|
jmp L(39bytes)
|
|
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
jmp L(36bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $34, %ecx
|
|
je L(34bytes)
|
|
cmp $36, %ecx
|
|
je L(36bytes)
|
|
jmp L(38bytes)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(less48bytes):
|
|
cmp $8, %ecx
|
|
jae L(more8bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $2, %ecx
|
|
je L(2bytes)
|
|
cmp $3, %ecx
|
|
je L(3bytes)
|
|
cmp $4, %ecx
|
|
je L(4bytes)
|
|
cmp $5, %ecx
|
|
je L(5bytes)
|
|
cmp $6, %ecx
|
|
je L(6bytes)
|
|
jmp L(7bytes)
|
|
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
jmp L(4bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $2, %ecx
|
|
je L(2bytes)
|
|
cmp $4, %ecx
|
|
je L(4bytes)
|
|
jmp L(6bytes)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
.p2align 4
|
|
L(more40bytes):
|
|
cmp $40, %ecx
|
|
je L(40bytes)
|
|
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
|
cmp $41, %ecx
|
|
je L(41bytes)
|
|
cmp $42, %ecx
|
|
je L(42bytes)
|
|
cmp $43, %ecx
|
|
je L(43bytes)
|
|
cmp $44, %ecx
|
|
je L(44bytes)
|
|
cmp $45, %ecx
|
|
je L(45bytes)
|
|
cmp $46, %ecx
|
|
je L(46bytes)
|
|
jmp L(47bytes)
|
|
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
|
cmp $42, %ecx
|
|
je L(42bytes)
|
|
cmp $44, %ecx
|
|
je L(44bytes)
|
|
jmp L(46bytes)
|
|
#endif
|
|
|
|
#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
|
|
.p2align 4
|
|
L(44bytes):
|
|
mov -44(%eax), %ecx
|
|
mov -44(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(40bytes):
|
|
mov -40(%eax), %ecx
|
|
mov -40(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(36bytes):
|
|
mov -36(%eax), %ecx
|
|
mov -36(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(32bytes):
|
|
mov -32(%eax), %ecx
|
|
mov -32(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(28bytes):
|
|
mov -28(%eax), %ecx
|
|
mov -28(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(24bytes):
|
|
mov -24(%eax), %ecx
|
|
mov -24(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(20bytes):
|
|
mov -20(%eax), %ecx
|
|
mov -20(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(16bytes):
|
|
mov -16(%eax), %ecx
|
|
mov -16(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(12bytes):
|
|
mov -12(%eax), %ecx
|
|
mov -12(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(8bytes):
|
|
mov -8(%eax), %ecx
|
|
mov -8(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(4bytes):
|
|
mov -4(%eax), %ecx
|
|
mov -4(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
mov $0, %eax
|
|
jne L(find_diff)
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
#elif defined(USE_AS_WMEMCMP)
|
|
|
|
.p2align 4
|
|
L(44bytes):
|
|
mov -44(%eax), %ecx
|
|
cmp -44(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(40bytes):
|
|
mov -40(%eax), %ecx
|
|
cmp -40(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(36bytes):
|
|
mov -36(%eax), %ecx
|
|
cmp -36(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(32bytes):
|
|
mov -32(%eax), %ecx
|
|
cmp -32(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(28bytes):
|
|
mov -28(%eax), %ecx
|
|
cmp -28(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(24bytes):
|
|
mov -24(%eax), %ecx
|
|
cmp -24(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(20bytes):
|
|
mov -20(%eax), %ecx
|
|
cmp -20(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(16bytes):
|
|
mov -16(%eax), %ecx
|
|
cmp -16(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(12bytes):
|
|
mov -12(%eax), %ecx
|
|
cmp -12(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(8bytes):
|
|
mov -8(%eax), %ecx
|
|
cmp -8(%edx), %ecx
|
|
jne L(find_diff)
|
|
L(4bytes):
|
|
mov -4(%eax), %ecx
|
|
xor %eax, %eax
|
|
cmp -4(%edx), %ecx
|
|
jne L(find_diff)
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
#elif defined USE_AS_MEMCMP16
|
|
|
|
.p2align 4
|
|
L(46bytes):
|
|
movzwl -46(%eax), %ecx
|
|
movzwl -46(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(44bytes):
|
|
movzwl -44(%eax), %ecx
|
|
movzwl -44(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(42bytes):
|
|
movzwl -42(%eax), %ecx
|
|
movzwl -42(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(40bytes):
|
|
movzwl -40(%eax), %ecx
|
|
movzwl -40(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(38bytes):
|
|
movzwl -38(%eax), %ecx
|
|
movzwl -38(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(36bytes):
|
|
movzwl -36(%eax), %ecx
|
|
movzwl -36(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(34bytes):
|
|
movzwl -34(%eax), %ecx
|
|
movzwl -34(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(32bytes):
|
|
movzwl -32(%eax), %ecx
|
|
movzwl -32(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(30bytes):
|
|
movzwl -30(%eax), %ecx
|
|
movzwl -30(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(28bytes):
|
|
movzwl -28(%eax), %ecx
|
|
movzwl -28(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(26bytes):
|
|
movzwl -26(%eax), %ecx
|
|
movzwl -26(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(24bytes):
|
|
movzwl -24(%eax), %ecx
|
|
movzwl -24(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(22bytes):
|
|
movzwl -22(%eax), %ecx
|
|
movzwl -22(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(20bytes):
|
|
movzwl -20(%eax), %ecx
|
|
movzwl -20(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(18bytes):
|
|
movzwl -18(%eax), %ecx
|
|
movzwl -18(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(16bytes):
|
|
movzwl -16(%eax), %ecx
|
|
movzwl -16(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(14bytes):
|
|
movzwl -14(%eax), %ecx
|
|
movzwl -14(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(12bytes):
|
|
movzwl -12(%eax), %ecx
|
|
movzwl -12(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(10bytes):
|
|
movzwl -10(%eax), %ecx
|
|
movzwl -10(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(8bytes):
|
|
movzwl -8(%eax), %ecx
|
|
movzwl -8(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(6bytes):
|
|
movzwl -6(%eax), %ecx
|
|
movzwl -6(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(4bytes):
|
|
movzwl -4(%eax), %ecx
|
|
movzwl -4(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(2bytes):
|
|
movzwl -2(%eax), %eax
|
|
movzwl -2(%edx), %ebx
|
|
subl %ebx, %eax
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
|
|
#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
|
|
|
|
.p2align 4
|
|
L(45bytes):
|
|
mov -45(%eax), %ecx
|
|
mov -45(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(41bytes):
|
|
mov -41(%eax), %ecx
|
|
mov -41(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(37bytes):
|
|
mov -37(%eax), %ecx
|
|
mov -37(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(33bytes):
|
|
mov -33(%eax), %ecx
|
|
mov -33(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(29bytes):
|
|
mov -29(%eax), %ecx
|
|
mov -29(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(25bytes):
|
|
mov -25(%eax), %ecx
|
|
mov -25(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(21bytes):
|
|
mov -21(%eax), %ecx
|
|
mov -21(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(17bytes):
|
|
mov -17(%eax), %ecx
|
|
mov -17(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(13bytes):
|
|
mov -13(%eax), %ecx
|
|
mov -13(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(9bytes):
|
|
mov -9(%eax), %ecx
|
|
mov -9(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(5bytes):
|
|
mov -5(%eax), %ecx
|
|
mov -5(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
movzbl -1(%eax), %ecx
|
|
cmp -1(%edx), %cl
|
|
mov $0, %eax
|
|
jne L(end)
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(46bytes):
|
|
mov -46(%eax), %ecx
|
|
mov -46(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(42bytes):
|
|
mov -42(%eax), %ecx
|
|
mov -42(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(38bytes):
|
|
mov -38(%eax), %ecx
|
|
mov -38(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(34bytes):
|
|
mov -34(%eax), %ecx
|
|
mov -34(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(30bytes):
|
|
mov -30(%eax), %ecx
|
|
mov -30(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(26bytes):
|
|
mov -26(%eax), %ecx
|
|
mov -26(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(22bytes):
|
|
mov -22(%eax), %ecx
|
|
mov -22(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(18bytes):
|
|
mov -18(%eax), %ecx
|
|
mov -18(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(14bytes):
|
|
mov -14(%eax), %ecx
|
|
mov -14(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(10bytes):
|
|
mov -10(%eax), %ecx
|
|
mov -10(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(6bytes):
|
|
mov -6(%eax), %ecx
|
|
mov -6(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(2bytes):
|
|
movzwl -2(%eax), %ecx
|
|
movzwl -2(%edx), %ebx
|
|
cmp %bl, %cl
|
|
jne L(end)
|
|
cmp %bh, %ch
|
|
mov $0, %eax
|
|
jne L(end)
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(47bytes):
|
|
movl -47(%eax), %ecx
|
|
movl -47(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(43bytes):
|
|
movl -43(%eax), %ecx
|
|
movl -43(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(39bytes):
|
|
movl -39(%eax), %ecx
|
|
movl -39(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(35bytes):
|
|
movl -35(%eax), %ecx
|
|
movl -35(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(31bytes):
|
|
movl -31(%eax), %ecx
|
|
movl -31(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(27bytes):
|
|
movl -27(%eax), %ecx
|
|
movl -27(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(23bytes):
|
|
movl -23(%eax), %ecx
|
|
movl -23(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(19bytes):
|
|
movl -19(%eax), %ecx
|
|
movl -19(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(15bytes):
|
|
movl -15(%eax), %ecx
|
|
movl -15(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(11bytes):
|
|
movl -11(%eax), %ecx
|
|
movl -11(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(7bytes):
|
|
movl -7(%eax), %ecx
|
|
movl -7(%edx), %ebx
|
|
cmp %ebx, %ecx
|
|
jne L(find_diff)
|
|
L(3bytes):
|
|
movzwl -3(%eax), %ecx
|
|
movzwl -3(%edx), %ebx
|
|
cmpb %bl, %cl
|
|
jne L(end)
|
|
cmp %bx, %cx
|
|
jne L(end)
|
|
movzbl -1(%eax), %eax
|
|
cmpb -1(%edx), %al
|
|
mov $0, %eax
|
|
jne L(end)
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(find_diff):
|
|
cmpb %bl, %cl
|
|
jne L(end)
|
|
cmp %bx, %cx
|
|
jne L(end)
|
|
shr $16,%ecx
|
|
shr $16,%ebx
|
|
cmp %bl, %cl
|
|
jne L(end)
|
|
cmp %bx, %cx
|
|
|
|
.p2align 4
|
|
L(end):
|
|
POP (%ebx)
|
|
mov $1, %eax
|
|
ja L(bigger)
|
|
neg %eax
|
|
L(bigger):
|
|
ret
|
|
#elif defined(USE_AS_WMEMCMP)
|
|
|
|
.p2align 4
|
|
L(find_diff):
|
|
POP (%ebx)
|
|
mov $1, %eax
|
|
jg L(find_diff_bigger)
|
|
neg %eax
|
|
ret
|
|
|
|
.p2align 4
|
|
L(find_diff_bigger):
|
|
ret
|
|
|
|
#elif defined(USE_AS_MEMCMP16)
|
|
|
|
.p2align 4
|
|
L(memcmp16_exit):
|
|
POP (%ebx)
|
|
mov %ecx, %eax
|
|
ret
|
|
#else
|
|
# error Unreachable preprocessor case
|
|
#endif
|
|
END (MEMCMP)
|