Add 32-bit Silvermont-optimized string/memory functions.
Add following functions:
bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen,
strcpy, strncpy, stpcpy, stpncpy.
Create new directories inside arch-x86 to specify architecture: atom,
silvermont and generic (non atom or silvermont architectures are treated like generic).
Due to introducing optimized versions of stpcpy and stpncpy,
c-implementations of these functions are moved from
common for architectures makefile to arm and mips specific makefiles.
Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e
Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
2014-04-24 15:41:20 +04:00
|
|
|
/*
|
|
|
|
Copyright (c) 2014, Intel Corporation
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
2014-07-07 15:42:06 -07:00
|
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
Add 32-bit Silvermont-optimized string/memory functions.
Add following functions:
bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen,
strcpy, strncpy, stpcpy, stpncpy.
Create new directories inside arch-x86 to specify architecture: atom,
silvermont and generic (non atom or silvermont architectures are treated like generic).
Due to introducing optimized versions of stpcpy and stpncpy,
c-implementations of these functions are moved from
common for architectures makefile to arm and mips specific makefiles.
Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e
Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
2014-04-24 15:41:20 +04:00
|
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef L
|
|
|
|
# define L(label) .L##label
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_startproc
|
|
|
|
# define cfi_startproc .cfi_startproc
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_endproc
|
|
|
|
# define cfi_endproc .cfi_endproc
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_rel_offset
|
|
|
|
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_restore
|
|
|
|
# define cfi_restore(reg) .cfi_restore reg
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_adjust_cfa_offset
|
|
|
|
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_remember_state
|
|
|
|
# define cfi_remember_state .cfi_remember_state
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef cfi_restore_state
|
|
|
|
# define cfi_restore_state .cfi_restore_state
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ENTRY
|
|
|
|
# define ENTRY(name) \
|
|
|
|
.type name, @function; \
|
|
|
|
.globl name; \
|
|
|
|
.p2align 4; \
|
|
|
|
name: \
|
|
|
|
cfi_startproc
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef END
|
|
|
|
# define END(name) \
|
|
|
|
cfi_endproc; \
|
|
|
|
.size name, .-name
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef MEMCMP
|
|
|
|
# define MEMCMP memcmp
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define CFI_PUSH(REG) \
|
|
|
|
cfi_adjust_cfa_offset (4); \
|
|
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
|
|
|
|
#define CFI_POP(REG) \
|
|
|
|
cfi_adjust_cfa_offset (-4); \
|
|
|
|
cfi_restore (REG)
|
|
|
|
|
|
|
|
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
|
|
#define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
|
|
|
|
#define PARMS 4
|
|
|
|
#define BLK1 PARMS
|
|
|
|
#define BLK2 BLK1 + 4
|
|
|
|
#define LEN BLK2 + 4
|
|
|
|
#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
|
|
|
|
|
|
|
|
|
|
|
#if (defined SHARED || defined __PIC__)
|
|
|
|
# define JMPTBL(I, B) I - B
|
|
|
|
|
|
|
|
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
|
|
|
jump table with relative offsets. INDEX is a register contains the
|
|
|
|
index into the jump table. SCALE is the scale of INDEX. */
|
|
|
|
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
|
|
/* We first load PC into EBX. */ \
|
|
|
|
call __x86.get_pc_thunk.bx; \
|
|
|
|
/* Get the address of the jump table. */ \
|
|
|
|
addl $(TABLE - .), %ebx; \
|
|
|
|
/* Get the entry and convert the relative offset to the \
|
|
|
|
absolute address. */ \
|
|
|
|
addl (%ebx,INDEX,SCALE), %ebx; \
|
|
|
|
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
|
|
|
jmp *%ebx
|
|
|
|
#else
|
|
|
|
# define JMPTBL(I, B) I
|
|
|
|
|
|
|
|
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
|
|
|
jump table with relative offsets. INDEX is a register contains the
|
|
|
|
index into the jump table. SCALE is the scale of INDEX. */
|
|
|
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
|
|
jmp *TABLE(,INDEX,SCALE)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* Warning!
|
|
|
|
wmemcmp has to use SIGNED comparison for elements.
|
|
|
|
memcmp has to use UNSIGNED comparison for elemnts.
|
|
|
|
*/
|
|
|
|
|
|
|
|
.section .text.sse4.2,"ax",@progbits
|
|
|
|
ENTRY (MEMCMP)
|
|
|
|
movl BLK1(%esp), %eax
|
|
|
|
movl BLK2(%esp), %edx
|
|
|
|
movl LEN(%esp), %ecx
|
|
|
|
|
|
|
|
#ifdef USE_AS_WMEMCMP
|
|
|
|
shl $2, %ecx
|
|
|
|
test %ecx, %ecx
|
|
|
|
jz L(return0)
|
|
|
|
#else
|
|
|
|
cmp $1, %ecx
|
|
|
|
jbe L(less1bytes)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
pxor %xmm0, %xmm0
|
|
|
|
cmp $64, %ecx
|
|
|
|
ja L(64bytesormore)
|
|
|
|
cmp $8, %ecx
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
PUSH (%ebx)
|
|
|
|
jb L(less8bytes)
|
|
|
|
#else
|
|
|
|
jb L(less8bytes)
|
|
|
|
PUSH (%ebx)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(less8bytes):
|
|
|
|
mov (%eax), %bl
|
|
|
|
cmpb (%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
mov 1(%eax), %bl
|
|
|
|
cmpb 1(%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
cmp $2, %ecx
|
|
|
|
jz L(0bytes)
|
|
|
|
|
|
|
|
mov 2(%eax), %bl
|
|
|
|
cmpb 2(%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
cmp $3, %ecx
|
|
|
|
jz L(0bytes)
|
|
|
|
|
|
|
|
mov 3(%eax), %bl
|
|
|
|
cmpb 3(%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
cmp $4, %ecx
|
|
|
|
jz L(0bytes)
|
|
|
|
|
|
|
|
mov 4(%eax), %bl
|
|
|
|
cmpb 4(%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
cmp $5, %ecx
|
|
|
|
jz L(0bytes)
|
|
|
|
|
|
|
|
mov 5(%eax), %bl
|
|
|
|
cmpb 5(%edx), %bl
|
|
|
|
jne L(nonzero)
|
|
|
|
|
|
|
|
cmp $6, %ecx
|
|
|
|
jz L(0bytes)
|
|
|
|
|
|
|
|
mov 6(%eax), %bl
|
|
|
|
cmpb 6(%edx), %bl
|
|
|
|
je L(0bytes)
|
|
|
|
|
|
|
|
L(nonzero):
|
|
|
|
POP (%ebx)
|
|
|
|
mov $1, %eax
|
|
|
|
ja L(above)
|
|
|
|
neg %eax
|
|
|
|
L(above):
|
|
|
|
ret
|
|
|
|
CFI_PUSH (%ebx)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(0bytes):
|
|
|
|
POP (%ebx)
|
|
|
|
xor %eax, %eax
|
|
|
|
ret
|
|
|
|
|
|
|
|
#ifdef USE_AS_WMEMCMP
|
|
|
|
|
|
|
|
/* for wmemcmp, case N == 1 */
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(less8bytes):
|
|
|
|
mov (%eax), %ecx
|
|
|
|
cmp (%edx), %ecx
|
|
|
|
je L(return0)
|
|
|
|
mov $1, %eax
|
|
|
|
jg L(find_diff_bigger)
|
|
|
|
neg %eax
|
|
|
|
ret
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(find_diff_bigger):
|
|
|
|
ret
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(return0):
|
|
|
|
xor %eax, %eax
|
|
|
|
ret
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(less1bytes):
|
|
|
|
jb L(0bytesend)
|
|
|
|
movzbl (%eax), %eax
|
|
|
|
movzbl (%edx), %edx
|
|
|
|
sub %edx, %eax
|
|
|
|
ret
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(0bytesend):
|
|
|
|
xor %eax, %eax
|
|
|
|
ret
|
|
|
|
#endif
|
|
|
|
.p2align 4
|
|
|
|
L(64bytesormore):
|
|
|
|
PUSH (%ebx)
|
|
|
|
mov %ecx, %ebx
|
|
|
|
mov $64, %ecx
|
|
|
|
sub $64, %ebx
|
|
|
|
L(64bytesormore_loop):
|
|
|
|
movdqu (%eax), %xmm1
|
|
|
|
movdqu (%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(find_16diff)
|
|
|
|
|
|
|
|
movdqu 16(%eax), %xmm1
|
|
|
|
movdqu 16(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(find_32diff)
|
|
|
|
|
|
|
|
movdqu 32(%eax), %xmm1
|
|
|
|
movdqu 32(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(find_48diff)
|
|
|
|
|
|
|
|
movdqu 48(%eax), %xmm1
|
|
|
|
movdqu 48(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(find_64diff)
|
|
|
|
add %ecx, %eax
|
|
|
|
add %ecx, %edx
|
|
|
|
sub %ecx, %ebx
|
|
|
|
jae L(64bytesormore_loop)
|
|
|
|
add %ebx, %ecx
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
|
|
|
|
|
|
|
#ifdef USE_AS_WMEMCMP
|
|
|
|
|
|
|
|
/* Label needs only for table_64bytes filling */
|
|
|
|
L(unreal_case):
|
|
|
|
/* no code here */
|
|
|
|
|
|
|
|
#endif
|
|
|
|
.p2align 4
|
|
|
|
L(find_16diff):
|
|
|
|
sub $16, %ecx
|
|
|
|
L(find_32diff):
|
|
|
|
sub $16, %ecx
|
|
|
|
L(find_48diff):
|
|
|
|
sub $16, %ecx
|
|
|
|
L(find_64diff):
|
|
|
|
add %ecx, %edx
|
|
|
|
add %ecx, %eax
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(16bytes):
|
|
|
|
mov -16(%eax), %ecx
|
|
|
|
mov -16(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(12bytes):
|
|
|
|
mov -12(%eax), %ecx
|
|
|
|
mov -12(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(8bytes):
|
|
|
|
mov -8(%eax), %ecx
|
|
|
|
mov -8(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(4bytes):
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
mov -4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
#else
|
|
|
|
.p2align 4
|
|
|
|
L(16bytes):
|
|
|
|
mov -16(%eax), %ecx
|
|
|
|
cmp -16(%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(12bytes):
|
|
|
|
mov -12(%eax), %ecx
|
|
|
|
cmp -12(%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(8bytes):
|
|
|
|
mov -8(%eax), %ecx
|
|
|
|
cmp -8(%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(4bytes):
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
cmp -4(%edx), %ecx
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(49bytes):
|
|
|
|
movdqu -49(%eax), %xmm1
|
|
|
|
movdqu -49(%edx), %xmm2
|
|
|
|
mov $-49, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(33bytes):
|
|
|
|
movdqu -33(%eax), %xmm1
|
|
|
|
movdqu -33(%edx), %xmm2
|
|
|
|
mov $-33, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(17bytes):
|
|
|
|
mov -17(%eax), %ecx
|
|
|
|
mov -17(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(13bytes):
|
|
|
|
mov -13(%eax), %ecx
|
|
|
|
mov -13(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(9bytes):
|
|
|
|
mov -9(%eax), %ecx
|
|
|
|
mov -9(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(5bytes):
|
|
|
|
mov -5(%eax), %ecx
|
|
|
|
mov -5(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
cmp -1(%edx), %cl
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(50bytes):
|
|
|
|
mov $-50, %ebx
|
|
|
|
movdqu -50(%eax), %xmm1
|
|
|
|
movdqu -50(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(34bytes):
|
|
|
|
mov $-34, %ebx
|
|
|
|
movdqu -34(%eax), %xmm1
|
|
|
|
movdqu -34(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(18bytes):
|
|
|
|
mov -18(%eax), %ecx
|
|
|
|
mov -18(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(14bytes):
|
|
|
|
mov -14(%eax), %ecx
|
|
|
|
mov -14(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(10bytes):
|
|
|
|
mov -10(%eax), %ecx
|
|
|
|
mov -10(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(6bytes):
|
|
|
|
mov -6(%eax), %ecx
|
|
|
|
mov -6(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(2bytes):
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movzwl -2(%edx), %ebx
|
|
|
|
cmp %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bh, %ch
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(51bytes):
|
|
|
|
mov $-51, %ebx
|
|
|
|
movdqu -51(%eax), %xmm1
|
|
|
|
movdqu -51(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(35bytes):
|
|
|
|
mov $-35, %ebx
|
|
|
|
movdqu -35(%eax), %xmm1
|
|
|
|
movdqu -35(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(19bytes):
|
|
|
|
movl -19(%eax), %ecx
|
|
|
|
movl -19(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(15bytes):
|
|
|
|
movl -15(%eax), %ecx
|
|
|
|
movl -15(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(11bytes):
|
|
|
|
movl -11(%eax), %ecx
|
|
|
|
movl -11(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(7bytes):
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl -7(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
L(3bytes):
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzwl -3(%edx), %ebx
|
|
|
|
cmpb %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
jne L(end)
|
|
|
|
L(1bytes):
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
cmpb -1(%edx), %al
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
.p2align 4
|
|
|
|
L(52bytes):
|
|
|
|
movdqu -52(%eax), %xmm1
|
|
|
|
movdqu -52(%edx), %xmm2
|
|
|
|
mov $-52, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(36bytes):
|
|
|
|
movdqu -36(%eax), %xmm1
|
|
|
|
movdqu -36(%edx), %xmm2
|
|
|
|
mov $-36, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(20bytes):
|
|
|
|
movdqu -20(%eax), %xmm1
|
|
|
|
movdqu -20(%edx), %xmm2
|
|
|
|
mov $-20, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -4(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(53bytes):
|
|
|
|
movdqu -53(%eax), %xmm1
|
|
|
|
movdqu -53(%edx), %xmm2
|
|
|
|
mov $-53, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(37bytes):
|
|
|
|
mov $-37, %ebx
|
|
|
|
movdqu -37(%eax), %xmm1
|
|
|
|
movdqu -37(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(21bytes):
|
|
|
|
mov $-21, %ebx
|
|
|
|
movdqu -21(%eax), %xmm1
|
|
|
|
movdqu -21(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
mov -5(%eax), %ecx
|
|
|
|
mov -5(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
cmp -1(%edx), %cl
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(54bytes):
|
|
|
|
movdqu -54(%eax), %xmm1
|
|
|
|
movdqu -54(%edx), %xmm2
|
|
|
|
mov $-54, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(38bytes):
|
|
|
|
mov $-38, %ebx
|
|
|
|
movdqu -38(%eax), %xmm1
|
|
|
|
movdqu -38(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(22bytes):
|
|
|
|
mov $-22, %ebx
|
|
|
|
movdqu -22(%eax), %xmm1
|
|
|
|
movdqu -22(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -6(%eax), %ecx
|
|
|
|
mov -6(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movzwl -2(%edx), %ebx
|
|
|
|
cmp %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bh, %ch
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(55bytes):
|
|
|
|
movdqu -55(%eax), %xmm1
|
|
|
|
movdqu -55(%edx), %xmm2
|
|
|
|
mov $-55, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(39bytes):
|
|
|
|
mov $-39, %ebx
|
|
|
|
movdqu -39(%eax), %xmm1
|
|
|
|
movdqu -39(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(23bytes):
|
|
|
|
mov $-23, %ebx
|
|
|
|
movdqu -23(%eax), %xmm1
|
|
|
|
movdqu -23(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl -7(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzwl -3(%edx), %ebx
|
|
|
|
cmpb %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
jne L(end)
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
cmpb -1(%edx), %al
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
.p2align 4
|
|
|
|
L(56bytes):
|
|
|
|
movdqu -56(%eax), %xmm1
|
|
|
|
movdqu -56(%edx), %xmm2
|
|
|
|
mov $-56, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(40bytes):
|
|
|
|
mov $-40, %ebx
|
|
|
|
movdqu -40(%eax), %xmm1
|
|
|
|
movdqu -40(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(24bytes):
|
|
|
|
mov $-24, %ebx
|
|
|
|
movdqu -24(%eax), %xmm1
|
|
|
|
movdqu -24(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -8(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -8(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -8(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -4(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(57bytes):
|
|
|
|
movdqu -57(%eax), %xmm1
|
|
|
|
movdqu -57(%edx), %xmm2
|
|
|
|
mov $-57, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(41bytes):
|
|
|
|
mov $-41, %ebx
|
|
|
|
movdqu -41(%eax), %xmm1
|
|
|
|
movdqu -41(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(25bytes):
|
|
|
|
mov $-25, %ebx
|
|
|
|
movdqu -25(%eax), %xmm1
|
|
|
|
movdqu -25(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
mov -9(%eax), %ecx
|
|
|
|
mov -9(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
mov -5(%eax), %ecx
|
|
|
|
mov -5(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
cmp -1(%edx), %cl
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(58bytes):
|
|
|
|
movdqu -58(%eax), %xmm1
|
|
|
|
movdqu -58(%edx), %xmm2
|
|
|
|
mov $-58, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(42bytes):
|
|
|
|
mov $-42, %ebx
|
|
|
|
movdqu -42(%eax), %xmm1
|
|
|
|
movdqu -42(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(26bytes):
|
|
|
|
mov $-26, %ebx
|
|
|
|
movdqu -26(%eax), %xmm1
|
|
|
|
movdqu -26(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -10(%eax), %ecx
|
|
|
|
mov -10(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -6(%eax), %ecx
|
|
|
|
mov -6(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movzwl -2(%edx), %ebx
|
|
|
|
cmp %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bh, %ch
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(59bytes):
|
|
|
|
movdqu -59(%eax), %xmm1
|
|
|
|
movdqu -59(%edx), %xmm2
|
|
|
|
mov $-59, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(43bytes):
|
|
|
|
mov $-43, %ebx
|
|
|
|
movdqu -43(%eax), %xmm1
|
|
|
|
movdqu -43(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(27bytes):
|
|
|
|
mov $-27, %ebx
|
|
|
|
movdqu -27(%eax), %xmm1
|
|
|
|
movdqu -27(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
movl -11(%eax), %ecx
|
|
|
|
movl -11(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl -7(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzwl -3(%edx), %ebx
|
|
|
|
cmpb %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
jne L(end)
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
cmpb -1(%edx), %al
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
.p2align 4
|
|
|
|
L(60bytes):
|
|
|
|
movdqu -60(%eax), %xmm1
|
|
|
|
movdqu -60(%edx), %xmm2
|
|
|
|
mov $-60, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(44bytes):
|
|
|
|
mov $-44, %ebx
|
|
|
|
movdqu -44(%eax), %xmm1
|
|
|
|
movdqu -44(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(28bytes):
|
|
|
|
mov $-28, %ebx
|
|
|
|
movdqu -28(%eax), %xmm1
|
|
|
|
movdqu -28(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -12(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -12(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -12(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -8(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -8(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -8(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -4(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(61bytes):
|
|
|
|
movdqu -61(%eax), %xmm1
|
|
|
|
movdqu -61(%edx), %xmm2
|
|
|
|
mov $-61, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(45bytes):
|
|
|
|
mov $-45, %ebx
|
|
|
|
movdqu -45(%eax), %xmm1
|
|
|
|
movdqu -45(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(29bytes):
|
|
|
|
mov $-29, %ebx
|
|
|
|
movdqu -29(%eax), %xmm1
|
|
|
|
movdqu -29(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -13(%eax), %ecx
|
|
|
|
mov -13(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -9(%eax), %ecx
|
|
|
|
mov -9(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -5(%eax), %ecx
|
|
|
|
mov -5(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzbl -1(%eax), %ecx
|
|
|
|
cmp -1(%edx), %cl
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(62bytes):
|
|
|
|
movdqu -62(%eax), %xmm1
|
|
|
|
movdqu -62(%edx), %xmm2
|
|
|
|
mov $-62, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(46bytes):
|
|
|
|
mov $-46, %ebx
|
|
|
|
movdqu -46(%eax), %xmm1
|
|
|
|
movdqu -46(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(30bytes):
|
|
|
|
mov $-30, %ebx
|
|
|
|
movdqu -30(%eax), %xmm1
|
|
|
|
movdqu -30(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
mov -14(%eax), %ecx
|
|
|
|
mov -14(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
mov -10(%eax), %ecx
|
|
|
|
mov -10(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
mov -6(%eax), %ecx
|
|
|
|
mov -6(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzwl -2(%eax), %ecx
|
|
|
|
movzwl -2(%edx), %ebx
|
|
|
|
cmp %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bh, %ch
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(63bytes):
|
|
|
|
movdqu -63(%eax), %xmm1
|
|
|
|
movdqu -63(%edx), %xmm2
|
|
|
|
mov $-63, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(47bytes):
|
|
|
|
mov $-47, %ebx
|
|
|
|
movdqu -47(%eax), %xmm1
|
|
|
|
movdqu -47(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(31bytes):
|
|
|
|
mov $-31, %ebx
|
|
|
|
movdqu -31(%eax), %xmm1
|
|
|
|
movdqu -31(%edx), %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
movl -15(%eax), %ecx
|
|
|
|
movl -15(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movl -11(%eax), %ecx
|
|
|
|
movl -11(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movl -7(%eax), %ecx
|
|
|
|
movl -7(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
movzwl -3(%eax), %ecx
|
|
|
|
movzwl -3(%edx), %ebx
|
|
|
|
cmpb %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
jne L(end)
|
|
|
|
movzbl -1(%eax), %eax
|
|
|
|
cmpb -1(%edx), %al
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(end)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(64bytes):
|
|
|
|
movdqu -64(%eax), %xmm1
|
|
|
|
movdqu -64(%edx), %xmm2
|
|
|
|
mov $-64, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(48bytes):
|
|
|
|
movdqu -48(%eax), %xmm1
|
|
|
|
movdqu -48(%edx), %xmm2
|
|
|
|
mov $-48, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
L(32bytes):
|
|
|
|
movdqu -32(%eax), %xmm1
|
|
|
|
movdqu -32(%edx), %xmm2
|
|
|
|
mov $-32, %ebx
|
|
|
|
pxor %xmm1, %xmm2
|
|
|
|
ptest %xmm2, %xmm0
|
|
|
|
jnc L(less16bytes)
|
|
|
|
|
|
|
|
mov -16(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -16(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -16(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -12(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -12(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -12(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -8(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -8(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -8(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov -4(%eax), %ecx
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
mov -4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
#else
|
|
|
|
cmp -4(%edx), %ecx
|
|
|
|
#endif
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
.p2align 4
|
|
|
|
L(less16bytes):
|
|
|
|
add %ebx, %eax
|
|
|
|
add %ebx, %edx
|
|
|
|
|
|
|
|
mov (%eax), %ecx
|
|
|
|
mov (%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 4(%eax), %ecx
|
|
|
|
mov 4(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 8(%eax), %ecx
|
|
|
|
mov 8(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 12(%eax), %ecx
|
|
|
|
mov 12(%edx), %ebx
|
|
|
|
cmp %ebx, %ecx
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
#else
|
|
|
|
.p2align 4
|
|
|
|
L(less16bytes):
|
|
|
|
add %ebx, %eax
|
|
|
|
add %ebx, %edx
|
|
|
|
|
|
|
|
mov (%eax), %ecx
|
|
|
|
cmp (%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 4(%eax), %ecx
|
|
|
|
cmp 4(%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 8(%eax), %ecx
|
|
|
|
cmp 8(%edx), %ecx
|
|
|
|
jne L(find_diff)
|
|
|
|
|
|
|
|
mov 12(%eax), %ecx
|
|
|
|
cmp 12(%edx), %ecx
|
|
|
|
|
|
|
|
mov $0, %eax
|
|
|
|
jne L(find_diff)
|
|
|
|
RETURN
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(find_diff):
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
cmpb %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
jne L(end)
|
|
|
|
shr $16,%ecx
|
|
|
|
shr $16,%ebx
|
|
|
|
cmp %bl, %cl
|
|
|
|
jne L(end)
|
|
|
|
cmp %bx, %cx
|
|
|
|
L(end):
|
|
|
|
POP (%ebx)
|
|
|
|
mov $1, %eax
|
|
|
|
ja L(bigger)
|
|
|
|
neg %eax
|
|
|
|
L(bigger):
|
|
|
|
ret
|
|
|
|
#else
|
|
|
|
POP (%ebx)
|
|
|
|
mov $1, %eax
|
|
|
|
jg L(bigger)
|
|
|
|
neg %eax
|
|
|
|
ret
|
|
|
|
|
|
|
|
.p2align 4
|
|
|
|
L(bigger):
|
|
|
|
ret
|
|
|
|
#endif
|
|
|
|
END (MEMCMP)
|
|
|
|
|
|
|
|
.section .rodata.sse4.2,"a",@progbits
|
|
|
|
.p2align 2
|
|
|
|
.type L(table_64bytes), @object
|
|
|
|
#ifndef USE_AS_WMEMCMP
|
|
|
|
L(table_64bytes):
|
|
|
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(1bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(2bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(3bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(4bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(5bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(6bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(7bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(8bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(9bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(10bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(11bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(12bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(13bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(14bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(15bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(16bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(17bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(18bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(19bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(20bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(21bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(22bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(23bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(24bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(25bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(26bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(27bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(28bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(29bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(30bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(31bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(32bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(33bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(34bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(35bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(36bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(37bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(38bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(39bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(40bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(41bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(42bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(43bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(44bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(45bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(46bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(47bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(48bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(49bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(50bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(51bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(52bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(53bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(54bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(55bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(56bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(57bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(58bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(59bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(60bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(61bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(62bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(63bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(64bytes), L(table_64bytes))
|
|
|
|
#else
|
|
|
|
L(table_64bytes):
|
|
|
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(4bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(8bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(12bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(16bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(20bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(24bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(28bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(32bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(36bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(40bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(44bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(48bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(52bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(56bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(60bytes), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
|
|
|
.int JMPTBL (L(64bytes), L(table_64bytes))
|
|
|
|
#endif
|