Merge "Add ssse3 implementation of __memcmp16."
This commit is contained in:
commit
3287dccf2e
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010, 2011 Intel Corporation
|
||||
Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -106,9 +106,12 @@ name: \
|
||||
ENTRY (MEMCMP)
|
||||
movl LEN(%esp), %ecx
|
||||
|
||||
#ifdef USE_AS_WMEMCMP
|
||||
#ifdef USE_WCHAR
|
||||
shl $2, %ecx
|
||||
jz L(zero)
|
||||
#elif defined USE_UTF16
|
||||
shl $1, %ecx
|
||||
jz L(zero)
|
||||
#endif
|
||||
|
||||
movl BLK1(%esp), %eax
|
||||
@ -116,7 +119,7 @@ ENTRY (MEMCMP)
|
||||
movl BLK2(%esp), %edx
|
||||
jae L(48bytesormore)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $1, %ecx
|
||||
jbe L(less1bytes)
|
||||
#endif
|
||||
@ -128,7 +131,7 @@ ENTRY (MEMCMP)
|
||||
|
||||
CFI_POP (%ebx)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
.p2align 4
|
||||
L(less1bytes):
|
||||
jb L(zero)
|
||||
@ -174,7 +177,7 @@ L(48bytesormore):
|
||||
jz L(shr_0)
|
||||
xor %edx, %esi
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $8, %edx
|
||||
jae L(next_unaligned_table)
|
||||
cmp $0, %edx
|
||||
@ -210,7 +213,7 @@ L(next_unaligned_table):
|
||||
cmp $14, %edx
|
||||
je L(shr_14)
|
||||
jmp L(shr_15)
|
||||
#else
|
||||
#elif defined(USE_WCHAR)
|
||||
cmp $0, %edx
|
||||
je L(shr_0)
|
||||
cmp $4, %edx
|
||||
@ -218,6 +221,22 @@ L(next_unaligned_table):
|
||||
cmp $8, %edx
|
||||
je L(shr_8)
|
||||
jmp L(shr_12)
|
||||
#elif defined(USE_UTF16)
|
||||
cmp $0, %edx
|
||||
je L(shr_0)
|
||||
cmp $2, %edx
|
||||
je L(shr_2)
|
||||
cmp $4, %edx
|
||||
je L(shr_4)
|
||||
cmp $6, %edx
|
||||
je L(shr_6)
|
||||
cmp $8, %edx
|
||||
je L(shr_8)
|
||||
cmp $10, %edx
|
||||
je L(shr_10)
|
||||
cmp $12, %edx
|
||||
je L(shr_12)
|
||||
jmp L(shr_14)
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
@ -289,7 +308,7 @@ L(shr_0_gobble_loop_next):
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -372,8 +391,10 @@ L(shr_1_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(USE_WCHAR)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -456,7 +477,9 @@ L(shr_2_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -624,7 +647,7 @@ L(shr_4_gobble_next):
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -707,7 +730,9 @@ L(shr_5_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -790,7 +815,9 @@ L(shr_6_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -958,7 +985,7 @@ L(shr_8_gobble_next):
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1041,7 +1068,9 @@ L(shr_9_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1124,7 +1153,9 @@ L(shr_10_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1292,7 +1323,7 @@ L(shr_12_gobble_next):
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1375,7 +1406,9 @@ L(shr_13_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1458,7 +1491,9 @@ L(shr_14_gobble_next):
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jmp L(less48bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cfi_restore_state
|
||||
cfi_remember_state
|
||||
.p2align 4
|
||||
@ -1558,7 +1593,7 @@ L(first16bytes):
|
||||
add %eax, %esi
|
||||
L(less16bytes):
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
test %dl, %dl
|
||||
jz L(next_24_bytes)
|
||||
|
||||
@ -1668,7 +1703,7 @@ L(Byte31):
|
||||
movzbl -9(%esi), %edx
|
||||
sub %edx, %eax
|
||||
RETURN_END
|
||||
#else
|
||||
#elif defined(USE_AS_WMEMCMP)
|
||||
|
||||
/* special for wmemcmp */
|
||||
test %dl, %dl
|
||||
@ -1682,7 +1717,6 @@ L(Byte31):
|
||||
neg %eax
|
||||
RETURN
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(second_double_word):
|
||||
mov -12(%edi), %ecx
|
||||
@ -1691,7 +1725,7 @@ L(second_double_word):
|
||||
jg L(nequal_bigger)
|
||||
neg %eax
|
||||
RETURN
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(next_two_double_words):
|
||||
and $15, %dh
|
||||
@ -1715,6 +1749,79 @@ L(fourth_double_word):
|
||||
.p2align 4
|
||||
L(nequal_bigger):
|
||||
RETURN_END
|
||||
|
||||
#elif defined(USE_AS_MEMCMP16)
|
||||
|
||||
/* special for __memcmp16 */
|
||||
test %dl, %dl
|
||||
jz L(next_four_words)
|
||||
test $15, %dl
|
||||
jz L(second_two_words)
|
||||
test $3, %dl
|
||||
jz L(second_word)
|
||||
movzwl -16(%edi), %eax
|
||||
movzwl -16(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(second_word):
|
||||
movzwl -14(%edi), %eax
|
||||
movzwl -14(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(second_two_words):
|
||||
test $63, %dl
|
||||
jz L(fourth_word)
|
||||
movzwl -12(%edi), %eax
|
||||
movzwl -12(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(fourth_word):
|
||||
movzwl -10(%edi), %eax
|
||||
movzwl -10(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(next_four_words):
|
||||
test $15, %dh
|
||||
jz L(fourth_two_words)
|
||||
test $3, %dh
|
||||
jz L(sixth_word)
|
||||
movzwl -8(%edi), %eax
|
||||
movzwl -8(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(sixth_word):
|
||||
movzwl -6(%edi), %eax
|
||||
movzwl -6(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(fourth_two_words):
|
||||
test $63, %dh
|
||||
jz L(eighth_word)
|
||||
movzwl -4(%edi), %eax
|
||||
movzwl -4(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(eighth_word):
|
||||
movzwl -2(%edi), %eax
|
||||
movzwl -2(%esi), %ebx
|
||||
subl %ebx, %eax
|
||||
RETURN
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
@ -1725,7 +1832,7 @@ L(more8bytes):
|
||||
jae L(more16bytes)
|
||||
cmp $8, %ecx
|
||||
je L(8bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $9, %ecx
|
||||
je L(9bytes)
|
||||
cmp $10, %ecx
|
||||
@ -1739,8 +1846,16 @@ L(more8bytes):
|
||||
cmp $14, %ecx
|
||||
je L(14bytes)
|
||||
jmp L(15bytes)
|
||||
#else
|
||||
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
jmp L(12bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $10, %ecx
|
||||
je L(10bytes)
|
||||
cmp $12, %ecx
|
||||
je L(12bytes)
|
||||
jmp L(14bytes)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
@ -1749,7 +1864,7 @@ L(more16bytes):
|
||||
jae L(more24bytes)
|
||||
cmp $16, %ecx
|
||||
je L(16bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $17, %ecx
|
||||
je L(17bytes)
|
||||
cmp $18, %ecx
|
||||
@ -1763,8 +1878,16 @@ L(more16bytes):
|
||||
cmp $22, %ecx
|
||||
je L(22bytes)
|
||||
jmp L(23bytes)
|
||||
#else
|
||||
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
jmp L(20bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $18, %ecx
|
||||
je L(18bytes)
|
||||
cmp $20, %ecx
|
||||
je L(20bytes)
|
||||
jmp L(22bytes)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
@ -1773,7 +1896,7 @@ L(more24bytes):
|
||||
jae L(more32bytes)
|
||||
cmp $24, %ecx
|
||||
je L(24bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $25, %ecx
|
||||
je L(25bytes)
|
||||
cmp $26, %ecx
|
||||
@ -1787,8 +1910,16 @@ L(more24bytes):
|
||||
cmp $30, %ecx
|
||||
je L(30bytes)
|
||||
jmp L(31bytes)
|
||||
#else
|
||||
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
jmp L(28bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $26, %ecx
|
||||
je L(26bytes)
|
||||
cmp $28, %ecx
|
||||
je L(28bytes)
|
||||
jmp L(30bytes)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
@ -1797,7 +1928,7 @@ L(more32bytes):
|
||||
jae L(more40bytes)
|
||||
cmp $32, %ecx
|
||||
je L(32bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $33, %ecx
|
||||
je L(33bytes)
|
||||
cmp $34, %ecx
|
||||
@ -1811,15 +1942,23 @@ L(more32bytes):
|
||||
cmp $38, %ecx
|
||||
je L(38bytes)
|
||||
jmp L(39bytes)
|
||||
#else
|
||||
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
jmp L(36bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $34, %ecx
|
||||
je L(34bytes)
|
||||
cmp $36, %ecx
|
||||
je L(36bytes)
|
||||
jmp L(38bytes)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
L(less48bytes):
|
||||
cmp $8, %ecx
|
||||
jae L(more8bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $2, %ecx
|
||||
je L(2bytes)
|
||||
cmp $3, %ecx
|
||||
@ -1831,15 +1970,23 @@ L(less48bytes):
|
||||
cmp $6, %ecx
|
||||
je L(6bytes)
|
||||
jmp L(7bytes)
|
||||
#else
|
||||
#elif defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
jmp L(4bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $2, %ecx
|
||||
je L(2bytes)
|
||||
cmp $4, %ecx
|
||||
je L(4bytes)
|
||||
jmp L(6bytes)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
L(more40bytes):
|
||||
cmp $40, %ecx
|
||||
je L(40bytes)
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_WCHAR) && !defined(USE_UTF16)
|
||||
cmp $41, %ecx
|
||||
je L(41bytes)
|
||||
cmp $42, %ecx
|
||||
@ -1853,7 +2000,15 @@ L(more40bytes):
|
||||
cmp $46, %ecx
|
||||
je L(46bytes)
|
||||
jmp L(47bytes)
|
||||
#elif defined(USE_UTF16) && !defined(USE_WCHAR)
|
||||
cmp $42, %ecx
|
||||
je L(42bytes)
|
||||
cmp $44, %ecx
|
||||
je L(44bytes)
|
||||
jmp L(46bytes)
|
||||
#endif
|
||||
|
||||
#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
|
||||
.p2align 4
|
||||
L(44bytes):
|
||||
mov -44(%eax), %ecx
|
||||
@ -1914,7 +2069,8 @@ L(4bytes):
|
||||
POP (%ebx)
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
#else
|
||||
#elif defined(USE_AS_WMEMCMP)
|
||||
|
||||
.p2align 4
|
||||
L(44bytes):
|
||||
mov -44(%eax), %ecx
|
||||
@ -1964,9 +2120,131 @@ L(4bytes):
|
||||
POP (%ebx)
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
#elif defined USE_AS_MEMCMP16
|
||||
|
||||
.p2align 4
|
||||
L(46bytes):
|
||||
movzwl -46(%eax), %ecx
|
||||
movzwl -46(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(44bytes):
|
||||
movzwl -44(%eax), %ecx
|
||||
movzwl -44(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(42bytes):
|
||||
movzwl -42(%eax), %ecx
|
||||
movzwl -42(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(40bytes):
|
||||
movzwl -40(%eax), %ecx
|
||||
movzwl -40(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(38bytes):
|
||||
movzwl -38(%eax), %ecx
|
||||
movzwl -38(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(36bytes):
|
||||
movzwl -36(%eax), %ecx
|
||||
movzwl -36(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(34bytes):
|
||||
movzwl -34(%eax), %ecx
|
||||
movzwl -34(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(32bytes):
|
||||
movzwl -32(%eax), %ecx
|
||||
movzwl -32(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(30bytes):
|
||||
movzwl -30(%eax), %ecx
|
||||
movzwl -30(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(28bytes):
|
||||
movzwl -28(%eax), %ecx
|
||||
movzwl -28(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(26bytes):
|
||||
movzwl -26(%eax), %ecx
|
||||
movzwl -26(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(24bytes):
|
||||
movzwl -24(%eax), %ecx
|
||||
movzwl -24(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(22bytes):
|
||||
movzwl -22(%eax), %ecx
|
||||
movzwl -22(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(20bytes):
|
||||
movzwl -20(%eax), %ecx
|
||||
movzwl -20(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(18bytes):
|
||||
movzwl -18(%eax), %ecx
|
||||
movzwl -18(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(16bytes):
|
||||
movzwl -16(%eax), %ecx
|
||||
movzwl -16(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(14bytes):
|
||||
movzwl -14(%eax), %ecx
|
||||
movzwl -14(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(12bytes):
|
||||
movzwl -12(%eax), %ecx
|
||||
movzwl -12(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(10bytes):
|
||||
movzwl -10(%eax), %ecx
|
||||
movzwl -10(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(8bytes):
|
||||
movzwl -8(%eax), %ecx
|
||||
movzwl -8(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(6bytes):
|
||||
movzwl -6(%eax), %ecx
|
||||
movzwl -6(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(4bytes):
|
||||
movzwl -4(%eax), %ecx
|
||||
movzwl -4(%edx), %ebx
|
||||
subl %ebx, %ecx
|
||||
jne L(memcmp16_exit)
|
||||
L(2bytes):
|
||||
movzwl -2(%eax), %eax
|
||||
movzwl -2(%edx), %ebx
|
||||
subl %ebx, %eax
|
||||
POP (%ebx)
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
|
||||
#ifndef USE_AS_WMEMCMP
|
||||
#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
|
||||
|
||||
.p2align 4
|
||||
L(45bytes):
|
||||
@ -2191,9 +2469,8 @@ L(end):
|
||||
neg %eax
|
||||
L(bigger):
|
||||
ret
|
||||
#else
|
||||
#elif defined(USE_AS_WMEMCMP)
|
||||
|
||||
/* for wmemcmp */
|
||||
.p2align 4
|
||||
L(find_diff):
|
||||
POP (%ebx)
|
||||
@ -2206,5 +2483,14 @@ L(find_diff):
|
||||
L(find_diff_bigger):
|
||||
ret
|
||||
|
||||
#elif defined(USE_AS_MEMCMP16)
|
||||
|
||||
.p2align 4
|
||||
L(memcmp16_exit):
|
||||
POP (%ebx)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
#else
|
||||
# error Unreachable preprocessor case
|
||||
#endif
|
||||
END (MEMCMP)
|
||||
|
37
libc/arch-x86/string/ssse3-memcmp16-atom.S
Normal file
37
libc/arch-x86/string/ssse3-memcmp16-atom.S
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright (c) 2013, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define MEMCMP __memcmp16
|
||||
|
||||
/* int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n); */
|
||||
|
||||
#define USE_UTF16
|
||||
#define USE_AS_MEMCMP16 1
|
||||
#include "ssse3-memcmp-atom.S"
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011, 2012, 2013 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -29,5 +29,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define MEMCMP wmemcmp
|
||||
|
||||
#define USE_WCHAR
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#include "ssse3-memcmp-atom.S"
|
||||
|
@ -27,6 +27,7 @@ _LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/ssse3-strcpy-atom.S \
|
||||
arch-x86/string/ssse3-memcmp-atom.S \
|
||||
arch-x86/string/ssse3-wmemcmp-atom.S \
|
||||
arch-x86/string/ssse3-memcmp16-atom.S \
|
||||
arch-x86/string/ssse3-wcscat-atom.S \
|
||||
arch-x86/string/ssse3-wcscpy-atom.S
|
||||
else
|
||||
@ -38,6 +39,7 @@ _LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/strncmp.S \
|
||||
arch-x86/string/strcat.S \
|
||||
arch-x86/string/memcmp.S \
|
||||
string/memcmp16.c \
|
||||
string/strcpy.c \
|
||||
string/strncat.c \
|
||||
string/strncpy.c \
|
||||
|
45
libc/string/memcmp16.c
Normal file
45
libc/string/memcmp16.c
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright (c) 2013 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Unoptimised version of __memcmp16 */
|
||||
int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (*ptr1 != *ptr2)
|
||||
return *ptr1 - *ptr2;
|
||||
ptr1++;
|
||||
ptr2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -131,14 +131,15 @@ TEST(string, strsignal_concurrent) {
|
||||
// expected result and then run function and compare what we got.
|
||||
// These tests contributed by Intel Corporation.
|
||||
// TODO: make these tests more intention-revealing and less random.
|
||||
template<class Character>
|
||||
struct StringTestState {
|
||||
StringTestState(size_t MAX_LEN) : MAX_LEN(MAX_LEN) {
|
||||
int max_alignment = 64;
|
||||
|
||||
// TODO: fix the tests to not sometimes use twice their specified "MAX_LEN".
|
||||
glob_ptr = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
|
||||
glob_ptr1 = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
|
||||
glob_ptr2 = reinterpret_cast<char*>(valloc(2 * MAX_LEN + max_alignment));
|
||||
glob_ptr = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
|
||||
glob_ptr1 = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
|
||||
glob_ptr2 = reinterpret_cast<Character*>(valloc(2 * sizeof(Character) * MAX_LEN + max_alignment));
|
||||
|
||||
InitLenArray();
|
||||
|
||||
@ -163,12 +164,12 @@ struct StringTestState {
|
||||
}
|
||||
|
||||
const size_t MAX_LEN;
|
||||
char *ptr, *ptr1, *ptr2;
|
||||
Character *ptr, *ptr1, *ptr2;
|
||||
size_t n;
|
||||
int len[ITER + 1];
|
||||
|
||||
private:
|
||||
char *glob_ptr, *glob_ptr1, *glob_ptr2;
|
||||
Character *glob_ptr, *glob_ptr1, *glob_ptr2;
|
||||
|
||||
// Calculate input lengths and fill state.len with them.
|
||||
// Test small lengths with more density than big ones. Manually push
|
||||
@ -188,7 +189,7 @@ struct StringTestState {
|
||||
};
|
||||
|
||||
TEST(string, strcat) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -380,7 +381,7 @@ TEST(string, strchr_with_0) {
|
||||
TEST(string, strchr) {
|
||||
int seek_char = random() & 255;
|
||||
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -411,7 +412,7 @@ TEST(string, strchr) {
|
||||
}
|
||||
|
||||
TEST(string, strcmp) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -448,7 +449,7 @@ TEST(string, strcmp) {
|
||||
}
|
||||
|
||||
TEST(string, strcpy) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
@ -476,7 +477,7 @@ TEST(string, strcpy) {
|
||||
|
||||
#if __BIONIC__
|
||||
TEST(string, strlcat) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -505,7 +506,7 @@ TEST(string, strlcat) {
|
||||
|
||||
#if __BIONIC__
|
||||
TEST(string, strlcpy) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
@ -539,7 +540,7 @@ TEST(string, strlcpy) {
|
||||
#endif
|
||||
|
||||
TEST(string, strncat) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -565,7 +566,7 @@ TEST(string, strncat) {
|
||||
}
|
||||
|
||||
TEST(string, strncmp) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -602,7 +603,7 @@ TEST(string, strncmp) {
|
||||
}
|
||||
|
||||
TEST(string, strncpy) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t j = 0; j < ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
@ -630,7 +631,7 @@ TEST(string, strncpy) {
|
||||
|
||||
TEST(string, strrchr) {
|
||||
int seek_char = random() & 255;
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 1; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -662,7 +663,7 @@ TEST(string, strrchr) {
|
||||
|
||||
TEST(string, memchr) {
|
||||
int seek_char = random() & 255;
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -685,7 +686,7 @@ TEST(string, memchr) {
|
||||
|
||||
TEST(string, memrchr) {
|
||||
int seek_char = random() & 255;
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -707,7 +708,7 @@ TEST(string, memrchr) {
|
||||
}
|
||||
|
||||
TEST(string, memcmp) {
|
||||
StringTestState state(SMALL);
|
||||
StringTestState<char> state(SMALL);
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -728,8 +729,61 @@ TEST(string, memcmp) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__BIONIC__)
|
||||
extern "C" int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n);
|
||||
|
||||
TEST(string, __memcmp16) {
|
||||
StringTestState<unsigned short> state(SMALL);
|
||||
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
unsigned short mask = 0xffff;
|
||||
unsigned short c1 = rand() & mask;
|
||||
unsigned short c2 = rand() & mask;
|
||||
|
||||
std::fill(state.ptr1, state.ptr1 + state.MAX_LEN, c1);
|
||||
std::fill(state.ptr2, state.ptr2 + state.MAX_LEN, c1);
|
||||
|
||||
int pos = (state.len[i] == 0) ? 0 : (random() % state.len[i]);
|
||||
state.ptr2[pos] = c2;
|
||||
|
||||
int expected = (static_cast<unsigned short>(c1) - static_cast<unsigned short>(c2));
|
||||
int actual = __memcmp16(state.ptr1, state.ptr2, (size_t) state.MAX_LEN);
|
||||
|
||||
ASSERT_EQ(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(string, wmemcmp) {
|
||||
StringTestState<wchar_t> state(SMALL);
|
||||
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
long long mask = ((long long) 1 << 8 * sizeof(wchar_t)) - 1;
|
||||
int c1 = rand() & mask;
|
||||
int c2 = rand() & mask;
|
||||
wmemset(state.ptr1, c1, state.MAX_LEN);
|
||||
wmemset(state.ptr2, c1, state.MAX_LEN);
|
||||
|
||||
int pos = (state.len[i] == 0) ? 0 : (random() % state.len[i]);
|
||||
state.ptr2[pos] = c2;
|
||||
|
||||
int expected = (static_cast<int>(c1) - static_cast<int>(c2));
|
||||
int actual = wmemcmp(state.ptr1, state.ptr2, (size_t) state.MAX_LEN);
|
||||
|
||||
ASSERT_EQ(signum(expected), signum(actual));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(string, memcpy) {
|
||||
StringTestState state(LARGE);
|
||||
StringTestState<char> state(LARGE);
|
||||
int rand = random() & 255;
|
||||
for (size_t i = 0; i < state.n - 1; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
@ -751,7 +805,7 @@ TEST(string, memcpy) {
|
||||
}
|
||||
|
||||
TEST(string, memset) {
|
||||
StringTestState state(LARGE);
|
||||
StringTestState<char> state(LARGE);
|
||||
char ch = random () & 255;
|
||||
for (size_t i = 0; i < state.n - 1; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
@ -773,7 +827,7 @@ TEST(string, memset) {
|
||||
}
|
||||
|
||||
TEST(string, memmove) {
|
||||
StringTestState state(LARGE);
|
||||
StringTestState<char> state(LARGE);
|
||||
for (size_t i = 0; i < state.n - 1; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -794,7 +848,7 @@ TEST(string, memmove) {
|
||||
}
|
||||
|
||||
TEST(string, bcopy) {
|
||||
StringTestState state(LARGE);
|
||||
StringTestState<char> state(LARGE);
|
||||
for (size_t i = 0; i < state.n; i++) {
|
||||
for (size_t j = 0; j < POS_ITER; j++) {
|
||||
state.NewIteration();
|
||||
@ -813,7 +867,7 @@ TEST(string, bcopy) {
|
||||
}
|
||||
|
||||
TEST(string, bzero) {
|
||||
StringTestState state(LARGE);
|
||||
StringTestState<char> state(LARGE);
|
||||
for (size_t j = 0; j < ITER; j++) {
|
||||
state.NewIteration();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user