diff --git a/libc/Android.mk b/libc/Android.mk index a034c5069..e6192671b 100644 --- a/libc/Android.mk +++ b/libc/Android.mk @@ -357,7 +357,7 @@ libc_common_src_files += \ arch-x86/string/memset_wrapper.S \ arch-x86/string/strcmp_wrapper.S \ arch-x86/string/strncmp_wrapper.S \ - arch-x86/string/strlen.S \ + arch-x86/string/strlen_wrapper.S \ bionic/pthread.c \ bionic/pthread-atfork.c \ bionic/pthread-timers.c \ diff --git a/libc/arch-x86/string/memcmp_wrapper.S b/libc/arch-x86/string/memcmp_wrapper.S index 7e28c1e7b..fa0c67259 100644 --- a/libc/arch-x86/string/memcmp_wrapper.S +++ b/libc/arch-x86/string/memcmp_wrapper.S @@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(USE_SSSE3) # define MEMCMP memcmp -# include "ssse3-memcmp3.S" +# include "ssse3-memcmp3-new.S" #else diff --git a/libc/arch-x86/string/sse2-memset5-atom.S b/libc/arch-x86/string/sse2-memset5-atom.S index 59a598c36..4b7f71bca 100644 --- a/libc/arch-x86/string/sse2-memset5-atom.S +++ b/libc/arch-x86/string/sse2-memset5-atom.S @@ -49,7 +49,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifndef cfi_restore -# define cfi_restore(reg) .cfi_restore (reg) +# define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset @@ -285,7 +285,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -329,14 +328,17 @@ L(128bytesormore): #ifdef DATA_CACHE_SIZE POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp $DATA_CACHE_SIZE, %ecx #else # ifdef SHARED +# define RESTORE_EBX_STATE call __i686.get_pc_thunk.bx add $_GLOBAL_OFFSET_TABLE_, %ebx cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx # else POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp __x86_data_cache_size, %ecx # endif #endif @@ -370,7 +372,7 @@ L(128bytesormore_normal): jae L(128bytesormore_normal) L(128bytesless_normal): - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) ALIGN (4) @@ -393,8 +395,13 @@ L(128bytes_L2_normal): L(128bytesless_L2_normal): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + RESTORE_EBX_STATE L(128bytesormore_nt_start): sub %ebx, %ecx + mov %ebx, %eax + and $0x7f, %eax + add %eax, %ecx + movd %xmm0, %eax ALIGN (4) L(128bytesormore_shared_cache_loop): prefetcht0 0x3c0(%edx) diff --git a/libc/arch-x86/string/sse2-strlen-atom.S b/libc/arch-x86/string/sse2-strlen-atom.S new file mode 100644 index 000000000..891186822 --- /dev/null +++ b/libc/arch-x86/string/sse2-strlen-atom.S @@ -0,0 +1,369 @@ +#define STRLEN sse2_strlen_atom + +#ifndef L +# define L(label) .L##label +#endif + +#ifndef cfi_startproc +# define cfi_startproc .cfi_startproc +#endif + +#ifndef cfi_endproc +# define cfi_endproc .cfi_endproc +#endif + +#ifndef cfi_rel_offset +# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off +#endif + +#ifndef cfi_restore +# define cfi_restore(reg) .cfi_restore reg +#endif + +#ifndef cfi_adjust_cfa_offset +# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off +#endif + +#ifndef cfi_remember_state +# define cfi_remember_state .cfi_remember_state +#endif + +#ifndef cfi_restore_state +# define cfi_restore_state .cfi_restore_state +#endif + +#ifndef ENTRY +# define ENTRY(name) \ + .type name, @function; \ + .globl name; \ + .p2align 4; \ +name: \ + cfi_startproc +#endif + +#ifndef END +# define END(name) \ + cfi_endproc; \ + .size name, .-name +#endif + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) +#define PARMS 4 +#define STR PARMS +#define ENTRANCE +#define RETURN ret + + .text +ENTRY (STRLEN) + ENTRANCE + mov STR(%esp), %edx + xor %eax, %eax + cmpb $0, (%edx) + jz L(exit_tail0) + cmpb $0, 1(%edx) + jz L(exit_tail1) + cmpb $0, 2(%edx) + jz L(exit_tail2) + cmpb $0, 3(%edx) + jz L(exit_tail3) + cmpb $0, 4(%edx) + jz L(exit_tail4) + cmpb $0, 5(%edx) + jz L(exit_tail5) + cmpb $0, 6(%edx) + jz L(exit_tail6) + cmpb $0, 7(%edx) + jz L(exit_tail7) + cmpb $0, 8(%edx) + jz L(exit_tail8) + cmpb $0, 9(%edx) + jz L(exit_tail9) + cmpb $0, 10(%edx) + jz L(exit_tail10) + cmpb $0, 11(%edx) + jz L(exit_tail11) + cmpb $0, 12(%edx) + jz L(exit_tail12) + cmpb $0, 13(%edx) + jz L(exit_tail13) + cmpb $0, 14(%edx) + jz L(exit_tail14) + cmpb $0, 15(%edx) + jz L(exit_tail15) + pxor %xmm0, %xmm0 + mov %edx, %eax + mov %edx, %ecx + and $-16, %eax + add $16, %ecx + add $16, %eax + + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + pxor %xmm1, %xmm1 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm1 + pmovmskb %xmm1, %edx + pxor %xmm2, %xmm2 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + + pcmpeqb (%eax), %xmm2 + pmovmskb %xmm2, %edx + pxor %xmm3, %xmm3 + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + pcmpeqb (%eax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%eax), %eax + jnz L(exit) + + and $-0x40, %eax + PUSH (%esi) + PUSH (%edi) + PUSH (%ebx) + PUSH (%ebp) + xor %ebp, %ebp +L(aligned_64): + pcmpeqb (%eax), %xmm0 + pcmpeqb 16(%eax), %xmm1 + pcmpeqb 32(%eax), %xmm2 + pcmpeqb 48(%eax), %xmm3 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %esi + pmovmskb %xmm2, %edi + pmovmskb %xmm3, %ebx + or %edx, %ebp + or %esi, %ebp + or %edi, %ebp + or %ebx, %ebp + lea 64(%eax), %eax + jz L(aligned_64) +L(48leave): + test %edx, %edx + jnz L(aligned_64_exit_16) + test %esi, %esi + jnz L(aligned_64_exit_32) + test %edi, %edi + jnz L(aligned_64_exit_48) + mov %ebx, %edx + lea (%eax), %eax + jmp L(aligned_64_exit) +L(aligned_64_exit_48): + lea -16(%eax), %eax + mov %edi, %edx + jmp L(aligned_64_exit) +L(aligned_64_exit_32): + lea -32(%eax), %eax + mov %esi, %edx + jmp L(aligned_64_exit) +L(aligned_64_exit_16): + lea -48(%eax), %eax +L(aligned_64_exit): + POP (%ebp) + POP (%ebx) + POP (%edi) + POP (%esi) +L(exit): + sub %ecx, %eax + test %dl, %dl + jz L(exit_high) + test $0x01, %dl + jnz L(exit_tail0) + + test $0x02, %dl + jnz L(exit_tail1) + + test $0x04, %dl + jnz L(exit_tail2) + + test $0x08, %dl + jnz L(exit_tail3) + + test $0x10, %dl + jnz L(exit_tail4) + + test $0x20, %dl + jnz L(exit_tail5) + + test $0x40, %dl + jnz L(exit_tail6) + add $7, %eax +L(exit_tail0): + RETURN + +L(exit_high): + add $8, %eax + test $0x01, %dh + jnz L(exit_tail0) + + test $0x02, %dh + jnz L(exit_tail1) + + test $0x04, %dh + jnz L(exit_tail2) + + test $0x08, %dh + jnz L(exit_tail3) + + test $0x10, %dh + jnz L(exit_tail4) + + test $0x20, %dh + jnz L(exit_tail5) + + test $0x40, %dh + jnz L(exit_tail6) + add $7, %eax + RETURN + + .p2align 4 +L(exit_tail1): + add $1, %eax + RETURN + +L(exit_tail2): + add $2, %eax + RETURN + +L(exit_tail3): + add $3, %eax + RETURN + +L(exit_tail4): + add $4, %eax + RETURN + +L(exit_tail5): + add $5, %eax + RETURN + +L(exit_tail6): + add $6, %eax + RETURN + +L(exit_tail7): + add $7, %eax + RETURN + +L(exit_tail8): + add $8, %eax + RETURN + +L(exit_tail9): + add $9, %eax + RETURN + +L(exit_tail10): + add $10, %eax + RETURN + +L(exit_tail11): + add $11, %eax + RETURN + +L(exit_tail12): + add $12, %eax + RETURN + +L(exit_tail13): + add $13, %eax + RETURN + +L(exit_tail14): + add $14, %eax + RETURN + +L(exit_tail15): + add $15, %eax + ret + +END (STRLEN) diff --git a/libc/arch-x86/string/ssse3-memcmp3.S b/libc/arch-x86/string/ssse3-memcmp3-new.S similarity index 95% rename from libc/arch-x86/string/ssse3-memcmp3.S rename to libc/arch-x86/string/ssse3-memcmp3-new.S index a7ce819ff..5ad879150 100644 --- a/libc/arch-x86/string/ssse3-memcmp3.S +++ b/libc/arch-x86/string/ssse3-memcmp3-new.S @@ -53,13 +53,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifndef cfi_restore -# define cfi_restore(reg) .cfi_restore (reg) +# define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif +#ifndef cfi_remember_state +# define cfi_remember_state .cfi_remember_state +#endif + +#ifndef cfi_restore_state +# define cfi_restore_state .cfi_restore_state +#endif + #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ @@ -91,8 +99,7 @@ name: \ #define BLK2 BLK1+4 #define LEN BLK2+4 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret -#define RETURN RETURN_END; CFI_PUSH (%ebx); CFI_PUSH (%edi); \ - CFI_PUSH (%esi) +#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state .section .text.ssse3,"ax",@progbits ENTRY (MEMCMP) @@ -131,6 +138,7 @@ L(48bytesormore): PUSH (%ebx) PUSH (%esi) PUSH (%edi) + cfi_remember_state movdqu (%eax), %xmm3 movdqu (%edx), %xmm0 movl %eax, %edi @@ -211,8 +219,8 @@ L(shr_0): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_0_gobble): lea -48(%ecx), %ecx @@ -257,8 +265,8 @@ L(shr_0_gobble_loop_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_1): cmp $80, %ecx @@ -287,8 +295,8 @@ L(shr_1): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_1_gobble): sub $32, %ecx @@ -340,8 +348,8 @@ L(shr_1_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_2): cmp $80, %ecx @@ -370,8 +378,8 @@ L(shr_2): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_2_gobble): sub $32, %ecx @@ -423,8 +431,8 @@ L(shr_2_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_3): cmp $80, %ecx @@ -453,8 +461,8 @@ L(shr_3): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_3_gobble): sub $32, %ecx @@ -506,8 +514,8 @@ L(shr_3_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_4): cmp $80, %ecx @@ -536,8 +544,8 @@ L(shr_4): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_4_gobble): sub $32, %ecx @@ -589,8 +597,8 @@ L(shr_4_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_5): cmp $80, %ecx @@ -619,8 +627,8 @@ L(shr_5): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_5_gobble): sub $32, %ecx @@ -672,8 +680,8 @@ L(shr_5_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_6): cmp $80, %ecx @@ -702,8 +710,8 @@ L(shr_6): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_6_gobble): sub $32, %ecx @@ -755,8 +763,8 @@ L(shr_6_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_7): cmp $80, %ecx @@ -785,8 +793,8 @@ L(shr_7): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_7_gobble): sub $32, %ecx @@ -838,8 +846,8 @@ L(shr_7_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_8): cmp $80, %ecx @@ -868,8 +876,8 @@ L(shr_8): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_8_gobble): sub $32, %ecx @@ -921,8 +929,8 @@ L(shr_8_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_9): cmp $80, %ecx @@ -951,8 +959,8 @@ L(shr_9): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_9_gobble): sub $32, %ecx @@ -1004,8 +1012,8 @@ L(shr_9_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_10): cmp $80, %ecx @@ -1034,8 +1042,8 @@ L(shr_10): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_10_gobble): sub $32, %ecx @@ -1087,8 +1095,8 @@ L(shr_10_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_11): cmp $80, %ecx @@ -1117,8 +1125,8 @@ L(shr_11): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_11_gobble): sub $32, %ecx @@ -1170,8 +1178,8 @@ L(shr_11_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_12): cmp $80, %ecx @@ -1200,8 +1208,8 @@ L(shr_12): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_12_gobble): sub $32, %ecx @@ -1253,8 +1261,8 @@ L(shr_12_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_13): cmp $80, %ecx @@ -1283,8 +1291,8 @@ L(shr_13): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_13_gobble): sub $32, %ecx @@ -1336,8 +1344,8 @@ L(shr_13_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_14): cmp $80, %ecx @@ -1366,8 +1374,8 @@ L(shr_14): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_14_gobble): sub $32, %ecx @@ -1419,8 +1427,8 @@ L(shr_14_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_15): cmp $80, %ecx @@ -1449,8 +1457,8 @@ L(shr_15): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shr_15_gobble): sub $32, %ecx @@ -1502,8 +1510,8 @@ L(shr_15_gobble_next): POP (%esi) jmp L(less48bytes) - CFI_PUSH (%esi) - CFI_PUSH (%edi) + cfi_restore_state + cfi_remember_state ALIGN (4) L(exit): pmovmskb %xmm1, %ebx diff --git a/libc/arch-x86/string/ssse3-memcpy5.S b/libc/arch-x86/string/ssse3-memcpy5.S index 6b9040266..b4773dfb1 100644 --- a/libc/arch-x86/string/ssse3-memcpy5.S +++ b/libc/arch-x86/string/ssse3-memcpy5.S @@ -53,13 +53,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifndef cfi_restore -# define cfi_restore(reg) .cfi_restore (reg) +# define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif +#ifndef cfi_remember_state +# define cfi_remember_state .cfi_remember_state +#endif + +#ifndef cfi_restore_state +# define cfi_restore_state .cfi_restore_state +#endif + #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ @@ -118,8 +126,8 @@ name: \ jmp *%ebx # define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ - addl $(TABLE - .), %ebx - + addl $(TABLE - .), %ebx + # define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ addl (%ebx,INDEX,SCALE), %ebx; \ /* We loaded the jump table. Go. */ \ @@ -146,7 +154,7 @@ __i686.get_pc_thunk.bx: # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ jmp *TABLE(,INDEX,SCALE) -# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) +# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) # define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ jmp *TABLE(,INDEX,SCALE) @@ -198,6 +206,7 @@ L(48bytesormore): movl %edx, %edi and $-16, %edx PUSH (%esi) + cfi_remember_state add $16, %edx movl %edi, %esi sub %edx, %edi @@ -223,6 +232,8 @@ L(48bytesormore): BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_0): movdqu %xmm0, (%esi) @@ -270,6 +281,7 @@ L(shl_0_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + CFI_PUSH (%edi) L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF @@ -419,7 +431,8 @@ L(shl_0_mem_less_16bytes): add %ecx, %eax BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_1): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -463,6 +476,8 @@ L(shl_1_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_2): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -506,6 +521,8 @@ L(shl_2_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_3): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -549,6 +566,8 @@ L(shl_3_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_4): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -592,6 +611,8 @@ L(shl_4_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_5): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -635,7 +656,8 @@ L(shl_5_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_6): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -679,6 +701,8 @@ L(shl_6_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_7): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -722,6 +746,8 @@ L(shl_7_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_8): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -765,6 +791,8 @@ L(shl_8_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_9): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -808,6 +836,8 @@ L(shl_9_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_10): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -851,6 +881,8 @@ L(shl_10_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_11): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -894,6 +926,8 @@ L(shl_11_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_12): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -937,6 +971,8 @@ L(shl_12_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_13): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -980,6 +1016,8 @@ L(shl_13_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_14): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -1023,7 +1061,8 @@ L(shl_14_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_15): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -1264,8 +1303,10 @@ L(fwd_write_3bytes): movl DEST(%esp), %eax # endif #endif - RETURN + RETURN_END + cfi_restore_state + cfi_remember_state ALIGN (4) L(large_page): movdqu (%eax), %xmm1 @@ -1688,6 +1729,7 @@ L(bk_write_less32bytes): L(bk_write_less32bytes_2): BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + CFI_PUSH (%esi) ALIGN (4) L(bk_align): cmp $8, %ecx diff --git a/libc/arch-x86/string/ssse3-strcmp.S b/libc/arch-x86/string/ssse3-strcmp-latest.S similarity index 99% rename from libc/arch-x86/string/ssse3-strcmp.S rename to libc/arch-x86/string/ssse3-strcmp-latest.S index cfb2e9ff2..69c6425be 100644 --- a/libc/arch-x86/string/ssse3-strcmp.S +++ b/libc/arch-x86/string/ssse3-strcmp-latest.S @@ -45,13 +45,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifndef cfi_restore -# define cfi_restore(reg) .cfi_restore (reg) +# define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif +#ifndef cfi_remember_state +# define cfi_remember_state .cfi_remember_state +#endif + +#ifndef cfi_restore_state +# define cfi_restore_state .cfi_restore_state +#endif + #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ @@ -201,6 +209,9 @@ L(crosspage): PUSH (%ebx) PUSH (%edi) PUSH (%esi) +#ifdef USE_AS_STRNCMP + cfi_remember_state +#endif movl %edx, %edi movl %eax, %ecx @@ -1521,17 +1532,18 @@ L(gobble_ashr_12): sub $0xffff, %esi jnz L(exit) +#ifdef USE_AS_STRNCMP + cmp $16, %ebp + lea -16(%ebp), %ebp + jbe L(more8byteseq) +#endif + add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_12) -#ifdef USE_AS_STRNCMP - cmp $16, %ebp - lea -16(%ebp), %ebp - jbe L(more8byteseq) -#endif movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 @@ -2087,10 +2099,7 @@ L(neq_bigger): RETURN #ifdef USE_AS_STRNCMP - CFI_PUSH (%ebx) - CFI_PUSH (%edi) - CFI_PUSH (%esi) - + cfi_restore_state .p2align 4 L(more8byteseq): POP (%esi) diff --git a/libc/arch-x86/string/strcmp_wrapper.S b/libc/arch-x86/string/strcmp_wrapper.S index 69b7f0bed..20f3064e5 100644 --- a/libc/arch-x86/string/strcmp_wrapper.S +++ b/libc/arch-x86/string/strcmp_wrapper.S @@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(USE_SSSE3) # define ssse3_strcmp_latest strcmp -# include "ssse3-strcmp.S" +# include "ssse3-strcmp-latest.S" #else diff --git a/libc/arch-x86/string/strlen_wrapper.S b/libc/arch-x86/string/strlen_wrapper.S new file mode 100644 index 000000000..e62786b9d --- /dev/null +++ b/libc/arch-x86/string/strlen_wrapper.S @@ -0,0 +1,40 @@ +/* +Copyright (c) 2010, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(USE_SSE2) + +# define sse2_strlen_atom strlen +# include "sse2-strlen-atom.S" + +#else + +# include "strlen.S" + +#endif diff --git a/libc/arch-x86/string/strncmp_wrapper.S b/libc/arch-x86/string/strncmp_wrapper.S index 205018420..191d7555e 100644 --- a/libc/arch-x86/string/strncmp_wrapper.S +++ b/libc/arch-x86/string/strncmp_wrapper.S @@ -32,7 +32,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # define USE_AS_STRNCMP # define ssse3_strcmp_latest strncmp -# include "ssse3-strcmp.S" +# include "ssse3-strcmp-latest.S" #else