#define STRLEN sse2_strlen_atom #ifndef L # define L(label) .L##label #endif #ifndef cfi_startproc # define cfi_startproc .cfi_startproc #endif #ifndef cfi_endproc # define cfi_endproc .cfi_endproc #endif #ifndef cfi_rel_offset # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off #endif #ifndef cfi_restore # define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif #ifndef cfi_remember_state # define cfi_remember_state .cfi_remember_state #endif #ifndef cfi_restore_state # define cfi_restore_state .cfi_restore_state #endif #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc #endif #ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name #endif #define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) #define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) #define PARMS 4 #define STR PARMS #define ENTRANCE #define RETURN ret .text ENTRY (STRLEN) ENTRANCE mov STR(%esp), %edx xor %eax, %eax cmpb $0, (%edx) jz L(exit_tail0) cmpb $0, 1(%edx) jz L(exit_tail1) cmpb $0, 2(%edx) jz L(exit_tail2) cmpb $0, 3(%edx) jz L(exit_tail3) cmpb $0, 4(%edx) jz L(exit_tail4) cmpb $0, 5(%edx) jz L(exit_tail5) cmpb $0, 6(%edx) jz L(exit_tail6) cmpb $0, 7(%edx) jz L(exit_tail7) cmpb $0, 8(%edx) jz L(exit_tail8) cmpb $0, 9(%edx) jz L(exit_tail9) cmpb $0, 10(%edx) jz L(exit_tail10) cmpb $0, 11(%edx) jz L(exit_tail11) cmpb $0, 12(%edx) jz L(exit_tail12) cmpb $0, 13(%edx) jz L(exit_tail13) cmpb $0, 14(%edx) jz L(exit_tail14) cmpb $0, 15(%edx) jz L(exit_tail15) pxor %xmm0, %xmm0 mov %edx, %eax mov %edx, %ecx and $-16, %eax add $16, %ecx add $16, %eax pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx pxor %xmm1, %xmm1 test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx pxor %xmm2, %xmm2 test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx pxor %xmm3, %xmm3 test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) and $-0x40, %eax PUSH (%esi) PUSH (%edi) PUSH (%ebx) PUSH (%ebp) xor %ebp, %ebp L(aligned_64): pcmpeqb (%eax), %xmm0 pcmpeqb 16(%eax), %xmm1 pcmpeqb 32(%eax), %xmm2 pcmpeqb 48(%eax), %xmm3 pmovmskb %xmm0, %edx pmovmskb %xmm1, %esi pmovmskb %xmm2, %edi pmovmskb %xmm3, %ebx or %edx, %ebp or %esi, %ebp or %edi, %ebp or %ebx, %ebp lea 64(%eax), %eax jz L(aligned_64) L(48leave): test %edx, %edx jnz L(aligned_64_exit_16) test %esi, %esi jnz L(aligned_64_exit_32) test %edi, %edi jnz L(aligned_64_exit_48) mov %ebx, %edx lea (%eax), %eax jmp L(aligned_64_exit) L(aligned_64_exit_48): lea -16(%eax), %eax mov %edi, %edx jmp L(aligned_64_exit) L(aligned_64_exit_32): lea -32(%eax), %eax mov %esi, %edx jmp L(aligned_64_exit) L(aligned_64_exit_16): lea -48(%eax), %eax L(aligned_64_exit): POP (%ebp) POP (%ebx) POP (%edi) POP (%esi) L(exit): sub %ecx, %eax test %dl, %dl jz L(exit_high) test $0x01, %dl jnz L(exit_tail0) test $0x02, %dl jnz L(exit_tail1) test $0x04, %dl jnz L(exit_tail2) test $0x08, %dl jnz L(exit_tail3) test $0x10, %dl jnz L(exit_tail4) test $0x20, %dl jnz L(exit_tail5) test $0x40, %dl jnz L(exit_tail6) add $7, %eax L(exit_tail0): RETURN L(exit_high): add $8, %eax test $0x01, %dh jnz L(exit_tail0) test $0x02, %dh jnz L(exit_tail1) test $0x04, %dh jnz L(exit_tail2) test $0x08, %dh jnz L(exit_tail3) test $0x10, %dh jnz L(exit_tail4) test $0x20, %dh jnz L(exit_tail5) test $0x40, %dh jnz L(exit_tail6) add $7, %eax RETURN .p2align 4 L(exit_tail1): add $1, %eax RETURN L(exit_tail2): add $2, %eax RETURN L(exit_tail3): add $3, %eax RETURN L(exit_tail4): add $4, %eax RETURN L(exit_tail5): add $5, %eax RETURN L(exit_tail6): add $6, %eax RETURN L(exit_tail7): add $7, %eax RETURN L(exit_tail8): add $8, %eax RETURN L(exit_tail9): add $9, %eax RETURN L(exit_tail10): add $10, %eax RETURN L(exit_tail11): add $11, %eax RETURN L(exit_tail12): add $12, %eax RETURN L(exit_tail13): add $13, %eax RETURN L(exit_tail14): add $14, %eax RETURN L(exit_tail15): add $15, %eax ret END (STRLEN)