bionic/libc/arch-x86/string/sse2-strlen-atom.S
Bruce Beare 124a542aa4 Update ATOM string routines to latest
Change-Id: I22a600e7f172681cfd38ff73a64e3fd07b284959
Signed-off-by: Lu, Hongjiu <hongjiu.lu@intel.com>
Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
2010-10-11 12:33:58 -07:00

370 lines
5.5 KiB
ArmAsm

#define STRLEN sse2_strlen_atom
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef cfi_remember_state
# define cfi_remember_state .cfi_remember_state
#endif
#ifndef cfi_restore_state
# define cfi_restore_state .cfi_restore_state
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR PARMS
#define ENTRANCE
#define RETURN ret
.text
ENTRY (STRLEN)
ENTRANCE
mov STR(%esp), %edx
xor %eax, %eax
cmpb $0, (%edx)
jz L(exit_tail0)
cmpb $0, 1(%edx)
jz L(exit_tail1)
cmpb $0, 2(%edx)
jz L(exit_tail2)
cmpb $0, 3(%edx)
jz L(exit_tail3)
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmpb $0, 5(%edx)
jz L(exit_tail5)
cmpb $0, 6(%edx)
jz L(exit_tail6)
cmpb $0, 7(%edx)
jz L(exit_tail7)
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmpb $0, 9(%edx)
jz L(exit_tail9)
cmpb $0, 10(%edx)
jz L(exit_tail10)
cmpb $0, 11(%edx)
jz L(exit_tail11)
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmpb $0, 13(%edx)
jz L(exit_tail13)
cmpb $0, 14(%edx)
jz L(exit_tail14)
cmpb $0, 15(%edx)
jz L(exit_tail15)
pxor %xmm0, %xmm0
mov %edx, %eax
mov %edx, %ecx
and $-16, %eax
add $16, %ecx
add $16, %eax
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
and $-0x40, %eax
PUSH (%esi)
PUSH (%edi)
PUSH (%ebx)
PUSH (%ebp)
xor %ebp, %ebp
L(aligned_64):
pcmpeqb (%eax), %xmm0
pcmpeqb 16(%eax), %xmm1
pcmpeqb 32(%eax), %xmm2
pcmpeqb 48(%eax), %xmm3
pmovmskb %xmm0, %edx
pmovmskb %xmm1, %esi
pmovmskb %xmm2, %edi
pmovmskb %xmm3, %ebx
or %edx, %ebp
or %esi, %ebp
or %edi, %ebp
or %ebx, %ebp
lea 64(%eax), %eax
jz L(aligned_64)
L(48leave):
test %edx, %edx
jnz L(aligned_64_exit_16)
test %esi, %esi
jnz L(aligned_64_exit_32)
test %edi, %edi
jnz L(aligned_64_exit_48)
mov %ebx, %edx
lea (%eax), %eax
jmp L(aligned_64_exit)
L(aligned_64_exit_48):
lea -16(%eax), %eax
mov %edi, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_32):
lea -32(%eax), %eax
mov %esi, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_16):
lea -48(%eax), %eax
L(aligned_64_exit):
POP (%ebp)
POP (%ebx)
POP (%edi)
POP (%esi)
L(exit):
sub %ecx, %eax
test %dl, %dl
jz L(exit_high)
test $0x01, %dl
jnz L(exit_tail0)
test $0x02, %dl
jnz L(exit_tail1)
test $0x04, %dl
jnz L(exit_tail2)
test $0x08, %dl
jnz L(exit_tail3)
test $0x10, %dl
jnz L(exit_tail4)
test $0x20, %dl
jnz L(exit_tail5)
test $0x40, %dl
jnz L(exit_tail6)
add $7, %eax
L(exit_tail0):
RETURN
L(exit_high):
add $8, %eax
test $0x01, %dh
jnz L(exit_tail0)
test $0x02, %dh
jnz L(exit_tail1)
test $0x04, %dh
jnz L(exit_tail2)
test $0x08, %dh
jnz L(exit_tail3)
test $0x10, %dh
jnz L(exit_tail4)
test $0x20, %dh
jnz L(exit_tail5)
test $0x40, %dh
jnz L(exit_tail6)
add $7, %eax
RETURN
.p2align 4
L(exit_tail1):
add $1, %eax
RETURN
L(exit_tail2):
add $2, %eax
RETURN
L(exit_tail3):
add $3, %eax
RETURN
L(exit_tail4):
add $4, %eax
RETURN
L(exit_tail5):
add $5, %eax
RETURN
L(exit_tail6):
add $6, %eax
RETURN
L(exit_tail7):
add $7, %eax
RETURN
L(exit_tail8):
add $8, %eax
RETURN
L(exit_tail9):
add $9, %eax
RETURN
L(exit_tail10):
add $10, %eax
RETURN
L(exit_tail11):
add $11, %eax
RETURN
L(exit_tail12):
add $12, %eax
RETURN
L(exit_tail13):
add $13, %eax
RETURN
L(exit_tail14):
add $14, %eax
RETURN
L(exit_tail15):
add $15, %eax
ret
END (STRLEN)