Change-Id: I22a600e7f172681cfd38ff73a64e3fd07b284959 Signed-off-by: Lu, Hongjiu <hongjiu.lu@intel.com> Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
		
			
				
	
	
		
			370 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			370 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
#define STRLEN sse2_strlen_atom
 | 
						|
 | 
						|
#ifndef L
 | 
						|
# define L(label)	.L##label
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_startproc
 | 
						|
# define cfi_startproc			.cfi_startproc
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_endproc
 | 
						|
# define cfi_endproc			.cfi_endproc
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_rel_offset
 | 
						|
# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_restore
 | 
						|
# define cfi_restore(reg)		.cfi_restore reg
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_adjust_cfa_offset
 | 
						|
# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_remember_state
 | 
						|
# define cfi_remember_state		.cfi_remember_state
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef cfi_restore_state
 | 
						|
# define cfi_restore_state		.cfi_restore_state
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef ENTRY
 | 
						|
# define ENTRY(name)			\
 | 
						|
	.type name,  @function; 	\
 | 
						|
	.globl name;			\
 | 
						|
	.p2align 4;			\
 | 
						|
name:					\
 | 
						|
	cfi_startproc
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef END
 | 
						|
# define END(name)			\
 | 
						|
	cfi_endproc;			\
 | 
						|
	.size name, .-name
 | 
						|
#endif
 | 
						|
 | 
						|
#define CFI_PUSH(REG)						\
 | 
						|
  cfi_adjust_cfa_offset (4);					\
 | 
						|
  cfi_rel_offset (REG, 0)
 | 
						|
 | 
						|
#define CFI_POP(REG)						\
 | 
						|
  cfi_adjust_cfa_offset (-4);					\
 | 
						|
  cfi_restore (REG)
 | 
						|
 | 
						|
#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
 | 
						|
#define POP(REG)	popl REG; CFI_POP (REG)
 | 
						|
#define PARMS		4
 | 
						|
#define	STR		PARMS
 | 
						|
#define ENTRANCE
 | 
						|
#define RETURN		ret
 | 
						|
 | 
						|
	.text
 | 
						|
ENTRY (STRLEN)
 | 
						|
	ENTRANCE
 | 
						|
	mov	STR(%esp), %edx
 | 
						|
	xor	%eax, %eax
 | 
						|
	cmpb	$0, (%edx)
 | 
						|
	jz	L(exit_tail0)
 | 
						|
	cmpb	$0, 1(%edx)
 | 
						|
	jz	L(exit_tail1)
 | 
						|
	cmpb	$0, 2(%edx)
 | 
						|
	jz	L(exit_tail2)
 | 
						|
	cmpb	$0, 3(%edx)
 | 
						|
	jz	L(exit_tail3)
 | 
						|
	cmpb	$0, 4(%edx)
 | 
						|
	jz	L(exit_tail4)
 | 
						|
	cmpb	$0, 5(%edx)
 | 
						|
	jz	L(exit_tail5)
 | 
						|
	cmpb	$0, 6(%edx)
 | 
						|
	jz	L(exit_tail6)
 | 
						|
	cmpb	$0, 7(%edx)
 | 
						|
	jz	L(exit_tail7)
 | 
						|
	cmpb	$0, 8(%edx)
 | 
						|
	jz	L(exit_tail8)
 | 
						|
	cmpb	$0, 9(%edx)
 | 
						|
	jz	L(exit_tail9)
 | 
						|
	cmpb	$0, 10(%edx)
 | 
						|
	jz	L(exit_tail10)
 | 
						|
	cmpb	$0, 11(%edx)
 | 
						|
	jz	L(exit_tail11)
 | 
						|
	cmpb	$0, 12(%edx)
 | 
						|
	jz	L(exit_tail12)
 | 
						|
	cmpb	$0, 13(%edx)
 | 
						|
	jz	L(exit_tail13)
 | 
						|
	cmpb	$0, 14(%edx)
 | 
						|
	jz	L(exit_tail14)
 | 
						|
	cmpb	$0, 15(%edx)
 | 
						|
	jz	L(exit_tail15)
 | 
						|
	pxor	%xmm0, %xmm0
 | 
						|
	mov	%edx, %eax
 | 
						|
	mov	%edx, %ecx
 | 
						|
	and	$-16, %eax
 | 
						|
	add	$16, %ecx
 | 
						|
	add	$16, %eax
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm0
 | 
						|
	pmovmskb %xmm0, %edx
 | 
						|
	pxor	%xmm1, %xmm1
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm1
 | 
						|
	pmovmskb %xmm1, %edx
 | 
						|
	pxor	%xmm2, %xmm2
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm2
 | 
						|
	pmovmskb %xmm2, %edx
 | 
						|
	pxor	%xmm3, %xmm3
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm3
 | 
						|
	pmovmskb %xmm3, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm0
 | 
						|
	pmovmskb %xmm0, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm1
 | 
						|
	pmovmskb %xmm1, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm2
 | 
						|
	pmovmskb %xmm2, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm3
 | 
						|
	pmovmskb %xmm3, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm0
 | 
						|
	pmovmskb %xmm0, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm1
 | 
						|
	pmovmskb %xmm1, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm2
 | 
						|
	pmovmskb %xmm2, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm3
 | 
						|
	pmovmskb %xmm3, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm0
 | 
						|
	pmovmskb %xmm0, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm1
 | 
						|
	pmovmskb %xmm1, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm2
 | 
						|
	pmovmskb %xmm2, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	pcmpeqb	(%eax), %xmm3
 | 
						|
	pmovmskb %xmm3, %edx
 | 
						|
	test	%edx, %edx
 | 
						|
	lea	16(%eax), %eax
 | 
						|
	jnz	L(exit)
 | 
						|
 | 
						|
	and	$-0x40, %eax
 | 
						|
	PUSH (%esi)
 | 
						|
	PUSH (%edi)
 | 
						|
	PUSH (%ebx)
 | 
						|
	PUSH (%ebp)
 | 
						|
	xor	%ebp, %ebp
 | 
						|
L(aligned_64):
 | 
						|
	pcmpeqb	(%eax), %xmm0
 | 
						|
	pcmpeqb	16(%eax), %xmm1
 | 
						|
	pcmpeqb	32(%eax), %xmm2
 | 
						|
	pcmpeqb	48(%eax), %xmm3
 | 
						|
	pmovmskb %xmm0, %edx
 | 
						|
	pmovmskb %xmm1, %esi
 | 
						|
	pmovmskb %xmm2, %edi
 | 
						|
	pmovmskb %xmm3, %ebx
 | 
						|
	or	%edx, %ebp
 | 
						|
	or	%esi, %ebp
 | 
						|
	or	%edi, %ebp
 | 
						|
	or	%ebx, %ebp
 | 
						|
	lea	64(%eax), %eax
 | 
						|
	jz	L(aligned_64)
 | 
						|
L(48leave):
 | 
						|
	test	%edx, %edx
 | 
						|
	jnz	L(aligned_64_exit_16)
 | 
						|
	test	%esi, %esi
 | 
						|
	jnz	L(aligned_64_exit_32)
 | 
						|
	test	%edi, %edi
 | 
						|
	jnz	L(aligned_64_exit_48)
 | 
						|
	mov	%ebx, %edx
 | 
						|
	lea	(%eax), %eax
 | 
						|
	jmp	L(aligned_64_exit)
 | 
						|
L(aligned_64_exit_48):
 | 
						|
	lea	-16(%eax), %eax
 | 
						|
	mov	%edi, %edx
 | 
						|
	jmp	L(aligned_64_exit)
 | 
						|
L(aligned_64_exit_32):
 | 
						|
	lea	-32(%eax), %eax
 | 
						|
	mov	%esi, %edx
 | 
						|
	jmp	L(aligned_64_exit)
 | 
						|
L(aligned_64_exit_16):
 | 
						|
	lea	-48(%eax), %eax
 | 
						|
L(aligned_64_exit):
 | 
						|
	POP (%ebp)
 | 
						|
	POP (%ebx)
 | 
						|
	POP (%edi)
 | 
						|
	POP (%esi)
 | 
						|
L(exit):
 | 
						|
	sub	%ecx, %eax
 | 
						|
	test	%dl, %dl
 | 
						|
	jz	L(exit_high)
 | 
						|
	test	$0x01, %dl
 | 
						|
	jnz	L(exit_tail0)
 | 
						|
 | 
						|
	test	$0x02, %dl
 | 
						|
	jnz	L(exit_tail1)
 | 
						|
 | 
						|
	test	$0x04, %dl
 | 
						|
	jnz	L(exit_tail2)
 | 
						|
 | 
						|
	test	$0x08, %dl
 | 
						|
	jnz	L(exit_tail3)
 | 
						|
 | 
						|
	test	$0x10, %dl
 | 
						|
	jnz	L(exit_tail4)
 | 
						|
 | 
						|
	test	$0x20, %dl
 | 
						|
	jnz	L(exit_tail5)
 | 
						|
 | 
						|
	test	$0x40, %dl
 | 
						|
	jnz	L(exit_tail6)
 | 
						|
	add	$7, %eax
 | 
						|
L(exit_tail0):
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_high):
 | 
						|
	add	$8, %eax
 | 
						|
	test	$0x01, %dh
 | 
						|
	jnz	L(exit_tail0)
 | 
						|
 | 
						|
	test	$0x02, %dh
 | 
						|
	jnz	L(exit_tail1)
 | 
						|
 | 
						|
	test	$0x04, %dh
 | 
						|
	jnz	L(exit_tail2)
 | 
						|
 | 
						|
	test	$0x08, %dh
 | 
						|
	jnz	L(exit_tail3)
 | 
						|
 | 
						|
	test	$0x10, %dh
 | 
						|
	jnz	L(exit_tail4)
 | 
						|
 | 
						|
	test	$0x20, %dh
 | 
						|
	jnz	L(exit_tail5)
 | 
						|
 | 
						|
	test	$0x40, %dh
 | 
						|
	jnz	L(exit_tail6)
 | 
						|
	add	$7, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
	.p2align 4
 | 
						|
L(exit_tail1):
 | 
						|
	add	$1, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail2):
 | 
						|
	add	$2, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail3):
 | 
						|
	add	$3, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail4):
 | 
						|
	add	$4, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail5):
 | 
						|
	add	$5, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail6):
 | 
						|
	add	$6, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail7):
 | 
						|
	add	$7, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail8):
 | 
						|
	add	$8, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail9):
 | 
						|
	add	$9, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail10):
 | 
						|
	add	$10, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail11):
 | 
						|
	add	$11, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail12):
 | 
						|
	add	$12, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail13):
 | 
						|
	add	$13, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail14):
 | 
						|
	add	$14, %eax
 | 
						|
	RETURN
 | 
						|
 | 
						|
L(exit_tail15):
 | 
						|
	add	$15, %eax
 | 
						|
	ret
 | 
						|
 | 
						|
END (STRLEN)
 |