Our <machine/asm.h> files were modified from upstream, to the extent that no architecture was actually using the upstream ENTRY or END macros, assuming that architecture even had such a macro upstream. This patch moves everyone to the same macros, with just a few tweaks remaining in the <machine/asm.h> files, which no one should now use directly. I've removed most of the unused cruft from the <machine/asm.h> files, though there's still rather a lot in the mips/mips64 ones. Bug: 12229603 Change-Id: I2fff287dc571ac1087abe9070362fb9420d85d6d
		
			
				
	
	
		
			424 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			424 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2009
 | 
						|
 *      MIPS Technologies, Inc., California.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without
 | 
						|
 * modification, are permitted provided that the following conditions
 | 
						|
 * are met:
 | 
						|
 * 1. Redistributions of source code must retain the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer.
 | 
						|
 * 2. Redistributions in binary form must reproduce the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer in the
 | 
						|
 *    documentation and/or other materials provided with the distribution.
 | 
						|
 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
 | 
						|
 *    contributors may be used to endorse or promote products derived from
 | 
						|
 *    this software without specific prior written permission.
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
 | 
						|
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
						|
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
						|
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
 | 
						|
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
						|
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | 
						|
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | 
						|
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | 
						|
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | 
						|
 * SUCH DAMAGE.
 | 
						|
 */
 | 
						|
 | 
						|
/************************************************************************
 | 
						|
 *
 | 
						|
 *  memcpy.S
 | 
						|
 *  Version: "043009"
 | 
						|
 *
 | 
						|
 ************************************************************************/
 | 
						|
 | 
						|
 | 
						|
/************************************************************************
 | 
						|
 *  Include files
 | 
						|
 ************************************************************************/
 | 
						|
 | 
						|
#include <private/bionic_asm.h>
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * This routine could be optimized for MIPS64. The current code only
 | 
						|
 * uses MIPS32 instructions.
 | 
						|
 */
 | 
						|
#if defined(__MIPSEB__)
 | 
						|
#  define LWHI	lwl		/* high part is left in big-endian	*/
 | 
						|
#  define SWHI	swl		/* high part is left in big-endian	*/
 | 
						|
#  define LWLO	lwr		/* low part is right in big-endian	*/
 | 
						|
#  define SWLO	swr		/* low part is right in big-endian	*/
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(__MIPSEL__)
 | 
						|
#  define LWHI	lwr		/* high part is right in little-endian	*/
 | 
						|
#  define SWHI	swr		/* high part is right in little-endian	*/
 | 
						|
#  define LWLO	lwl		/* low part is left in big-endian	*/
 | 
						|
#  define SWLO	swl		/* low part is left in big-endian	*/
 | 
						|
#endif
 | 
						|
 | 
						|
LEAF(memcpy,0)
 | 
						|
 | 
						|
	.set	noreorder
 | 
						|
	.set	noat
 | 
						|
/*
 | 
						|
 * Below we handle the case where memcpy is called with overlapping src and dst.
 | 
						|
 * Although memcpy is not required to handle this case, some parts of Android like Skia
 | 
						|
 * rely on such usage. We call memmove to handle such cases.
 | 
						|
 */
 | 
						|
	subu	t0,a0,a1
 | 
						|
	sra	AT,t0,31
 | 
						|
	xor	t1,t0,AT
 | 
						|
	subu	t0,t1,AT
 | 
						|
	sltu	AT,t0,a2
 | 
						|
	beq	AT,zero,.Lmemcpy
 | 
						|
	 la	t9,memmove
 | 
						|
	jr	t9
 | 
						|
	 nop
 | 
						|
.Lmemcpy:
 | 
						|
	slti	AT,a2,8
 | 
						|
	bne	AT,zero,.Llast8
 | 
						|
	 move	v0,a0	# memcpy returns the dst pointer
 | 
						|
 | 
						|
# Test if the src and dst are word-aligned, or can be made word-aligned
 | 
						|
	xor	t8,a1,a0
 | 
						|
	andi	t8,t8,0x3		# t8 is a0/a1 word-displacement
 | 
						|
 | 
						|
	bne	t8,zero,.Lunaligned
 | 
						|
	 negu	a3,a0
 | 
						|
 | 
						|
	andi	a3,a3,0x3	# we need to copy a3 bytes to make a0/a1 aligned
 | 
						|
	beq	a3,zero,.Lchk16w # when a3=0 then the dst (a0) is word-aligned
 | 
						|
	 subu	a2,a2,a3	# now a2 is the remining bytes count
 | 
						|
 | 
						|
	LWHI	t8,0(a1)
 | 
						|
	addu	a1,a1,a3
 | 
						|
	SWHI	t8,0(a0)
 | 
						|
	addu	a0,a0,a3
 | 
						|
 | 
						|
# Now the dst/src are mutually word-aligned with word-aligned addresses
 | 
						|
.Lchk16w:
 | 
						|
	andi	t8,a2,0x3f	# any whole 64-byte chunks?
 | 
						|
				# t8 is the byte count after 64-byte chunks
 | 
						|
 | 
						|
	beq	a2,t8,.Lchk8w	# if a2==t8, no 64-byte chunks
 | 
						|
				# There will be at most 1 32-byte chunk after it
 | 
						|
	 subu	a3,a2,t8	# subtract from a2 the reminder
 | 
						|
                                # Here a3 counts bytes in 16w chunks
 | 
						|
	addu	a3,a0,a3	# Now a3 is the final dst after 64-byte chunks
 | 
						|
 | 
						|
	addu	t0,a0,a2	# t0 is the "past the end" address
 | 
						|
 | 
						|
# When in the loop we exercise "pref 30,x(a0)", the a0+x should not be past
 | 
						|
# the "t0-32" address
 | 
						|
# This means: for x=128 the last "safe" a0 address is "t0-160"
 | 
						|
# Alternatively, for x=64 the last "safe" a0 address is "t0-96"
 | 
						|
# In the current version we will use "pref 30,128(a0)", so "t0-160" is the limit
 | 
						|
	subu	t9,t0,160	# t9 is the "last safe pref 30,128(a0)" address
 | 
						|
 | 
						|
	pref    0,0(a1)		# bring the first line of src, addr 0
 | 
						|
	pref    0,32(a1)	# bring the second line of src, addr 32
 | 
						|
	pref    0,64(a1)	# bring the third line of src, addr 64
 | 
						|
	pref	30,32(a0)	# safe, as we have at least 64 bytes ahead
 | 
						|
# In case the a0 > t9 don't use "pref 30" at all
 | 
						|
	sgtu	v1,a0,t9
 | 
						|
	bgtz	v1,.Lloop16w	# skip "pref 30,64(a0)" for too short arrays
 | 
						|
	 nop
 | 
						|
# otherwise, start with using pref30
 | 
						|
	pref	30,64(a0)
 | 
						|
.Lloop16w:
 | 
						|
	pref	0,96(a1)
 | 
						|
	lw	t0,0(a1)
 | 
						|
	bgtz	v1,.Lskip_pref30_96	# skip "pref 30,96(a0)"
 | 
						|
	 lw	t1,4(a1)
 | 
						|
	pref    30,96(a0)   # continue setting up the dest, addr 96
 | 
						|
.Lskip_pref30_96:
 | 
						|
	lw	t2,8(a1)
 | 
						|
	lw	t3,12(a1)
 | 
						|
	lw	t4,16(a1)
 | 
						|
	lw	t5,20(a1)
 | 
						|
	lw	t6,24(a1)
 | 
						|
	lw	t7,28(a1)
 | 
						|
        pref    0,128(a1)    # bring the next lines of src, addr 128
 | 
						|
 | 
						|
	sw	t0,0(a0)
 | 
						|
	sw	t1,4(a0)
 | 
						|
	sw	t2,8(a0)
 | 
						|
	sw	t3,12(a0)
 | 
						|
	sw	t4,16(a0)
 | 
						|
	sw	t5,20(a0)
 | 
						|
	sw	t6,24(a0)
 | 
						|
	sw	t7,28(a0)
 | 
						|
 | 
						|
	lw	t0,32(a1)
 | 
						|
	bgtz	v1,.Lskip_pref30_128	# skip "pref 30,128(a0)"
 | 
						|
	 lw	t1,36(a1)
 | 
						|
	pref    30,128(a0)   # continue setting up the dest, addr 128
 | 
						|
.Lskip_pref30_128:
 | 
						|
	lw	t2,40(a1)
 | 
						|
	lw	t3,44(a1)
 | 
						|
	lw	t4,48(a1)
 | 
						|
	lw	t5,52(a1)
 | 
						|
	lw	t6,56(a1)
 | 
						|
	lw	t7,60(a1)
 | 
						|
        pref    0, 160(a1)    # bring the next lines of src, addr 160
 | 
						|
 | 
						|
	sw	t0,32(a0)
 | 
						|
	sw	t1,36(a0)
 | 
						|
	sw	t2,40(a0)
 | 
						|
	sw	t3,44(a0)
 | 
						|
	sw	t4,48(a0)
 | 
						|
	sw	t5,52(a0)
 | 
						|
	sw	t6,56(a0)
 | 
						|
	sw	t7,60(a0)
 | 
						|
 | 
						|
	addiu	a0,a0,64	# adding 64 to dest
 | 
						|
	sgtu	v1,a0,t9
 | 
						|
	bne	a0,a3,.Lloop16w
 | 
						|
	 addiu	a1,a1,64	# adding 64 to src
 | 
						|
	move	a2,t8
 | 
						|
 | 
						|
# Here we have src and dest word-aligned but less than 64-bytes to go
 | 
						|
 | 
						|
.Lchk8w:
 | 
						|
	pref 0, 0x0(a1)
 | 
						|
	andi	t8,a2,0x1f	# is there a 32-byte chunk?
 | 
						|
				# the t8 is the reminder count past 32-bytes
 | 
						|
	beq	a2,t8,.Lchk1w	# when a2=t8, no 32-byte chunk
 | 
						|
	 nop
 | 
						|
 | 
						|
	lw	t0,0(a1)
 | 
						|
	lw	t1,4(a1)
 | 
						|
	lw	t2,8(a1)
 | 
						|
	lw	t3,12(a1)
 | 
						|
	lw	t4,16(a1)
 | 
						|
	lw	t5,20(a1)
 | 
						|
	lw	t6,24(a1)
 | 
						|
	lw	t7,28(a1)
 | 
						|
	addiu	a1,a1,32
 | 
						|
 | 
						|
	sw	t0,0(a0)
 | 
						|
	sw	t1,4(a0)
 | 
						|
	sw	t2,8(a0)
 | 
						|
	sw	t3,12(a0)
 | 
						|
	sw	t4,16(a0)
 | 
						|
	sw	t5,20(a0)
 | 
						|
	sw	t6,24(a0)
 | 
						|
	sw	t7,28(a0)
 | 
						|
	addiu	a0,a0,32
 | 
						|
 | 
						|
.Lchk1w:
 | 
						|
	andi	a2,t8,0x3	# now a2 is the reminder past 1w chunks
 | 
						|
	beq	a2,t8,.Llast8
 | 
						|
	 subu	a3,t8,a2	# a3 is count of bytes in 1w chunks
 | 
						|
	addu	a3,a0,a3	# now a3 is the dst address past the 1w chunks
 | 
						|
 | 
						|
# copying in words (4-byte chunks)
 | 
						|
.LwordCopy_loop:
 | 
						|
	lw	t3,0(a1)	# the first t3 may be equal t0 ... optimize?
 | 
						|
	addiu	a1,a1,4
 | 
						|
	addiu	a0,a0,4
 | 
						|
	bne	a0,a3,.LwordCopy_loop
 | 
						|
	 sw	t3,-4(a0)
 | 
						|
 | 
						|
# For the last (<8) bytes
 | 
						|
.Llast8:
 | 
						|
	blez	a2,.Lleave
 | 
						|
	 addu	a3,a0,a2	# a3 is the last dst address
 | 
						|
.Llast8loop:
 | 
						|
	lb	v1,0(a1)
 | 
						|
	addiu	a1,a1,1
 | 
						|
	addiu	a0,a0,1
 | 
						|
	bne	a0,a3,.Llast8loop
 | 
						|
	 sb	v1,-1(a0)
 | 
						|
 | 
						|
.Lleave:
 | 
						|
	j	ra
 | 
						|
	 nop
 | 
						|
 | 
						|
#
 | 
						|
# UNALIGNED case
 | 
						|
#
 | 
						|
 | 
						|
.Lunaligned:
 | 
						|
	# got here with a3="negu a0"
 | 
						|
	andi	a3,a3,0x3	# test if the a0 is word aligned
 | 
						|
	beqz	a3,.Lua_chk16w
 | 
						|
	 subu	a2,a2,a3	# bytes left after initial a3 bytes
 | 
						|
 | 
						|
	LWHI	v1,0(a1)
 | 
						|
	LWLO	v1,3(a1)
 | 
						|
	addu	a1,a1,a3	# a3 may be here 1, 2 or 3
 | 
						|
	SWHI	v1,0(a0)
 | 
						|
	addu	a0,a0,a3	# below the dst will be word aligned (NOTE1)
 | 
						|
 | 
						|
.Lua_chk16w:
 | 
						|
	andi	t8,a2,0x3f	# any whole 64-byte chunks?
 | 
						|
				# t8 is the byte count after 64-byte chunks
 | 
						|
	beq	a2,t8,.Lua_chk8w # if a2==t8, no 64-byte chunks
 | 
						|
				# There will be at most 1 32-byte chunk after it
 | 
						|
	 subu	a3,a2,t8	# subtract from a2 the reminder
 | 
						|
                                # Here a3 counts bytes in 16w chunks
 | 
						|
	addu	a3,a0,a3	# Now a3 is the final dst after 64-byte chunks
 | 
						|
 | 
						|
	addu	t0,a0,a2	# t0 is the "past the end" address
 | 
						|
 | 
						|
	subu	t9,t0,160	# t9 is the "last safe pref 30,128(a0)" address
 | 
						|
 | 
						|
	pref    0,0(a1)		# bring the first line of src, addr 0
 | 
						|
	pref    0,32(a1)	# bring the second line of src, addr 32
 | 
						|
	pref    0,64(a1)	# bring the third line of src, addr 64
 | 
						|
	pref	30,32(a0)	# safe, as we have at least 64 bytes ahead
 | 
						|
# In case the a0 > t9 don't use "pref 30" at all
 | 
						|
	sgtu	v1,a0,t9
 | 
						|
	bgtz	v1,.Lua_loop16w	# skip "pref 30,64(a0)" for too short arrays
 | 
						|
	 nop
 | 
						|
# otherwise, start with using pref30
 | 
						|
	pref	30,64(a0)
 | 
						|
.Lua_loop16w:
 | 
						|
	pref	0,96(a1)
 | 
						|
	LWHI	t0,0(a1)
 | 
						|
	LWLO	t0,3(a1)
 | 
						|
	LWHI	t1,4(a1)
 | 
						|
	bgtz	v1,.Lua_skip_pref30_96
 | 
						|
	 LWLO	t1,7(a1)
 | 
						|
	pref    30,96(a0)   # continue setting up the dest, addr 96
 | 
						|
.Lua_skip_pref30_96:
 | 
						|
	LWHI	t2,8(a1)
 | 
						|
	LWLO	t2,11(a1)
 | 
						|
	LWHI	t3,12(a1)
 | 
						|
	LWLO	t3,15(a1)
 | 
						|
	LWHI	t4,16(a1)
 | 
						|
	LWLO	t4,19(a1)
 | 
						|
	LWHI	t5,20(a1)
 | 
						|
	LWLO	t5,23(a1)
 | 
						|
	LWHI	t6,24(a1)
 | 
						|
	LWLO	t6,27(a1)
 | 
						|
	LWHI	t7,28(a1)
 | 
						|
	LWLO	t7,31(a1)
 | 
						|
        pref    0,128(a1)    # bring the next lines of src, addr 128
 | 
						|
 | 
						|
	sw	t0,0(a0)
 | 
						|
	sw	t1,4(a0)
 | 
						|
	sw	t2,8(a0)
 | 
						|
	sw	t3,12(a0)
 | 
						|
	sw	t4,16(a0)
 | 
						|
	sw	t5,20(a0)
 | 
						|
	sw	t6,24(a0)
 | 
						|
	sw	t7,28(a0)
 | 
						|
 | 
						|
	LWHI	t0,32(a1)
 | 
						|
	LWLO	t0,35(a1)
 | 
						|
	LWHI	t1,36(a1)
 | 
						|
	bgtz	v1,.Lua_skip_pref30_128
 | 
						|
	LWLO	t1,39(a1)
 | 
						|
	pref    30,128(a0)   # continue setting up the dest, addr 128
 | 
						|
.Lua_skip_pref30_128:
 | 
						|
	LWHI	t2,40(a1)
 | 
						|
	LWLO	t2,43(a1)
 | 
						|
	LWHI	t3,44(a1)
 | 
						|
	LWLO	t3,47(a1)
 | 
						|
	LWHI	t4,48(a1)
 | 
						|
	LWLO	t4,51(a1)
 | 
						|
	LWHI	t5,52(a1)
 | 
						|
	LWLO	t5,55(a1)
 | 
						|
	LWHI	t6,56(a1)
 | 
						|
	LWLO	t6,59(a1)
 | 
						|
	LWHI	t7,60(a1)
 | 
						|
	LWLO	t7,63(a1)
 | 
						|
        pref    0, 160(a1)    # bring the next lines of src, addr 160
 | 
						|
 | 
						|
	sw	t0,32(a0)
 | 
						|
	sw	t1,36(a0)
 | 
						|
	sw	t2,40(a0)
 | 
						|
	sw	t3,44(a0)
 | 
						|
	sw	t4,48(a0)
 | 
						|
	sw	t5,52(a0)
 | 
						|
	sw	t6,56(a0)
 | 
						|
	sw	t7,60(a0)
 | 
						|
 | 
						|
	addiu	a0,a0,64	# adding 64 to dest
 | 
						|
	sgtu	v1,a0,t9
 | 
						|
	bne	a0,a3,.Lua_loop16w
 | 
						|
	 addiu	a1,a1,64	# adding 64 to src
 | 
						|
	move	a2,t8
 | 
						|
 | 
						|
# Here we have src and dest word-aligned but less than 64-bytes to go
 | 
						|
 | 
						|
.Lua_chk8w:
 | 
						|
	pref 0, 0x0(a1)
 | 
						|
	andi	t8,a2,0x1f	# is there a 32-byte chunk?
 | 
						|
				# the t8 is the reminder count
 | 
						|
	beq	a2,t8,.Lua_chk1w # when a2=t8, no 32-byte chunk
 | 
						|
	 nop
 | 
						|
 | 
						|
	LWHI	t0,0(a1)
 | 
						|
	LWLO	t0,3(a1)
 | 
						|
	LWHI	t1,4(a1)
 | 
						|
	LWLO	t1,7(a1)
 | 
						|
	LWHI	t2,8(a1)
 | 
						|
	LWLO	t2,11(a1)
 | 
						|
	LWHI	t3,12(a1)
 | 
						|
	LWLO	t3,15(a1)
 | 
						|
	LWHI	t4,16(a1)
 | 
						|
	LWLO	t4,19(a1)
 | 
						|
	LWHI	t5,20(a1)
 | 
						|
	LWLO	t5,23(a1)
 | 
						|
	LWHI	t6,24(a1)
 | 
						|
	LWLO	t6,27(a1)
 | 
						|
	LWHI	t7,28(a1)
 | 
						|
	LWLO	t7,31(a1)
 | 
						|
	addiu	a1,a1,32
 | 
						|
 | 
						|
	sw	t0,0(a0)
 | 
						|
	sw	t1,4(a0)
 | 
						|
	sw	t2,8(a0)
 | 
						|
	sw	t3,12(a0)
 | 
						|
	sw	t4,16(a0)
 | 
						|
	sw	t5,20(a0)
 | 
						|
	sw	t6,24(a0)
 | 
						|
	sw	t7,28(a0)
 | 
						|
	addiu	a0,a0,32
 | 
						|
 | 
						|
.Lua_chk1w:
 | 
						|
	andi	a2,t8,0x3	# now a2 is the reminder past 1w chunks
 | 
						|
	beq	a2,t8,.Lua_smallCopy
 | 
						|
	 subu	a3,t8,a2	# a3 is count of bytes in 1w chunks
 | 
						|
	addu	a3,a0,a3	# now a3 is the dst address past the 1w chunks
 | 
						|
 | 
						|
# copying in words (4-byte chunks)
 | 
						|
.Lua_wordCopy_loop:
 | 
						|
	LWHI	v1,0(a1)
 | 
						|
	LWLO	v1,3(a1)
 | 
						|
	addiu	a1,a1,4
 | 
						|
	addiu	a0,a0,4		# note: dst=a0 is word aligned here, see NOTE1
 | 
						|
	bne	a0,a3,.Lua_wordCopy_loop
 | 
						|
	 sw	v1,-4(a0)
 | 
						|
 | 
						|
# Now less than 4 bytes (value in a2) left to copy
 | 
						|
.Lua_smallCopy:
 | 
						|
	beqz	a2,.Lleave
 | 
						|
	addu	a3,a0,a2	# a3 is the last dst address
 | 
						|
.Lua_smallCopy_loop:
 | 
						|
	lb	v1,0(a1)
 | 
						|
	addiu	a1,a1,1
 | 
						|
	addiu	a0,a0,1
 | 
						|
	bne	a0,a3,.Lua_smallCopy_loop
 | 
						|
	 sb	v1,-1(a0)
 | 
						|
 | 
						|
	j	ra
 | 
						|
	 nop
 | 
						|
 | 
						|
	.set	at
 | 
						|
	.set	reorder
 | 
						|
 | 
						|
END(memcpy)
 | 
						|
 | 
						|
 | 
						|
/************************************************************************
 | 
						|
 *  Implementation : Static functions
 | 
						|
 ************************************************************************/
 |