Merge "Add stack unwinding directives to memcpy."

This commit is contained in:
Ben Cheng 2013-03-05 21:04:38 +00:00 committed by Gerrit Code Review
commit 133d97e4b0

View File

@ -26,12 +26,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(!(defined (__ARM_ARCH_7A__))))
/* Do nothing here. See memcpy-stub.c in the same directory. */
#else
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
/* Use the version of memcpy implemented using LDRD and STRD.
@ -50,16 +44,12 @@
destination register must be even and the second consecutive in
ARM state, but not in Thumb state. */
#include <machine/cpu-features.h>
#include <machine/asm.h>
.syntax unified
#if defined (__thumb__)
.thumb
.thumb_func
#endif
.global memcpy
.type memcpy, %function
memcpy:
ENTRY(memcpy)
/* Assumes that n >= 0, and dst, src are valid pointers.
If there is at least 8 bytes to copy, use LDRD/STRD.
@ -69,12 +59,16 @@ memcpy:
When less than 8 left, copy a word and then byte by byte. */
/* Save registers (r0 holds the return value):
optimized push {r0, r4, r5, lr}.
optimized push {r0, r4, r5, r6, r7, lr}.
To try and improve performance, stack layout changed,
i.e., not keeping the stack looking like users expect
(highest numbered register at highest address). */
.save {r0, lr}
push {r0, lr}
.save {r4, r5}
strd r4, r5, [sp, #-8]!
.save {r6, r7}
strd r6, r7, [sp, #-8]!
/* TODO: Add debug frame directives.
We don't need exception unwind directives, because the code below
@ -194,9 +188,11 @@ copy_less_than_4:
strbcs r5, [r0]
return:
/* Restore registers: optimized pop {r0, r4, r5, pc} */
/* Restore registers: optimized pop {r0, r4, r5, r6, r7, pc} */
/* This is the only return point of memcpy. */
ldrd r6, r7, [sp], #8
ldrd r4, r5, [sp], #8
pop {r0, pc} /* This is the only return point of memcpy. */
pop {r0, pc}
#ifndef __ARM_FEATURE_UNALIGNED
@ -223,12 +219,6 @@ return:
/* Get here if there is more than 8 bytes to copy.
The number of bytes to copy is r2+8, r2 >= 0. */
/* Save registers: push { r6, r7 }.
We need additional registers for LDRD and STRD, because in ARM state
the first destination register must be even and the second
consecutive. */
strd r6, r7, [sp, #-8]!
subs r2, r2, #56
blt 4f /* Go to misaligned copy of less than 64 bytes. */
@ -259,10 +249,6 @@ return:
/* Restore the count if there is more than 7 bytes to copy. */
adds r2, r2, #56
/* If less than 8 bytes to copy,
restore registers saved for this loop: optimized poplt { r6, r7 }. */
itt lt
ldrdlt r6, r7, [sp], #8
blt 6f /* Go to misaligned copy of less than 8 bytes. */
5:
@ -278,9 +264,6 @@ return:
subs r2, r2, #8
bge 5b /* If there is more to copy. */
/* Restore registers saved for this loop: optimized pop { r6, r7 }. */
ldrd r6, r7, [sp], #8
6:
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
and they are misaligned. */
@ -420,4 +403,4 @@ miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsl shiftright=lsr
#endif /* not __ARM_FEATURE_UNALIGNED */
#endif /* memcpy */
END(memcpy)