Merge "Add stack unwinding directives to memcpy."

This commit is contained in:
Ben Cheng 2013-03-05 21:04:38 +00:00 committed by Gerrit Code Review
commit 133d97e4b0

View File

@ -26,12 +26,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(!(defined (__ARM_ARCH_7A__))))
/* Do nothing here. See memcpy-stub.c in the same directory. */
#else
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */ /* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
/* Use the version of memcpy implemented using LDRD and STRD. /* Use the version of memcpy implemented using LDRD and STRD.
@ -50,16 +44,12 @@
destination register must be even and the second consecutive in destination register must be even and the second consecutive in
ARM state, but not in Thumb state. */ ARM state, but not in Thumb state. */
#include <machine/cpu-features.h>
#include <machine/asm.h>
.syntax unified .syntax unified
#if defined (__thumb__) ENTRY(memcpy)
.thumb
.thumb_func
#endif
.global memcpy
.type memcpy, %function
memcpy:
/* Assumes that n >= 0, and dst, src are valid pointers. /* Assumes that n >= 0, and dst, src are valid pointers.
If there is at least 8 bytes to copy, use LDRD/STRD. If there is at least 8 bytes to copy, use LDRD/STRD.
@ -69,12 +59,16 @@ memcpy:
When less than 8 left, copy a word and then byte by byte. */ When less than 8 left, copy a word and then byte by byte. */
/* Save registers (r0 holds the return value): /* Save registers (r0 holds the return value):
optimized push {r0, r4, r5, lr}. optimized push {r0, r4, r5, r6, r7, lr}.
To try and improve performance, stack layout changed, To try and improve performance, stack layout changed,
i.e., not keeping the stack looking like users expect i.e., not keeping the stack looking like users expect
(highest numbered register at highest address). */ (highest numbered register at highest address). */
push {r0, lr} .save {r0, lr}
strd r4, r5, [sp, #-8]! push {r0, lr}
.save {r4, r5}
strd r4, r5, [sp, #-8]!
.save {r6, r7}
strd r6, r7, [sp, #-8]!
/* TODO: Add debug frame directives. /* TODO: Add debug frame directives.
We don't need exception unwind directives, because the code below We don't need exception unwind directives, because the code below
@ -194,9 +188,11 @@ copy_less_than_4:
strbcs r5, [r0] strbcs r5, [r0]
return: return:
/* Restore registers: optimized pop {r0, r4, r5, pc} */ /* Restore registers: optimized pop {r0, r4, r5, r6, r7, pc} */
/* This is the only return point of memcpy. */
ldrd r6, r7, [sp], #8
ldrd r4, r5, [sp], #8 ldrd r4, r5, [sp], #8
pop {r0, pc} /* This is the only return point of memcpy. */ pop {r0, pc}
#ifndef __ARM_FEATURE_UNALIGNED #ifndef __ARM_FEATURE_UNALIGNED
@ -223,12 +219,6 @@ return:
/* Get here if there is more than 8 bytes to copy. /* Get here if there is more than 8 bytes to copy.
The number of bytes to copy is r2+8, r2 >= 0. */ The number of bytes to copy is r2+8, r2 >= 0. */
/* Save registers: push { r6, r7 }.
We need additional registers for LDRD and STRD, because in ARM state
the first destination register must be even and the second
consecutive. */
strd r6, r7, [sp, #-8]!
subs r2, r2, #56 subs r2, r2, #56
blt 4f /* Go to misaligned copy of less than 64 bytes. */ blt 4f /* Go to misaligned copy of less than 64 bytes. */
@ -259,10 +249,6 @@ return:
/* Restore the count if there is more than 7 bytes to copy. */ /* Restore the count if there is more than 7 bytes to copy. */
adds r2, r2, #56 adds r2, r2, #56
/* If less than 8 bytes to copy,
restore registers saved for this loop: optimized poplt { r6, r7 }. */
itt lt
ldrdlt r6, r7, [sp], #8
blt 6f /* Go to misaligned copy of less than 8 bytes. */ blt 6f /* Go to misaligned copy of less than 8 bytes. */
5: 5:
@ -278,9 +264,6 @@ return:
subs r2, r2, #8 subs r2, r2, #8
bge 5b /* If there is more to copy. */ bge 5b /* If there is more to copy. */
/* Restore registers saved for this loop: optimized pop { r6, r7 }. */
ldrd r6, r7, [sp], #8
6: 6:
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0) /* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
and they are misaligned. */ and they are misaligned. */
@ -420,4 +403,4 @@ miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsl shiftright=lsr
#endif /* not __ARM_FEATURE_UNALIGNED */ #endif /* not __ARM_FEATURE_UNALIGNED */
#endif /* memcpy */ END(memcpy)