Merge "Add stack unwinding directives to memcpy."
This commit is contained in:
commit
133d97e4b0
@ -26,12 +26,6 @@
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
|
||||
(!(defined (__ARM_ARCH_7A__))))
|
||||
|
||||
/* Do nothing here. See memcpy-stub.c in the same directory. */
|
||||
|
||||
#else
|
||||
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
|
||||
|
||||
/* Use the version of memcpy implemented using LDRD and STRD.
|
||||
@ -50,16 +44,12 @@
|
||||
destination register must be even and the second consecutive in
|
||||
ARM state, but not in Thumb state. */
|
||||
|
||||
#include <machine/cpu-features.h>
|
||||
#include <machine/asm.h>
|
||||
|
||||
.syntax unified
|
||||
|
||||
#if defined (__thumb__)
|
||||
.thumb
|
||||
.thumb_func
|
||||
#endif
|
||||
|
||||
.global memcpy
|
||||
.type memcpy, %function
|
||||
memcpy:
|
||||
ENTRY(memcpy)
|
||||
|
||||
/* Assumes that n >= 0, and dst, src are valid pointers.
|
||||
If there is at least 8 bytes to copy, use LDRD/STRD.
|
||||
@ -69,12 +59,16 @@ memcpy:
|
||||
When less than 8 left, copy a word and then byte by byte. */
|
||||
|
||||
/* Save registers (r0 holds the return value):
|
||||
optimized push {r0, r4, r5, lr}.
|
||||
optimized push {r0, r4, r5, r6, r7, lr}.
|
||||
To try and improve performance, stack layout changed,
|
||||
i.e., not keeping the stack looking like users expect
|
||||
(highest numbered register at highest address). */
|
||||
.save {r0, lr}
|
||||
push {r0, lr}
|
||||
.save {r4, r5}
|
||||
strd r4, r5, [sp, #-8]!
|
||||
.save {r6, r7}
|
||||
strd r6, r7, [sp, #-8]!
|
||||
|
||||
/* TODO: Add debug frame directives.
|
||||
We don't need exception unwind directives, because the code below
|
||||
@ -194,9 +188,11 @@ copy_less_than_4:
|
||||
strbcs r5, [r0]
|
||||
|
||||
return:
|
||||
/* Restore registers: optimized pop {r0, r4, r5, pc} */
|
||||
/* Restore registers: optimized pop {r0, r4, r5, r6, r7, pc} */
|
||||
/* This is the only return point of memcpy. */
|
||||
ldrd r6, r7, [sp], #8
|
||||
ldrd r4, r5, [sp], #8
|
||||
pop {r0, pc} /* This is the only return point of memcpy. */
|
||||
pop {r0, pc}
|
||||
|
||||
#ifndef __ARM_FEATURE_UNALIGNED
|
||||
|
||||
@ -223,12 +219,6 @@ return:
|
||||
/* Get here if there is more than 8 bytes to copy.
|
||||
The number of bytes to copy is r2+8, r2 >= 0. */
|
||||
|
||||
/* Save registers: push { r6, r7 }.
|
||||
We need additional registers for LDRD and STRD, because in ARM state
|
||||
the first destination register must be even and the second
|
||||
consecutive. */
|
||||
strd r6, r7, [sp, #-8]!
|
||||
|
||||
subs r2, r2, #56
|
||||
blt 4f /* Go to misaligned copy of less than 64 bytes. */
|
||||
|
||||
@ -259,10 +249,6 @@ return:
|
||||
/* Restore the count if there is more than 7 bytes to copy. */
|
||||
adds r2, r2, #56
|
||||
|
||||
/* If less than 8 bytes to copy,
|
||||
restore registers saved for this loop: optimized poplt { r6, r7 }. */
|
||||
itt lt
|
||||
ldrdlt r6, r7, [sp], #8
|
||||
blt 6f /* Go to misaligned copy of less than 8 bytes. */
|
||||
|
||||
5:
|
||||
@ -278,9 +264,6 @@ return:
|
||||
subs r2, r2, #8
|
||||
bge 5b /* If there is more to copy. */
|
||||
|
||||
/* Restore registers saved for this loop: optimized pop { r6, r7 }. */
|
||||
ldrd r6, r7, [sp], #8
|
||||
|
||||
6:
|
||||
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
|
||||
and they are misaligned. */
|
||||
@ -420,4 +403,4 @@ miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsl shiftright=lsr
|
||||
|
||||
#endif /* not __ARM_FEATURE_UNALIGNED */
|
||||
|
||||
#endif /* memcpy */
|
||||
END(memcpy)
|
||||
|
Loading…
Reference in New Issue
Block a user