Merge "Add stack unwinding directives to memcpy."
This commit is contained in:
commit
133d97e4b0
@ -26,12 +26,6 @@
|
|||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
|
|
||||||
(!(defined (__ARM_ARCH_7A__))))
|
|
||||||
|
|
||||||
/* Do nothing here. See memcpy-stub.c in the same directory. */
|
|
||||||
|
|
||||||
#else
|
|
||||||
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
|
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
|
||||||
|
|
||||||
/* Use the version of memcpy implemented using LDRD and STRD.
|
/* Use the version of memcpy implemented using LDRD and STRD.
|
||||||
@ -50,16 +44,12 @@
|
|||||||
destination register must be even and the second consecutive in
|
destination register must be even and the second consecutive in
|
||||||
ARM state, but not in Thumb state. */
|
ARM state, but not in Thumb state. */
|
||||||
|
|
||||||
|
#include <machine/cpu-features.h>
|
||||||
|
#include <machine/asm.h>
|
||||||
|
|
||||||
.syntax unified
|
.syntax unified
|
||||||
|
|
||||||
#if defined (__thumb__)
|
ENTRY(memcpy)
|
||||||
.thumb
|
|
||||||
.thumb_func
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.global memcpy
|
|
||||||
.type memcpy, %function
|
|
||||||
memcpy:
|
|
||||||
|
|
||||||
/* Assumes that n >= 0, and dst, src are valid pointers.
|
/* Assumes that n >= 0, and dst, src are valid pointers.
|
||||||
If there is at least 8 bytes to copy, use LDRD/STRD.
|
If there is at least 8 bytes to copy, use LDRD/STRD.
|
||||||
@ -69,12 +59,16 @@ memcpy:
|
|||||||
When less than 8 left, copy a word and then byte by byte. */
|
When less than 8 left, copy a word and then byte by byte. */
|
||||||
|
|
||||||
/* Save registers (r0 holds the return value):
|
/* Save registers (r0 holds the return value):
|
||||||
optimized push {r0, r4, r5, lr}.
|
optimized push {r0, r4, r5, r6, r7, lr}.
|
||||||
To try and improve performance, stack layout changed,
|
To try and improve performance, stack layout changed,
|
||||||
i.e., not keeping the stack looking like users expect
|
i.e., not keeping the stack looking like users expect
|
||||||
(highest numbered register at highest address). */
|
(highest numbered register at highest address). */
|
||||||
push {r0, lr}
|
.save {r0, lr}
|
||||||
strd r4, r5, [sp, #-8]!
|
push {r0, lr}
|
||||||
|
.save {r4, r5}
|
||||||
|
strd r4, r5, [sp, #-8]!
|
||||||
|
.save {r6, r7}
|
||||||
|
strd r6, r7, [sp, #-8]!
|
||||||
|
|
||||||
/* TODO: Add debug frame directives.
|
/* TODO: Add debug frame directives.
|
||||||
We don't need exception unwind directives, because the code below
|
We don't need exception unwind directives, because the code below
|
||||||
@ -194,9 +188,11 @@ copy_less_than_4:
|
|||||||
strbcs r5, [r0]
|
strbcs r5, [r0]
|
||||||
|
|
||||||
return:
|
return:
|
||||||
/* Restore registers: optimized pop {r0, r4, r5, pc} */
|
/* Restore registers: optimized pop {r0, r4, r5, r6, r7, pc} */
|
||||||
|
/* This is the only return point of memcpy. */
|
||||||
|
ldrd r6, r7, [sp], #8
|
||||||
ldrd r4, r5, [sp], #8
|
ldrd r4, r5, [sp], #8
|
||||||
pop {r0, pc} /* This is the only return point of memcpy. */
|
pop {r0, pc}
|
||||||
|
|
||||||
#ifndef __ARM_FEATURE_UNALIGNED
|
#ifndef __ARM_FEATURE_UNALIGNED
|
||||||
|
|
||||||
@ -223,12 +219,6 @@ return:
|
|||||||
/* Get here if there is more than 8 bytes to copy.
|
/* Get here if there is more than 8 bytes to copy.
|
||||||
The number of bytes to copy is r2+8, r2 >= 0. */
|
The number of bytes to copy is r2+8, r2 >= 0. */
|
||||||
|
|
||||||
/* Save registers: push { r6, r7 }.
|
|
||||||
We need additional registers for LDRD and STRD, because in ARM state
|
|
||||||
the first destination register must be even and the second
|
|
||||||
consecutive. */
|
|
||||||
strd r6, r7, [sp, #-8]!
|
|
||||||
|
|
||||||
subs r2, r2, #56
|
subs r2, r2, #56
|
||||||
blt 4f /* Go to misaligned copy of less than 64 bytes. */
|
blt 4f /* Go to misaligned copy of less than 64 bytes. */
|
||||||
|
|
||||||
@ -259,10 +249,6 @@ return:
|
|||||||
/* Restore the count if there is more than 7 bytes to copy. */
|
/* Restore the count if there is more than 7 bytes to copy. */
|
||||||
adds r2, r2, #56
|
adds r2, r2, #56
|
||||||
|
|
||||||
/* If less than 8 bytes to copy,
|
|
||||||
restore registers saved for this loop: optimized poplt { r6, r7 }. */
|
|
||||||
itt lt
|
|
||||||
ldrdlt r6, r7, [sp], #8
|
|
||||||
blt 6f /* Go to misaligned copy of less than 8 bytes. */
|
blt 6f /* Go to misaligned copy of less than 8 bytes. */
|
||||||
|
|
||||||
5:
|
5:
|
||||||
@ -278,9 +264,6 @@ return:
|
|||||||
subs r2, r2, #8
|
subs r2, r2, #8
|
||||||
bge 5b /* If there is more to copy. */
|
bge 5b /* If there is more to copy. */
|
||||||
|
|
||||||
/* Restore registers saved for this loop: optimized pop { r6, r7 }. */
|
|
||||||
ldrd r6, r7, [sp], #8
|
|
||||||
|
|
||||||
6:
|
6:
|
||||||
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
|
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
|
||||||
and they are misaligned. */
|
and they are misaligned. */
|
||||||
@ -420,4 +403,4 @@ miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsl shiftright=lsr
|
|||||||
|
|
||||||
#endif /* not __ARM_FEATURE_UNALIGNED */
|
#endif /* not __ARM_FEATURE_UNALIGNED */
|
||||||
|
|
||||||
#endif /* memcpy */
|
END(memcpy)
|
||||||
|
Loading…
Reference in New Issue
Block a user