Merge "Fix generic __memcpy_chk implementation."

This commit is contained in:
Christopher Ferris 2014-10-20 18:21:21 +00:00 committed by Gerrit Code Review
commit 8f41d3d3bc

View File

@ -39,7 +39,7 @@
ENTRY(__memcpy_chk) ENTRY(__memcpy_chk)
cmp r2, r3 cmp r2, r3
bgt fortify_check_failed bhi __memcpy_chk_fail
// Fall through to memcpy... // Fall through to memcpy...
END(__memcpy_chk) END(__memcpy_chk)
@ -49,11 +49,14 @@ ENTRY(memcpy)
* ARM ABI. Since we have to save R0, we might as well save R4 * ARM ABI. Since we have to save R0, we might as well save R4
* which we can use for better pipelining of the reads below * which we can use for better pipelining of the reads below
*/ */
.save {r0, r4, lr}
stmfd sp!, {r0, r4, lr} stmfd sp!, {r0, r4, lr}
.cfi_def_cfa_offset 12
.cfi_rel_offset r0, 0
.cfi_rel_offset r4, 4
.cfi_rel_offset lr, 8
/* Making room for r5-r11 which will be spilled later */ /* Making room for r5-r11 which will be spilled later */
.pad #28
sub sp, sp, #28 sub sp, sp, #28
.cfi_adjust_cfa_offset 28
// preload the destination because we'll align it to a cache line // preload the destination because we'll align it to a cache line
// with small writes. Also start the source "pump". // with small writes. Also start the source "pump".
@ -63,14 +66,14 @@ ENTRY(memcpy)
/* it simplifies things to take care of len<4 early */ /* it simplifies things to take care of len<4 early */
cmp r2, #4 cmp r2, #4
blo copy_last_3_and_return blo .Lcopy_last_3_and_return
/* compute the offset to align the source /* compute the offset to align the source
* offset = (4-(src&3))&3 = -src & 3 * offset = (4-(src&3))&3 = -src & 3
*/ */
rsb r3, r1, #0 rsb r3, r1, #0
ands r3, r3, #3 ands r3, r3, #3
beq src_aligned beq .Lsrc_aligned
/* align source to 32 bits. We need to insert 2 instructions between /* align source to 32 bits. We need to insert 2 instructions between
* a ldr[b|h] and str[b|h] because byte and half-word instructions * a ldr[b|h] and str[b|h] because byte and half-word instructions
@ -85,12 +88,12 @@ ENTRY(memcpy)
strcsb r4, [r0], #1 strcsb r4, [r0], #1
strcsb r12,[r0], #1 strcsb r12,[r0], #1
src_aligned: .Lsrc_aligned:
/* see if src and dst are aligned together (congruent) */ /* see if src and dst are aligned together (congruent) */
eor r12, r0, r1 eor r12, r0, r1
tst r12, #3 tst r12, #3
bne non_congruent bne .Lnon_congruent
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp. * frame. Don't update sp.
@ -100,7 +103,7 @@ src_aligned:
/* align the destination to a cache-line */ /* align the destination to a cache-line */
rsb r3, r0, #0 rsb r3, r0, #0
ands r3, r3, #0x1C ands r3, r3, #0x1C
beq congruent_aligned32 beq .Lcongruent_aligned32
cmp r3, r2 cmp r3, r2
andhi r3, r2, #0x1C andhi r3, r2, #0x1C
@ -115,14 +118,14 @@ src_aligned:
strne r10,[r0], #4 strne r10,[r0], #4
sub r2, r2, r3 sub r2, r2, r3
congruent_aligned32: .Lcongruent_aligned32:
/* /*
* here source is aligned to 32 bytes. * here source is aligned to 32 bytes.
*/ */
cached_aligned32: .Lcached_aligned32:
subs r2, r2, #32 subs r2, r2, #32
blo less_than_32_left blo .Lless_than_32_left
/* /*
* We preload a cache-line up to 64 bytes ahead. On the 926, this will * We preload a cache-line up to 64 bytes ahead. On the 926, this will
@ -160,10 +163,7 @@ cached_aligned32:
add r2, r2, #32 add r2, r2, #32
.Lless_than_32_left:
less_than_32_left:
/* /*
* less than 32 bytes left at this point (length in r2) * less than 32 bytes left at this point (length in r2)
*/ */
@ -197,7 +197,7 @@ less_than_32_left:
/********************************************************************/ /********************************************************************/
non_congruent: .Lnon_congruent:
/* /*
* here source is aligned to 4 bytes * here source is aligned to 4 bytes
* but destination is not. * but destination is not.
@ -207,9 +207,9 @@ non_congruent:
* partial words in the shift queue) * partial words in the shift queue)
*/ */
cmp r2, #4 cmp r2, #4
blo copy_last_3_and_return blo .Lcopy_last_3_and_return
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack /* Use post-increment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp. * frame. Don't update sp.
*/ */
stmea sp, {r5-r11} stmea sp, {r5-r11}
@ -236,7 +236,7 @@ non_congruent:
movcs r3, r3, lsr #8 movcs r3, r3, lsr #8
cmp r2, #4 cmp r2, #4
blo partial_word_tail blo .Lpartial_word_tail
/* Align destination to 32 bytes (cache line boundary) */ /* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c 1: tst r0, #0x1c
@ -248,11 +248,11 @@ non_congruent:
str r4, [r0], #4 str r4, [r0], #4
cmp r2, #4 cmp r2, #4
bhs 1b bhs 1b
blo partial_word_tail blo .Lpartial_word_tail
/* copy 32 bytes at a time */ /* copy 32 bytes at a time */
2: subs r2, r2, #32 2: subs r2, r2, #32
blo less_than_thirtytwo blo .Lless_than_thirtytwo
/* Use immediate mode for the shifts, because there is an extra cycle /* Use immediate mode for the shifts, because there is an extra cycle
* for register shifts, which could account for up to 50% of * for register shifts, which could account for up to 50% of
@ -260,11 +260,11 @@ non_congruent:
*/ */
cmp r12, #24 cmp r12, #24
beq loop24 beq .Lloop24
cmp r12, #8 cmp r12, #8
beq loop8 beq .Lloop8
loop16: .Lloop16:
ldr r12, [r1], #4 ldr r12, [r1], #4
1: mov r4, r12 1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
@ -289,9 +289,9 @@ loop16:
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16 mov r3, r11, lsr #16
bhs 1b bhs 1b
b less_than_thirtytwo b .Lless_than_thirtytwo
loop8: .Lloop8:
ldr r12, [r1], #4 ldr r12, [r1], #4
1: mov r4, r12 1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
@ -316,9 +316,9 @@ loop8:
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8 mov r3, r11, lsr #8
bhs 1b bhs 1b
b less_than_thirtytwo b .Lless_than_thirtytwo
loop24: .Lloop24:
ldr r12, [r1], #4 ldr r12, [r1], #4
1: mov r4, r12 1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
@ -345,12 +345,12 @@ loop24:
bhs 1b bhs 1b
less_than_thirtytwo: .Lless_than_thirtytwo:
/* copy the last 0 to 31 bytes of the source */ /* copy the last 0 to 31 bytes of the source */
rsb r12, lr, #32 /* we corrupted r12, recompute it */ rsb r12, lr, #32 /* we corrupted r12, recompute it */
add r2, r2, #32 add r2, r2, #32
cmp r2, #4 cmp r2, #4
blo partial_word_tail blo .Lpartial_word_tail
1: ldr r5, [r1], #4 1: ldr r5, [r1], #4
sub r2, r2, #4 sub r2, r2, #4
@ -360,7 +360,7 @@ less_than_thirtytwo:
cmp r2, #4 cmp r2, #4
bhs 1b bhs 1b
partial_word_tail: .Lpartial_word_tail:
/* we have a partial word in the input buffer */ /* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3) movs r5, lr, lsl #(31-3)
strmib r3, [r0], #1 strmib r3, [r0], #1
@ -372,7 +372,7 @@ partial_word_tail:
/* Refill spilled registers from the stack. Don't update sp. */ /* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11} ldmfd sp, {r5-r11}
copy_last_3_and_return: .Lcopy_last_3_and_return:
movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
ldrmib r2, [r1], #1 ldrmib r2, [r1], #1
ldrcsb r3, [r1], #1 ldrcsb r3, [r1], #1
@ -385,9 +385,15 @@ copy_last_3_and_return:
add sp, sp, #28 add sp, sp, #28
ldmfd sp!, {r0, r4, lr} ldmfd sp!, {r0, r4, lr}
bx lr bx lr
END(memcpy)
// Only reached when the __memcpy_chk check fails. // Only reached when the __memcpy_chk check fails.
fortify_check_failed: ENTRY_PRIVATE(__memcpy_chk_fail)
// Preserve lr for backtrace.
push {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message ldr r0, error_message
ldr r1, error_code ldr r1, error_code
1: 1:
@ -397,7 +403,7 @@ error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message: error_message:
.word error_string-(1b+8) .word error_string-(1b+8)
END(memcpy) END(__memcpy_chk_fail)
.data .data
error_string: error_string: