bionic: Add ARM optimized strcpy()

Reference results of the experiments on Qualcomm MSM7x25 (524MHz):

[original C code]
             prc thr   usecs/call      samples   errors cnt/samp
size
strcpy_1k      1   1     14.56159           99        0     1000
1024

[ARM optimized code]
             prc thr   usecs/call      samples   errors cnt/samp
size
strcpy_1k      1   1      3.46653           99        0     1000
1024

The work was derived from ARM Ltd.

Change-Id: I906ac53bb7a7285e14693c77d3ce8d4ed6f98bfd
This commit is contained in:
Jim Huang 2010-08-10 17:23:39 +08:00 committed by Jean-Baptiste Queru
parent ea0fe8563d
commit 73c04b3269
2 changed files with 141 additions and 1 deletions

View File

@ -186,7 +186,6 @@ libc_common_src_files := \
string/strcat.c \
string/strchr.c \
string/strcoll.c \
string/strcpy.c \
string/strcspn.c \
string/strdup.c \
string/strerror.c \
@ -309,6 +308,7 @@ libc_common_src_files += \
arch-arm/bionic/setjmp.S \
arch-arm/bionic/sigsetjmp.S \
arch-arm/bionic/strlen.c.arm \
arch-arm/bionic/strcpy.S \
arch-arm/bionic/syscall.S \
arch-arm/bionic/sigaction.c \
arch-arm/bionic/__sig_restorer.S \
@ -357,6 +357,7 @@ libc_common_src_files += \
arch-x86/string/strcmp_wrapper.S \
arch-x86/string/strncmp_wrapper.S \
arch-x86/string/strlen_wrapper.S \
string/strcpy.c \
bionic/pthread.c \
bionic/pthread-atfork.c \
bionic/pthread-timers.c \
@ -394,6 +395,7 @@ libc_common_src_files += \
string/strncmp.c \
string/memcmp.c \
string/strlen.c \
string/strcpy.c \
bionic/pthread.c \
bionic/pthread-atfork.c \
bionic/pthread-timers.c \

View File

@ -0,0 +1,138 @@
/*
* Copyright (C) 2010 The Android Open Source Project
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Android adaptation and tweak by Jim Huang <jserv@0xlab.org>.
*/
#include <machine/cpu-features.h>
.text
.global strcpy
.type strcpy, %function
.align 4
strcpy:
.fnstart
PLD(r1, #0)
eor r2, r0, r1
mov ip, r0
tst r2, #3
bne 4f
tst r1, #3
bne 3f
5:
str r5, [sp, #-4]!
mov r5, #0x01
orr r5, r5, r5, lsl #8
orr r5, r5, r5, lsl #16
str r4, [sp, #-4]!
tst r1, #4
ldr r3, [r1], #4
beq 2f
sub r2, r3, r5
bics r2, r2, r3
tst r2, r5, lsl #7
itt eq
streq r3, [ip], #4
ldreq r3, [r1], #4
bne 1f
/* Inner loop. We now know that r1 is 64-bit aligned, so we
can safely fetch up to two words. This allows us to avoid
load stalls. */
.p2align 2
2:
PLD(r1, #8)
ldr r4, [r1], #4
sub r2, r3, r5
bics r2, r2, r3
tst r2, r5, lsl #7
sub r2, r4, r5
bne 1f
str r3, [ip], #4
bics r2, r2, r4
tst r2, r5, lsl #7
itt eq
ldreq r3, [r1], #4
streq r4, [ip], #4
beq 2b
mov r3, r4
1:
#ifdef __ARMEB__
rors r3, r3, #24
#endif
strb r3, [ip], #1
tst r3, #0xff
#ifdef __ARMEL__
ror r3, r3, #8
#endif
bne 1b
ldr r4, [sp], #4
ldr r5, [sp], #4
bx lr
/* Strings have the same offset from word alignment, but it's
not zero. */
3:
tst r1, #1
beq 1f
ldrb r2, [r1], #1
strb r2, [ip], #1
cmp r2, #0
it eq
bxeq lr
1:
tst r1, #2
beq 5b
ldrh r2, [r1], #2
#ifdef __ARMEB__
tst r2, #0xff00
iteet ne
strneh r2, [ip], #2
lsreq r2, r2, #8
streqb r2, [ip]
tstne r2, #0xff
#else
tst r2, #0xff
itet ne
strneh r2, [ip], #2
streqb r2, [ip]
tstne r2, #0xff00
#endif
bne 5b
bx lr
/* src and dst do not have a common word-alignement. Fall back to
byte copying. */
4:
ldrb r2, [r1], #1
strb r2, [ip], #1
cmp r2, #0
bne 4b
bx lr