Compare commits

..

2 Commits

Author SHA1 Message Date
Baligh Uddin
e4dda1f1e9 Merge "resolved conflicts for merge of 5d0ad38c to jb-mr2-dev" into jb-mr2-dev 2013-09-25 02:31:37 +00:00
Elliott Hughes
5a6f55a3c4 resolved conflicts for merge of 5d0ad38c to jb-mr2-dev
Change-Id: I606f6d86aad46c5eda0dc0d907fe691ca0eea826
2013-09-24 15:53:34 -07:00
321 changed files with 4978 additions and 28036 deletions

View File

@@ -6,33 +6,67 @@ include $(LOCAL_PATH)/arch-$(TARGET_ARCH)/syscalls.mk
# =========================================================
libc_common_src_files := \
$(syscall_src) \
unistd/abort.c \
unistd/alarm.c \
unistd/exec.c \
unistd/fnmatch.c \
unistd/getopt_long.c \
unistd/syslog.c \
unistd/system.c \
unistd/time.c \
stdio/asprintf.c \
stdio/clrerr.c \
stdio/fclose.c \
stdio/fdopen.c \
stdio/feof.c \
stdio/ferror.c \
stdio/fflush.c \
stdio/fgetc.c \
stdio/fgetln.c \
stdio/fgetpos.c \
stdio/fgets.c \
stdio/fileno.c \
stdio/findfp.c \
stdio/flags.c \
stdio/fopen.c \
stdio/fprintf.c \
stdio/fpurge.c \
stdio/fputc.c \
stdio/fputs.c \
stdio/fread.c \
stdio/freopen.c \
stdio/fscanf.c \
stdio/fseek.c \
stdio/fsetpos.c \
stdio/ftell.c \
stdio/funopen.c \
stdio/fvwrite.c \
stdio/fwalk.c \
stdio/fwrite.c \
stdio/getc.c \
stdio/getchar.c \
stdio/gets.c \
stdio/makebuf.c \
stdio/mktemp.c \
stdio/printf.c \
stdio/putc.c \
stdio/putchar.c \
stdio/puts.c \
stdio/putw.c \
stdio/refill.c \
stdio/remove.c \
stdio/rewind.c \
stdio/rget.c \
stdio/scanf.c \
stdio/setbuf.c \
stdio/setbuffer.c \
stdio/setvbuf.c \
stdio/snprintf.c\
stdio/sprintf.c \
stdio/sscanf.c \
stdio/stdio.c \
stdio/tempnam.c \
stdio/tmpnam.c \
stdio/ungetc.c \
stdio/vasprintf.c \
stdio/vfprintf.c \
@@ -43,11 +77,13 @@ libc_common_src_files := \
stdio/vscanf.c \
stdio/vsscanf.c \
stdio/wbuf.c \
stdio/wsetup.c \
stdlib/atexit.c \
stdlib/ctype_.c \
stdlib/exit.c \
stdlib/getenv.c \
stdlib/putenv.c \
stdlib/qsort.c \
stdlib/setenv.c \
stdlib/strtod.c \
stdlib/strtoimax.c \
@@ -58,16 +94,29 @@ libc_common_src_files := \
stdlib/strtoumax.c \
stdlib/tolower_.c \
stdlib/toupper_.c \
string/index.c \
string/strcasecmp.c \
string/strcat.c \
string/strchr.c \
string/strcspn.c \
string/strdup.c \
string/strlcat.c \
string/strlcpy.c \
string/strncat.c \
string/strncpy.c \
string/strpbrk.c \
string/strrchr.c \
string/strsep.c \
string/strspn.c \
string/strstr.c \
string/strtok.c \
wchar/wcswidth.c \
wchar/wcsxfrm.c \
tzcode/asctime.c \
tzcode/difftime.c \
tzcode/localtime.c \
tzcode/strftime.c \
tzcode/strptime.c \
bionic/arc4random.c \
bionic/atoi.c \
bionic/atol.c \
@@ -102,9 +151,11 @@ libc_common_src_files := \
bionic/ldexp.c \
bionic/lseek64.c \
bionic/md5.c \
bionic/memchr.c \
bionic/memmem.c \
bionic/memrchr.c \
bionic/memswap.c \
bionic/name_mem.c \
bionic/mmap.c \
bionic/openat.c \
bionic/open.c \
bionic/pathconf.c \
@@ -119,6 +170,7 @@ libc_common_src_files := \
bionic/recv.c \
bionic/sched_cpualloc.c \
bionic/sched_cpucount.c \
bionic/sched_getaffinity.c \
bionic/sched_getcpu.c \
bionic/semaphore.c \
bionic/send.c \
@@ -138,11 +190,11 @@ libc_common_src_files := \
bionic/sleep.c \
bionic/statfs.c \
bionic/strndup.c \
bionic/strnlen.c \
bionic/strntoimax.c \
bionic/strntoumax.c \
bionic/strtotimeval.c \
bionic/system_properties.c \
bionic/system_properties_compat.c \
bionic/tcgetpgrp.c \
bionic/tcsetpgrp.c \
bionic/thread_atexit.c \
@@ -181,40 +233,22 @@ libc_common_src_files := \
netbsd/nameser/ns_print.c \
netbsd/nameser/ns_samedomain.c \
# Fortify implementations of libc functions.
libc_common_src_files += \
bionic/__fgets_chk.cpp \
bionic/__memcpy_chk.cpp \
bionic/__memmove_chk.cpp \
bionic/__memset_chk.cpp \
bionic/__strcat_chk.cpp \
bionic/__strchr_chk.cpp \
bionic/__strcpy_chk.cpp \
bionic/__strlcat_chk.cpp \
bionic/__strlcpy_chk.cpp \
bionic/__strlen_chk.cpp \
bionic/__strncat_chk.cpp \
bionic/__strncpy_chk.cpp \
bionic/__strrchr_chk.cpp \
bionic/__umask_chk.cpp \
bionic/__vsnprintf_chk.cpp \
bionic/__vsprintf_chk.cpp \
libc_bionic_src_files := \
bionic/abort.cpp \
bionic/assert.cpp \
bionic/brk.cpp \
bionic/dirent.cpp \
bionic/__errno.c \
bionic/eventfd_read.cpp \
bionic/eventfd_write.cpp \
bionic/futimens.cpp \
bionic/__fgets_chk.cpp \
bionic/getauxval.cpp \
bionic/getcwd.cpp \
bionic/libc_init_common.cpp \
bionic/libc_logging.cpp \
bionic/libgen.cpp \
bionic/mmap.cpp \
bionic/__memcpy_chk.cpp \
bionic/__memmove_chk.cpp \
bionic/__memset_chk.cpp \
bionic/pthread_attr.cpp \
bionic/pthread_detach.cpp \
bionic/pthread_equal.cpp \
@@ -230,88 +264,56 @@ libc_bionic_src_files := \
bionic/raise.cpp \
bionic/sbrk.cpp \
bionic/scandir.cpp \
bionic/sched_getaffinity.cpp \
bionic/__set_errno.cpp \
bionic/setlocale.cpp \
bionic/signalfd.cpp \
bionic/sigwait.cpp \
bionic/statvfs.cpp \
bionic/__strcat_chk.cpp \
bionic/__strcpy_chk.cpp \
bionic/strerror.cpp \
bionic/strerror_r.cpp \
bionic/__strlcat_chk.cpp \
bionic/__strlcpy_chk.cpp \
bionic/__strlen_chk.cpp \
bionic/__strncat_chk.cpp \
bionic/__strncpy_chk.cpp \
bionic/strsignal.cpp \
bionic/stubs.cpp \
bionic/sysconf.cpp \
bionic/tdestroy.cpp \
bionic/tmpfile.cpp \
bionic/__umask_chk.cpp \
bionic/__vsnprintf_chk.cpp \
bionic/__vsprintf_chk.cpp \
bionic/wait.cpp \
bionic/wchar.cpp \
libc_tzcode_src_files := \
tzcode/asctime.c \
tzcode/difftime.c \
tzcode/localtime.c \
tzcode/strftime.c \
tzcode/strptime.c \
libc_upstream_freebsd_src_files := \
upstream-freebsd/lib/libc/stdio/clrerr.c \
upstream-freebsd/lib/libc/stdio/fclose.c \
upstream-freebsd/lib/libc/stdio/fdopen.c \
upstream-freebsd/lib/libc/stdio/feof.c \
upstream-freebsd/lib/libc/stdio/ferror.c \
upstream-freebsd/lib/libc/stdio/fgetln.c \
upstream-freebsd/lib/libc/stdio/fgetpos.c \
upstream-freebsd/lib/libc/stdio/fgets.c \
upstream-freebsd/lib/libc/stdio/fileno.c \
upstream-freebsd/lib/libc/stdio/flags.c \
upstream-freebsd/lib/libc/stdio/fopen.c \
upstream-freebsd/lib/libc/stdio/fpurge.c \
upstream-freebsd/lib/libc/stdio/fputs.c \
upstream-freebsd/lib/libc/stdio/fsetpos.c \
upstream-freebsd/lib/libc/stdio/funopen.c \
upstream-freebsd/lib/libc/stdio/fwalk.c \
upstream-freebsd/lib/libc/stdio/fwrite.c \
upstream-freebsd/lib/libc/stdio/getc.c \
upstream-freebsd/lib/libc/stdio/getchar.c \
upstream-freebsd/lib/libc/stdio/makebuf.c \
upstream-freebsd/lib/libc/stdio/mktemp.c \
upstream-freebsd/lib/libc/stdio/putc.c \
upstream-freebsd/lib/libc/stdio/putchar.c \
upstream-freebsd/lib/libc/stdio/puts.c \
upstream-freebsd/lib/libc/stdio/putw.c \
upstream-freebsd/lib/libc/stdio/remove.c \
upstream-freebsd/lib/libc/stdio/rget.c \
upstream-freebsd/lib/libc/stdio/setbuf.c \
upstream-freebsd/lib/libc/stdio/setbuffer.c \
upstream-freebsd/lib/libc/stdio/setvbuf.c \
upstream-freebsd/lib/libc/stdio/tempnam.c \
upstream-freebsd/lib/libc/stdio/tmpnam.c \
upstream-freebsd/lib/libc/stdio/wsetup.c \
upstream-freebsd/lib/libc/stdlib/abs.c \
upstream-freebsd/lib/libc/stdlib/getopt_long.c \
upstream-freebsd/lib/libc/stdlib/imaxabs.c \
upstream-freebsd/lib/libc/stdlib/imaxdiv.c \
upstream-freebsd/lib/libc/stdlib/labs.c \
upstream-freebsd/lib/libc/stdlib/llabs.c \
upstream-freebsd/lib/libc/stdlib/qsort.c \
upstream-freebsd/lib/libc/stdlib/realpath.c \
upstream-freebsd/lib/libc/string/wcpcpy.c \
upstream-freebsd/lib/libc/string/wcpncpy.c \
upstream-freebsd/lib/libc/string/wcscasecmp.c \
upstream-freebsd/lib/libc/string/wcscat.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wcscspn.c \
upstream-freebsd/lib/libc/string/wcsdup.c \
upstream-freebsd/lib/libc/string/wcslcat.c \
upstream-freebsd/lib/libc/string/wcslcpy.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcsncasecmp.c \
upstream-freebsd/lib/libc/string/wcsncat.c \
upstream-freebsd/lib/libc/string/wcsncmp.c \
upstream-freebsd/lib/libc/string/wcsncpy.c \
upstream-freebsd/lib/libc/string/wcsnlen.c \
upstream-freebsd/lib/libc/string/wcspbrk.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcsspn.c \
upstream-freebsd/lib/libc/string/wcsstr.c \
upstream-freebsd/lib/libc/string/wcstok.c \
upstream-freebsd/lib/libc/string/wmemchr.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wmemcpy.c \
upstream-freebsd/lib/libc/string/wmemmove.c \
upstream-freebsd/lib/libc/string/wmemset.c \
@@ -367,23 +369,6 @@ libc_common_src_files += \
bionic/memmove.c.arm \
string/bcopy.c \
string/strncmp.c \
string/strncat.c \
string/strncpy.c \
bionic/strchr.cpp \
string/strrchr.c \
bionic/memchr.c \
bionic/memrchr.c \
string/index.c \
bionic/strnlen.c \
string/strlcat.c \
string/strlcpy.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcscat.c
# These files need to be arm so that gdbserver
# can set breakpoints in them without messing
@@ -407,6 +392,7 @@ libc_common_src_files += \
bionic/pthread-rwlocks.c \
bionic/pthread-timers.c \
bionic/ptrace.c \
string/strcpy.c \
libc_static_common_src_files += \
bionic/pthread.c \
@@ -421,25 +407,7 @@ libc_common_src_files += \
string/bcopy.c \
string/strcmp.c \
string/strcpy.c \
string/strncmp.c \
string/strcat.c \
string/strncat.c \
string/strncpy.c \
bionic/strchr.cpp \
string/strrchr.c \
bionic/memchr.c \
bionic/memrchr.c \
string/index.c \
bionic/strnlen.c \
string/strlcat.c \
string/strlcpy.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcscat.c
string/strncmp.c
libc_common_src_files += \
bionic/pthread-atfork.c \
@@ -501,6 +469,14 @@ libc_common_cflags := \
-DLOG_ON_HEAP_ERROR \
-Wall -Wextra
# these macro definitions are required to implement the
# 'timezone' and 'daylight' global variables, as well as
# properly update the 'tm_gmtoff' field in 'struct tm'.
#
libc_common_cflags += \
-DTM_GMTOFF=tm_gmtoff \
-DUSG_COMPAT=1
ifeq ($(strip $(DEBUG_BIONIC_LIBC)),true)
libc_common_cflags += -DDEBUG
endif
@@ -578,17 +554,23 @@ libc_crt_target_cflags += \
# static C++ destructors are properly called on dlclose().
#
ifeq ($(TARGET_ARCH),arm)
libc_crtbegin_extension := c
libc_crt_target_so_cflags :=
endif
ifeq ($(TARGET_ARCH),mips)
libc_crtbegin_extension := S
libc_crt_target_so_cflags := -fPIC
endif
ifeq ($(TARGET_ARCH),x86)
libc_crtbegin_extension := c
libc_crt_target_so_cflags := -fPIC
endif
ifeq ($(libc_crtbegin_extension),)
$(error $(TARGET_ARCH) not supported)
endif
libc_crt_target_so_cflags += $(libc_crt_target_cflags)
libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.c
libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.c
libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.$(libc_crtbegin_extension)
libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.$(libc_crtbegin_extension)
# See the comment in crtbrand.c for the reason why we need to generate
# crtbrand.s before generating crtbrand.o.
@@ -704,28 +686,6 @@ LOCAL_SYSTEM_SHARED_LIBRARIES :=
include $(BUILD_STATIC_LIBRARY)
# ========================================================
# libc_tzcode.a - upstream 'tzcode' code
# ========================================================
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(libc_tzcode_src_files)
LOCAL_CFLAGS := \
$(libc_common_cflags) \
-std=gnu99 \
-DSTD_INSPIRED=1 \
-DTZDIR=\"/system/usr/share/zoneinfo\" \
-DTM_GMTOFF=tm_gmtoff \
-DUSG_COMPAT=1
LOCAL_C_INCLUDES := $(libc_common_c_includes)
LOCAL_MODULE := libc_tzcode
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_SYSTEM_SHARED_LIBRARIES :=
include $(BUILD_STATIC_LIBRARY)
# ========================================================
# libc_freebsd.a - upstream FreeBSD C library code
# ========================================================
@@ -801,12 +761,7 @@ LOCAL_CFLAGS := $(libc_common_cflags) \
LOCAL_C_INCLUDES := $(libc_common_c_includes)
LOCAL_MODULE := libc_common
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_WHOLE_STATIC_LIBRARIES := \
libbionic_ssp \
libc_bionic \
libc_freebsd \
libc_netbsd \
libc_tzcode
LOCAL_WHOLE_STATIC_LIBRARIES := libbionic_ssp libc_bionic libc_freebsd libc_netbsd
LOCAL_SYSTEM_SHARED_LIBRARIES :=
# TODO: split out the asflags.

View File

@@ -1673,6 +1673,38 @@ SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 1990, 1993
The Regents of the University of California. All rights reserved.
This code is derived from software contributed to Berkeley by
Chris Torek.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
4. Neither the name of the University nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 1990, 1993
The Regents of the University of California. All rights reserved.
@@ -2820,7 +2852,7 @@ are met:
2. Redistributions in binary form must reproduce the above copyright
notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors
4. Neither the name of the University nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
@@ -3060,6 +3092,13 @@ are met:
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by the NetBSD
Foundation, Inc. and its contributors.
4. Neither the name of The NetBSD Foundation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -3877,36 +3916,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2010, 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2010, Intel Corporation
All rights reserved.
@@ -3937,36 +3946,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2011 The Android Open Source Project
Copyright (c) 2008 ARM Ltd
All rights reserved.
@@ -3996,36 +3975,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2011, VMware, Inc.
All rights reserved.
@@ -4109,38 +4058,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c)1999 Citrus Project,
All rights reserved.

View File

@@ -201,8 +201,6 @@ int removexattr(const char *, const char *) 1
int lremovexattr(const char *, const char *) 1
int __statfs64:statfs64(const char *, size_t, struct statfs *) 1
long unshare(unsigned long) 1
int swapon(const char *, int) 1
int swapoff(const char *) 1
# time
int pause () 1
@@ -223,9 +221,6 @@ int __timer_getoverrun:timer_getoverrun(timer_t)
int __timer_delete:timer_delete(timer_t) 1
int utimes(const char*, const struct timeval tvp[2]) 1
int utimensat(int, const char *, const struct timespec times[2], int) 1
int timerfd_create(clockid_t, int) 1
int timerfd_settime(int, int, const struct itimerspec *, struct itimerspec *) 1
int timerfd_gettime(int, struct itimerspec *) 1
# signals
int sigaction(int, const struct sigaction *, struct sigaction *) 1

View File

@@ -14,6 +14,8 @@ _LIBC_ARCH_COMMON_SRC_FILES := \
arch-arm/bionic/_setjmp.S \
arch-arm/bionic/setjmp.S \
arch-arm/bionic/sigsetjmp.S \
arch-arm/bionic/strcpy.S \
arch-arm/bionic/strlen.c.arm \
arch-arm/bionic/syscall.S \
arch-arm/bionic/tgkill.S \
arch-arm/bionic/tkill.S \
@@ -26,19 +28,8 @@ _LIBC_ARCH_STATIC_SRC_FILES := \
_LIBC_ARCH_DYNAMIC_SRC_FILES := \
arch-arm/bionic/exidx_dynamic.c
# Remove the C++ fortify function implementations for which there is an
# arm assembler version.
_LIBC_FORTIFY_FILES_TO_REMOVE := \
bionic/__memcpy_chk.cpp \
bionic/__memset_chk.cpp \
bionic/__strcpy_chk.cpp \
bionic/__strcat_chk.cpp \
libc_common_src_files := \
$(filter-out $(_LIBC_FORTIFY_FILES_TO_REMOVE),$(libc_common_src_files))
ifeq ($(strip $(wildcard bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk)),)
$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
endif
include bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk

View File

@@ -38,5 +38,5 @@
ENTRY(abort)
.save {r3, r14}
stmfd sp!, {r3, r14}
bl PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
blx PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
END(abort)

View File

@@ -32,33 +32,33 @@
// int __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
ENTRY(__pthread_clone)
# Push 'fn' and 'arg' onto 'child_stack'.
# Copy the args onto the new stack.
stmdb r1!, {r0, r3}
# The sys_clone system call only takes two arguments: 'flags' and 'child_stack'.
# 'child_stack' is already in r1, but we need to move 'flags' into position.
mov r0, r2
stmfd sp!, {r4, r7}
# System call.
mov ip, r7
ldr r7, =__NR_clone
swi #0
# Child?
movs r0, r0
beq 1f
# Parent.
mov r7, ip
# In parent, reload saved registers then either return or set errno.
ldmfd sp!, {r4, r7}
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
1: # Child.
# Pop 'fn' and 'arg' back off the stack and call __thread_entry.
1: # The child.
# pick the function arg and call address off the stack and jump
# to the C __thread_entry function which does some setup and then
# calls the thread's start function
pop {r0, r1}
# __thread_entry also needs our stack pointer.
# __thread_entry needs the TLS pointer
mov r2, sp
b __thread_entry
END(__pthread_clone)

View File

@@ -34,10 +34,11 @@
// __futex_syscall3(*ftx, op, val)
ENTRY(__futex_syscall3)
mov ip, r7
stmdb sp!, {r4, r7}
.save {r4, r7}
ldr r7, =__NR_futex
swi #0
mov r7, ip
ldmia sp!, {r4, r7}
bx lr
END(__futex_syscall3)
@@ -48,23 +49,25 @@ END(__futex_syscall4)
// __futex_wait(*ftx, val, *timespec)
ENTRY(__futex_wait)
mov ip, r7
stmdb sp!, {r4, r7}
.save {r4, r7}
mov r3, r2
mov r2, r1
mov r1, #FUTEX_WAIT
ldr r7, =__NR_futex
swi #0
mov r7, ip
ldmia sp!, {r4, r7}
bx lr
END(__futex_wait)
// __futex_wake(*ftx, counter)
ENTRY(__futex_wake)
mov ip, r7
.save {r4, r7}
stmdb sp!, {r4, r7}
mov r2, r1
mov r1, #FUTEX_WAKE
ldr r7, =__NR_futex
swi #0
mov r7, ip
ldmia sp!, {r4, r7}
bx lr
END(__futex_wake)

View File

@@ -30,8 +30,8 @@
* dynamic linker to copy their definition into the final libc.so binary.
*
* They are required to ensure backwards binary compatibility with
* libc.so provided by the platform and binaries built with the NDK or
* different versions/configurations of toolchains.
* Android 1.5, 1.6 and even 3.0 system images. Some applications built
* using the NDK require them to be here.
*
* Now, for a more elaborate description of the issue:
*
@@ -48,9 +48,7 @@
* gcc <options> -o libfoo.so foo.a libgcc.a -lc -lm
*
* This ensures that any helper function needed by the code in foo.a is copied
* into the final libfoo.so. However, doing so will link a bunch of other __cxa
* functions from libgcc.a into each .so and executable, causing 4k+ increase
* in every binary. Therefore the Android platform build system has been
* into the final libfoo.so. Unfortunately, the Android build system has been
* using this instead:
*
* gcc <options> -o libfoo.so foo.a -lc -lm libgcc.a
@@ -60,10 +58,9 @@
* into libfoo.so. Instead, a symbol import definition will be added to it
* so libfoo.so can directly call the one in libc.so at runtime.
*
* When refreshing toolchains for new versions or using different architecture
* flags, the set of helper functions copied to libc.so may change, which
* resulted in some native shared libraries generated with the NDK or prebuilts
* from vendors to fail to load properly.
* When changing toolchains for 2.0, the set of helper functions copied to
* libc.so changed, which resulted in some native shared libraries generated
* with the NDK to fail to load properly.
*
* The NDK has been fixed after 1.6_r1 to use the correct link command, so
* any native shared library generated with it should now be safe from that
@@ -76,11 +73,6 @@
* but it is easier to add a single function here than asking several app
* developers to fix their build.
*
* The __aeabi_idiv function is added to the list since cortex-a15 supports
* HW idiv instructions so the system libc.so doesn't pull in the reference to
* __aeabi_idiv but legacy libraries built against cortex-a9 targets still need
* it.
*
* Final note: some of the functions below should really be in libm.so to
* completely reflect the state of 1.5/1.6 system images. However,
* since libm.so depends on libc.so, it's easier to put all of
@@ -121,7 +113,6 @@
XX(__aeabi_idiv) \
XX(__aeabi_l2d) \
XX(__aeabi_l2f) \
XX(__aeabi_lasr) \
XX(__aeabi_lmul) \
XX(__aeabi_llsl) \
XX(__aeabi_llsr) \
@@ -132,7 +123,7 @@
XX(__cmpdf2) \
XX(__divdf3) \
XX(__divsf3) \
XX(__eqdf2) \
XX(__eqdf2) \
XX(__extendsfdf2) \
XX(__fixdfsi) \
XX(__fixsfsi) \

View File

@@ -1,614 +0,0 @@
/* Copyright (c) 2013, Linaro Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Linaro Limited nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This memcpy routine is optimised for Cortex-A15 cores and takes advantage
of VFP or NEON when built with the appropriate flags.
Assumptions:
ARMv6 (ARMv7-a if using Neon)
ARM state
Unaligned accesses
LDRD/STRD support unaligned word accesses
*/
#include <machine/cpu-features.h>
#include <machine/asm.h>
.syntax unified
/* This implementation requires ARM state. */
.arm
#ifdef __ARM_NEON__
.fpu neon
.arch armv7-a
# define FRAME_SIZE 4
# define USE_VFP
# define USE_NEON
#elif !defined (__SOFTFP__)
.arch armv6
.fpu vfpv2
# define FRAME_SIZE 32
# define USE_VFP
#else
.arch armv6
# define FRAME_SIZE 32
#endif
/* Old versions of GAS incorrectly implement the NEON align semantics. */
#ifdef BROKEN_ASM_NEON_ALIGN
#define ALIGN(addr, align) addr,:align
#else
#define ALIGN(addr, align) addr:align
#endif
#define PC_OFFSET 8 /* PC pipeline compensation. */
#define INSN_SIZE 4
/* Call parameters. */
#define dstin r0
#define src r1
#define count r2
/* Locals. */
#define tmp1 r3
#define dst ip
#define tmp2 r10
#ifndef USE_NEON
/* For bulk copies using GP registers. */
#define A_l r2 /* Call-clobbered. */
#define A_h r3 /* Call-clobbered. */
#define B_l r4
#define B_h r5
#define C_l r6
#define C_h r7
#define D_l r8
#define D_h r9
#endif
/* Number of lines ahead to pre-fetch data. If you change this the code
below will need adjustment to compensate. */
#define prefetch_lines 5
#ifdef USE_VFP
.macro cpy_line_vfp vreg, base
vstr \vreg, [dst, #\base]
vldr \vreg, [src, #\base]
vstr d0, [dst, #\base + 8]
vldr d0, [src, #\base + 8]
vstr d1, [dst, #\base + 16]
vldr d1, [src, #\base + 16]
vstr d2, [dst, #\base + 24]
vldr d2, [src, #\base + 24]
vstr \vreg, [dst, #\base + 32]
vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
vstr d0, [dst, #\base + 40]
vldr d0, [src, #\base + 40]
vstr d1, [dst, #\base + 48]
vldr d1, [src, #\base + 48]
vstr d2, [dst, #\base + 56]
vldr d2, [src, #\base + 56]
.endm
.macro cpy_tail_vfp vreg, base
vstr \vreg, [dst, #\base]
vldr \vreg, [src, #\base]
vstr d0, [dst, #\base + 8]
vldr d0, [src, #\base + 8]
vstr d1, [dst, #\base + 16]
vldr d1, [src, #\base + 16]
vstr d2, [dst, #\base + 24]
vldr d2, [src, #\base + 24]
vstr \vreg, [dst, #\base + 32]
vstr d0, [dst, #\base + 40]
vldr d0, [src, #\base + 40]
vstr d1, [dst, #\base + 48]
vldr d1, [src, #\base + 48]
vstr d2, [dst, #\base + 56]
vldr d2, [src, #\base + 56]
.endm
#endif
.p2align 6
ENTRY(memcpy)
mov dst, dstin /* Preserve dstin, we need to return it. */
cmp count, #64
bge .Lcpy_not_short
/* Deal with small copies quickly by dropping straight into the
exit block. */
.Ltail63unaligned:
#ifdef USE_NEON
and tmp1, count, #0x38
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
add pc, pc, tmp1
vld1.8 {d0}, [src]! /* 14 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 12 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 10 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 8 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 6 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 4 words to go. */
vst1.8 {d0}, [dst]!
vld1.8 {d0}, [src]! /* 2 words to go. */
vst1.8 {d0}, [dst]!
tst count, #4
ldrne tmp1, [src], #4
strne tmp1, [dst], #4
#else
/* Copy up to 15 full words of data. May not be aligned. */
/* Cannot use VFP for unaligned data. */
and tmp1, count, #0x3c
add dst, dst, tmp1
add src, src, tmp1
rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
/* Jump directly into the sequence below at the correct offset. */
add pc, pc, tmp1, lsl #1
ldr tmp1, [src, #-60] /* 15 words to go. */
str tmp1, [dst, #-60]
ldr tmp1, [src, #-56] /* 14 words to go. */
str tmp1, [dst, #-56]
ldr tmp1, [src, #-52]
str tmp1, [dst, #-52]
ldr tmp1, [src, #-48] /* 12 words to go. */
str tmp1, [dst, #-48]
ldr tmp1, [src, #-44]
str tmp1, [dst, #-44]
ldr tmp1, [src, #-40] /* 10 words to go. */
str tmp1, [dst, #-40]
ldr tmp1, [src, #-36]
str tmp1, [dst, #-36]
ldr tmp1, [src, #-32] /* 8 words to go. */
str tmp1, [dst, #-32]
ldr tmp1, [src, #-28]
str tmp1, [dst, #-28]
ldr tmp1, [src, #-24] /* 6 words to go. */
str tmp1, [dst, #-24]
ldr tmp1, [src, #-20]
str tmp1, [dst, #-20]
ldr tmp1, [src, #-16] /* 4 words to go. */
str tmp1, [dst, #-16]
ldr tmp1, [src, #-12]
str tmp1, [dst, #-12]
ldr tmp1, [src, #-8] /* 2 words to go. */
str tmp1, [dst, #-8]
ldr tmp1, [src, #-4]
str tmp1, [dst, #-4]
#endif
lsls count, count, #31
ldrhcs tmp1, [src], #2
ldrbne src, [src] /* Src is dead, use as a scratch. */
strhcs tmp1, [dst], #2
strbne src, [dst]
bx lr
.Lcpy_not_short:
/* At least 64 bytes to copy, but don't know the alignment yet. */
str tmp2, [sp, #-FRAME_SIZE]!
and tmp2, src, #7
and tmp1, dst, #7
cmp tmp1, tmp2
bne .Lcpy_notaligned
#ifdef USE_VFP
/* Magic dust alert! Force VFP on Cortex-A9. Experiments show
that the FP pipeline is much better at streaming loads and
stores. This is outside the critical loop. */
vmov.f32 s0, s0
#endif
/* SRC and DST have the same mutual 32-bit alignment, but we may
still need to pre-copy some bytes to get to natural alignment.
We bring DST into full 64-bit alignment. */
lsls tmp2, dst, #29
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
ldrmi tmp1, [src], #4
strmi tmp1, [dst], #4
lsls tmp2, tmp2, #2
ldrhcs tmp1, [src], #2
ldrbne tmp2, [src], #1
strhcs tmp1, [dst], #2
strbne tmp2, [dst], #1
1:
subs tmp2, count, #64 /* Use tmp2 for count. */
blt .Ltail63aligned
cmp tmp2, #512
bge .Lcpy_body_long
.Lcpy_body_medium: /* Count in tmp2. */
#ifdef USE_VFP
1:
vldr d0, [src, #0]
subs tmp2, tmp2, #64
vldr d1, [src, #8]
vstr d0, [dst, #0]
vldr d0, [src, #16]
vstr d1, [dst, #8]
vldr d1, [src, #24]
vstr d0, [dst, #16]
vldr d0, [src, #32]
vstr d1, [dst, #24]
vldr d1, [src, #40]
vstr d0, [dst, #32]
vldr d0, [src, #48]
vstr d1, [dst, #40]
vldr d1, [src, #56]
vstr d0, [dst, #48]
add src, src, #64
vstr d1, [dst, #56]
add dst, dst, #64
bge 1b
tst tmp2, #0x3f
beq .Ldone
.Ltail63aligned: /* Count in tmp2. */
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
add pc, pc, tmp1
vldr d0, [src, #-56] /* 14 words to go. */
vstr d0, [dst, #-56]
vldr d0, [src, #-48] /* 12 words to go. */
vstr d0, [dst, #-48]
vldr d0, [src, #-40] /* 10 words to go. */
vstr d0, [dst, #-40]
vldr d0, [src, #-32] /* 8 words to go. */
vstr d0, [dst, #-32]
vldr d0, [src, #-24] /* 6 words to go. */
vstr d0, [dst, #-24]
vldr d0, [src, #-16] /* 4 words to go. */
vstr d0, [dst, #-16]
vldr d0, [src, #-8] /* 2 words to go. */
vstr d0, [dst, #-8]
#else
sub src, src, #8
sub dst, dst, #8
1:
ldrd A_l, A_h, [src, #8]
strd A_l, A_h, [dst, #8]
ldrd A_l, A_h, [src, #16]
strd A_l, A_h, [dst, #16]
ldrd A_l, A_h, [src, #24]
strd A_l, A_h, [dst, #24]
ldrd A_l, A_h, [src, #32]
strd A_l, A_h, [dst, #32]
ldrd A_l, A_h, [src, #40]
strd A_l, A_h, [dst, #40]
ldrd A_l, A_h, [src, #48]
strd A_l, A_h, [dst, #48]
ldrd A_l, A_h, [src, #56]
strd A_l, A_h, [dst, #56]
ldrd A_l, A_h, [src, #64]!
strd A_l, A_h, [dst, #64]!
subs tmp2, tmp2, #64
bge 1b
tst tmp2, #0x3f
bne 1f
ldr tmp2,[sp], #FRAME_SIZE
bx lr
1:
add src, src, #8
add dst, dst, #8
.Ltail63aligned: /* Count in tmp2. */
/* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
we know that the src and dest are 32-bit aligned so we can use
LDRD/STRD to improve efficiency. */
/* TMP2 is now negative, but we don't care about that. The bottom
six bits still tell us how many bytes are left to copy. */
and tmp1, tmp2, #0x38
add dst, dst, tmp1
add src, src, tmp1
rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
add pc, pc, tmp1
ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
strd A_l, A_h, [dst, #-56]
ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
strd A_l, A_h, [dst, #-48]
ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
strd A_l, A_h, [dst, #-40]
ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
strd A_l, A_h, [dst, #-32]
ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
strd A_l, A_h, [dst, #-24]
ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
strd A_l, A_h, [dst, #-16]
ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
strd A_l, A_h, [dst, #-8]
#endif
tst tmp2, #4
ldrne tmp1, [src], #4
strne tmp1, [dst], #4
lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
ldrhcs tmp1, [src], #2
ldrbne tmp2, [src]
strhcs tmp1, [dst], #2
strbne tmp2, [dst]
.Ldone:
ldr tmp2, [sp], #FRAME_SIZE
bx lr
.Lcpy_body_long: /* Count in tmp2. */
/* Long copy. We know that there's at least (prefetch_lines * 64)
bytes to go. */
#ifdef USE_VFP
/* Don't use PLD. Instead, read some data in advance of the current
copy position into a register. This should act like a PLD
operation but we won't have to repeat the transfer. */
vldr d3, [src, #0]
vldr d4, [src, #64]
vldr d5, [src, #128]
vldr d6, [src, #192]
vldr d7, [src, #256]
vldr d0, [src, #8]
vldr d1, [src, #16]
vldr d2, [src, #24]
add src, src, #32
subs tmp2, tmp2, #prefetch_lines * 64 * 2
blt 2f
1:
cpy_line_vfp d3, 0
cpy_line_vfp d4, 64
cpy_line_vfp d5, 128
add dst, dst, #3 * 64
add src, src, #3 * 64
cpy_line_vfp d6, 0
cpy_line_vfp d7, 64
add dst, dst, #2 * 64
add src, src, #2 * 64
subs tmp2, tmp2, #prefetch_lines * 64
bge 1b
2:
cpy_tail_vfp d3, 0
cpy_tail_vfp d4, 64
cpy_tail_vfp d5, 128
add src, src, #3 * 64
add dst, dst, #3 * 64
cpy_tail_vfp d6, 0
vstr d7, [dst, #64]
vldr d7, [src, #64]
vstr d0, [dst, #64 + 8]
vldr d0, [src, #64 + 8]
vstr d1, [dst, #64 + 16]
vldr d1, [src, #64 + 16]
vstr d2, [dst, #64 + 24]
vldr d2, [src, #64 + 24]
vstr d7, [dst, #64 + 32]
add src, src, #96
vstr d0, [dst, #64 + 40]
vstr d1, [dst, #64 + 48]
vstr d2, [dst, #64 + 56]
add dst, dst, #128
add tmp2, tmp2, #prefetch_lines * 64
b .Lcpy_body_medium
#else
/* Long copy. Use an SMS style loop to maximize the I/O
bandwidth of the core. We don't have enough spare registers
to synthesise prefetching, so use PLD operations. */
/* Pre-bias src and dst. */
sub src, src, #8
sub dst, dst, #8
pld [src, #8]
pld [src, #72]
subs tmp2, tmp2, #64
pld [src, #136]
ldrd A_l, A_h, [src, #8]
strd B_l, B_h, [sp, #8]
ldrd B_l, B_h, [src, #16]
strd C_l, C_h, [sp, #16]
ldrd C_l, C_h, [src, #24]
strd D_l, D_h, [sp, #24]
pld [src, #200]
ldrd D_l, D_h, [src, #32]!
b 1f
.p2align 6
2:
pld [src, #232]
strd A_l, A_h, [dst, #40]
ldrd A_l, A_h, [src, #40]
strd B_l, B_h, [dst, #48]
ldrd B_l, B_h, [src, #48]
strd C_l, C_h, [dst, #56]
ldrd C_l, C_h, [src, #56]
strd D_l, D_h, [dst, #64]!
ldrd D_l, D_h, [src, #64]!
subs tmp2, tmp2, #64
1:
strd A_l, A_h, [dst, #8]
ldrd A_l, A_h, [src, #8]
strd B_l, B_h, [dst, #16]
ldrd B_l, B_h, [src, #16]
strd C_l, C_h, [dst, #24]
ldrd C_l, C_h, [src, #24]
strd D_l, D_h, [dst, #32]
ldrd D_l, D_h, [src, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
strd A_l, A_h, [dst, #40]
add src, src, #40
strd B_l, B_h, [dst, #48]
ldrd B_l, B_h, [sp, #8]
strd C_l, C_h, [dst, #56]
ldrd C_l, C_h, [sp, #16]
strd D_l, D_h, [dst, #64]
ldrd D_l, D_h, [sp, #24]
add dst, dst, #72
tst tmp2, #0x3f
bne .Ltail63aligned
ldr tmp2, [sp], #FRAME_SIZE
bx lr
#endif
.Lcpy_notaligned:
pld [src]
pld [src, #64]
/* There's at least 64 bytes to copy, but there is no mutual
alignment. */
/* Bring DST to 64-bit alignment. */
lsls tmp2, dst, #29
pld [src, #(2 * 64)]
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
ldrmi tmp1, [src], #4
strmi tmp1, [dst], #4
lsls tmp2, tmp2, #2
ldrbne tmp1, [src], #1
ldrhcs tmp2, [src], #2
strbne tmp1, [dst], #1
strhcs tmp2, [dst], #2
1:
pld [src, #(3 * 64)]
subs count, count, #64
ldrmi tmp2, [sp], #FRAME_SIZE
bmi .Ltail63unaligned
pld [src, #(4 * 64)]
#ifdef USE_NEON
vld1.8 {d0-d3}, [src]!
vld1.8 {d4-d7}, [src]!
subs count, count, #64
bmi 2f
1:
pld [src, #(4 * 64)]
vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
vld1.8 {d0-d3}, [src]!
vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
vld1.8 {d4-d7}, [src]!
subs count, count, #64
bpl 1b
2:
vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
ands count, count, #0x3f
#else
/* Use an SMS style loop to maximize the I/O bandwidth. */
sub src, src, #4
sub dst, dst, #8
subs tmp2, count, #64 /* Use tmp2 for count. */
ldr A_l, [src, #4]
ldr A_h, [src, #8]
strd B_l, B_h, [sp, #8]
ldr B_l, [src, #12]
ldr B_h, [src, #16]
strd C_l, C_h, [sp, #16]
ldr C_l, [src, #20]
ldr C_h, [src, #24]
strd D_l, D_h, [sp, #24]
ldr D_l, [src, #28]
ldr D_h, [src, #32]!
b 1f
.p2align 6
2:
pld [src, #(5 * 64) - (32 - 4)]
strd A_l, A_h, [dst, #40]
ldr A_l, [src, #36]
ldr A_h, [src, #40]
strd B_l, B_h, [dst, #48]
ldr B_l, [src, #44]
ldr B_h, [src, #48]
strd C_l, C_h, [dst, #56]
ldr C_l, [src, #52]
ldr C_h, [src, #56]
strd D_l, D_h, [dst, #64]!
ldr D_l, [src, #60]
ldr D_h, [src, #64]!
subs tmp2, tmp2, #64
1:
strd A_l, A_h, [dst, #8]
ldr A_l, [src, #4]
ldr A_h, [src, #8]
strd B_l, B_h, [dst, #16]
ldr B_l, [src, #12]
ldr B_h, [src, #16]
strd C_l, C_h, [dst, #24]
ldr C_l, [src, #20]
ldr C_h, [src, #24]
strd D_l, D_h, [dst, #32]
ldr D_l, [src, #28]
ldr D_h, [src, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
strd A_l, A_h, [dst, #40]
add src, src, #36
strd B_l, B_h, [dst, #48]
ldrd B_l, B_h, [sp, #8]
strd C_l, C_h, [dst, #56]
ldrd C_l, C_h, [sp, #16]
strd D_l, D_h, [dst, #64]
ldrd D_l, D_h, [sp, #24]
add dst, dst, #72
ands count, tmp2, #0x3f
#endif
ldr tmp2, [sp], #FRAME_SIZE
bne .Ltail63unaligned
bx lr
END(memcpy)

View File

@@ -33,16 +33,16 @@ size_t strlen(const char *s)
{
__builtin_prefetch(s);
__builtin_prefetch(s+32);
union {
const char *b;
const uint32_t *w;
uintptr_t i;
} u;
// these are some scratch variables for the asm code below
uint32_t v, t;
// initialize the string length to zero
size_t l = 0;
@@ -60,60 +60,52 @@ size_t strlen(const char *s)
// We need to process 32 bytes per loop to schedule PLD properly
// and achieve the maximum bus speed.
asm(
"ldr %[v], [%[s]], #4 \n"
"ldr %[v], [ %[s] ], #4 \n"
"sub %[l], %[l], %[s] \n"
"0: \n"
#if __ARM_HAVE_PLD
"pld [%[s], #64] \n"
"pld [ %[s], #64 ] \n"
#endif
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
#if !defined(__OPTIMIZE_SIZE__)
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
"bne 1f \n"
"sub %[t], %[v], %[mask], lsr #7\n"
"and %[t], %[t], %[mask] \n"
"bics %[t], %[t], %[v] \n"
"it eq \n"
"ldreq %[v], [%[s]], #4 \n"
"ldreq %[v], [ %[s] ], #4 \n"
#endif
"beq 0b \n"
"1: \n"
@@ -125,14 +117,13 @@ size_t strlen(const char *s)
"beq 2f \n"
"add %[l], %[l], #1 \n"
"tst %[v], #0xFF0000 \n"
"it ne \n"
"addne %[l], %[l], #1 \n"
"2: \n"
: [l]"=&r"(l), [v]"=&r"(v), [t]"=&r"(t), [s]"=&r"(u.b)
: "%[l]"(l), "%[s]"(u.b), [mask]"r"(0x80808080UL)
: "cc"
);
done:
return l;
}

View File

@@ -39,7 +39,6 @@
*/
ENTRY(tgkill)
.save {r4-r7, ip, lr}
stmfd sp!, {r4-r7, ip, lr}
ldr r7, =__NR_tgkill
swi #0

View File

@@ -1,227 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.thumb
.thumb_func
// Get the length of src string, then get the source of the dst string.
// Check that the two lengths together don't exceed the threshold, then
// do a memcpy of the data.
ENTRY(__strcat_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
.save {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
mov lr, r2
// Save the dst register to r5
mov r5, r0
// Zero out r4
eor r4, r4, r4
// r1 contains the address of the string to count.
.L_strlen_start:
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r1, r0
sub r3, r3, #1
b .L_finish
.L_zero_in_first_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_finish
.L_sub8_and_finish:
sub r3, r3, #8
b .L_finish
.L_sub7_and_finish:
sub r3, r3, #7
b .L_finish
.L_sub6_and_finish:
sub r3, r3, #6
b .L_finish
.L_zero_in_second_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_finish
.L_sub4_and_finish:
sub r3, r3, #4
b .L_finish
.L_sub3_and_finish:
sub r3, r3, #3
b .L_finish
.L_sub2_and_finish:
sub r3, r3, #2
.L_finish:
cmp r4, #0
bne .L_strlen_done
// Time to get the dst string length.
mov r1, r5
// Save the original source address to r5.
mov r5, r0
// Save the current length (adding 1 for the terminator).
add r4, r3, #1
b .L_strlen_start
// r0 holds the pointer to the dst string.
// r3 holds the dst string length.
// r4 holds the src string length + 1.
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
bhi __strcat_chk_failed
// Set up the registers for the memcpy code.
mov r1, r5
pld [r1, #64]
mov r2, r4
add r0, r0, r3
pop {r4, r5}
.cfi_endproc
END(__strcat_chk)
#define MEMCPY_BASE __strcat_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcat_chk_failed)
.cfi_startproc
.save {r0, lr}
.save {r4, r5}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcat_chk_failed)
.data
error_string:
.string "strcat buffer overflow"

View File

@@ -1,188 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.thumb
.thumb_func
// Get the length of the source string first, then do a memcpy of the data
// instead of a strcpy.
ENTRY(__strcpy_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
mov lr, r2
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r0, r1
sub r3, r3, #1
b .L_check_size
.L_zero_in_first_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_check_size
.L_sub8_and_finish:
sub r3, r3, #8
b .L_check_size
.L_sub7_and_finish:
sub r3, r3, #7
b .L_check_size
.L_sub6_and_finish:
sub r3, r3, #6
b .L_check_size
.L_zero_in_second_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_check_size
.L_sub4_and_finish:
sub r3, r3, #4
b .L_check_size
.L_sub3_and_finish:
sub r3, r3, #3
b .L_check_size
.L_sub2_and_finish:
sub r3, r3, #2
.L_check_size:
pld [r1, #0]
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
bhs __strcpy_chk_failed
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
.cfi_endproc
END(__strcpy_chk)
#define MEMCPY_BASE __strcpy_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcpy_chk_failed)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcpy_chk_failed)
.data
error_string:
.string "strcpy buffer overflow"

View File

@@ -25,88 +25,122 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Assumes neon instructions and a cache line size of 64 bytes. */
#include <machine/cpu-features.h>
#include <machine/asm.h>
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* This code assumes it is running on a processor that supports all arm v7
* instructions, that supports neon instructions, and that has a 64 byte
* cache line.
*/
// Prototype: void *memcpy (void *dst, const void *src, size_t count).
#include <machine/asm.h>
#include "libc_events.h"
.text
.syntax unified
.fpu neon
ENTRY(__memcpy_chk)
.cfi_startproc
cmp r2, r3
bhi __memcpy_chk_fail
// Fall through to memcpy...
.cfi_endproc
END(__memcpy_chk)
#define CACHE_LINE_SIZE 64
ENTRY(memcpy)
.cfi_startproc
pld [r1, #64]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.save {r0, lr}
/* start preloading as early as possible */
pld [r1, #(CACHE_LINE_SIZE*0)]
stmfd sp!, {r0, lr}
pld [r1, #(CACHE_LINE_SIZE*1)]
.cfi_endproc
/* do we have at least 16-bytes to copy (needed for alignment below) */
cmp r2, #16
blo 5f
/* align destination to cache-line for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 0f
/* copy up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
ldrmib lr, [r1], #1
strmib lr, [r0], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// copies 4 bytes, destination 32-bits aligned
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
1: bcc 2f
// copies 8 bytes, destination 64-bits aligned
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2:
0: /* preload immediately the next cache line, which we may need */
pld [r1, #(CACHE_LINE_SIZE*0)]
pld [r1, #(CACHE_LINE_SIZE*1)]
/* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
/* Preload all the cache lines we need.
* NOTE: The number of pld below depends on CACHE_LINE_SIZE,
* ideally we would increase the distance in the main loop to
* avoid the goofy code below. In practice this doesn't seem to make
* a big difference.
* NOTE: The value CACHE_LINE_SIZE * 4 was chosen through
* experimentation.
*/
pld [r1, #(CACHE_LINE_SIZE*2)]
pld [r1, #(CACHE_LINE_SIZE*3)]
pld [r1, #(CACHE_LINE_SIZE*4)]
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(CACHE_LINE_SIZE*4)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0, :128]!
vst1.8 {d4 - d7}, [r0, :128]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
add r2, r2, #64
subs r2, r2, #32
blo 4f
3: /* 32 bytes at a time. These cache lines were already preloaded */
vld1.8 {d0 - d3}, [r1]!
subs r2, r2, #32
vst1.8 {d0 - d3}, [r0, :128]!
bhs 3b
4: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 5f
// copies 16 bytes, 128-bits aligned
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0, :128]!
5: /* copy up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
2: movs ip, r2, lsl #31
ldrmib r3, [r1], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strmib r3, [r0], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
ldmfd sp!, {r0, lr}
bx lr
END(memcpy)
#define MEMCPY_BASE __memcpy_base
#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__memcpy_chk_fail)
.cfi_startproc
// Preserve lr for backtrace.
push {lr}
.save {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
.cfi_endproc
END(__memcpy_chk_fail)
.data
error_string:
.string "memcpy buffer overflow"

View File

@@ -1,329 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
ENTRY(MEMCPY_BASE)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
// Assumes that n >= 0, and dst, src are valid pointers.
// For any sizes less than 832 use the neon code that doesn't
// care about the src alignment. This avoids any checks
// for src alignment, and offers the best improvement since
// smaller sized copies are dominated by the overhead of
// the pre and post main loop.
// For larger copies, if src and dst cannot both be aligned to
// word boundaries, use the neon code.
// For all other copies, align dst to a double word boundary
// and copy using LDRD/STRD instructions.
cmp r2, #16
blo .L_copy_less_than_16_unknown_align
// TODO: The aligned copy code is extremely slow copying some large
// buffers so always go through the unaligned path for now.
//cmp r2, #832
//bge .L_check_alignment
.L_copy_unknown_alignment:
// Unknown alignment of src and dst.
// Assumes that the first few bytes have already been prefetched.
// Align destination to 128 bits. The mainloop store instructions
// require this alignment or they will throw an exception.
rsb r3, r0, #0
ands r3, r3, #0xF
beq 2f
// Copy up to 15 bytes (count in r3).
sub r2, r2, r3
movs ip, r3, lsl #31
itt mi
ldrbmi lr, [r1], #1
strbmi lr, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1], #1
strbcs ip, [r0], #1
strbcs lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
1: bcc 2f
// Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2: // Make sure we have at least 64 bytes to copy.
subs r2, r2, #64
blo 2f
1: // The main loop copies 64 bytes at a time.
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(64*4)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0, :128]!
vst1.8 {d4 - d7}, [r0, :128]!
bhs 1b
2: // Fix-up the remaining count and make sure we have >= 32 bytes left.
adds r2, r2, #32
blo 3f
// 32 bytes. These cache lines were already preloaded.
vld1.8 {d0 - d3}, [r1]!
sub r2, r2, #32
vst1.8 {d0 - d3}, [r0, :128]!
3: // Less than 32 left.
add r2, r2, #32
tst r2, #0x10
beq .L_copy_less_than_16_unknown_align
// Copies 16 bytes, destination 128 bits aligned.
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0, :128]!
.L_copy_less_than_16_unknown_align:
// Copy up to 15 bytes (count in r2).
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
2: // Copy 0 to 4 bytes.
lsls r2, r2, #31
itt ne
ldrbne lr, [r1], #1
strbne lr, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1]
strbcs ip, [r0], #1
strbcs lr, [r0]
pop {r0, pc}
.L_check_alignment:
// If src and dst cannot both be aligned to a word boundary,
// use the unaligned copy version.
eor r3, r0, r1
ands r3, r3, #0x3
bne .L_copy_unknown_alignment
.cfi_endproc
END(MEMCPY_BASE)
ENTRY(MEMCPY_BASE_ALIGNED)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
// To try and improve performance, stack layout changed,
// i.e., not keeping the stack looking like users expect
// (highest numbered register at highest address).
strd r4, r5, [sp, #-8]!
.save {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
strd r6, r7, [sp, #-8]!
.save {r6, r7}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r6, 0
.cfi_rel_offset r7, 0
strd r8, r9, [sp, #-8]!
.save {r8, r9}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r8, 0
.cfi_rel_offset r9, 4
// Optimized for already aligned dst code.
ands ip, r0, #3
bne .L_dst_not_word_aligned
.L_word_aligned:
// Align the destination buffer to 8 bytes, to make sure double
// loads and stores don't cross a cache line boundary,
// as they are then more expensive even if the data is in the cache
// (require two load/store issue cycles instead of one).
// If only one of the buffers is not 8 bytes aligned,
// then it's more important to align dst than src,
// because there is more penalty for stores
// than loads that cross a cacheline boundary.
// This check and realignment are only done if there is >= 832
// bytes to copy.
// Dst is word aligned, but check if it is already double word aligned.
ands r3, r0, #4
beq 1f
ldr r3, [r1], #4
str r3, [r0], #4
sub r2, #4
1: // Can only get here if > 64 bytes to copy, so don't do check r2.
sub r2, #64
2: // Every loop iteration copies 64 bytes.
.irp offset, #0, #8, #16, #24, #32
ldrd r4, r5, [r1, \offset]
strd r4, r5, [r0, \offset]
.endr
ldrd r4, r5, [r1, #40]
ldrd r6, r7, [r1, #48]
ldrd r8, r9, [r1, #56]
// Keep the pld as far from the next load as possible.
// The amount to prefetch was determined experimentally using
// large sizes, and verifying the prefetch size does not affect
// the smaller copies too much.
// WARNING: If the ldrd and strd instructions get too far away
// from each other, performance suffers. Three loads
// in a row is the best tradeoff.
pld [r1, #(64*16)]
strd r4, r5, [r0, #40]
strd r6, r7, [r0, #48]
strd r8, r9, [r0, #56]
add r0, r0, #64
add r1, r1, #64
subs r2, r2, #64
bge 2b
// Fix-up the remaining count and make sure we have >= 32 bytes left.
adds r2, r2, #32
blo 4f
// Copy 32 bytes. These cache lines were already preloaded.
.irp offset, #0, #8, #16, #24
ldrd r4, r5, [r1, \offset]
strd r4, r5, [r0, \offset]
.endr
add r1, r1, #32
add r0, r0, #32
sub r2, r2, #32
4: // Less than 32 left.
add r2, r2, #32
tst r2, #0x10
beq 5f
// Copy 16 bytes.
.irp offset, #0, #8
ldrd r4, r5, [r1, \offset]
strd r4, r5, [r0, \offset]
.endr
add r1, r1, #16
add r0, r0, #16
5: // Copy up to 15 bytes (count in r2).
movs ip, r2, lsl #29
bcc 1f
// Copy 8 bytes.
ldrd r4, r5, [r1], #8
strd r4, r5, [r0], #8
1: bge 2f
// Copy 4 bytes.
ldr r4, [r1], #4
str r4, [r0], #4
2: // Copy 0 to 4 bytes.
lsls r2, r2, #31
itt ne
ldrbne lr, [r1], #1
strbne lr, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1]
strbcs ip, [r0], #1
strbcs lr, [r0]
// Restore registers: optimized pop {r0, pc}
ldrd r8, r9, [sp], #8
ldrd r6, r7, [sp], #8
ldrd r4, r5, [sp], #8
pop {r0, pc}
.L_dst_not_word_aligned:
// Align dst to word.
rsb ip, ip, #4
cmp ip, #2
itt gt
ldrbgt lr, [r1], #1
strbgt lr, [r0], #1
itt ge
ldrbge lr, [r1], #1
strbge lr, [r0], #1
ldrb lr, [r1], #1
strb lr, [r0], #1
sub r2, r2, ip
// Src is guaranteed to be at least word aligned by this point.
b .L_word_aligned
.cfi_endproc
END(MEMCPY_BASE_ALIGNED)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,173 +28,79 @@
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* Optimized memset() for ARM.
/*
* Optimized memset() for ARM.
*
* memset() returns its first argument.
*/
*/
.fpu neon
.syntax unified
ENTRY(__memset_chk)
.cfi_startproc
cmp r2, r3
bls .L_done
// Preserve lr for backtrace.
.save {lr}
push {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
.cfi_endproc
END(__memset_chk)
.fpu neon
ENTRY(bzero)
.cfi_startproc
mov r2, r1
mov r1, #0
.L_done:
mov r2, r1
mov r1, #0
// Fall through to memset...
.cfi_endproc
END(bzero)
ENTRY(memset)
.cfi_startproc
.save {r0}
stmfd sp!, {r0}
.cfi_def_cfa_offset 4
.cfi_rel_offset r0, 0
// The new algorithm is slower for copies < 16 so use the old
// neon code in that case.
vdup.8 q0, r1
/* do we have at least 16-bytes to write (needed for alignment below) */
cmp r2, #16
blo .L_set_less_than_16_unknown_align
blo 3f
// Use strd which requires an even and odd register so move the
// values so that:
// r0 and r1 contain the memset value
// r2 is the number of bytes to set
// r3 is the destination pointer
mov r3, r0
/* align destination to 16 bytes for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 2f
// Copy the byte value in every byte of r1.
mov r1, r1, lsl #24
orr r1, r1, r1, lsr #8
orr r1, r1, r1, lsr #16
/* write up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
strmib r1, [r0], #1
strcsb r1, [r0], #1
strcsb r1, [r0], #1
movs ip, r3, lsl #29
bge 1f
.L_check_alignment:
// Align destination to a double word to avoid the strd crossing
// a cache line boundary.
ands ip, r3, #7
bne .L_do_double_word_align
// writes 4 bytes, 32-bits aligned
vst1.32 {d0[0]}, [r0, :32]!
1: bcc 2f
.L_double_word_aligned:
mov r0, r1
// writes 8 bytes, 64-bits aligned
vst1.8 {d0}, [r0, :64]!
2:
/* make sure we have at least 32 bytes to write */
subs r2, r2, #32
blo 2f
vmov q1, q0
subs r2, #64
blo .L_set_less_than_64
1: /* The main loop writes 32 bytes at a time */
subs r2, r2, #32
vst1.8 {d0 - d3}, [r0, :128]!
bhs 1b
1: // Main loop sets 64 bytes at a time.
.irp offset, #0, #8, #16, #24, #32, #40, #48, #56
strd r0, r1, [r3, \offset]
.endr
2: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 3f
add r3, #64
subs r2, #64
bge 1b
.L_set_less_than_64:
// Restore r2 to the count of bytes left to set.
add r2, #64
lsls ip, r2, #27
bcc .L_set_less_than_32
// Set 32 bytes.
.irp offset, #0, #8, #16, #24
strd r0, r1, [r3, \offset]
.endr
add r3, #32
.L_set_less_than_32:
bpl .L_set_less_than_16
// Set 16 bytes.
.irp offset, #0, #8
strd r0, r1, [r3, \offset]
.endr
add r3, #16
.L_set_less_than_16:
// Less than 16 bytes to set.
lsls ip, r2, #29
bcc .L_set_less_than_8
// Set 8 bytes.
strd r0, r1, [r3], #8
.L_set_less_than_8:
bpl .L_set_less_than_4
// Set 4 bytes
str r1, [r3], #4
.L_set_less_than_4:
lsls ip, r2, #31
it ne
strbne r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3]
ldmfd sp!, {r0}
bx lr
.L_do_double_word_align:
rsb ip, ip, #8
sub r2, r2, ip
movs r0, ip, lsl #31
it mi
strbmi r1, [r3], #1
itt cs
strbcs r1, [r3], #1
strbcs r1, [r3], #1
// Dst is at least word aligned by this point.
cmp ip, #4
blo .L_double_word_aligned
str r1, [r3], #4
b .L_double_word_aligned
.L_set_less_than_16_unknown_align:
// Set up to 15 bytes.
vdup.8 d0, r1
// writes 16 bytes, 128-bits aligned
vst1.8 {d0, d1}, [r0, :128]!
3: /* write up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vst1.8 {d0}, [r0]!
1: bge 2f
vst1.32 {d0[0]}, [r0]!
2: movs ip, r2, lsl #31
it mi
strbmi r1, [r0], #1
itt cs
strbcs r1, [r0], #1
strbcs r1, [r0], #1
strmib r1, [r0], #1
strcsb r1, [r0], #1
strcsb r1, [r0], #1
ldmfd sp!, {r0}
bx lr
.cfi_endproc
END(memset)
.data
error_string:
.string "memset buffer overflow"

View File

@@ -1,568 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
.macro m_push
push {r0, r4, r5, lr}
.endm // m_push
.macro m_pop
pop {r0, r4, r5, pc}
.endm // m_pop
.macro m_scan_byte
ldrb r3, [r0]
cbz r3, strcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
.macro m_copy_byte reg, cmd, label
ldrb \reg, [r1], #1
strb \reg, [r0], #1
\cmd \reg, \label
.endm // m_copy_byte
ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
cbnz r2, strcat_continue
bx lr
strcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
m_scan_byte
m_scan_byte
m_scan_byte
ands r3, r0, #7
beq strcat_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcat_align_to_32
ldrb r5, [r0]
cbz r5, strcat_r0_scan_done
add r0, r0, #1
strcat_align_to_32:
bcc strcat_align_to_64
ldrb r2, [r0]
cbz r2, strcat_r0_scan_done
add r0, r0, #1
ldrb r4, [r0]
cbz r4, strcat_r0_scan_done
add r0, r0, #1
strcat_align_to_64:
tst r3, #4
beq strcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
strcat_r0_scan_done:
// For short copies, hard-code checking the first 8 bytes since this
// new code doesn't win until after about 8 bytes.
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
strcpy_finish:
m_pop
strcpy_continue:
ands r3, r0, #7
beq strcpy_check_src_align
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_32:
bcc strcpy_align_to_64
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_64:
tst r3, #4
beq strcpy_check_src_align
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
str r2, [r0], #4
strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
bne strcpy_unaligned_copy
.p2align 2
strcpy_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_mainloop
strcpy_complete:
m_pop
strcpy_zero_in_first_register:
lsls lr, ip, #17
bne strcpy_copy1byte
bcs strcpy_copy2bytes
lsls ip, ip, #1
bne strcpy_copy3bytes
strcpy_copy4bytes:
// Copy 4 bytes to the destiniation.
str r2, [r0]
m_pop
strcpy_copy1byte:
strb r2, [r0]
m_pop
strcpy_copy2bytes:
strh r2, [r0]
m_pop
strcpy_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_pop
strcpy_zero_in_second_register:
lsls lr, ip, #17
bne strcpy_copy5bytes
bcs strcpy_copy6bytes
lsls ip, ip, #1
bne strcpy_copy7bytes
// Copy 8 bytes to the destination.
strd r2, r3, [r0]
m_pop
strcpy_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
m_pop
strcpy_copy6bytes:
str r2, [r0], #4
strh r3, [r0]
m_pop
strcpy_copy7bytes:
str r2, [r0], #4
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_pop
strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
strcpy_unaligned_branchtable:
.byte 0
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r3, [r1]
cbz r3, strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
cbz r5, strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign7
strcpy_unalign7_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
strcpy_unalign_return:
m_pop
strcpy_unalign7_copy6bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
m_pop
strcpy_unalign7_copy7bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
strb r5, [r0], #1
m_pop
.p2align 2
// Can read 6 bytes before possibly crossing a page.
strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
cbz r5, strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
beq strcpy_copy7bytes
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign6
.p2align 2
// Can read 5 bytes before possibly crossing a page.
strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign5
strcpy_unalign_copy5bytes:
str r2, [r0], #4
strb r4, [r0]
m_pop
strcpy_unalign_copy6bytes:
str r2, [r0], #4
strb r4, [r0], #1
strb r5, [r0]
m_pop
.p2align 2
// Can read 4 bytes before possibly crossing a page.
strcpy_unalign4:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
strcpy_unalign3:
ldrb r2, [r1]
cbz r2, strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
cbz r4, strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
lsrs lr, r2, #24
beq strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign3
strcpy_unalign3_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_pop
strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
// Can read 2 bytes before possibly crossing a page.
strcpy_unalign2:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
beq strcpy_copy3bytes
lsrs ip, r2, #24
beq strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
strcpy_unalign1:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign1
strcpy_unalign_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
strcat_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
strcat_zero_in_first_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
bne strcat_sub8
bcs strcat_sub7
lsls ip, ip, #1
bne strcat_sub6
sub r0, r0, #5
b strcat_r0_scan_done
strcat_sub8:
sub r0, r0, #8
b strcat_r0_scan_done
strcat_sub7:
sub r0, r0, #7
b strcat_r0_scan_done
strcat_sub6:
sub r0, r0, #6
b strcat_r0_scan_done
strcat_zero_in_second_register:
// Prefetch the src now, it's going to be used soon.
pld [r1, #0]
lsls lr, ip, #17
bne strcat_sub4
bcs strcat_sub3
lsls ip, ip, #1
bne strcat_sub2
sub r0, r0, #1
b strcat_r0_scan_done
strcat_sub4:
sub r0, r0, #4
b strcat_r0_scan_done
strcat_sub3:
sub r0, r0, #3
b strcat_r0_scan_done
strcat_sub2:
sub r0, r0, #2
b strcat_r0_scan_done
END(strcat)

View File

@@ -123,13 +123,8 @@ ENTRY(strcmp)
.macro init
/* Macro to save temporary registers and prepare magic values. */
subs sp, sp, #16
.cfi_def_cfa_offset 16
strd r4, r5, [sp, #8]
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
strd r6, r7, [sp]
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
mvn r6, #0 /* all F */
mov r7, #0 /* all 0 */
.endm /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
#endif /* not __ARMEB__ */
.endm /* setup_return */
.cfi_startproc
pld [r0, #0]
pld [r1, #0]
/* Are both strings double-word aligned? */
orr ip, r0, r1
tst ip, #7
bne .L_do_align
bne do_align
/* Fast path. */
.save {r4-r7}
init
.L_doubleword_aligned:
doubleword_aligned:
/* Get here when the strings to compare are double-word aligned. */
/* Compare two words in every iteration. */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
ldrd r2, r3, [r0], #8
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
magic_compare_and_branch w1=r2, w2=r4, label=return_24
magic_compare_and_branch w1=r3, w2=r5, label=return_35
b 2b
.L_do_align:
do_align:
/* Is the first string word-aligned? */
ands ip, r0, #3
beq .L_word_aligned_r0
beq word_aligned_r0
/* Fast compare byte by byte until the first string is word-aligned. */
/* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
bic r0, r0, #3
ldr r2, [r0], #4
lsls ip, ip, #31
beq .L_byte2
bcs .L_byte3
beq byte2
bcs byte3
.L_byte1:
byte1:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE1_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte2:
byte2:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE2_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte3:
byte3:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE3_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbnz reg=r3, label=.L_word_aligned_r0
bne fast_return
m_cbnz reg=r3, label=word_aligned_r0
.L_fast_return:
fast_return:
mov r0, ip
bx lr
.L_word_aligned_r0:
word_aligned_r0:
init
/* The first string is word-aligned. */
/* Is the second string word-aligned? */
ands ip, r1, #3
bne .L_strcmp_unaligned
bne strcmp_unaligned
.L_word_aligned:
word_aligned:
/* The strings are word-aligned. */
/* Is the first string double-word aligned? */
tst r0, #4
beq .L_doubleword_aligned_r0
beq doubleword_aligned_r0
/* If r0 is not double-word aligned yet, align it by loading
and comparing the next word from each string. */
ldr r2, [r0], #4
ldr r4, [r1], #4
magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
magic_compare_and_branch w1=r2 w2=r4 label=return_24
.L_doubleword_aligned_r0:
doubleword_aligned_r0:
/* Get here when r0 is double-word aligned. */
/* Is r1 doubleword_aligned? */
tst r1, #4
beq .L_doubleword_aligned
beq doubleword_aligned
/* Get here when the strings to compare are word-aligned,
r0 is double-word aligned, but r1 is not double-word aligned. */
@@ -278,9 +271,9 @@ ENTRY(strcmp)
/* Load the next double-word from each string and compare. */
ldrd r2, r3, [r0], #8
magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
magic_compare_and_branch w1=r2 w2=r5 label=return_25
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
magic_compare_and_branch w1=r3 w2=r4 label=return_34
b 3b
.macro miscmp_word offsetlo offsethi
@@ -304,47 +297,47 @@ ENTRY(strcmp)
and r2, r3, r6, S2LOMEM #\offsetlo
it eq
cmpeq r2, r5
bne .L_return_25
bne return_25
ldr r5, [r1], #4
cmp ip, #0
eor r3, r2, r3
S2HIMEM r2, r5, #\offsethi
it eq
cmpeq r3, r2
bne .L_return_32
bne return_32
b 7b
.endm /* miscmp_word */
.L_strcmp_unaligned:
strcmp_unaligned:
/* r0 is word-aligned, r1 is at offset ip from a word. */
/* Align r1 to the (previous) word-boundary. */
bic r1, r1, #3
/* Unaligned comparison word by word using LDRs. */
cmp ip, #2
beq .L_miscmp_word_16 /* If ip == 2. */
bge .L_miscmp_word_24 /* If ip == 3. */
beq miscmp_word_16 /* If ip == 2. */
bge miscmp_word_24 /* If ip == 3. */
miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */
.L_miscmp_word_16: miscmp_word offsetlo=16 offsethi=16
.L_miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
miscmp_word_16: miscmp_word offsetlo=16 offsethi=16
miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
.L_return_32:
return_32:
setup_return w1=r3, w2=r2
b .L_do_return
.L_return_34:
b do_return
return_34:
setup_return w1=r3, w2=r4
b .L_do_return
.L_return_25:
b do_return
return_25:
setup_return w1=r2, w2=r5
b .L_do_return
.L_return_35:
b do_return
return_35:
setup_return w1=r3, w2=r5
b .L_do_return
.L_return_24:
b do_return
return_24:
setup_return w1=r2, w2=r4
.L_do_return:
do_return:
#ifdef __ARMEB__
mov r0, ip
@@ -356,16 +349,11 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
/* There is a zero or a different byte between r1 and r2. */
/* r0 contains a mask of all-zero bytes in r1. */
/* Using r0 and not ip here because cbz requires low register. */
m_cbz reg=r0, label=.L_compute_return_value
m_cbz reg=r0, label=compute_return_value
clz r0, r0
/* r0 contains the number of bits on the left of the first all-zero byte in r1. */
rsb r0, r0, #24
@@ -373,7 +361,7 @@ ENTRY(strcmp)
lsr r1, r1, r0
lsr r2, r2, r0
.L_compute_return_value:
compute_return_value:
movs r0, #1
cmp r1, r2
/* The return value is computed as follows.
@@ -386,5 +374,4 @@ ENTRY(strcmp)
it ls
sbcls r0, r0, r0
bx lr
.cfi_endproc
END(strcmp)

View File

@@ -1,451 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
.macro m_push
push {r0, r4, r5, lr}
.endm // m_push
.macro m_pop
pop {r0, r4, r5, pc}
.endm // m_pop
.macro m_copy_byte reg, cmd, label
ldrb \reg, [r1], #1
strb \reg, [r0], #1
\cmd \reg, \label
.endm // m_copy_byte
ENTRY(strcpy)
// For short copies, hard-code checking the first 8 bytes since this
// new code doesn't win until after about 8 bytes.
m_push
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
strcpy_finish:
m_pop
strcpy_continue:
pld [r1, #0]
ands r3, r0, #7
beq strcpy_check_src_align
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_32:
bcc strcpy_align_to_64
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_64:
tst r3, #4
beq strcpy_check_src_align
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
str r2, [r0], #4
strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
bne strcpy_unaligned_copy
.p2align 2
strcpy_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_mainloop
strcpy_complete:
m_pop
strcpy_zero_in_first_register:
lsls lr, ip, #17
bne strcpy_copy1byte
bcs strcpy_copy2bytes
lsls ip, ip, #1
bne strcpy_copy3bytes
strcpy_copy4bytes:
// Copy 4 bytes to the destiniation.
str r2, [r0]
m_pop
strcpy_copy1byte:
strb r2, [r0]
m_pop
strcpy_copy2bytes:
strh r2, [r0]
m_pop
strcpy_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_pop
strcpy_zero_in_second_register:
lsls lr, ip, #17
bne strcpy_copy5bytes
bcs strcpy_copy6bytes
lsls ip, ip, #1
bne strcpy_copy7bytes
// Copy 8 bytes to the destination.
strd r2, r3, [r0]
m_pop
strcpy_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
m_pop
strcpy_copy6bytes:
str r2, [r0], #4
strh r3, [r0]
m_pop
strcpy_copy7bytes:
str r2, [r0], #4
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_pop
strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
strcpy_unaligned_branchtable:
.byte 0
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r3, [r1]
cbz r3, strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
cbz r5, strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign7
strcpy_unalign7_copy5bytes:
str r2, [r0], #4
strb r3, [r0]
strcpy_unalign_return:
m_pop
strcpy_unalign7_copy6bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
m_pop
strcpy_unalign7_copy7bytes:
str r2, [r0], #4
strb r3, [r0], #1
strb r4, [r0], #1
strb r5, [r0], #1
m_pop
.p2align 2
// Can read 6 bytes before possibly crossing a page.
strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
cbz r5, strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
beq strcpy_copy7bytes
lsrs ip, r3, #24
strd r2, r3, [r0], #8
beq strcpy_unalign_return
b strcpy_unalign6
.p2align 2
// Can read 5 bytes before possibly crossing a page.
strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign5
strcpy_unalign_copy5bytes:
str r2, [r0], #4
strb r4, [r0]
m_pop
strcpy_unalign_copy6bytes:
str r2, [r0], #4
strb r4, [r0], #1
strb r5, [r0]
m_pop
.p2align 2
// Can read 4 bytes before possibly crossing a page.
strcpy_unalign4:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
strcpy_unalign3:
ldrb r2, [r1]
cbz r2, strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
cbz r4, strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
lsrs lr, r2, #24
beq strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign3
strcpy_unalign3_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_pop
strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
m_pop
.p2align 2
// Can read 2 bytes before possibly crossing a page.
strcpy_unalign2:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
beq strcpy_copy3bytes
lsrs ip, r2, #24
beq strcpy_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
strcpy_unalign1:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
strd r2, r3, [r0], #8
b strcpy_unalign1
strcpy_unalign_copy1byte:
strb r2, [r0]
m_pop
strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r4, [r0]
m_pop
END(strcpy)

View File

@@ -1,165 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
ENTRY(strlen)
pld [r0, #0]
mov r1, r0
ands r3, r0, #7
beq mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq align_to_32
ldrb r2, [r1], #1
cbz r2, update_count_and_return
align_to_32:
bcc align_to_64
ands ip, r3, #2
beq align_to_64
ldrb r2, [r1], #1
cbz r2, update_count_and_return
ldrb r2, [r1], #1
cbz r2, update_count_and_return
align_to_64:
tst r3, #4
beq mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne zero_in_second_register
.p2align 2
mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne zero_in_second_register
b mainloop
update_count_and_return:
sub r0, r1, r0
sub r0, r0, #1
bx lr
zero_in_first_register:
sub r0, r1, r0
lsls r3, ip, #17
bne sub8_and_return
bcs sub7_and_return
lsls ip, ip, #1
bne sub6_and_return
sub r0, r0, #5
bx lr
sub8_and_return:
sub r0, r0, #8
bx lr
sub7_and_return:
sub r0, r0, #7
bx lr
sub6_and_return:
sub r0, r0, #6
bx lr
zero_in_second_register:
sub r0, r1, r0
lsls r3, ip, #17
bne sub4_and_return
bcs sub3_and_return
lsls ip, ip, #1
bne sub2_and_return
sub r0, r0, #1
bx lr
sub4_and_return:
sub r0, r0, #4
bx lr
sub3_and_return:
sub r0, r0, #3
bx lr
sub2_and_return:
sub r0, r0, #2
bx lr
END(strlen)

View File

@@ -1,10 +1,5 @@
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S)
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S)
$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a15/bionic/strcat.S)
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S)
$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a15/bionic/strcpy.S)
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/cortex-a15/bionic/__strcat_chk.S)
$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/cortex-a15/bionic/__strcpy_chk.S)
include bionic/libc/arch-arm/generic/generic.mk

View File

@@ -1 +0,0 @@
include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk

View File

@@ -1,230 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.fpu neon
.thumb
.thumb_func
// Get the length of src string, then get the source of the dst string.
// Check that the two lengths together don't exceed the threshold, then
// do a memcpy of the data.
ENTRY(__strcat_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
.save {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
mov lr, r2
// Save the dst register to r5
mov r5, r0
// Zero out r4
eor r4, r4, r4
// r1 contains the address of the string to count.
.L_strlen_start:
mov r0, r1
ands r3, r0, #7
bne .L_align_src
.p2align 2
.L_mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_zero_in_first_register:
sub r3, r1, r0
// Check for zero in byte 0.
lsls r2, ip, #17
beq .L_check_byte1_reg1
sub r3, r3, #8
b .L_finish
.L_check_byte1_reg1:
bcc .L_check_byte2_reg1
sub r3, r3, #7
b .L_finish
.L_check_byte2_reg1:
// Check for zero in byte 2.
tst ip, #0x800000
it ne
subne r3, r3, #6
bne .L_finish
sub r3, r3, #5
b .L_finish
.L_zero_in_second_register:
sub r3, r1, r0
// Check for zero in byte 0.
lsls r2, ip, #17
beq .L_check_byte1_reg2
sub r3, r3, #4
b .L_finish
.L_check_byte1_reg2:
bcc .L_check_byte2_reg2
sub r3, r3, #3
b .L_finish
.L_check_byte2_reg2:
// Check for zero in byte 2.
tst ip, #0x800000
it ne
subne r3, r3, #2
bne .L_finish
sub r3, r3, #1
b .L_finish
.L_align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r1], #1
cbz r2, .L_done
.L_align_to_32:
bcc .L_align_to_64
ldrb r2, [r1], #1
cbz r2, .L_done
ldrb r2, [r1], #1
cbz r2, .L_done
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_done:
sub r3, r1, r0
sub r3, r3, #1
.L_finish:
cmp r4, #0
bne .L_strlen_done
// Time to get the dst string length.
mov r1, r5
// Save the original source address to r5.
mov r5, r0
// Save the current length (adding 1 for the terminator).
add r4, r3, #1
b .L_strlen_start
// r0 holds the pointer to the dst string.
// r3 holds the dst string length.
// r4 holds the src string length + 1.
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
bhi __strcat_chk_fail
// Set up the registers for the memcpy code.
mov r1, r5
pld [r1, #64]
mov r2, r4
add r0, r0, r3
pop {r4, r5}
// Fall through into the memcpy_base function.
.cfi_endproc
END(__strcat_chk)
#define MEMCPY_BASE __strcat_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcat_chk_fail)
.cfi_startproc
.save {r0, lr}
.save {r4, r5}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcat_chk_fail)
.data
error_string:
.string "strcat buffer overflow"

View File

@@ -1,194 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.fpu neon
.thumb
.thumb_func
// Get the length of the source string first, then do a memcpy of the data
// instead of a strcpy.
ENTRY(__strcpy_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
mov lr, r2
mov r0, r1
ands r3, r0, #7
bne .L_align_src
.p2align 2
.L_mainloop:
ldmia r0!, {r2, r3}
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_zero_in_first_register:
sub r3, r0, r1
// Check for zero in byte 0.
lsls r2, ip, #17
beq .L_check_byte1_reg1
sub r3, r3, #8
b .L_check_size
.L_check_byte1_reg1:
bcc .L_check_byte2_reg1
sub r3, r3, #7
b .L_check_size
.L_check_byte2_reg1:
// Check for zero in byte 2.
tst ip, #0x800000
it ne
subne r3, r3, #6
bne .L_check_size
sub r3, r3, #5
b .L_check_size
.L_zero_in_second_register:
sub r3, r0, r1
// Check for zero in byte 0.
lsls r2, ip, #17
beq .L_check_byte1_reg2
sub r3, r3, #4
b .L_check_size
.L_check_byte1_reg2:
bcc .L_check_byte2_reg2
sub r3, r3, #3
b .L_check_size
.L_check_byte2_reg2:
// Check for zero in byte 2.
tst ip, #0x800000
it ne
subne r3, r3, #2
bne .L_check_size
sub r3, r3, #1
b .L_check_size
.L_align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r0], #1
cbz r2, .L_done
.L_align_to_32:
bcc .L_align_to_64
ldrb r2, [r0], #1
cbz r2, .L_done
ldrb r2, [r0], #1
cbz r2, .L_done
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r2, [r0], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_done:
sub r3, r0, r1
sub r3, r3, #1
.L_check_size:
pld [r1, #0]
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
bhs __strcpy_chk_fail
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
.cfi_endproc
// Fall through into the memcpy_base function.
END(__strcpy_chk)
#define MEMCPY_BASE __strcpy_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcpy_chk_fail)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcpy_chk_fail)
.data
error_string:
.string "strcpy buffer overflow"

View File

@@ -26,8 +26,8 @@
* SUCH DAMAGE.
*/
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* This code assumes it is running on a processor that supports all arm v7
@@ -35,58 +35,177 @@
* cache line.
*/
.syntax unified
.text
.fpu neon
.thumb
.thumb_func
ENTRY(__memcpy_chk)
.cfi_startproc
cmp r2, r3
bhi __memcpy_chk_fail
// Fall through to memcpy...
.cfi_endproc
END(__memcpy_chk)
#define CACHE_LINE_SIZE 32
ENTRY(memcpy)
.cfi_startproc
.save {r0, lr}
/* start preloading as early as possible */
pld [r1, #(CACHE_LINE_SIZE * 0)]
stmfd sp!, {r0, lr}
pld [r1, #(CACHE_LINE_SIZE * 2)]
pld [r1, #0]
stmfd sp!, {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
pld [r1, #64]
// Check so divider is at least 16 bytes, needed for alignment code.
cmp r2, #16
blo 5f
.cfi_endproc
/* check if buffers are aligned. If so, run arm-only version */
eor r3, r0, r1
ands r3, r3, #0x3
beq 11f
/* Check the upper size limit for Neon unaligned memory access in memcpy */
cmp r2, #224
blo 3f
/* align destination to 16 bytes for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 3f
/* copy up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
ldrmib lr, [r1], #1
strmib lr, [r0], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// copies 4 bytes, destination 32-bits aligned
vld1.32 {d0[0]}, [r1]!
vst1.32 {d0[0]}, [r0, :32]!
1: bcc 2f
// copies 8 bytes, destination 64-bits aligned
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2:
/* preload immediately the next cache line, which we may need */
pld [r1, #(CACHE_LINE_SIZE * 0)]
pld [r1, #(CACHE_LINE_SIZE * 2)]
3:
/* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
/* preload all the cache lines we need */
pld [r1, #(CACHE_LINE_SIZE * 4)]
pld [r1, #(CACHE_LINE_SIZE * 6)]
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(CACHE_LINE_SIZE * 6)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0]!
vst1.8 {d4 - d7}, [r0]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
add r2, r2, #64
subs r2, r2, #32
blo 4f
3: /* 32 bytes at a time. These cache lines were already preloaded */
vld1.8 {d0 - d3}, [r1]!
subs r2, r2, #32
vst1.8 {d0 - d3}, [r0]!
bhs 3b
4: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 5f
// copies 16 bytes, 128-bits aligned
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0]!
5: /* copy up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld1.32 {d0[0]}, [r1]!
vst1.32 {d0[0]}, [r0]!
2: movs ip, r2, lsl #31
ldrmib r3, [r1], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strmib r3, [r0], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
ldmfd sp!, {r0, lr}
bx lr
11:
/* Simple arm-only copy loop to handle aligned copy operations */
stmfd sp!, {r4, r5, r6, r7, r8}
pld [r1, #(CACHE_LINE_SIZE * 4)]
/* Check alignment */
rsb r3, r1, #0
ands r3, #3
beq 2f
/* align source to 32 bits. We need to insert 2 instructions between
* a ldr[b|h] and str[b|h] because byte and half-word instructions
* stall 2 cycles.
*/
movs r12, r3, lsl #31
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
ldrmib r3, [r1], #1
ldrcsb r4, [r1], #1
ldrcsb r5, [r1], #1
strmib r3, [r0], #1
strcsb r4, [r0], #1
strcsb r5, [r0], #1
2:
subs r2, r2, #64
blt 4f
3: /* Main copy loop, copying 64 bytes at a time */
pld [r1, #(CACHE_LINE_SIZE * 8)]
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
subs r2, r2, #64
bge 3b
4: /* Check if there are > 32 bytes left */
adds r2, r2, #64
subs r2, r2, #32
blt 5f
/* Copy 32 bytes */
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
subs r2, #32
5: /* Handle any remaining bytes */
adds r2, #32
beq 6f
movs r12, r2, lsl #28
ldmcsia r1!, {r3, r4, r5, r6} /* 16 bytes */
ldmmiia r1!, {r7, r8} /* 8 bytes */
stmcsia r0!, {r3, r4, r5, r6}
stmmiia r0!, {r7, r8}
movs r12, r2, lsl #30
ldrcs r3, [r1], #4 /* 4 bytes */
ldrmih r4, [r1], #2 /* 2 bytes */
strcs r3, [r0], #4
strmih r4, [r0], #2
tst r2, #0x1
ldrneb r3, [r1] /* last byte */
strneb r3, [r0]
6:
ldmfd sp!, {r4, r5, r6, r7, r8}
ldmfd sp!, {r0, pc}
END(memcpy)
#define MEMCPY_BASE __memcpy_base
#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__memcpy_chk_fail)
.cfi_startproc
// Preserve lr for backtrace.
push {lr}
.save {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__memcpy_chk_fail)
.data
error_string:
.string "memcpy buffer overflow"

View File

@@ -1,233 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This code assumes it is running on a processor that supports all arm v7
* instructions, that supports neon instructions, and that has a 32 byte
* cache line.
*/
ENTRY(MEMCPY_BASE)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
// Check so divider is at least 16 bytes, needed for alignment code.
cmp r2, #16
blo 5f
/* check if buffers are aligned. If so, run arm-only version */
eor r3, r0, r1
ands r3, r3, #0x3
beq __memcpy_base_aligned
/* Check the upper size limit for Neon unaligned memory access in memcpy */
cmp r2, #224
blo 3f
/* align destination to 16 bytes for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 3f
/* copy up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
itt mi
ldrbmi lr, [r1], #1
strbmi lr, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1], #1
strbcs ip, [r0], #1
strbcs lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// copies 4 bytes, destination 32-bits aligned
vld1.32 {d0[0]}, [r1]!
vst1.32 {d0[0]}, [r0, :32]!
1: bcc 2f
// copies 8 bytes, destination 64-bits aligned
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2:
/* preload immediately the next cache line, which we may need */
pld [r1, #0]
pld [r1, #(32 * 2)]
3:
/* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
/* preload all the cache lines we need */
pld [r1, #(32 * 4)]
pld [r1, #(32 * 6)]
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(32 * 6)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0]!
vst1.8 {d4 - d7}, [r0]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
add r2, r2, #64
subs r2, r2, #32
blo 4f
3: /* 32 bytes at a time. These cache lines were already preloaded */
vld1.8 {d0 - d3}, [r1]!
subs r2, r2, #32
vst1.8 {d0 - d3}, [r0]!
bhs 3b
4: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 5f
// copies 16 bytes, 128-bits aligned
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0]!
5: /* copy up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld1.32 {d0[0]}, [r1]!
vst1.32 {d0[0]}, [r0]!
2: movs ip, r2, lsl #31
itt mi
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1], #1
strbcs ip, [r0], #1
strbcs lr, [r0], #1
ldmfd sp!, {r0, lr}
bx lr
.cfi_endproc
END(MEMCPY_BASE)
ENTRY(MEMCPY_BASE_ALIGNED)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
/* Simple arm-only copy loop to handle aligned copy operations */
stmfd sp!, {r4-r8}
.save {r4-r8}
.cfi_adjust_cfa_offset 20
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
.cfi_rel_offset r8, 16
pld [r1, #(32 * 4)]
/* Check alignment */
rsb r3, r1, #0
ands r3, #3
beq 2f
/* align source to 32 bits. We need to insert 2 instructions between
* a ldr[b|h] and str[b|h] because byte and half-word instructions
* stall 2 cycles.
*/
movs r12, r3, lsl #31
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
itt mi
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
itttt cs
ldrbcs r4, [r1], #1
ldrbcs r5, [r1], #1
strbcs r4, [r0], #1
strbcs r5, [r0], #1
2:
subs r2, r2, #64
blt 4f
3: /* Main copy loop, copying 64 bytes at a time */
pld [r1, #(32 * 8)]
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
subs r2, r2, #64
bge 3b
4: /* Check if there are > 32 bytes left */
adds r2, r2, #64
subs r2, r2, #32
blt 5f
/* Copy 32 bytes */
ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
subs r2, #32
5: /* Handle any remaining bytes */
adds r2, #32
beq 6f
movs r12, r2, lsl #28
itt cs
ldmiacs r1!, {r3, r4, r5, r6} /* 16 bytes */
stmiacs r0!, {r3, r4, r5, r6}
itt mi
ldmiami r1!, {r7, r8} /* 8 bytes */
stmiami r0!, {r7, r8}
movs r12, r2, lsl #30
itt cs
ldrcs r3, [r1], #4 /* 4 bytes */
strcs r3, [r0], #4
itt mi
ldrhmi r4, [r1], #2 /* 2 bytes */
strhmi r4, [r0], #2
tst r2, #0x1
itt ne
ldrbne r3, [r1] /* last byte */
strbne r3, [r0]
6:
ldmfd sp!, {r4-r8}
ldmfd sp!, {r0, pc}
.cfi_endproc
END(MEMCPY_BASE_ALIGNED)

View File

@@ -28,7 +28,6 @@
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* This code assumes it is running on a processor that supports all arm v7
@@ -37,52 +36,19 @@
.fpu neon
ENTRY(__memset_chk)
.cfi_startproc
cmp r2, r3
bls .L_done
// Preserve lr for backtrace.
push {lr}
.save {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
.cfi_endproc
END(__memset_chk)
ENTRY(bzero)
.cfi_startproc
mov r2, r1
mov r1, #0
.L_done:
// Fall through to memset...
.cfi_endproc
END(bzero)
/* memset() returns its first argument. */
ENTRY(memset)
.cfi_startproc
# The neon memset only wins for less than 132.
cmp r2, #132
bhi __memset_large_copy
bhi 11f
stmfd sp!, {r0}
.save {r0}
.cfi_def_cfa_offset 4
.cfi_rel_offset r0, 0
stmfd sp!, {r0}
vdup.8 q0, r1
@@ -115,26 +81,13 @@ ENTRY(memset)
strcsb r1, [r0], #1
ldmfd sp!, {r0}
bx lr
.cfi_endproc
END(memset)
ENTRY(__memset_large_copy)
.cfi_startproc
11:
/* compute the offset to align the destination
* offset = (4-(src&3))&3 = -src & 3
*/
stmfd sp!, {r0, r4-r7, lr}
.save {r0, r4-r7, lr}
.cfi_def_cfa_offset 24
.cfi_rel_offset r0, 0
.cfi_rel_offset r4, 4
.cfi_rel_offset r5, 8
.cfi_rel_offset r6, 12
.cfi_rel_offset r7, 16
.cfi_rel_offset lr, 20
.save {r0, r4-r7, lr}
stmfd sp!, {r0, r4-r7, lr}
rsb r3, r0, #0
ands r3, r3, #3
cmp r3, r2
@@ -196,9 +149,4 @@ ENTRY(__memset_large_copy)
strcsb r1, [r0]
ldmfd sp!, {r0, r4-r7, lr}
bx lr
.cfi_endproc
END(__memset_large_copy)
.data
error_string:
.string "memset buffer overflow"
END(memset)

View File

@@ -1,548 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
.macro m_push
push {r0, r4, r5, lr}
.endm // m_push
.macro m_ret inst
\inst {r0, r4, r5, pc}
.endm // m_ret
.macro m_scan_byte
ldrb r3, [r0]
cbz r3, strcat_r0_scan_done
add r0, #1
.endm // m_scan_byte
.macro m_copy_byte reg, cmd, label
ldrb \reg, [r1], #1
strb \reg, [r0], #1
\cmd \reg, \label
.endm // m_copy_byte
ENTRY(strcat)
// Quick check to see if src is empty.
ldrb r2, [r1]
pld [r1, #0]
cbnz r2, strcat_continue
bx lr
strcat_continue:
// To speed up really small dst strings, unroll checking the first 4 bytes.
m_push
m_scan_byte
m_scan_byte
m_scan_byte
m_scan_byte
ands r3, r0, #7
bne strcat_align_src
.p2align 2
strcat_mainloop:
ldmia r0!, {r2, r3}
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcat_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
strcat_zero_in_first_register:
sub r0, r0, #4
strcat_zero_in_second_register:
// Check for zero in byte 0.
tst ip, #0x80
it ne
subne r0, r0, #4
bne strcat_r0_scan_done
// Check for zero in byte 1.
tst ip, #0x8000
it ne
subne r0, r0, #3
bne strcat_r0_scan_done
// Check for zero in byte 2.
tst ip, #0x800000
it ne
subne r0, r0, #2
it eq
// Zero is in byte 3.
subeq r0, r0, #1
strcat_r0_scan_done:
// Unroll the first 8 bytes that will be copied.
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
strcpy_finish:
m_ret inst=pop
strcpy_continue:
pld [r1, #0]
ands r3, r0, #7
bne strcpy_align_dst
strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
bne strcpy_unaligned_copy
.p2align 2
strcpy_mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_mainloop
strcpy_zero_in_first_register:
lsls lr, ip, #17
itt ne
strbne r2, [r0]
m_ret inst=popne
itt cs
strhcs r2, [r0]
m_ret inst=popcs
lsls ip, ip, #1
itt eq
streq r2, [r0]
m_ret inst=popeq
strh r2, [r0], #2
lsr r3, r2, #16
strb r3, [r0]
m_ret inst=pop
strcpy_zero_in_second_register:
lsls lr, ip, #17
ittt ne
stmiane r0!, {r2}
strbne r3, [r0]
m_ret inst=popne
ittt cs
strcs r2, [r0], #4
strhcs r3, [r0]
m_ret inst=popcs
lsls ip, ip, #1
itt eq
stmiaeq r0, {r2, r3}
m_ret inst=popeq
stmia r0!, {r2}
strh r3, [r0], #2
lsr r4, r3, #16
strb r4, [r0]
m_ret inst=pop
strcpy_align_dst:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_32:
bcc strcpy_align_to_64
ldrb r4, [r1], #1
strb r4, [r0], #1
cmp r4, #0
it eq
m_ret inst=popeq
ldrb r5, [r1], #1
strb r5, [r0], #1
cmp r5, #0
it eq
m_ret inst=popeq
strcpy_align_to_64:
tst r3, #4
beq strcpy_check_src_align
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
stmia r0!, {r2}
b strcpy_check_src_align
strcpy_complete:
m_ret inst=pop
strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
strcpy_unaligned_branchtable:
.byte 0
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r3, [r1]
cbz r3, strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
cbz r5, strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
stmia r0!, {r2, r3}
beq strcpy_unalign_return
b strcpy_unalign7
strcpy_unalign7_copy5bytes:
stmia r0!, {r2}
strb r3, [r0]
strcpy_unalign_return:
m_ret inst=pop
strcpy_unalign7_copy6bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
m_ret inst=pop
strcpy_unalign7_copy7bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
strb r5, [r0], #1
m_ret inst=pop
.p2align 2
// Can read 6 bytes before possibly crossing a page.
strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
cbz r5, strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
beq strcpy_unalign6_copy7bytes
lsrs ip, r3, #24
stmia r0!, {r2, r3}
beq strcpy_unalign_return
b strcpy_unalign6
strcpy_unalign6_copy7bytes:
stmia r0!, {r2}
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_ret inst=pop
.p2align 2
// Can read 5 bytes before possibly crossing a page.
strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign5
strcpy_unalign_copy5bytes:
stmia r0!, {r2}
strb r4, [r0]
m_ret inst=pop
strcpy_unalign_copy6bytes:
stmia r0!, {r2}
strb r4, [r0], #1
strb r5, [r0]
m_ret inst=pop
.p2align 2
// Can read 4 bytes before possibly crossing a page.
strcpy_unalign4:
ldmia r1!, {r2}
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldmia r1!, {r3}
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
strcpy_unalign3:
ldrb r2, [r1]
cbz r2, strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
cbz r4, strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
lsrs lr, r2, #24
beq strcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign3
strcpy_unalign3_copy1byte:
strb r2, [r0]
m_ret inst=pop
strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
m_ret inst=pop
.p2align 2
// Can read 2 bytes before possibly crossing a page.
strcpy_unalign2:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
beq strcpy_unalign_copy3bytes
lsrs ip, r2, #24
beq strcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
strcpy_unalign1:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign1
strcpy_unalign_copy1byte:
strb r2, [r0]
m_ret inst=pop
strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
strcpy_unalign_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_ret inst=pop
strcpy_unalign_copy4bytes:
stmia r0, {r2}
m_ret inst=pop
strcat_align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcat_align_to_32
ldrb r2, [r0], #1
cbz r2, strcat_r0_update
strcat_align_to_32:
bcc strcat_align_to_64
ldrb r2, [r0], #1
cbz r2, strcat_r0_update
ldrb r2, [r0], #1
cbz r2, strcat_r0_update
strcat_align_to_64:
tst r3, #4
beq strcat_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcat_zero_in_second_register
b strcat_mainloop
strcat_r0_update:
sub r0, r0, #1
b strcat_r0_scan_done
END(strcat)

View File

@@ -123,13 +123,8 @@ ENTRY(strcmp)
.macro init
/* Macro to save temporary registers and prepare magic values. */
subs sp, sp, #16
.cfi_def_cfa_offset 16
strd r4, r5, [sp, #8]
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
strd r6, r7, [sp]
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
mvn r6, #0 /* all F */
mov r7, #0 /* all 0 */
.endm /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
#endif /* not __ARMEB__ */
.endm /* setup_return */
.cfi_startproc
pld [r0, #0]
pld [r1, #0]
/* Are both strings double-word aligned? */
orr ip, r0, r1
tst ip, #7
bne .L_do_align
bne do_align
/* Fast path. */
.save {r4-r7}
init
.L_doubleword_aligned:
doubleword_aligned:
/* Get here when the strings to compare are double-word aligned. */
/* Compare two words in every iteration. */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
ldrd r2, r3, [r0], #8
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
magic_compare_and_branch w1=r2, w2=r4, label=return_24
magic_compare_and_branch w1=r3, w2=r5, label=return_35
b 2b
.L_do_align:
do_align:
/* Is the first string word-aligned? */
ands ip, r0, #3
beq .L_word_aligned_r0
beq word_aligned_r0
/* Fast compare byte by byte until the first string is word-aligned. */
/* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
bic r0, r0, #3
ldr r2, [r0], #4
lsls ip, ip, #31
beq .L_byte2
bcs .L_byte3
beq byte2
bcs byte3
.L_byte1:
byte1:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE1_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte2:
byte2:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE2_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte3:
byte3:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE3_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbnz reg=r3, label=.L_word_aligned_r0
bne fast_return
m_cbnz reg=r3, label=word_aligned_r0
.L_fast_return:
fast_return:
mov r0, ip
bx lr
.L_word_aligned_r0:
word_aligned_r0:
init
/* The first string is word-aligned. */
/* Is the second string word-aligned? */
ands ip, r1, #3
bne .L_strcmp_unaligned
bne strcmp_unaligned
.L_word_aligned:
word_aligned:
/* The strings are word-aligned. */
/* Is the first string double-word aligned? */
tst r0, #4
beq .L_doubleword_aligned_r0
beq doubleword_aligned_r0
/* If r0 is not double-word aligned yet, align it by loading
and comparing the next word from each string. */
ldr r2, [r0], #4
ldr r4, [r1], #4
magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
magic_compare_and_branch w1=r2 w2=r4 label=return_24
.L_doubleword_aligned_r0:
doubleword_aligned_r0:
/* Get here when r0 is double-word aligned. */
/* Is r1 doubleword_aligned? */
tst r1, #4
beq .L_doubleword_aligned
beq doubleword_aligned
/* Get here when the strings to compare are word-aligned,
r0 is double-word aligned, but r1 is not double-word aligned. */
@@ -278,9 +271,9 @@ ENTRY(strcmp)
/* Load the next double-word from each string and compare. */
ldrd r2, r3, [r0], #8
magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
magic_compare_and_branch w1=r2 w2=r5 label=return_25
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
magic_compare_and_branch w1=r3 w2=r4 label=return_34
b 3b
.macro miscmp_word offsetlo offsethi
@@ -304,33 +297,33 @@ ENTRY(strcmp)
and r2, r3, r6, S2LOMEM #\offsetlo
it eq
cmpeq r2, r5
bne .L_return_25
bne return_25
ldr r5, [r1], #4
cmp ip, #0
eor r3, r2, r3
S2HIMEM r2, r5, #\offsethi
it eq
cmpeq r3, r2
bne .L_return_32
bne return_32
b 7b
.endm /* miscmp_word */
.L_return_32:
return_32:
setup_return w1=r3, w2=r2
b .L_do_return
.L_return_34:
b do_return
return_34:
setup_return w1=r3, w2=r4
b .L_do_return
.L_return_25:
b do_return
return_25:
setup_return w1=r2, w2=r5
b .L_do_return
.L_return_35:
b do_return
return_35:
setup_return w1=r3, w2=r5
b .L_do_return
.L_return_24:
b do_return
return_24:
setup_return w1=r2, w2=r4
.L_do_return:
do_return:
#ifdef __ARMEB__
mov r0, ip
@@ -342,16 +335,11 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
/* There is a zero or a different byte between r1 and r2. */
/* r0 contains a mask of all-zero bytes in r1. */
/* Using r0 and not ip here because cbz requires low register. */
m_cbz reg=r0, label=.L_compute_return_value
m_cbz reg=r0, label=compute_return_value
clz r0, r0
/* r0 contains the number of bits on the left of the first all-zero byte in r1. */
rsb r0, r0, #24
@@ -359,7 +347,7 @@ ENTRY(strcmp)
lsr r1, r1, r0
lsr r2, r2, r0
.L_compute_return_value:
compute_return_value:
movs r0, #1
cmp r1, r2
/* The return value is computed as follows.
@@ -379,7 +367,7 @@ ENTRY(strcmp)
* bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
* version of the code.
*/
.L_strcmp_unaligned:
strcmp_unaligned:
wp1 .req r0
wp2 .req r1
b1 .req r2
@@ -532,11 +520,6 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
bx lr
@@ -558,5 +541,4 @@ ENTRY(strcmp)
adds sp, sp, #16
bx lr
.cfi_endproc
END(strcmp)

View File

@@ -1,456 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
.macro m_push
push {r0, r4, r5, lr}
.endm // m_push
.macro m_ret inst
\inst {r0, r4, r5, pc}
.endm // m_ret
.macro m_copy_byte reg, cmd, label
ldrb \reg, [r1], #1
strb \reg, [r0], #1
\cmd \reg, \label
.endm // m_copy_byte
ENTRY(strcpy)
// Unroll the first 8 bytes that will be copied.
m_push
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
strcpy_finish:
m_ret inst=pop
strcpy_continue:
pld [r1, #0]
ands r3, r0, #7
bne strcpy_align_dst
strcpy_check_src_align:
// At this point dst is aligned to a double word, check if src
// is also aligned to a double word.
ands r3, r1, #7
bne strcpy_unaligned_copy
.p2align 2
strcpy_mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_mainloop
strcpy_zero_in_first_register:
lsls lr, ip, #17
itt ne
strbne r2, [r0]
m_ret inst=popne
itt cs
strhcs r2, [r0]
m_ret inst=popcs
lsls ip, ip, #1
itt eq
streq r2, [r0]
m_ret inst=popeq
strh r2, [r0], #2
lsr r3, r2, #16
strb r3, [r0]
m_ret inst=pop
strcpy_zero_in_second_register:
lsls lr, ip, #17
ittt ne
stmiane r0!, {r2}
strbne r3, [r0]
m_ret inst=popne
ittt cs
strcs r2, [r0], #4
strhcs r3, [r0]
m_ret inst=popcs
lsls ip, ip, #1
itt eq
stmiaeq r0, {r2, r3}
m_ret inst=popeq
stmia r0!, {r2}
strh r3, [r0], #2
lsr r4, r3, #16
strb r4, [r0]
m_ret inst=pop
strcpy_align_dst:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq strcpy_align_to_32
ldrb r2, [r1], #1
strb r2, [r0], #1
cbz r2, strcpy_complete
strcpy_align_to_32:
bcc strcpy_align_to_64
ldrb r4, [r1], #1
strb r4, [r0], #1
cmp r4, #0
it eq
m_ret inst=popeq
ldrb r5, [r1], #1
strb r5, [r0], #1
cmp r5, #0
it eq
m_ret inst=popeq
strcpy_align_to_64:
tst r3, #4
beq strcpy_check_src_align
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
stmia r0!, {r2}
b strcpy_check_src_align
strcpy_complete:
m_ret inst=pop
strcpy_unaligned_copy:
// Dst is aligned to a double word, while src is at an unknown alignment.
// There are 7 different versions of the unaligned copy code
// to prevent overreading the src. The mainloop of every single version
// will store 64 bits per loop. The difference is how much of src can
// be read without potentially crossing a page boundary.
tbb [pc, r3]
strcpy_unaligned_branchtable:
.byte 0
.byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
.byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
.p2align 2
// Can read 7 bytes before possibly crossing a page.
strcpy_unalign7:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r3, [r1]
cbz r3, strcpy_unalign7_copy5bytes
ldrb r4, [r1, #1]
cbz r4, strcpy_unalign7_copy6bytes
ldrb r5, [r1, #2]
cbz r5, strcpy_unalign7_copy7bytes
ldr r3, [r1], #4
pld [r1, #64]
lsrs ip, r3, #24
stmia r0!, {r2, r3}
beq strcpy_unalign_return
b strcpy_unalign7
strcpy_unalign7_copy5bytes:
stmia r0!, {r2}
strb r3, [r0]
strcpy_unalign_return:
m_ret inst=pop
strcpy_unalign7_copy6bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
m_ret inst=pop
strcpy_unalign7_copy7bytes:
stmia r0!, {r2}
strb r3, [r0], #1
strb r4, [r0], #1
strb r5, [r0], #1
m_ret inst=pop
.p2align 2
// Can read 6 bytes before possibly crossing a page.
strcpy_unalign6:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldrb r5, [r1, #1]
cbz r5, strcpy_unalign_copy6bytes
ldr r3, [r1], #4
pld [r1, #64]
tst r3, #0xff0000
beq strcpy_unalign6_copy7bytes
lsrs ip, r3, #24
stmia r0!, {r2, r3}
beq strcpy_unalign_return
b strcpy_unalign6
strcpy_unalign6_copy7bytes:
stmia r0!, {r2}
strh r3, [r0], #2
lsr r3, #16
strb r3, [r0]
m_ret inst=pop
.p2align 2
// Can read 5 bytes before possibly crossing a page.
strcpy_unalign5:
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldrb r4, [r1]
cbz r4, strcpy_unalign_copy5bytes
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign5
strcpy_unalign_copy5bytes:
stmia r0!, {r2}
strb r4, [r0]
m_ret inst=pop
strcpy_unalign_copy6bytes:
stmia r0!, {r2}
strb r4, [r0], #1
strb r5, [r0]
m_ret inst=pop
.p2align 2
// Can read 4 bytes before possibly crossing a page.
strcpy_unalign4:
ldmia r1!, {r2}
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
ldmia r1!, {r3}
pld [r1, #64]
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign4
.p2align 2
// Can read 3 bytes before possibly crossing a page.
strcpy_unalign3:
ldrb r2, [r1]
cbz r2, strcpy_unalign3_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign3_copy2bytes
ldrb r4, [r1, #2]
cbz r4, strcpy_unalign3_copy3bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
lsrs lr, r2, #24
beq strcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign3
strcpy_unalign3_copy1byte:
strb r2, [r0]
m_ret inst=pop
strcpy_unalign3_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
strcpy_unalign3_copy3bytes:
strb r2, [r0], #1
strb r3, [r0], #1
strb r4, [r0]
m_ret inst=pop
.p2align 2
// Can read 2 bytes before possibly crossing a page.
strcpy_unalign2:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldrb r3, [r1, #1]
cbz r3, strcpy_unalign_copy2bytes
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
tst r2, #0xff0000
beq strcpy_unalign_copy3bytes
lsrs ip, r2, #24
beq strcpy_unalign_copy4bytes
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign2
.p2align 2
// Can read 1 byte before possibly crossing a page.
strcpy_unalign1:
ldrb r2, [r1]
cbz r2, strcpy_unalign_copy1byte
ldr r2, [r1], #4
ldr r3, [r1], #4
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne strcpy_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne strcpy_zero_in_second_register
stmia r0!, {r2, r3}
b strcpy_unalign1
strcpy_unalign_copy1byte:
strb r2, [r0]
m_ret inst=pop
strcpy_unalign_copy2bytes:
strb r2, [r0], #1
strb r3, [r0]
m_ret inst=pop
strcpy_unalign_copy3bytes:
strh r2, [r0], #2
lsr r2, #16
strb r2, [r0]
m_ret inst=pop
strcpy_unalign_copy4bytes:
stmia r0, {r2}
m_ret inst=pop
END(strcpy)

View File

@@ -1,167 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Copyright (c) 2013 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.syntax unified
.thumb
.thumb_func
ENTRY(strlen)
pld [r0, #0]
mov r1, r0
ands r3, r0, #7
bne align_src
.p2align 2
mainloop:
ldmia r1!, {r2, r3}
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne zero_in_second_register
b mainloop
zero_in_first_register:
sub r0, r1, r0
// Check for zero in byte 0.
lsls r2, ip, #17
beq check_byte1_reg1
sub r0, r0, #8
bx lr
check_byte1_reg1:
bcc check_byte2_reg1
sub r0, r0, #7
bx lr
check_byte2_reg1:
// Check for zero in byte 2.
tst ip, #0x800000
itt ne
subne r0, r0, #6
bxne lr
sub r0, r0, #5
bx lr
zero_in_second_register:
sub r0, r1, r0
// Check for zero in byte 0.
lsls r2, ip, #17
beq check_byte1_reg2
sub r0, r0, #4
bx lr
check_byte1_reg2:
bcc check_byte2_reg2
sub r0, r0, #3
bx lr
check_byte2_reg2:
// Check for zero in byte 2.
tst ip, #0x800000
itt ne
subne r0, r0, #2
bxne lr
sub r0, r0, #1
bx lr
align_src:
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq align_to_32
ldrb r2, [r1], #1
cbz r2, done
align_to_32:
bcc align_to_64
ldrb r2, [r1], #1
cbz r2, done
ldrb r2, [r1], #1
cbz r2, done
align_to_64:
tst r3, #4
beq mainloop
ldr r2, [r1], #4
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne zero_in_second_register
b mainloop
done:
sub r0, r1, r0
sub r0, r0, #1
bx lr
END(strlen)

View File

@@ -1,10 +1,5 @@
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S)
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S)
$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a9/bionic/strcat.S)
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S)
$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a9/bionic/strcpy.S)
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a9/bionic/strlen.S)
$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/cortex-a9/bionic/__strcat_chk.S)
$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/cortex-a9/bionic/__strcpy_chk.S)
include bionic/libc/arch-arm/generic/generic.mk

View File

@@ -28,7 +28,6 @@
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* Optimized memcpy() for ARM.
@@ -37,13 +36,6 @@
* so we have to preserve R0.
*/
ENTRY(__memcpy_chk)
cmp r2, r3
bgt fortify_check_failed
// Fall through to memcpy...
END(__memcpy_chk)
ENTRY(memcpy)
/* The stack must always be 64-bits aligned to be compliant with the
* ARM ABI. Since we have to save R0, we might as well save R4
@@ -385,20 +377,4 @@ copy_last_3_and_return:
add sp, sp, #28
ldmfd sp!, {r0, r4, lr}
bx lr
// Only reached when the __memcpy_chk check fails.
fortify_check_failed:
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
END(memcpy)
.data
error_string:
.string "memcpy buffer overflow"

View File

@@ -27,7 +27,6 @@
*/
#include <machine/asm.h>
#include "libc_events.h"
/*
* Optimized memset() for ARM.
@@ -35,28 +34,9 @@
* memset() returns its first argument.
*/
ENTRY(__memset_chk)
cmp r2, r3
bls done
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
END(__memset_chk)
ENTRY(bzero)
mov r2, r1
mov r1, #0
done:
// Fall through to memset...
END(bzero)
ENTRY(memset)
@@ -127,7 +107,3 @@ ENTRY(memset)
ldmfd sp!, {r0, r4-r7, lr}
bx lr
END(memset)
.data
error_string:
.string "memset buffer overflow"

View File

@@ -1,8 +1,3 @@
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S)
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S)
$(call libc-add-cpu-variant-src,STRCAT,string/strcat.c)
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S)
$(call libc-add-cpu-variant-src,STRCPY,arch-arm/generic/bionic/strcpy.S)
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c)
$(call libc-add-cpu-variant-src,__STRCAT_CHK,bionic/__strcat_chk.cpp)
$(call libc-add-cpu-variant-src,__STRCPY_CHK,bionic/__strcpy_chk.cpp)

View File

@@ -1,225 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.thumb
.thumb_func
// Get the length of src string, then get the source of the dst string.
// Check that the two lengths together don't exceed the threshold, then
// do a memcpy of the data.
ENTRY(__strcat_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
push {r4, r5}
.save {r4, r5}
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
mov lr, r2
// Save the dst register to r5
mov r5, r0
// Zero out r4
eor r4, r4, r4
// r1 contains the address of the string to count.
.L_strlen_start:
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r1], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r1], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r1], #8
pld [r1, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r1, r0
sub r3, r3, #1
b .L_finish
.L_zero_in_first_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_finish
.L_sub8_and_finish:
sub r3, r3, #8
b .L_finish
.L_sub7_and_finish:
sub r3, r3, #7
b .L_finish
.L_sub6_and_finish:
sub r3, r3, #6
b .L_finish
.L_zero_in_second_register:
sub r3, r1, r0
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_finish
.L_sub4_and_finish:
sub r3, r3, #4
b .L_finish
.L_sub3_and_finish:
sub r3, r3, #3
b .L_finish
.L_sub2_and_finish:
sub r3, r3, #2
.L_finish:
cmp r4, #0
bne .L_strlen_done
// Time to get the dst string length.
mov r1, r5
// Save the original source address to r5.
mov r5, r0
// Save the current length (adding 1 for the terminator).
add r4, r3, #1
b .L_strlen_start
// r0 holds the pointer to the dst string.
// r3 holds the dst string length.
// r4 holds the src string length + 1.
.L_strlen_done:
add r2, r3, r4
cmp r2, lr
bhi __strcat_chk_failed
// Set up the registers for the memcpy code.
mov r1, r5
pld [r1, #64]
mov r2, r4
add r0, r0, r3
pop {r4, r5}
.cfi_endproc
END(__strcat_chk)
#define MEMCPY_BASE __strcat_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcat_chk_failed)
.cfi_startproc
.save {r0, lr}
.save {r4, r5}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.cfi_adjust_cfa_offset 8
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcat_chk_failed)
.data
error_string:
.string "strcat buffer overflow"

View File

@@ -1,187 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include "libc_events.h"
.syntax unified
.thumb
.thumb_func
// Get the length of the source string first, then do a memcpy of the data
// instead of a strcpy.
ENTRY(__strcpy_chk)
.cfi_startproc
pld [r0, #0]
push {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
mov lr, r2
mov r0, r1
ands r3, r1, #7
beq .L_mainloop
// Align to a double word (64 bits).
rsb r3, r3, #8
lsls ip, r3, #31
beq .L_align_to_32
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_32:
bcc .L_align_to_64
ands ip, r3, #2
beq .L_align_to_64
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
ldrb r2, [r0], #1
cbz r2, .L_update_count_and_finish
.L_align_to_64:
tst r3, #4
beq .L_mainloop
ldr r3, [r0], #4
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
.p2align 2
.L_mainloop:
ldrd r2, r3, [r0], #8
pld [r0, #64]
sub ip, r2, #0x01010101
bic ip, ip, r2
ands ip, ip, #0x80808080
bne .L_zero_in_first_register
sub ip, r3, #0x01010101
bic ip, ip, r3
ands ip, ip, #0x80808080
bne .L_zero_in_second_register
b .L_mainloop
.L_update_count_and_finish:
sub r3, r0, r1
sub r3, r3, #1
b .L_check_size
.L_zero_in_first_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub8_and_finish
bcs .L_sub7_and_finish
lsls ip, ip, #1
bne .L_sub6_and_finish
sub r3, r3, #5
b .L_check_size
.L_sub8_and_finish:
sub r3, r3, #8
b .L_check_size
.L_sub7_and_finish:
sub r3, r3, #7
b .L_check_size
.L_sub6_and_finish:
sub r3, r3, #6
b .L_check_size
.L_zero_in_second_register:
sub r3, r0, r1
lsls r2, ip, #17
bne .L_sub4_and_finish
bcs .L_sub3_and_finish
lsls ip, ip, #1
bne .L_sub2_and_finish
sub r3, r3, #1
b .L_check_size
.L_sub4_and_finish:
sub r3, r3, #4
b .L_check_size
.L_sub3_and_finish:
sub r3, r3, #3
b .L_check_size
.L_sub2_and_finish:
sub r3, r3, #2
.L_check_size:
pld [r1, #0]
pld [r1, #64]
ldr r0, [sp]
cmp r3, lr
bhs __strcpy_chk_failed
// Add 1 for copy length to get the string terminator.
add r2, r3, #1
.cfi_endproc
END(__strcpy_chk)
#define MEMCPY_BASE __strcpy_chk_memcpy_base
#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__strcpy_chk_failed)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__strcpy_chk_failed)
.data
error_string:
.string "strcpy buffer overflow"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,8 @@
/* Assumes neon instructions and a cache line size of 32 bytes. */
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* This code assumes it is running on a processor that supports all arm v7
@@ -38,55 +38,109 @@
*/
.text
.syntax unified
.fpu neon
.thumb
.thumb_func
ENTRY(__memcpy_chk)
.cfi_startproc
cmp r2, r3
bhi __memcpy_chk_fail
// Fall through to memcpy...
.cfi_endproc
END(__memcpy_chk)
#define CACHE_LINE_SIZE 32
ENTRY(memcpy)
.cfi_startproc
pld [r1, #64]
stmfd sp!, {r0, lr}
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
.cfi_endproc
.save {r0, lr}
/* start preloading as early as possible */
pld [r1, #(CACHE_LINE_SIZE*0)]
stmfd sp!, {r0, lr}
pld [r1, #(CACHE_LINE_SIZE*2)]
/* do we have at least 16-bytes to copy (needed for alignment below) */
cmp r2, #16
blo 5f
/* align destination to cache-line for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 0f
/* copy up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
ldrmib lr, [r1], #1
strmib lr, [r0], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// copies 4 bytes, destination 32-bits aligned
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
1: bcc 2f
// copies 8 bytes, destination 64-bits aligned
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2:
0: /* preload immediately the next cache line, which we may need */
pld [r1, #(CACHE_LINE_SIZE*0)]
pld [r1, #(CACHE_LINE_SIZE*2)]
/* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
/* Preload all the cache lines we need.
* NOTE: The number of pld below depends on CACHE_LINE_SIZE,
* ideally we would increase the distance in the main loop to
* avoid the goofy code below. In practice this doesn't seem to make
* a big difference.
* NOTE: The value CACHE_LINE_SIZE * 8 was chosen through
* experimentation.
*/
pld [r1, #(CACHE_LINE_SIZE*4)]
pld [r1, #(CACHE_LINE_SIZE*6)]
pld [r1, #(CACHE_LINE_SIZE*8)]
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(CACHE_LINE_SIZE*8)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0, :128]!
vst1.8 {d4 - d7}, [r0, :128]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
add r2, r2, #64
subs r2, r2, #32
blo 4f
3: /* 32 bytes at a time. These cache lines were already preloaded */
vld1.8 {d0 - d3}, [r1]!
subs r2, r2, #32
vst1.8 {d0 - d3}, [r0, :128]!
bhs 3b
4: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 5f
// copies 16 bytes, 128-bits aligned
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0, :128]!
5: /* copy up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
2: movs ip, r2, lsl #31
ldrmib r3, [r1], #1
ldrcsb ip, [r1], #1
ldrcsb lr, [r1], #1
strmib r3, [r0], #1
strcsb ip, [r0], #1
strcsb lr, [r0], #1
ldmfd sp!, {r0, lr}
bx lr
END(memcpy)
#define MEMCPY_BASE __memcpy_base
#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
#include "memcpy_base.S"
ENTRY(__memcpy_chk_fail)
.cfi_startproc
// Preserve lr for backtrace.
push {lr}
.save {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+4)
.cfi_endproc
END(__memcpy_chk_fail)
.data
error_string:
.string "memcpy buffer overflow"

View File

@@ -1,127 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This code assumes it is running on a processor that supports all arm v7
* instructions, that supports neon instructions, and that has a 32 byte
* cache line.
*/
// Assumes neon instructions and a cache line size of 32 bytes.
ENTRY(MEMCPY_BASE)
.cfi_startproc
.save {r0, lr}
.cfi_def_cfa_offset 8
.cfi_rel_offset r0, 0
.cfi_rel_offset lr, 4
/* do we have at least 16-bytes to copy (needed for alignment below) */
cmp r2, #16
blo 5f
/* align destination to cache-line for the write-buffer */
rsb r3, r0, #0
ands r3, r3, #0xF
beq 2f
/* copy up to 15-bytes (count in r3) */
sub r2, r2, r3
movs ip, r3, lsl #31
itt mi
ldrbmi lr, [r1], #1
strbmi lr, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1], #1
strbcs ip, [r0], #1
strbcs lr, [r0], #1
movs ip, r3, lsl #29
bge 1f
// copies 4 bytes, destination 32-bits aligned
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
1: bcc 2f
// copies 8 bytes, destination 64-bits aligned
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0, :64]!
2: /* make sure we have at least 64 bytes to copy */
subs r2, r2, #64
blo 2f
1: /* The main loop copies 64 bytes at a time */
vld1.8 {d0 - d3}, [r1]!
vld1.8 {d4 - d7}, [r1]!
pld [r1, #(32*8)]
subs r2, r2, #64
vst1.8 {d0 - d3}, [r0, :128]!
vst1.8 {d4 - d7}, [r0, :128]!
bhs 1b
2: /* fix-up the remaining count and make sure we have >= 32 bytes left */
adds r2, r2, #32
blo 4f
/* Copy 32 bytes. These cache lines were already preloaded */
vld1.8 {d0 - d3}, [r1]!
sub r2, r2, #32
vst1.8 {d0 - d3}, [r0, :128]!
4: /* less than 32 left */
add r2, r2, #32
tst r2, #0x10
beq 5f
// copies 16 bytes, 128-bits aligned
vld1.8 {d0, d1}, [r1]!
vst1.8 {d0, d1}, [r0, :128]!
5: /* copy up to 15-bytes (count in r2) */
movs ip, r2, lsl #29
bcc 1f
vld1.8 {d0}, [r1]!
vst1.8 {d0}, [r0]!
1: bge 2f
vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]!
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]!
2: movs ip, r2, lsl #31
itt mi
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
itttt cs
ldrbcs ip, [r1], #1
ldrbcs lr, [r1], #1
strbcs ip, [r0], #1
strbcs lr, [r0], #1
ldmfd sp!, {r0, lr}
bx lr
.cfi_endproc
END(MEMCPY_BASE)

View File

@@ -28,7 +28,6 @@
#include <machine/cpu-features.h>
#include <machine/asm.h>
#include "libc_events.h"
/*
* This code assumes it is running on a processor that supports all arm v7
@@ -38,47 +37,15 @@
.fpu neon
ENTRY(__memset_chk)
.cfi_startproc
cmp r2, r3
bls .L_done
// Preserve lr for backtrace.
.save {lr}
push {lr}
.cfi_def_cfa_offset 4
.cfi_rel_offset lr, 0
ldr r0, error_message
ldr r1, error_code
1:
add r0, pc
bl __fortify_chk_fail
error_code:
.word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
error_message:
.word error_string-(1b+8)
.cfi_endproc
END(__memset_chk)
ENTRY(bzero)
.cfi_startproc
mov r2, r1
mov r1, #0
.L_done:
// Fall through to memset...
.cfi_endproc
END(bzero)
/* memset() returns its first argument. */
ENTRY(memset)
.cfi_startproc
.save {r0}
stmfd sp!, {r0}
.cfi_def_cfa_offset 4
.cfi_rel_offset r0, 0
vdup.8 q0, r1
@@ -111,9 +78,4 @@ ENTRY(memset)
strcsb r1, [r0], #1
ldmfd sp!, {r0}
bx lr
.cfi_endproc
END(memset)
.data
error_string:
.string "memset buffer overflow"

View File

@@ -123,13 +123,8 @@ ENTRY(strcmp)
.macro init
/* Macro to save temporary registers and prepare magic values. */
subs sp, sp, #16
.cfi_def_cfa_offset 16
strd r4, r5, [sp, #8]
.cfi_rel_offset r4, 0
.cfi_rel_offset r5, 4
strd r6, r7, [sp]
.cfi_rel_offset r6, 8
.cfi_rel_offset r7, 12
mvn r6, #0 /* all F */
mov r7, #0 /* all 0 */
.endm /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
#endif /* not __ARMEB__ */
.endm /* setup_return */
.cfi_startproc
pld [r0, #0]
pld [r1, #0]
/* Are both strings double-word aligned? */
orr ip, r0, r1
tst ip, #7
bne .L_do_align
bne do_align
/* Fast path. */
.save {r4-r7}
init
.L_doubleword_aligned:
doubleword_aligned:
/* Get here when the strings to compare are double-word aligned. */
/* Compare two words in every iteration. */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
ldrd r2, r3, [r0], #8
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
magic_compare_and_branch w1=r2, w2=r4, label=return_24
magic_compare_and_branch w1=r3, w2=r5, label=return_35
b 2b
.L_do_align:
do_align:
/* Is the first string word-aligned? */
ands ip, r0, #3
beq .L_word_aligned_r0
beq word_aligned_r0
/* Fast compare byte by byte until the first string is word-aligned. */
/* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
bic r0, r0, #3
ldr r2, [r0], #4
lsls ip, ip, #31
beq .L_byte2
bcs .L_byte3
beq byte2
bcs byte3
.L_byte1:
byte1:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE1_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte2:
byte2:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE2_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbz reg=r3, label=.L_fast_return
bne fast_return
m_cbz reg=r3, label=fast_return
.L_byte3:
byte3:
ldrb ip, [r1], #1
uxtb r3, r2, ror #BYTE3_OFFSET
subs ip, r3, ip
bne .L_fast_return
m_cbnz reg=r3, label=.L_word_aligned_r0
bne fast_return
m_cbnz reg=r3, label=word_aligned_r0
.L_fast_return:
fast_return:
mov r0, ip
bx lr
.L_word_aligned_r0:
word_aligned_r0:
init
/* The first string is word-aligned. */
/* Is the second string word-aligned? */
ands ip, r1, #3
bne .L_strcmp_unaligned
bne strcmp_unaligned
.L_word_aligned:
word_aligned:
/* The strings are word-aligned. */
/* Is the first string double-word aligned? */
tst r0, #4
beq .L_doubleword_aligned_r0
beq doubleword_aligned_r0
/* If r0 is not double-word aligned yet, align it by loading
and comparing the next word from each string. */
ldr r2, [r0], #4
ldr r4, [r1], #4
magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
magic_compare_and_branch w1=r2 w2=r4 label=return_24
.L_doubleword_aligned_r0:
doubleword_aligned_r0:
/* Get here when r0 is double-word aligned. */
/* Is r1 doubleword_aligned? */
tst r1, #4
beq .L_doubleword_aligned
beq doubleword_aligned
/* Get here when the strings to compare are word-aligned,
r0 is double-word aligned, but r1 is not double-word aligned. */
@@ -278,9 +271,9 @@ ENTRY(strcmp)
/* Load the next double-word from each string and compare. */
ldrd r2, r3, [r0], #8
magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
magic_compare_and_branch w1=r2 w2=r5 label=return_25
ldrd r4, r5, [r1], #8
magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
magic_compare_and_branch w1=r3 w2=r4 label=return_34
b 3b
.macro miscmp_word offsetlo offsethi
@@ -304,46 +297,46 @@ ENTRY(strcmp)
and r2, r3, r6, S2LOMEM #\offsetlo
it eq
cmpeq r2, r5
bne .L_return_25
bne return_25
ldr r5, [r1], #4
cmp ip, #0
eor r3, r2, r3
S2HIMEM r2, r5, #\offsethi
it eq
cmpeq r3, r2
bne .L_return_32
bne return_32
b 7b
.endm /* miscmp_word */
.L_strcmp_unaligned:
strcmp_unaligned:
/* r0 is word-aligned, r1 is at offset ip from a word. */
/* Align r1 to the (previous) word-boundary. */
bic r1, r1, #3
/* Unaligned comparison word by word using LDRs. */
cmp ip, #2
beq .L_miscmp_word_16 /* If ip == 2. */
bge .L_miscmp_word_24 /* If ip == 3. */
beq miscmp_word_16 /* If ip == 2. */
bge miscmp_word_24 /* If ip == 3. */
miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */
.L_miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
miscmp_word_24: miscmp_word offsetlo=24 offsethi=8
.L_return_32:
return_32:
setup_return w1=r3, w2=r2
b .L_do_return
.L_return_34:
b do_return
return_34:
setup_return w1=r3, w2=r4
b .L_do_return
.L_return_25:
b do_return
return_25:
setup_return w1=r2, w2=r5
b .L_do_return
.L_return_35:
b do_return
return_35:
setup_return w1=r3, w2=r5
b .L_do_return
.L_return_24:
b do_return
return_24:
setup_return w1=r2, w2=r4
.L_do_return:
do_return:
#ifdef __ARMEB__
mov r0, ip
@@ -355,16 +348,11 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
/* There is a zero or a different byte between r1 and r2. */
/* r0 contains a mask of all-zero bytes in r1. */
/* Using r0 and not ip here because cbz requires low register. */
m_cbz reg=r0, label=.L_compute_return_value
m_cbz reg=r0, label=compute_return_value
clz r0, r0
/* r0 contains the number of bits on the left of the first all-zero byte in r1. */
rsb r0, r0, #24
@@ -372,7 +360,7 @@ ENTRY(strcmp)
lsr r1, r1, r0
lsr r2, r2, r0
.L_compute_return_value:
compute_return_value:
movs r0, #1
cmp r1, r2
/* The return value is computed as follows.
@@ -392,7 +380,7 @@ ENTRY(strcmp)
* previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S
* for the unedited version of this code.
*/
.L_miscmp_word_16:
miscmp_word_16:
wp1 .req r0
wp2 .req r1
b1 .req r2
@@ -465,11 +453,6 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
bx lr
@@ -489,12 +472,6 @@ ENTRY(strcmp)
ldrd r6, r7, [sp]
ldrd r4, r5, [sp, #8]
adds sp, sp, #16
.cfi_def_cfa_offset 0
.cfi_restore r4
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
bx lr
.cfi_endproc
END(strcmp)

View File

@@ -1,11 +1,5 @@
$(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S)
$(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S)
$(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S)
$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/krait/bionic/__strcat_chk.S)
$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/krait/bionic/__strcpy_chk.S)
# Use cortex-a15 versions of strcat/strcpy/strlen.
$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a15/bionic/strcat.S)
$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a15/bionic/strcpy.S)
$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
include bionic/libc/arch-arm/generic/generic.mk

View File

@@ -126,8 +126,6 @@ syscall_src += arch-arm/syscalls/removexattr.S
syscall_src += arch-arm/syscalls/lremovexattr.S
syscall_src += arch-arm/syscalls/__statfs64.S
syscall_src += arch-arm/syscalls/unshare.S
syscall_src += arch-arm/syscalls/swapon.S
syscall_src += arch-arm/syscalls/swapoff.S
syscall_src += arch-arm/syscalls/pause.S
syscall_src += arch-arm/syscalls/gettimeofday.S
syscall_src += arch-arm/syscalls/settimeofday.S
@@ -146,9 +144,6 @@ syscall_src += arch-arm/syscalls/__timer_getoverrun.S
syscall_src += arch-arm/syscalls/__timer_delete.S
syscall_src += arch-arm/syscalls/utimes.S
syscall_src += arch-arm/syscalls/utimensat.S
syscall_src += arch-arm/syscalls/timerfd_create.S
syscall_src += arch-arm/syscalls/timerfd_settime.S
syscall_src += arch-arm/syscalls/timerfd_gettime.S
syscall_src += arch-arm/syscalls/sigaction.S
syscall_src += arch-arm/syscalls/sigprocmask.S
syscall_src += arch-arm/syscalls/__sigsuspend.S

View File

@@ -1,15 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
#include <linux/err.h>
#include <machine/asm.h>
ENTRY(swapoff)
mov ip, r7
ldr r7, =__NR_swapoff
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(swapoff)

View File

@@ -1,15 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
#include <linux/err.h>
#include <machine/asm.h>
ENTRY(swapon)
mov ip, r7
ldr r7, =__NR_swapon
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(swapon)

View File

@@ -1,15 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
#include <linux/err.h>
#include <machine/asm.h>
ENTRY(timerfd_create)
mov ip, r7
ldr r7, =__NR_timerfd_create
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(timerfd_create)

View File

@@ -1,15 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
#include <linux/err.h>
#include <machine/asm.h>
ENTRY(timerfd_gettime)
mov ip, r7
ldr r7, =__NR_timerfd_gettime
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(timerfd_gettime)

View File

@@ -1,15 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
#include <linux/err.h>
#include <machine/asm.h>
ENTRY(timerfd_settime)
mov ip, r7
ldr r7, =__NR_timerfd_settime
swi #0
mov r7, ip
cmn r0, #(MAX_ERRNO + 1)
bxls lr
neg r0, r0
b __set_errno
END(timerfd_settime)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (C) 2010 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,13 +26,17 @@
* SUCH DAMAGE.
*/
extern void __cxa_finalize(void *);
extern void *__dso_handle;
# The __dso_handle global variable is used by static
# C++ constructors and destructors in the binary.
# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
#
.section .bss
.align 4
__attribute__((visibility("hidden"),destructor))
void __on_dlclose() {
__cxa_finalize(&__dso_handle);
}
#ifndef CRT_LEGACY_WORKAROUND
.hidden __dso_handle
#endif
#include "__dso_handle_so.h"
#include "atexit.h"
.globl __dso_handle
__dso_handle:
.long 0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (C) 2010 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,11 +26,13 @@
* SUCH DAMAGE.
*/
extern void *__dso_handle;
extern int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
__attribute__ ((visibility ("hidden")))
int atexit(void (*func)(void))
{
return (__cxa_atexit((void (*)(void *))func, (void *)0, &__dso_handle));
}
# The __dso_handle global variable is used by static
# C++ constructors and destructors in the binary.
# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
#
.data
.align 4
.hidden __dso_handle
.globl __dso_handle
__dso_handle:
.long __dso_handle

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,16 +25,19 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _BIONIC_NAME_MEM_H
#define _BIONIC_NAME_MEM_H
#include <sys/cdefs.h>
#include <stddef.h>
__BEGIN_DECLS
int __bionic_name_mem(void *addr, size_t len, const char *name);
__END_DECLS
#endif
.text
.globl atexit
.hidden atexit
.type atexit, @function
.align 4
.ent atexit
atexit:
.set noreorder
.cpload $t9
.set reorder
la $t9, __cxa_atexit
move $a1, $0
la $a2, __dso_handle
j $t9
.size atexit, .-atexit
.end atexit

View File

@@ -0,0 +1,146 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
.text
.align 4
.type __start,@function
.globl __start
.globl _start
# this is the small startup code that is first run when
# any executable that is statically-linked with Bionic
# runs.
#
# it's purpose is to call __libc_init with appropriate
# arguments, which are:
#
# - the address of the raw data block setup by the Linux
# kernel ELF loader
#
# - address of an "onexit" function, not used on any
# platform supported by Bionic
#
# - address of the "main" function of the program.
#
# - address of the constructor list
#
.ent __start
__start:
_start:
bal 1f
1:
.set noreorder
.cpload $ra
.set reorder
move $a0, $sp
move $a1, $0
la $a2, main
la $a3, 1f
subu $sp, 32
la $t9, __libc_init
j $t9
.end __start
1: .long __PREINIT_ARRAY__
.long __INIT_ARRAY__
.long __FINI_ARRAY__
.section .preinit_array, "aw"
.type __PREINIT_ARRAY__, @object
.globl __PREINIT_ARRAY__
__PREINIT_ARRAY__:
.long -1
.section .init_array, "aw"
.type __INIT_ARRAY__, @object
.globl __INIT_ARRAY__
__INIT_ARRAY__:
.long -1
.section .fini_array, "aw"
.type __FINI_ARRAY__, @object
.globl __FINI_ARRAY__
__FINI_ARRAY__:
.long -1
.long __do_global_dtors_aux
.abicalls
.text
.align 2
.set nomips16
.ent __do_global_dtors_aux
.type __do_global_dtors_aux, @function
__do_global_dtors_aux:
.frame $sp,32,$31 # vars= 0, regs= 1/0, args= 16, gp= 8
.mask 0x80000000,-4
.fmask 0x00000000,0
.set noreorder
.cpload $25
.set nomacro
addiu $sp,$sp,-32
sw $31,28($sp)
.cprestore 16
lw $2,%got(completed.1269)($28)
lbu $2,%lo(completed.1269)($2)
bne $2,$0,$L8
nop
$L4:
lw $2,%got(__cxa_finalize)($28)
beq $2,$0,$L6
nop
lw $2,%got(__dso_handle)($28)
lw $4,0($2)
lw $25,%call16(__cxa_finalize)($28)
.reloc 1f,R_MIPS_JALR,__cxa_finalize
1: jalr $25
nop
lw $28,16($sp)
$L6:
lw $2,%got(completed.1269)($28)
li $3,1 # 0x1
sb $3,%lo(completed.1269)($2)
$L8:
lw $31,28($sp)
addiu $sp,$sp,32
j $31
nop
.set macro
.set reorder
.end __do_global_dtors_aux
.size __do_global_dtors_aux, .-__do_global_dtors_aux
.local completed.1269
.comm completed.1269,1,1
.weak __cxa_finalize
#include "__dso_handle.S"
#include "atexit.S"

View File

@@ -1,94 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "../../bionic/libc_init_common.h"
#include <stddef.h>
#include <stdint.h>
__attribute__ ((section (".preinit_array")))
void (*__PREINIT_ARRAY__)(void) = (void (*)(void)) -1;
__attribute__ ((section (".init_array")))
void (*__INIT_ARRAY__)(void) = (void (*)(void)) -1;
__attribute__ ((section (".fini_array")))
void (*__FINI_ARRAY__)(void) = (void (*)(void)) -1;
__LIBC_HIDDEN__ void do_mips_start(void *raw_args) {
structors_array_t array;
array.preinit_array = &__PREINIT_ARRAY__;
array.init_array = &__INIT_ARRAY__;
array.fini_array = &__FINI_ARRAY__;
__libc_init(raw_args, NULL, &main, &array);
}
/*
* This function prepares the return address with a branch-and-link
* instruction (bal) and then uses a .cpload to compute the Global
* Offset Table (GOT) pointer ($gp). The $gp is then used to load
* the address of _do_start() into $t9 just before calling it.
* Terminating the stack with a NULL return address.
*/
__asm__ (
" .set push \n"
" \n"
" .text \n"
" .align 4 \n"
" .type __start,@function \n"
" .globl __start \n"
" .globl _start \n"
" \n"
" .ent __start \n"
"__start: \n"
" _start: \n"
" .frame $sp,32,$ra \n"
" .mask 0x80000000,-4 \n"
" \n"
" .set noreorder \n"
" bal 1f \n"
" nop \n"
"1: \n"
" .cpload $ra \n"
" .set reorder \n"
" \n"
" move $a0, $sp \n"
" addiu $sp, $sp, (-32) \n"
" sw $0, 28($sp) \n"
" la $t9, do_mips_start \n"
" jalr $t9 \n"
" \n"
"2: b 2b \n"
" .end __start \n"
" \n"
" .set pop \n"
);
#include "__dso_handle.h"
#include "atexit.h"

View File

@@ -25,31 +25,70 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
.section .init_array, "aw"
.type __INIT_ARRAY__, @object
.globl __INIT_ARRAY__
__INIT_ARRAY__:
.long -1
#include <errno.h>
#include <sys/mman.h>
#include <unistd.h>
.section .fini_array, "aw"
.type __FINI_ARRAY__, @object
.globl __FINI_ARRAY__
__FINI_ARRAY__:
.long -1
.long __do_global_dtors_aux
#include "private/ErrnoRestorer.h"
.abicalls
.text
.align 2
.set nomips16
.ent __do_global_dtors_aux
.type __do_global_dtors_aux, @function
__do_global_dtors_aux:
.frame $sp,32,$31 # vars= 0, regs= 1/0, args= 16, gp= 8
.mask 0x80000000,-4
.fmask 0x00000000,0
.set noreorder
.cpload $25
.set nomacro
addiu $sp,$sp,-32
sw $31,28($sp)
.cprestore 16
lw $2,%got(completed.1269)($28)
lbu $2,%lo(completed.1269)($2)
bne $2,$0,$L8
nop
// mmap2(2) is like mmap(2), but the offset is in 4096-byte blocks, not bytes.
extern "C" void* __mmap2(void*, size_t, int, int, int, size_t);
$L4:
lw $2,%got(__cxa_finalize)($28)
beq $2,$0,$L6
nop
#define MMAP2_SHIFT 12 // 2**12 == 4096
lw $2,%got(__dso_handle)($28)
lw $4,0($2)
lw $25,%call16(__cxa_finalize)($28)
.reloc 1f,R_MIPS_JALR,__cxa_finalize
1: jalr $25
nop
void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) {
if (offset & ((1UL << MMAP2_SHIFT)-1)) {
errno = EINVAL;
return MAP_FAILED;
}
lw $28,16($sp)
$L6:
lw $2,%got(completed.1269)($28)
li $3,1 # 0x1
sb $3,%lo(completed.1269)($2)
$L8:
lw $31,28($sp)
addiu $sp,$sp,32
j $31
nop
size_t unsigned_offset = static_cast<size_t>(offset); // To avoid sign extension.
void* result = __mmap2(addr, size, prot, flags, fd, unsigned_offset >> MMAP2_SHIFT);
.set macro
.set reorder
.end __do_global_dtors_aux
.size __do_global_dtors_aux, .-__do_global_dtors_aux
.local completed.1269
.comm completed.1269,1,1
.weak __cxa_finalize
if (result != MAP_FAILED && (flags & (MAP_PRIVATE | MAP_ANONYMOUS)) != 0) {
ErrnoRestorer errno_restorer;
madvise(result, size, MADV_MERGEABLE);
}
return result;
}
#include "__dso_handle_so.S"
#include "atexit.S"

View File

@@ -129,8 +129,6 @@ syscall_src += arch-mips/syscalls/removexattr.S
syscall_src += arch-mips/syscalls/lremovexattr.S
syscall_src += arch-mips/syscalls/__statfs64.S
syscall_src += arch-mips/syscalls/unshare.S
syscall_src += arch-mips/syscalls/swapon.S
syscall_src += arch-mips/syscalls/swapoff.S
syscall_src += arch-mips/syscalls/pause.S
syscall_src += arch-mips/syscalls/gettimeofday.S
syscall_src += arch-mips/syscalls/settimeofday.S
@@ -149,9 +147,6 @@ syscall_src += arch-mips/syscalls/__timer_getoverrun.S
syscall_src += arch-mips/syscalls/__timer_delete.S
syscall_src += arch-mips/syscalls/utimes.S
syscall_src += arch-mips/syscalls/utimensat.S
syscall_src += arch-mips/syscalls/timerfd_create.S
syscall_src += arch-mips/syscalls/timerfd_settime.S
syscall_src += arch-mips/syscalls/timerfd_gettime.S
syscall_src += arch-mips/syscalls/sigaction.S
syscall_src += arch-mips/syscalls/sigprocmask.S
syscall_src += arch-mips/syscalls/__sigsuspend.S

View File

@@ -1,22 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
.text
.globl swapoff
.align 4
.ent swapoff
swapoff:
.set noreorder
.cpload $t9
li $v0, __NR_swapoff
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end swapoff

View File

@@ -1,22 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
.text
.globl swapon
.align 4
.ent swapon
swapon:
.set noreorder
.cpload $t9
li $v0, __NR_swapon
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end swapon

View File

@@ -1,22 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
.text
.globl timerfd_create
.align 4
.ent timerfd_create
timerfd_create:
.set noreorder
.cpload $t9
li $v0, __NR_timerfd_create
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end timerfd_create

View File

@@ -1,22 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
.text
.globl timerfd_gettime
.align 4
.ent timerfd_gettime
timerfd_gettime:
.set noreorder
.cpload $t9
li $v0, __NR_timerfd_gettime
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end timerfd_gettime

View File

@@ -1,22 +0,0 @@
/* autogenerated by gensyscalls.py */
#include <asm/unistd.h>
.text
.globl timerfd_settime
.align 4
.ent timerfd_settime
timerfd_settime:
.set noreorder
.cpload $t9
li $v0, __NR_timerfd_settime
syscall
bnez $a3, 1f
move $a0, $v0
j $ra
nop
1:
la $t9,__set_errno
j $t9
nop
.set reorder
.end timerfd_settime

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: endian.h,v 1.17 2011/03/12 04:03:04 guenther Exp $ */
/* $OpenBSD: endian.h,v 1.14 2005/12/13 00:35:23 millert Exp $ */
/*-
* Copyright (c) 1997 Niklas Hallqvist. All rights reserved.
@@ -24,28 +24,38 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MACHINE_ENDIAN_H_
#define _MACHINE_ENDIAN_H_
#ifndef _I386_ENDIAN_H_
#define _I386_ENDIAN_H_
#ifdef __GNUC__
#define __swap32md(x) __statement({ \
#if defined(_KERNEL) && !defined(I386_CPU)
#define __swap32md(x) ({ \
uint32_t __swap32md_x = (x); \
\
__asm ("bswap %0" : "+r" (__swap32md_x)); \
__asm ("bswap %1" : "+r" (__swap32md_x)); \
__swap32md_x; \
})
#else
#define __swap32md(x) ({ \
uint32_t __swap32md_x = (x); \
\
__asm ("rorw $8, %w1; rorl $16, %1; rorw $8, %w1" : \
"+r" (__swap32md_x)); \
__swap32md_x; \
})
#endif /* _KERNEL && !I386_CPU */
#define __swap64md(x) __statement({ \
#define __swap64md(x) ({ \
uint64_t __swap64md_x = (x); \
\
(uint64_t)__swap32md(__swap64md_x >> 32) | \
(uint64_t)__swap32md(__swap64md_x & 0xffffffff) << 32; \
})
#define __swap16md(x) __statement({ \
#define __swap16md(x) ({ \
uint16_t __swap16md_x = (x); \
\
__asm ("rorw $8, %w0" : "+r" (__swap16md_x)); \
__asm ("rorw $8, %w1" : "+r" (__swap16md_x)); \
__swap16md_x; \
})
@@ -58,4 +68,4 @@
#include <sys/types.h>
#include <sys/endian.h>
#endif /* _MACHINE_ENDIAN_H_ */
#endif /* _I386_ENDIAN_H_ */

View File

@@ -0,0 +1,45 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY bcopy
# define USE_AS_MEMMOVE
# define USE_AS_BCOPY
# include "ssse3-memcpy5.S"
#else
# include "bcopy.S"
#endif

View File

@@ -0,0 +1,43 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSE2)
# include "cache_wrapper.S"
# undef __i686
# define USE_AS_BZERO
# define sse2_memset5_atom bzero
# include "sse2-memset5-atom.S"
#else
# include "bzero.S"
#endif

View File

@@ -0,0 +1,26 @@
/* $OpenBSD: index.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*/
#include <machine/asm.h>
#ifdef STRCHR
ENTRY(strchr)
#else
ENTRY(index)
#endif
movl 4(%esp),%eax
movb 8(%esp),%cl
.align 2,0x90
L1:
movb (%eax),%dl
cmpb %dl,%cl /* found char??? */
je L2
incl %eax
testb %dl,%dl /* null terminator??? */
jnz L1
xorl %eax,%eax
L2:
ret

View File

@@ -0,0 +1,27 @@
/* $OpenBSD: memchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*/
#include <machine/asm.h>
ENTRY(memchr)
pushl %edi
movl 8(%esp),%edi /* string address */
movl 12(%esp),%eax /* set character to search for */
movl 16(%esp),%ecx /* set length of search */
testl %ecx,%ecx /* test for len == 0 */
jz L1
cld /* set search forward */
repne /* search! */
scasb
jne L1 /* scan failed, return null */
leal -1(%edi),%eax /* adjust result of scan */
popl %edi
ret
.align 2,0x90
L1: xorl %eax,%eax
popl %edi
ret
END(memchr)

View File

@@ -28,8 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
#define USE_AS_STRNCMP
#define STRCMP strncmp
#include "ssse3-strcmp-atom.S"
# define MEMCMP memcmp
# include "ssse3-memcmp3-new.S"
#else
# include "memcmp.S"
#endif

View File

@@ -0,0 +1,43 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY memcpy
# define USE_AS_MEMMOVE
# include "ssse3-memcpy5.S"
#else
# include "memcpy.S"
#endif

View File

@@ -0,0 +1,43 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY memmove
# define USE_AS_MEMMOVE
# include "ssse3-memcpy5.S"
#else
# include "memmove.S"
#endif

View File

@@ -0,0 +1,42 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSE2)
# include "cache_wrapper.S"
# undef __i686
# define sse2_memset5_atom memset
# include "sse2-memset5-atom.S"
#else
# include "memset.S"
#endif

View File

@@ -1,32 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define strchr index
#include "sse2-strchr-atom.S"

View File

@@ -1,556 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define ENTRANCE PUSH (%edi);
#define PARMS 8
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
#define LEN STR2+4
.text
ENTRY (memchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
test %edx, %edx
jz L(return_null)
punpcklbw %xmm1, %xmm1
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
and $63, %ecx
pshufd $0, %xmm1, %xmm1
cmp $48, %ecx
ja L(crosscache)
movdqu (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case2_prolog)
sub $16, %edx
jbe L(return_null)
lea 16(%edi), %edi
and $15, %ecx
and $-16, %edi
add %ecx, %edx
sub $64, %edx
jbe L(exit_loop)
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and $15, %ecx
and $-16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
sar %cl, %eax
test %eax, %eax
jnz L(match_case2_prolog1)
lea -16(%edx), %edx
add %ecx, %edx
jle L(return_null)
lea 16(%edi), %edi
sub $64, %edx
jbe L(exit_loop)
.p2align 4
L(loop_prolog):
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm4
lea 16(%ecx), %ecx
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(match_case1)
lea 64(%edi), %edi
sub $64, %edx
jbe L(exit_loop)
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm4
lea 16(%ecx), %ecx
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(match_case1)
lea 64(%edi), %edi
mov %edi, %ecx
and $-64, %edi
and $63, %ecx
add %ecx, %edx
.p2align 4
L(align64_loop):
sub $64, %edx
jbe L(exit_loop)
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
add $64, %edi
pmovmskb %xmm4, %eax
test %eax, %eax
jz L(align64_loop)
sub $64, %edi
pmovmskb %xmm0, %eax
xor %ecx, %ecx
test %eax, %eax
jnz L(match_case1)
pmovmskb %xmm2, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(match_case1)
pcmpeqb 48(%edi), %xmm1
pmovmskb %xmm1, %eax
lea 16(%ecx), %ecx
.p2align 4
L(match_case1):
add %ecx, %edi
test %al, %al
jz L(match_case1_high)
mov %al, %cl
and $15, %cl
jz L(match_case1_8)
test $0x01, %al
jnz L(exit_case1_1)
test $0x02, %al
jnz L(exit_case1_2)
test $0x04, %al
jnz L(exit_case1_3)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_case1_8):
test $0x10, %al
jnz L(exit_case1_5)
test $0x20, %al
jnz L(exit_case1_6)
test $0x40, %al
jnz L(exit_case1_7)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case1_high):
mov %ah, %ch
and $15, %ch
jz L(match_case1_high_8)
test $0x01, %ah
jnz L(exit_case1_9)
test $0x02, %ah
jnz L(exit_case1_10)
test $0x04, %ah
jnz L(exit_case1_11)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case1_high_8):
test $0x10, %ah
jnz L(exit_case1_13)
test $0x20, %ah
jnz L(exit_case1_14)
test $0x40, %ah
jnz L(exit_case1_15)
lea 15(%edi), %eax
RETURN
.p2align 4
L(exit_loop):
add $64, %edx
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case2)
cmp $16, %edx
jbe L(return_null)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case2)
cmp $32, %edx
jbe L(return_null)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case2)
cmp $48, %edx
jbe L(return_null)
pcmpeqb 48(%edi), %xmm1
lea 16(%ecx), %ecx
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(match_case2)
xor %eax, %eax
RETURN
.p2align 4
L(exit_case1_1):
mov %edi, %eax
RETURN
.p2align 4
L(exit_case1_2):
lea 1(%edi), %eax
RETURN
.p2align 4
L(exit_case1_3):
lea 2(%edi), %eax
RETURN
.p2align 4
L(exit_case1_5):
lea 4(%edi), %eax
RETURN
.p2align 4
L(exit_case1_6):
lea 5(%edi), %eax
RETURN
.p2align 4
L(exit_case1_7):
lea 6(%edi), %eax
RETURN
.p2align 4
L(exit_case1_9):
lea 8(%edi), %eax
RETURN
.p2align 4
L(exit_case1_10):
lea 9(%edi), %eax
RETURN
.p2align 4
L(exit_case1_11):
lea 10(%edi), %eax
RETURN
.p2align 4
L(exit_case1_13):
lea 12(%edi), %eax
RETURN
.p2align 4
L(exit_case1_14):
lea 13(%edi), %eax
RETURN
.p2align 4
L(exit_case1_15):
lea 14(%edi), %eax
RETURN
.p2align 4
L(match_case2):
sub %ecx, %edx
L(match_case2_prolog1):
add %ecx, %edi
L(match_case2_prolog):
test %al, %al
jz L(match_case2_high)
mov %al, %cl
and $15, %cl
jz L(match_case2_8)
test $0x01, %al
jnz L(exit_case2_1)
test $0x02, %al
jnz L(exit_case2_2)
test $0x04, %al
jnz L(exit_case2_3)
sub $4, %edx
jb L(return_null)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_case2_8):
test $0x10, %al
jnz L(exit_case2_5)
test $0x20, %al
jnz L(exit_case2_6)
test $0x40, %al
jnz L(exit_case2_7)
sub $8, %edx
jb L(return_null)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case2_high):
mov %ah, %ch
and $15, %ch
jz L(match_case2_high_8)
test $0x01, %ah
jnz L(exit_case2_9)
test $0x02, %ah
jnz L(exit_case2_10)
test $0x04, %ah
jnz L(exit_case2_11)
sub $12, %edx
jb L(return_null)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case2_high_8):
test $0x10, %ah
jnz L(exit_case2_13)
test $0x20, %ah
jnz L(exit_case2_14)
test $0x40, %ah
jnz L(exit_case2_15)
sub $16, %edx
jb L(return_null)
lea 15(%edi), %eax
RETURN
.p2align 4
L(exit_case2_1):
mov %edi, %eax
RETURN
.p2align 4
L(exit_case2_2):
sub $2, %edx
jb L(return_null)
lea 1(%edi), %eax
RETURN
.p2align 4
L(exit_case2_3):
sub $3, %edx
jb L(return_null)
lea 2(%edi), %eax
RETURN
.p2align 4
L(exit_case2_5):
sub $5, %edx
jb L(return_null)
lea 4(%edi), %eax
RETURN
.p2align 4
L(exit_case2_6):
sub $6, %edx
jb L(return_null)
lea 5(%edi), %eax
RETURN
.p2align 4
L(exit_case2_7):
sub $7, %edx
jb L(return_null)
lea 6(%edi), %eax
RETURN
.p2align 4
L(exit_case2_9):
sub $9, %edx
jb L(return_null)
lea 8(%edi), %eax
RETURN
.p2align 4
L(exit_case2_10):
sub $10, %edx
jb L(return_null)
lea 9(%edi), %eax
RETURN
.p2align 4
L(exit_case2_11):
sub $11, %edx
jb L(return_null)
lea 10(%edi), %eax
RETURN
.p2align 4
L(exit_case2_13):
sub $13, %edx
jb L(return_null)
lea 12(%edi), %eax
RETURN
.p2align 4
L(exit_case2_14):
sub $14, %edx
jb L(return_null)
lea 13(%edi), %eax
RETURN
.p2align 4
L(exit_case2_15):
sub $15, %edx
jb L(return_null)
lea 14(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
END (memchr)

View File

@@ -1,778 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS
#define STR2 STR1+4
#define LEN STR2+4
.text
ENTRY (memrchr)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
test %edx, %edx
jz L(return_null)
sub $16, %edx
jbe L(length_less16)
punpcklbw %xmm1, %xmm1
add %edx, %ecx
punpcklbw %xmm1, %xmm1
movdqu (%ecx), %xmm0
pshufd $0, %xmm1, %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(exit_dispatch)
sub $64, %ecx
mov %ecx, %eax
and $15, %eax
jz L(loop_prolog)
add $16, %ecx
add $16, %edx
and $-16, %ecx
sub %eax, %edx
.p2align 4
/* Loop start on aligned string. */
L(loop_prolog):
sub $64, %edx
jbe L(exit_loop)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16)
movdqa (%ecx), %xmm4
pcmpeqb %xmm1, %xmm4
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(exit_dispatch)
sub $64, %ecx
sub $64, %edx
jbe L(exit_loop)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16)
movdqa (%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(exit_dispatch)
mov %ecx, %eax
and $63, %eax
test %eax, %eax
jz L(align64_loop)
add $64, %ecx
add $64, %edx
and $-64, %ecx
sub %eax, %edx
.p2align 4
L(align64_loop):
sub $64, %ecx
sub $64, %edx
jbe L(exit_loop)
movdqa (%ecx), %xmm0
movdqa 16(%ecx), %xmm2
movdqa 32(%ecx), %xmm3
movdqa 48(%ecx), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm3, %xmm0
pmaxub %xmm4, %xmm2
pmaxub %xmm0, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jz L(align64_loop)
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches48)
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pcmpeqb (%ecx), %xmm1
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
pmovmskb %xmm1, %eax
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(exit_loop):
add $64, %edx
cmp $32, %edx
jbe L(exit_loop_32)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16_1)
cmp $48, %edx
jbe L(return_null)
pcmpeqb (%ecx), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches0_1)
xor %eax, %eax
ret
.p2align 4
L(exit_loop_32):
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48_1)
cmp $16, %edx
jbe L(return_null)
pcmpeqb 32(%ecx), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches32_1)
xor %eax, %eax
ret
.p2align 4
L(matches16):
lea 16(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(matches32):
lea 32(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(matches48):
lea 48(%ecx), %ecx
.p2align 4
L(exit_dispatch):
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(exit_dispatch_8):
test $0x80, %al
jnz L(exit_8)
test $0x40, %al
jnz L(exit_7)
test $0x20, %al
jnz L(exit_6)
lea 4(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_high):
mov %ah, %dh
and $15 << 4, %dh
jnz L(exit_dispatch_high_8)
test $0x08, %ah
jnz L(exit_12)
test $0x04, %ah
jnz L(exit_11)
test $0x02, %ah
jnz L(exit_10)
lea 8(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_high_8):
test $0x80, %ah
jnz L(exit_16)
test $0x40, %ah
jnz L(exit_15)
test $0x20, %ah
jnz L(exit_14)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit_2):
lea 1(%ecx), %eax
ret
.p2align 4
L(exit_3):
lea 2(%ecx), %eax
ret
.p2align 4
L(exit_4):
lea 3(%ecx), %eax
ret
.p2align 4
L(exit_6):
lea 5(%ecx), %eax
ret
.p2align 4
L(exit_7):
lea 6(%ecx), %eax
ret
.p2align 4
L(exit_8):
lea 7(%ecx), %eax
ret
.p2align 4
L(exit_10):
lea 9(%ecx), %eax
ret
.p2align 4
L(exit_11):
lea 10(%ecx), %eax
ret
.p2align 4
L(exit_12):
lea 11(%ecx), %eax
ret
.p2align 4
L(exit_14):
lea 13(%ecx), %eax
ret
.p2align 4
L(exit_15):
lea 14(%ecx), %eax
ret
.p2align 4
L(exit_16):
lea 15(%ecx), %eax
ret
.p2align 4
L(matches0_1):
lea -64(%edx), %edx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches16_1):
lea -48(%edx), %edx
lea 16(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches32_1):
lea -32(%edx), %edx
lea 32(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches48_1):
lea -16(%edx), %edx
lea 48(%ecx), %ecx
.p2align 4
L(exit_dispatch_1):
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(exit_dispatch_1_8):
test $0x80, %al
jnz L(exit_1_8)
test $0x40, %al
jnz L(exit_1_7)
test $0x20, %al
jnz L(exit_1_6)
add $4, %edx
jl L(return_null)
lea 4(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_1_high):
mov %ah, %al
and $15 << 4, %al
jnz L(exit_dispatch_1_high_8)
test $0x08, %ah
jnz L(exit_1_12)
test $0x04, %ah
jnz L(exit_1_11)
test $0x02, %ah
jnz L(exit_1_10)
add $8, %edx
jl L(return_null)
lea 8(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_1_high_8):
test $0x80, %ah
jnz L(exit_1_16)
test $0x40, %ah
jnz L(exit_1_15)
test $0x20, %ah
jnz L(exit_1_14)
add $12, %edx
jl L(return_null)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit_1_2):
add $1, %edx
jl L(return_null)
lea 1(%ecx), %eax
ret
.p2align 4
L(exit_1_3):
add $2, %edx
jl L(return_null)
lea 2(%ecx), %eax
ret
.p2align 4
L(exit_1_4):
add $3, %edx
jl L(return_null)
lea 3(%ecx), %eax
ret
.p2align 4
L(exit_1_6):
add $5, %edx
jl L(return_null)
lea 5(%ecx), %eax
ret
.p2align 4
L(exit_1_7):
add $6, %edx
jl L(return_null)
lea 6(%ecx), %eax
ret
.p2align 4
L(exit_1_8):
add $7, %edx
jl L(return_null)
lea 7(%ecx), %eax
ret
.p2align 4
L(exit_1_10):
add $9, %edx
jl L(return_null)
lea 9(%ecx), %eax
ret
.p2align 4
L(exit_1_11):
add $10, %edx
jl L(return_null)
lea 10(%ecx), %eax
ret
.p2align 4
L(exit_1_12):
add $11, %edx
jl L(return_null)
lea 11(%ecx), %eax
ret
.p2align 4
L(exit_1_14):
add $13, %edx
jl L(return_null)
lea 13(%ecx), %eax
ret
.p2align 4
L(exit_1_15):
add $14, %edx
jl L(return_null)
lea 14(%ecx), %eax
ret
.p2align 4
L(exit_1_16):
add $15, %edx
jl L(return_null)
lea 15(%ecx), %eax
ret
.p2align 4
L(return_null):
xor %eax, %eax
ret
.p2align 4
L(length_less16_offset0):
mov %dl, %cl
pcmpeqb (%eax), %xmm1
mov $1, %edx
sal %cl, %edx
sub $1, %edx
mov %eax, %ecx
pmovmskb %xmm1, %eax
and %edx, %eax
test %eax, %eax
jnz L(exit_dispatch)
xor %eax, %eax
ret
.p2align 4
L(length_less16):
punpcklbw %xmm1, %xmm1
add $16, %edx
punpcklbw %xmm1, %xmm1
mov %ecx, %eax
pshufd $0, %xmm1, %xmm1
and $15, %ecx
jz L(length_less16_offset0)
PUSH (%edi)
mov %cl, %dh
add %dl, %dh
and $-16, %eax
sub $16, %dh
ja L(length_less16_part2)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edi
sar %cl, %edi
add %ecx, %eax
mov %dl, %cl
mov $1, %edx
sal %cl, %edx
sub $1, %edx
and %edx, %edi
test %edi, %edi
jz L(ret_null)
bsr %edi, %edi
add %edi, %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(length_less16_part2):
movdqa 16(%eax), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %edi
mov %cl, %ch
mov %dh, %cl
mov $1, %edx
sal %cl, %edx
sub $1, %edx
and %edx, %edi
test %edi, %edi
jnz L(length_less16_part2_return)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edi
mov %ch, %cl
sar %cl, %edi
test %edi, %edi
jz L(ret_null)
bsr %edi, %edi
add %edi, %eax
xor %ch, %ch
add %ecx, %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(length_less16_part2_return):
bsr %edi, %edi
lea 16(%eax, %edi), %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(ret_null):
xor %eax, %eax
POP (%edi)
ret
END (memrchr)

View File

@@ -28,9 +28,6 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cache.h"
#undef __i686
#ifndef L
# define L(label) .L##label
#endif
@@ -139,13 +136,9 @@ __i686.get_pc_thunk.bx:
jmp *TABLE(,%ecx,4)
#endif
#ifndef MEMSET
# define MEMSET memset
#endif
.section .text.sse2,"ax",@progbits
ALIGN (4)
ENTRY (MEMSET)
ENTRY (sse2_memset5_atom)
ENTRANCE
movl LEN(%esp), %ecx
@@ -918,4 +911,4 @@ L(aligned_16_15bytes):
SETRTNVAL
RETURN_END
END (MEMSET)
END (sse2_memset5_atom)

View File

@@ -1,391 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi)
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (strchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
/* ECX has OFFSET. */
and $15, %ecx
pshufd $0, %xmm1, %xmm1
je L(loop)
/* Handle unaligned string. */
and $-16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sarl %cl, %edx
sarl %cl, %eax
test %eax, %eax
jz L(unaligned_no_match)
add %ecx, %edi
test %edx, %edx
jz L(match_case1)
jmp L(match_case2)
.p2align 4
L(unaligned_no_match):
test %edx, %edx
jne L(return_null)
pxor %xmm2, %xmm2
add $16, %edi
.p2align 4
/* Loop start on aligned string. */
L(loop):
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
jmp L(loop)
L(matches):
/* There is a match. First find where NULL is. */
test %edx, %edx
jz L(match_case1)
.p2align 4
L(match_case2):
test %al, %al
jz L(match_higth_case2)
mov %al, %cl
and $15, %cl
jnz L(match_case2_4)
mov %dl, %ch
and $15, %ch
jnz L(return_null)
test $0x10, %al
jnz L(Exit5)
test $0x10, %dl
jnz L(return_null)
test $0x20, %al
jnz L(Exit6)
test $0x20, %dl
jnz L(return_null)
test $0x40, %al
jnz L(Exit7)
test $0x40, %dl
jnz L(return_null)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case2_4):
test $0x01, %al
jnz L(Exit1)
test $0x01, %dl
jnz L(return_null)
test $0x02, %al
jnz L(Exit2)
test $0x02, %dl
jnz L(return_null)
test $0x04, %al
jnz L(Exit3)
test $0x04, %dl
jnz L(return_null)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_higth_case2):
test %dl, %dl
jnz L(return_null)
mov %ah, %cl
and $15, %cl
jnz L(match_case2_12)
mov %dh, %ch
and $15, %ch
jnz L(return_null)
test $0x10, %ah
jnz L(Exit13)
test $0x10, %dh
jnz L(return_null)
test $0x20, %ah
jnz L(Exit14)
test $0x20, %dh
jnz L(return_null)
test $0x40, %ah
jnz L(Exit15)
test $0x40, %dh
jnz L(return_null)
lea 15(%edi), %eax
RETURN
.p2align 4
L(match_case2_12):
test $0x01, %ah
jnz L(Exit9)
test $0x01, %dh
jnz L(return_null)
test $0x02, %ah
jnz L(Exit10)
test $0x02, %dh
jnz L(return_null)
test $0x04, %ah
jnz L(Exit11)
test $0x04, %dh
jnz L(return_null)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case1):
test %al, %al
jz L(match_higth_case1)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_higth_case1):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
lea 15(%edi), %eax
RETURN
.p2align 4
L(Exit1):
lea (%edi), %eax
RETURN
.p2align 4
L(Exit2):
lea 1(%edi), %eax
RETURN
.p2align 4
L(Exit3):
lea 2(%edi), %eax
RETURN
.p2align 4
L(Exit4):
lea 3(%edi), %eax
RETURN
.p2align 4
L(Exit5):
lea 4(%edi), %eax
RETURN
.p2align 4
L(Exit6):
lea 5(%edi), %eax
RETURN
.p2align 4
L(Exit7):
lea 6(%edi), %eax
RETURN
.p2align 4
L(Exit9):
lea 8(%edi), %eax
RETURN
.p2align 4
L(Exit10):
lea 9(%edi), %eax
RETURN
.p2align 4
L(Exit11):
lea 10(%edi), %eax
RETURN
.p2align 4
L(Exit12):
lea 11(%edi), %eax
RETURN
.p2align 4
L(Exit13):
lea 12(%edi), %eax
RETURN
.p2align 4
L(Exit14):
lea 13(%edi), %eax
RETURN
.p2align 4
L(Exit15):
lea 14(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
END (strchr)

View File

@@ -1,112 +1,71 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
#define STRLEN sse2_strlen_atom
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
#ifndef L
# define L(label) .L##label
#endif
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef USE_AS_STRCAT
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
# ifndef STRLEN
# define STRLEN strlen
# endif
#ifndef cfi_remember_state
# define cfi_remember_state .cfi_remember_state
#endif
# ifndef L
# define L(label) .L##label
# endif
#ifndef cfi_restore_state
# define cfi_restore_state .cfi_restore_state
#endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
/* calee safe register only for strnlen is required */
# ifdef USE_AS_STRNLEN
# ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
# endif
# ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
# endif
# ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
# endif
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
#endif
# ifndef END
# define END(name) \
cfi_endproc; \
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
#endif
# define PARMS 4
# define STR PARMS
# define RETURN ret
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
# ifdef USE_AS_STRNLEN
# define LEN PARMS + 8
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
# define RETURN POP (%edi); ret; CFI_PUSH(%edi);
# endif
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR PARMS
#define ENTRANCE
#define RETURN ret
.text
ENTRY (STRLEN)
ENTRANCE
mov STR(%esp), %edx
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
#endif
xor %eax, %eax
cmpb $0, (%edx)
jz L(exit_tail0)
@@ -116,12 +75,6 @@ ENTRY (STRLEN)
jz L(exit_tail2)
cmpb $0, 3(%edx)
jz L(exit_tail3)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less8_prolog)
#endif
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmpb $0, 5(%edx)
@@ -130,12 +83,6 @@ ENTRY (STRLEN)
jz L(exit_tail6)
cmpb $0, 7(%edx)
jz L(exit_tail7)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less12_prolog)
#endif
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmpb $0, 9(%edx)
@@ -144,12 +91,6 @@ ENTRY (STRLEN)
jz L(exit_tail10)
cmpb $0, 11(%edx)
jz L(exit_tail11)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less16_prolog)
#endif
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmpb $0, 13(%edx)
@@ -158,532 +99,212 @@ ENTRY (STRLEN)
jz L(exit_tail14)
cmpb $0, 15(%edx)
jz L(exit_tail15)
pxor %xmm0, %xmm0
lea 16(%edx), %eax
mov %eax, %ecx
mov %edx, %eax
mov %edx, %ecx
and $-16, %eax
#ifdef USE_AS_STRNLEN
and $15, %edx
add %edx, %edi
sub $64, %edi
jbe L(len_less64)
#endif
add $16, %ecx
add $16, %eax
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
#ifdef USE_AS_STRNLEN
mov %eax, %edx
and $63, %edx
add %edx, %edi
#endif
and $-0x40, %eax
.p2align 4
L(aligned_64_loop):
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
movaps 48(%eax), %xmm6
pminub %xmm1, %xmm0
pminub %xmm6, %xmm2
pminub %xmm0, %xmm2
pcmpeqb %xmm3, %xmm2
pmovmskb %xmm2, %edx
PUSH (%esi)
PUSH (%edi)
PUSH (%ebx)
PUSH (%ebp)
xor %ebp, %ebp
L(aligned_64):
pcmpeqb (%eax), %xmm0
pcmpeqb 16(%eax), %xmm1
pcmpeqb 32(%eax), %xmm2
pcmpeqb 48(%eax), %xmm3
pmovmskb %xmm0, %edx
pmovmskb %xmm1, %esi
pmovmskb %xmm2, %edi
pmovmskb %xmm3, %ebx
or %edx, %ebp
or %esi, %ebp
or %edi, %ebp
or %ebx, %ebp
lea 64(%eax), %eax
jz L(aligned_64)
L(48leave):
test %edx, %edx
jz L(aligned_64_loop)
pcmpeqb -64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 48(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqb -32(%eax), %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqb %xmm6, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
jnz L(aligned_64_exit_16)
test %esi, %esi
jnz L(aligned_64_exit_32)
test %edi, %edi
jnz L(aligned_64_exit_48)
mov %ebx, %edx
lea (%eax), %eax
jmp L(aligned_64_exit)
L(aligned_64_exit_48):
lea -16(%eax), %eax
mov %edi, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_32):
lea -32(%eax), %eax
mov %esi, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_16):
lea -48(%eax), %eax
L(aligned_64_exit):
POP (%ebp)
POP (%ebx)
POP (%edi)
POP (%esi)
L(exit):
sub %ecx, %eax
test %dl, %dl
jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_8)
test $0x01, %dl
jnz L(exit_tail0)
test $0x02, %dl
jnz L(exit_tail1)
test $0x04, %dl
jnz L(exit_tail2)
add $3, %eax
RETURN
.p2align 4
L(exit_8):
test $0x08, %dl
jnz L(exit_tail3)
test $0x10, %dl
jnz L(exit_tail4)
test $0x20, %dl
jnz L(exit_tail5)
test $0x40, %dl
jnz L(exit_tail6)
add $7, %eax
RETURN
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_high_8)
test $0x01, %dh
jnz L(exit_tail8)
test $0x02, %dh
jnz L(exit_tail9)
test $0x04, %dh
jnz L(exit_tail10)
add $11, %eax
RETURN
.p2align 4
L(exit_high_8):
test $0x10, %dh
jnz L(exit_tail12)
test $0x20, %dh
jnz L(exit_tail13)
test $0x40, %dh
jnz L(exit_tail14)
add $15, %eax
L(exit_tail0):
RETURN
#ifdef USE_AS_STRNLEN
.p2align 4
L(len_less64):
pxor %xmm0, %xmm0
add $64, %edi
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
sub $16, %edi
jbe L(return_start_len)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
sub $16, %edi
jbe L(return_start_len)
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
sub $16, %edi
jbe L(return_start_len)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
#ifndef USE_AS_STRLCAT
movl LEN(%esp), %eax
RETURN
#else
jmp L(return_start_len)
#endif
.p2align 4
L(strnlen_exit):
sub %ecx, %eax
test %dl, %dl
jz L(strnlen_exit_high)
mov %dl, %cl
and $15, %cl
jz L(strnlen_exit_8)
test $0x01, %dl
jnz L(exit_tail0)
test $0x02, %dl
jnz L(strnlen_exit_tail1)
test $0x04, %dl
jnz L(strnlen_exit_tail2)
sub $4, %edi
jb L(return_start_len)
lea 3(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_8):
test $0x10, %dl
jnz L(strnlen_exit_tail4)
test $0x20, %dl
jnz L(strnlen_exit_tail5)
test $0x40, %dl
jnz L(strnlen_exit_tail6)
sub $8, %edi
jb L(return_start_len)
lea 7(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_high):
mov %dh, %ch
and $15, %ch
jz L(strnlen_exit_high_8)
L(exit_high):
add $8, %eax
test $0x01, %dh
jnz L(strnlen_exit_tail8)
jnz L(exit_tail0)
test $0x02, %dh
jnz L(strnlen_exit_tail9)
jnz L(exit_tail1)
test $0x04, %dh
jnz L(strnlen_exit_tail10)
sub $12, %edi
jb L(return_start_len)
lea 11(%eax), %eax
RETURN
jnz L(exit_tail2)
test $0x08, %dh
jnz L(exit_tail3)
.p2align 4
L(strnlen_exit_high_8):
test $0x10, %dh
jnz L(strnlen_exit_tail12)
jnz L(exit_tail4)
test $0x20, %dh
jnz L(strnlen_exit_tail13)
jnz L(exit_tail5)
test $0x40, %dh
jnz L(strnlen_exit_tail14)
sub $16, %edi
jb L(return_start_len)
lea 15(%eax), %eax
jnz L(exit_tail6)
add $7, %eax
RETURN
.p2align 4
L(strnlen_exit_tail1):
sub $2, %edi
jb L(return_start_len)
lea 1(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail2):
sub $3, %edi
jb L(return_start_len)
lea 2(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail4):
sub $5, %edi
jb L(return_start_len)
lea 4(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail5):
sub $6, %edi
jb L(return_start_len)
lea 5(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail6):
sub $7, %edi
jb L(return_start_len)
lea 6(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail8):
sub $9, %edi
jb L(return_start_len)
lea 8(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail9):
sub $10, %edi
jb L(return_start_len)
lea 9(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail10):
sub $11, %edi
jb L(return_start_len)
lea 10(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail12):
sub $13, %edi
jb L(return_start_len)
lea 12(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail13):
sub $14, %edi
jb L(return_start_len)
lea 13(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail14):
sub $15, %edi
jb L(return_start_len)
lea 14(%eax), %eax
RETURN
#ifndef USE_AS_STRLCAT
.p2align 4
L(return_start_len):
movl LEN(%esp), %eax
RETURN
#endif
/* for prolog only */
.p2align 4
L(len_less4_prolog):
xor %eax, %eax
add $4, %edi
jz L(exit_tail0)
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
je L(exit_tail1)
cmpb $0, 1(%edx)
jz L(exit_tail1)
cmp $2, %edi
je L(exit_tail2)
cmpb $0, 2(%edx)
jz L(exit_tail2)
cmp $3, %edi
je L(exit_tail3)
cmpb $0, 3(%edx)
jz L(exit_tail3)
mov %edi, %eax
RETURN
.p2align 4
L(len_less8_prolog):
add $4, %edi
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmp $1, %edi
je L(exit_tail5)
cmpb $0, 5(%edx)
jz L(exit_tail5)
cmp $2, %edi
je L(exit_tail6)
cmpb $0, 6(%edx)
jz L(exit_tail6)
cmp $3, %edi
je L(exit_tail7)
cmpb $0, 7(%edx)
jz L(exit_tail7)
mov $8, %eax
RETURN
.p2align 4
L(len_less12_prolog):
add $4, %edi
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmp $1, %edi
je L(exit_tail9)
cmpb $0, 9(%edx)
jz L(exit_tail9)
cmp $2, %edi
je L(exit_tail10)
cmpb $0, 10(%edx)
jz L(exit_tail10)
cmp $3, %edi
je L(exit_tail11)
cmpb $0, 11(%edx)
jz L(exit_tail11)
mov $12, %eax
RETURN
.p2align 4
L(len_less16_prolog):
add $4, %edi
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmp $1, %edi
je L(exit_tail13)
cmpb $0, 13(%edx)
jz L(exit_tail13)
cmp $2, %edi
je L(exit_tail14)
cmpb $0, 14(%edx)
jz L(exit_tail14)
cmp $3, %edi
je L(exit_tail15)
cmpb $0, 15(%edx)
jz L(exit_tail15)
mov $16, %eax
RETURN
#endif
.p2align 4
L(exit_tail1):
add $1, %eax
@@ -743,7 +364,6 @@ L(exit_tail14):
L(exit_tail15):
add $15, %eax
#ifndef USE_AS_STRCAT
RETURN
ret
END (STRLEN)
#endif

View File

@@ -1,33 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define USE_AS_STRNLEN 1
#define STRLEN strnlen
#include "sse2-strlen-atom.S"

View File

@@ -1,753 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi);
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (strrchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
/* ECX has OFFSET. */
and $63, %ecx
pshufd $0, %xmm1, %xmm1
cmp $48, %ecx
ja L(crosscache)
/* unaligned string. */
movdqu (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %ecx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match1)
test %ecx, %ecx
jnz L(return_null)
and $-16, %edi
PUSH (%esi)
PUSH (%ebx)
xor %ebx, %ebx
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(unaligned_match1):
test %ecx, %ecx
jnz L(prolog_find_zero_1)
PUSH (%esi)
PUSH (%ebx)
mov %eax, %ebx
mov %edi, %esi
and $-16, %edi
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(crosscache):
/* Hancle unaligned string. */
and $15, %ecx
and $-16, %edi
pxor %xmm3, %xmm3
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm3, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
shr %cl, %edx
shr %cl, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match)
test %edx, %edx
jnz L(return_null)
PUSH (%esi)
PUSH (%ebx)
xor %ebx, %ebx
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(unaligned_match):
test %edx, %edx
jnz L(prolog_find_zero)
PUSH (%esi)
PUSH (%ebx)
mov %eax, %ebx
lea (%edi, %ecx), %esi
/* Loop start on aligned string. */
.p2align 4
L(loop):
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jz L(loop)
L(matches):
test %eax, %eax
jnz L(match)
L(return_value):
test %ebx, %ebx
jz L(return_null_1)
mov %ebx, %eax
mov %esi, %edi
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(return_null_1):
POP (%ebx)
POP (%esi)
xor %eax, %eax
RETURN
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(match):
pmovmskb %xmm2, %ecx
test %ecx, %ecx
jnz L(find_zero)
mov %eax, %ebx
mov %edi, %esi
jmp L(loop)
.p2align 4
L(find_zero):
test %cl, %cl
jz L(find_zero_high)
mov %cl, %dl
and $15, %dl
jz L(find_zero_8)
test $0x01, %cl
jnz L(FindZeroExit1)
test $0x02, %cl
jnz L(FindZeroExit2)
test $0x04, %cl
jnz L(FindZeroExit3)
and $1 << 4 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_8):
test $0x10, %cl
jnz L(FindZeroExit5)
test $0x20, %cl
jnz L(FindZeroExit6)
test $0x40, %cl
jnz L(FindZeroExit7)
and $1 << 8 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_high):
mov %ch, %dh
and $15, %dh
jz L(find_zero_high_8)
test $0x01, %ch
jnz L(FindZeroExit9)
test $0x02, %ch
jnz L(FindZeroExit10)
test $0x04, %ch
jnz L(FindZeroExit11)
and $1 << 12 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_high_8):
test $0x10, %ch
jnz L(FindZeroExit13)
test $0x20, %ch
jnz L(FindZeroExit14)
test $0x40, %ch
jnz L(FindZeroExit15)
and $1 << 16 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit1):
and $1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit2):
and $1 << 2 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit3):
and $1 << 3 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit5):
and $1 << 5 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit6):
and $1 << 6 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit7):
and $1 << 7 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit9):
and $1 << 9 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit10):
and $1 << 10 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit11):
and $1 << 11 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit13):
and $1 << 13 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit14):
and $1 << 14 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit15):
and $1 << 15 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
.p2align 4
L(match_case1):
test %ah, %ah
jnz L(match_case1_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(match_case1_8)
test $0x08, %al
jnz L(Exit4)
test $0x04, %al
jnz L(Exit3)
test $0x02, %al
jnz L(Exit2)
lea -16(%edi), %eax
RETURN
.p2align 4
L(match_case1_8):
test $0x80, %al
jnz L(Exit8)
test $0x40, %al
jnz L(Exit7)
test $0x20, %al
jnz L(Exit6)
lea -12(%edi), %eax
RETURN
.p2align 4
L(match_case1_high):
mov %ah, %dh
and $15 << 4, %dh
jnz L(match_case1_high_8)
test $0x08, %ah
jnz L(Exit12)
test $0x04, %ah
jnz L(Exit11)
test $0x02, %ah
jnz L(Exit10)
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_case1_high_8):
test $0x80, %ah
jnz L(Exit16)
test $0x40, %ah
jnz L(Exit15)
test $0x20, %ah
jnz L(Exit14)
lea -4(%edi), %eax
RETURN
.p2align 4
L(Exit2):
lea -15(%edi), %eax
RETURN
.p2align 4
L(Exit3):
lea -14(%edi), %eax
RETURN
.p2align 4
L(Exit4):
lea -13(%edi), %eax
RETURN
.p2align 4
L(Exit6):
lea -11(%edi), %eax
RETURN
.p2align 4
L(Exit7):
lea -10(%edi), %eax
RETURN
.p2align 4
L(Exit8):
lea -9(%edi), %eax
RETURN
.p2align 4
L(Exit10):
lea -7(%edi), %eax
RETURN
.p2align 4
L(Exit11):
lea -6(%edi), %eax
RETURN
.p2align 4
L(Exit12):
lea -5(%edi), %eax
RETURN
.p2align 4
L(Exit14):
lea -3(%edi), %eax
RETURN
.p2align 4
L(Exit15):
lea -2(%edi), %eax
RETURN
.p2align 4
L(Exit16):
lea -1(%edi), %eax
RETURN
/* Return NULL. */
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero):
add %ecx, %edi
mov %edx, %ecx
L(prolog_find_zero_1):
test %cl, %cl
jz L(prolog_find_zero_high)
mov %cl, %dl
and $15, %dl
jz L(prolog_find_zero_8)
test $0x01, %cl
jnz L(PrologFindZeroExit1)
test $0x02, %cl
jnz L(PrologFindZeroExit2)
test $0x04, %cl
jnz L(PrologFindZeroExit3)
and $1 << 4 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_8):
test $0x10, %cl
jnz L(PrologFindZeroExit5)
test $0x20, %cl
jnz L(PrologFindZeroExit6)
test $0x40, %cl
jnz L(PrologFindZeroExit7)
and $1 << 8 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_high):
mov %ch, %dh
and $15, %dh
jz L(prolog_find_zero_high_8)
test $0x01, %ch
jnz L(PrologFindZeroExit9)
test $0x02, %ch
jnz L(PrologFindZeroExit10)
test $0x04, %ch
jnz L(PrologFindZeroExit11)
and $1 << 12 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_high_8):
test $0x10, %ch
jnz L(PrologFindZeroExit13)
test $0x20, %ch
jnz L(PrologFindZeroExit14)
test $0x40, %ch
jnz L(PrologFindZeroExit15)
and $1 << 16 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit1):
and $1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit2):
and $1 << 2 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit3):
and $1 << 3 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit5):
and $1 << 5 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit6):
and $1 << 6 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit7):
and $1 << 7 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit9):
and $1 << 9 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit10):
and $1 << 10 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit11):
and $1 << 11 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit13):
and $1 << 13 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit14):
and $1 << 14 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit15):
and $1 << 15 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
END (strrchr)

View File

@@ -1,267 +0,0 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (wcschr)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
punpckldq %xmm1, %xmm1
and $63, %eax
cmp $48, %eax
ja L(cross_cache)
movdqu (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
and $-16, %ecx
jmp L(loop)
.p2align 4
L(cross_cache):
PUSH (%edi)
mov %ecx, %edi
mov %eax, %ecx
and $-16, %edi
and $15, %ecx
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
sarl %cl, %edx
sarl %cl, %eax
test %eax, %eax
jz L(unaligned_no_match)
add %edi, %ecx
POP (%edi)
test %edx, %edx
jz L(match_case1)
test %al, %al
jz L(match_higth_case2)
test $15, %al
jnz L(match_case2_4)
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
ret
CFI_PUSH (%edi)
.p2align 4
L(unaligned_no_match):
mov %edi, %ecx
POP (%edi)
test %edx, %edx
jnz L(return_null)
pxor %xmm2, %xmm2
/* Loop start on aligned string. */
.p2align 4
L(loop):
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jz L(loop)
.p2align 4
L(matches):
pmovmskb %xmm2, %edx
test %eax, %eax
jz L(return_null)
test %edx, %edx
jz L(match_case1)
.p2align 4
L(match_case2):
test %al, %al
jz L(match_higth_case2)
test $15, %al
jnz L(match_case2_4)
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
ret
.p2align 4
L(match_higth_case2):
test %dl, %dl
jnz L(return_null)
test $15, %ah
jnz L(match_case2_12)
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
ret
.p2align 4
L(match_case1):
test %al, %al
jz L(match_higth_case1)
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
ret
.p2align 4
L(match_higth_case1):
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit0):
mov %ecx, %eax
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
ret
.p2align 4
L(return_null):
xor %eax, %eax
ret
END (wcschr)

File diff suppressed because it is too large Load Diff

View File

@@ -1,306 +0,0 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_WCSCAT
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
# define PARMS 4
# define STR PARMS
# define RETURN ret
.text
ENTRY (wcslen)
mov STR(%esp), %edx
#endif
cmp $0, (%edx)
jz L(exit_tail0)
cmp $0, 4(%edx)
jz L(exit_tail1)
cmp $0, 8(%edx)
jz L(exit_tail2)
cmp $0, 12(%edx)
jz L(exit_tail3)
cmp $0, 16(%edx)
jz L(exit_tail4)
cmp $0, 20(%edx)
jz L(exit_tail5)
cmp $0, 24(%edx)
jz L(exit_tail6)
cmp $0, 28(%edx)
jz L(exit_tail7)
pxor %xmm0, %xmm0
lea 32(%edx), %eax
lea -16(%eax), %ecx
and $-16, %eax
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
and $-0x40, %eax
.p2align 4
L(aligned_64_loop):
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
movaps 48(%eax), %xmm6
pminub %xmm1, %xmm0
pminub %xmm6, %xmm2
pminub %xmm0, %xmm2
pcmpeqd %xmm3, %xmm2
pmovmskb %xmm2, %edx
lea 64(%eax), %eax
test %edx, %edx
jz L(aligned_64_loop)
pcmpeqd -64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 48(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd -32(%eax), %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd %xmm6, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
jmp L(aligned_64_loop)
.p2align 4
L(exit):
sub %ecx, %eax
shr $2, %eax
test %dl, %dl
jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_1)
RETURN
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_3)
add $2, %eax
RETURN
.p2align 4
L(exit_1):
add $1, %eax
RETURN
.p2align 4
L(exit_3):
add $3, %eax
RETURN
.p2align 4
L(exit_tail0):
xor %eax, %eax
RETURN
.p2align 4
L(exit_tail1):
mov $1, %eax
RETURN
.p2align 4
L(exit_tail2):
mov $2, %eax
RETURN
.p2align 4
L(exit_tail3):
mov $3, %eax
RETURN
.p2align 4
L(exit_tail4):
mov $4, %eax
RETURN
.p2align 4
L(exit_tail5):
mov $5, %eax
RETURN
.p2align 4
L(exit_tail6):
mov $6, %eax
RETURN
.p2align 4
L(exit_tail7):
mov $7, %eax
#ifndef USE_AS_WCSCAT
RETURN
END (wcslen)
#endif

View File

@@ -1,402 +0,0 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi);
#define RETURN POP(%edi); ret; CFI_PUSH(%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (wcsrchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov %ecx, %edi
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
punpckldq %xmm1, %xmm1
/* ECX has OFFSET. */
and $63, %ecx
cmp $48, %ecx
ja L(crosscache)
/* unaligned string. */
movdqu (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %ecx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match1)
test %ecx, %ecx
jnz L(return_null)
and $-16, %edi
PUSH (%esi)
xor %edx, %edx
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(unaligned_match1):
test %ecx, %ecx
jnz L(prolog_find_zero_1)
PUSH (%esi)
/* Save current match */
mov %eax, %edx
mov %edi, %esi
and $-16, %edi
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(crosscache):
/* Hancle unaligned string. */
and $15, %ecx
and $-16, %edi
pxor %xmm3, %xmm3
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm3
pcmpeqd %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm3, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
shr %cl, %edx
shr %cl, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match)
test %edx, %edx
jnz L(return_null)
PUSH (%esi)
xor %edx, %edx
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(unaligned_match):
test %edx, %edx
jnz L(prolog_find_zero)
PUSH (%esi)
mov %eax, %edx
lea (%edi, %ecx), %esi
/* Loop start on aligned string. */
.p2align 4
L(loop):
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm2, %ecx
pmovmskb %xmm3, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm4
pmovmskb %xmm2, %ecx
pmovmskb %xmm4, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm5
pmovmskb %xmm2, %ecx
pmovmskb %xmm5, %eax
or %eax, %ecx
jz L(loop)
.p2align 4
L(matches):
test %eax, %eax
jnz L(match)
L(return_value):
test %edx, %edx
jz L(return_null_1)
mov %edx, %eax
mov %esi, %edi
POP (%esi)
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(return_null_1):
POP (%esi)
xor %eax, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(match):
pmovmskb %xmm2, %ecx
test %ecx, %ecx
jnz L(find_zero)
/* save match info */
mov %eax, %edx
mov %edi, %esi
jmp L(loop)
.p2align 4
L(find_zero):
test %cl, %cl
jz L(find_zero_in_third_or_fourth_wchar)
test $15, %cl
jz L(find_zero_in_second_wchar)
and $1, %eax
jz L(return_value)
POP (%esi)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_second_wchar):
and $1 << 5 - 1, %eax
jz L(return_value)
POP (%esi)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_third_or_fourth_wchar):
test $15, %ch
jz L(find_zero_in_fourth_wchar)
and $1 << 9 - 1, %eax
jz L(return_value)
POP (%esi)
test %ah, %ah
jnz L(match_third_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_fourth_wchar):
POP (%esi)
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(match_second_wchar):
lea -12(%edi), %eax
RETURN
.p2align 4
L(match_third_or_fourth_wchar):
test $15 << 4, %ah
jnz L(match_fourth_wchar)
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_third_wchar):
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_fourth_wchar):
lea -4(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero):
add %ecx, %edi
mov %edx, %ecx
L(prolog_find_zero_1):
test %cl, %cl
jz L(prolog_find_zero_in_third_or_fourth_wchar)
test $15, %cl
jz L(prolog_find_zero_in_second_wchar)
and $1, %eax
jz L(return_null)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_second_wchar):
and $1 << 5 - 1, %eax
jz L(return_null)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_third_or_fourth_wchar):
test $15, %ch
jz L(prolog_find_zero_in_fourth_wchar)
and $1 << 9 - 1, %eax
jz L(return_null)
test %ah, %ah
jnz L(match_third_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_fourth_wchar):
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
END (wcsrchr)

View File

@@ -1,37 +0,0 @@
/*
Copyright (c) 2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define MEMCMP __memcmp16
/* int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n); */
#define USE_UTF16
#define USE_AS_MEMCMP16 1
#include "ssse3-memcmp-atom.S"

View File

@@ -28,11 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cache.h"
#undef __i686
#ifndef MEMCPY
# define MEMCPY memcpy
# define MEMCPY ssse3_memcpy5
#endif
#ifndef L

View File

@@ -1,620 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef cfi_remember_state
# define cfi_remember_state .cfi_remember_state
#endif
#ifndef cfi_restore_state
# define cfi_restore_state .cfi_restore_state
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#ifndef STRCAT
# define STRCAT strcat
#endif
#define PARMS 4
#define STR1 PARMS+4
#define STR2 STR1+4
#ifdef USE_AS_STRNCAT
# define LEN STR2+8
#endif
#define USE_AS_STRCAT
.section .text.ssse3,"ax",@progbits
ENTRY (STRCAT)
PUSH (%edi)
mov STR1(%esp), %edi
mov %edi, %edx
#define RETURN jmp L(StrcpyAtom)
#include "sse2-strlen-atom.S"
L(StrcpyAtom):
mov STR2(%esp), %ecx
lea (%edi, %eax), %edx
#ifdef USE_AS_STRNCAT
PUSH (%ebx)
mov LEN(%esp), %ebx
test %ebx, %ebx
jz L(StrncatExit0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
#endif
cmpb $0, (%ecx)
jz L(Exit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmpb $0, 7(%ecx)
jz L(Exit8)
cmpb $0, 8(%ecx)
jz L(Exit9)
#ifdef USE_AS_STRNCAT
cmp $16, %ebx
jb L(StrncpyExit15Bytes)
#endif
cmpb $0, 9(%ecx)
jz L(Exit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmpb $0, 14(%ecx)
jz L(Exit15)
cmpb $0, 15(%ecx)
jz L(Exit16)
#ifdef USE_AS_STRNCAT
cmp $16, %ebx
je L(StrncatExit16)
# define RETURN1 POP (%ebx); POP (%edi); ret; \
CFI_PUSH (%ebx); CFI_PUSH (%edi)
# define USE_AS_STRNCPY
#else
# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
#endif
#include "ssse3-strcpy-atom.S"
.p2align 4
L(CopyFrom1To16Bytes):
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit1):
movb %bh, 1(%edx)
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit2):
movb %bh, 2(%edx)
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit3):
movb %bh, 3(%edx)
L(Exit3):
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit4):
movb %bh, 4(%edx)
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit5):
movb %bh, 5(%edx)
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit6):
movb %bh, 6(%edx)
L(Exit6):
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit7):
movb %bh, 7(%edx)
L(Exit7):
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit8):
movb %bh, 8(%edx)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit9):
movb %bh, 9(%edx)
L(Exit9):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb 8(%ecx), %al
movb %al, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit10):
movb %bh, 10(%edx)
L(Exit10):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
movw %ax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit11):
movb %bh, 11(%edx)
L(Exit11):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
movl %eax, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit12):
movb %bh, 12(%edx)
L(Exit12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit13):
movb %bh, 13(%edx)
L(Exit13):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit14):
movb %bh, 14(%edx)
L(Exit14):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit15):
movb %bh, 15(%edx)
L(Exit15):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit16):
movb %bh, 16(%edx)
L(Exit16):
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
#ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
lea (%esi, %edx), %esi
lea -9(%ebx), %edx
and $1<<7, %dh
or %al, %dh
lea (%esi), %edx
POP (%esi)
jz L(ExitHighCase2)
test $0x01, %al
jnz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
test $0x02, %al
jnz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
test $0x04, %al
jnz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
test $0x08, %al
jnz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
test $0x10, %al
jnz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
test $0x20, %al
jnz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
test $0x40, %al
jnz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
xor %cl, %cl
movb %cl, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase2):
test $0x01, %ah
jnz L(Exit9)
cmp $9, %ebx
je L(StrncatExit9)
test $0x02, %ah
jnz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
test $0x04, %ah
jnz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
test $0x8, %ah
jnz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
test $0x10, %ah
jnz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
test $0x20, %ah
jnz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
test $0x40, %ah
jnz L(Exit15)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
CFI_PUSH(%esi)
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %esi, %edx
add %esi, %ecx
POP (%esi)
cmp $8, %ebx
ja L(ExitHighCase3)
cmp $1, %ebx
je L(StrncatExit1)
cmp $2, %ebx
je L(StrncatExit2)
cmp $3, %ebx
je L(StrncatExit3)
cmp $4, %ebx
je L(StrncatExit4)
cmp $5, %ebx
je L(StrncatExit5)
cmp $6, %ebx
je L(StrncatExit6)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb %bh, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase3):
cmp $9, %ebx
je L(StrncatExit9)
cmp $10, %ebx
je L(StrncatExit10)
cmp $11, %ebx
je L(StrncatExit11)
cmp $12, %ebx
je L(StrncatExit12)
cmp $13, %ebx
je L(StrncatExit13)
cmp $14, %ebx
je L(StrncatExit14)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movb %bh, 16(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit0):
movl %edi, %eax
RETURN1
.p2align 4
L(StrncpyExit15Bytes):
cmp $9, %ebx
je L(StrncatExit9)
cmpb $0, 9(%ecx)
jz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
lea 14(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncpyExit8Bytes):
cmpb $0, (%ecx)
jz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
#endif
END (STRCAT)

View File

@@ -107,12 +107,8 @@ name: \
sub %esi, %ebp
#endif
#ifndef STRCMP
# define STRCMP strcmp
#endif
.section .text.ssse3,"ax",@progbits
ENTRY (STRCMP)
ENTRY (ssse3_strcmp_latest)
#ifdef USE_AS_STRNCMP
PUSH (%ebp)
#endif
@@ -2275,4 +2271,4 @@ L(less16bytes_sncmp):
ret
#endif
END (STRCMP)
END (ssse3_strcmp_latest)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,34 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define STRCAT strncat
#define USE_AS_STRNCAT
#include "ssse3-strcat-atom.S"

View File

@@ -1,33 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define USE_AS_STRNCPY
#define STRCPY strncpy
#include "ssse3-strcpy-atom.S"

View File

@@ -1,114 +0,0 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS+4
#define STR2 STR1+4
#define USE_AS_WCSCAT
.text
ENTRY (wcscat)
PUSH (%edi)
mov STR1(%esp), %edi
mov %edi, %edx
#define RETURN jmp L(WcscpyAtom)
#include "sse2-wcslen-atom.S"
L(WcscpyAtom):
shl $2, %eax
mov STR2(%esp), %ecx
lea (%edi, %eax), %edx
cmp $0, (%ecx)
jz L(Exit4)
cmp $0, 4(%ecx)
jz L(Exit8)
cmp $0, 8(%ecx)
jz L(Exit12)
cmp $0, 12(%ecx)
jz L(Exit16)
#undef RETURN
#define RETURN POP(%edi); ret; CFI_PUSH(%edi)
#include "ssse3-wcscpy-atom.S"
END (wcscat)

View File

@@ -1,652 +0,0 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_WCSCAT
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
# endif
# ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
# endif
# ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# define PARMS 4
# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
# define STR1 PARMS
# define STR2 STR1+4
# define LEN STR2+4
.text
ENTRY (wcscpy)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
cmp $0, (%ecx)
jz L(ExitTail4)
cmp $0, 4(%ecx)
jz L(ExitTail8)
cmp $0, 8(%ecx)
jz L(ExitTail12)
cmp $0, 12(%ecx)
jz L(ExitTail16)
PUSH (%edi)
mov %edx, %edi
#endif
PUSH (%esi)
lea 16(%ecx), %esi
and $-16, %esi
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
sub %ecx, %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
mov %edx, %eax
lea 16(%edx), %edx
and $-16, %edx
sub %edx, %eax
sub %eax, %ecx
mov %ecx, %eax
and $0xf, %eax
mov $0, %esi
jz L(Align16Both)
cmp $4, %eax
je L(Shl4)
cmp $8, %eax
je L(Shl8)
jmp L(Shl12)
L(Align16Both):
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
and $-0x40, %ecx
sub %ecx, %eax
sub %eax, %edx
mov $-0x40, %esi
L(Aligned64Loop):
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
movaps %xmm3, %xmm6
movaps 48(%ecx), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
lea 64(%edx), %edx
pcmpeqd %xmm0, %xmm3
lea 64(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(Aligned64Leave)
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
L(Aligned64Leave):
pcmpeqd %xmm4, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm4, -64(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm5, -48(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -12(%ecx), %ecx
sub %eax, %edx
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 44(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 60(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $4, %xmm4, %xmm5
palignr $4, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl4Start)
palignr $4, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
add $12, %edx
add $12, %ecx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(Shl8):
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -8(%ecx), %ecx
sub %eax, %edx
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 40(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 56(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $8, %xmm4, %xmm5
palignr $8, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl8Start)
palignr $8, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
add $8, %ecx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(Shl12):
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -4(%ecx), %ecx
sub %eax, %edx
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 36(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 52(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $12, %xmm4, %xmm5
palignr $12, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl12Start)
palignr $12, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
movl (%ecx), %esi
movl %esi, (%edx)
mov $4, %esi
.p2align 4
L(CopyFrom1To16Bytes):
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Exit12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edi, %eax
RETURN
CFI_POP (%edi)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
#ifndef USE_AS_WCSCAT
END (wcscpy)
#endif

View File

@@ -1,35 +0,0 @@
/*
Copyright (c) 2011, 2012, 2013 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define MEMCMP wmemcmp
#define USE_WCHAR
#define USE_AS_WMEMCMP 1
#include "ssse3-memcmp-atom.S"

View File

@@ -0,0 +1,3 @@
/* $OpenBSD: strchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
#define STRCHR
#include "index.S"

View File

@@ -28,8 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
#define MEMCPY bcopy
#define USE_AS_MEMMOVE
#define USE_AS_BCOPY
#include "ssse3-memcpy-atom.S"
# define ssse3_strcmp_latest strcmp
# include "ssse3-strcmp-latest.S"
#else
# include "strcmp.S"
#endif

View File

@@ -0,0 +1,64 @@
/* $OpenBSD: strcpy.S,v 1.8 2005/08/07 11:30:38 espie Exp $ */
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*/
#include <machine/asm.h>
#if defined(APIWARN)
#APP
.section .gnu.warning.strcpy
.ascii "warning: strcpy() is almost always misused, please use strlcpy()"
#NO_APP
#endif
/*
* NOTE: I've unrolled the loop eight times: large enough to make a
* significant difference, and small enough not to totally trash the
* cache.
*/
ENTRY(strcpy)
movl 4(%esp),%ecx /* dst address */
movl 8(%esp),%edx /* src address */
pushl %ecx /* push dst address */
.align 2,0x90
L1: movb (%edx),%al /* unroll loop, but not too much */
movb %al,(%ecx)
testb %al,%al
jz L2
movb 1(%edx),%al
movb %al,1(%ecx)
testb %al,%al
jz L2
movb 2(%edx),%al
movb %al,2(%ecx)
testb %al,%al
jz L2
movb 3(%edx),%al
movb %al,3(%ecx)
testb %al,%al
jz L2
movb 4(%edx),%al
movb %al,4(%ecx)
testb %al,%al
jz L2
movb 5(%edx),%al
movb %al,5(%ecx)
testb %al,%al
jz L2
movb 6(%edx),%al
movb %al,6(%ecx)
testb %al,%al
jz L2
movb 7(%edx),%al
movb %al,7(%ecx)
addl $8,%edx
addl $8,%ecx
testb %al,%al
jnz L1
L2: popl %eax /* pop dst address */
ret
END(strcpy)

View File

@@ -28,7 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSE2)
#define MEMCPY memmove
#define USE_AS_MEMMOVE
#include "ssse3-memcpy-atom.S"
# define sse2_strlen_atom strlen
# include "sse2-strlen-atom.S"
#else
# include "strlen.S"
#endif

Some files were not shown because too many files have changed in this diff Show More