Merge "resolved conflicts for merge of 5d0ad38c to jb-mr2-dev" into jb-mr2-dev

resolved conflicts for merge of 5d0ad38c to jb-mr2-dev
Change-Id: I606f6d86aad46c5eda0dc0d907fe691ca0eea826
2013-09-25 02:31:37 +00:00 · 2013-09-24 15:53:34 -07:00
321 changed files with 4978 additions and 28036 deletions
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -6,33 +6,67 @@ include $(LOCAL_PATH)/arch-$(TARGET_ARCH)/syscalls.mk
 # =========================================================
 libc_common_src_files := \
 	$(syscall_src) \
+	unistd/abort.c \
 	unistd/alarm.c \
 	unistd/exec.c \
 	unistd/fnmatch.c \
+	unistd/getopt_long.c \
 	unistd/syslog.c \
 	unistd/system.c \
 	unistd/time.c \
 	stdio/asprintf.c \
+	stdio/clrerr.c \
+	stdio/fclose.c \
+	stdio/fdopen.c \
+	stdio/feof.c \
+	stdio/ferror.c \
 	stdio/fflush.c \
 	stdio/fgetc.c \
+	stdio/fgetln.c \
+	stdio/fgetpos.c \
+	stdio/fgets.c \
+	stdio/fileno.c \
 	stdio/findfp.c \
+	stdio/flags.c \
+	stdio/fopen.c \
 	stdio/fprintf.c \
+	stdio/fpurge.c \
 	stdio/fputc.c \
+	stdio/fputs.c \
 	stdio/fread.c \
 	stdio/freopen.c \
 	stdio/fscanf.c \
 	stdio/fseek.c \
+	stdio/fsetpos.c \
 	stdio/ftell.c \
+	stdio/funopen.c \
 	stdio/fvwrite.c \
+	stdio/fwalk.c \
+	stdio/fwrite.c \
+	stdio/getc.c \
+	stdio/getchar.c \
 	stdio/gets.c \
+	stdio/makebuf.c \
+	stdio/mktemp.c \
 	stdio/printf.c \
+	stdio/putc.c \
+	stdio/putchar.c \
+	stdio/puts.c \
+	stdio/putw.c \
 	stdio/refill.c \
+	stdio/remove.c \
 	stdio/rewind.c \
+	stdio/rget.c \
 	stdio/scanf.c \
+	stdio/setbuf.c \
+	stdio/setbuffer.c \
+	stdio/setvbuf.c \
 	stdio/snprintf.c\
 	stdio/sprintf.c \
 	stdio/sscanf.c \
 	stdio/stdio.c \
+	stdio/tempnam.c \
+	stdio/tmpnam.c \
 	stdio/ungetc.c \
 	stdio/vasprintf.c \
 	stdio/vfprintf.c \
@@ -43,11 +77,13 @@ libc_common_src_files := \
 	stdio/vscanf.c \
 	stdio/vsscanf.c \
 	stdio/wbuf.c \
+	stdio/wsetup.c \
 	stdlib/atexit.c \
 	stdlib/ctype_.c \
 	stdlib/exit.c \
 	stdlib/getenv.c \
 	stdlib/putenv.c \
+	stdlib/qsort.c \
 	stdlib/setenv.c \
 	stdlib/strtod.c \
 	stdlib/strtoimax.c \
@@ -58,16 +94,29 @@ libc_common_src_files := \
 	stdlib/strtoumax.c \
 	stdlib/tolower_.c \
 	stdlib/toupper_.c \
+	string/index.c \
 	string/strcasecmp.c \
+	string/strcat.c \
+	string/strchr.c \
 	string/strcspn.c \
 	string/strdup.c \
+	string/strlcat.c \
+	string/strlcpy.c \
+	string/strncat.c \
+	string/strncpy.c \
 	string/strpbrk.c \
+	string/strrchr.c \
 	string/strsep.c \
 	string/strspn.c \
 	string/strstr.c \
 	string/strtok.c \
 	wchar/wcswidth.c \
 	wchar/wcsxfrm.c \
+	tzcode/asctime.c \
+	tzcode/difftime.c \
+	tzcode/localtime.c \
+	tzcode/strftime.c \
+	tzcode/strptime.c \
 	bionic/arc4random.c \
 	bionic/atoi.c \
 	bionic/atol.c \
@@ -102,9 +151,11 @@ libc_common_src_files := \
 	bionic/ldexp.c \
 	bionic/lseek64.c \
 	bionic/md5.c \
+	bionic/memchr.c \
 	bionic/memmem.c \
+	bionic/memrchr.c \
 	bionic/memswap.c \
-	bionic/name_mem.c \
+	bionic/mmap.c \
 	bionic/openat.c \
 	bionic/open.c \
 	bionic/pathconf.c \
@@ -119,6 +170,7 @@ libc_common_src_files := \
 	bionic/recv.c \
 	bionic/sched_cpualloc.c \
 	bionic/sched_cpucount.c \
+	bionic/sched_getaffinity.c \
 	bionic/sched_getcpu.c \
 	bionic/semaphore.c \
 	bionic/send.c \
@@ -138,11 +190,11 @@ libc_common_src_files := \
 	bionic/sleep.c \
 	bionic/statfs.c \
 	bionic/strndup.c \
+	bionic/strnlen.c \
 	bionic/strntoimax.c \
 	bionic/strntoumax.c \
 	bionic/strtotimeval.c \
 	bionic/system_properties.c \
-	bionic/system_properties_compat.c \
 	bionic/tcgetpgrp.c \
 	bionic/tcsetpgrp.c \
 	bionic/thread_atexit.c \
@@ -181,40 +233,22 @@ libc_common_src_files := \
 	netbsd/nameser/ns_print.c \
 	netbsd/nameser/ns_samedomain.c \

-# Fortify implementations of libc functions.
-libc_common_src_files += \
-    bionic/__fgets_chk.cpp \
-    bionic/__memcpy_chk.cpp \
-    bionic/__memmove_chk.cpp \
-    bionic/__memset_chk.cpp \
-    bionic/__strcat_chk.cpp \
-    bionic/__strchr_chk.cpp \
-    bionic/__strcpy_chk.cpp \
-    bionic/__strlcat_chk.cpp \
-    bionic/__strlcpy_chk.cpp \
-    bionic/__strlen_chk.cpp \
-    bionic/__strncat_chk.cpp \
-    bionic/__strncpy_chk.cpp \
-    bionic/__strrchr_chk.cpp \
-    bionic/__umask_chk.cpp \
-    bionic/__vsnprintf_chk.cpp \
-    bionic/__vsprintf_chk.cpp \
-
 libc_bionic_src_files := \
-    bionic/abort.cpp \
    bionic/assert.cpp \
    bionic/brk.cpp \
    bionic/dirent.cpp \
    bionic/__errno.c \
    bionic/eventfd_read.cpp \
    bionic/eventfd_write.cpp \
-    bionic/futimens.cpp \
+    bionic/__fgets_chk.cpp \
    bionic/getauxval.cpp \
    bionic/getcwd.cpp \
    bionic/libc_init_common.cpp \
    bionic/libc_logging.cpp \
    bionic/libgen.cpp \
-    bionic/mmap.cpp \
+    bionic/__memcpy_chk.cpp \
+    bionic/__memmove_chk.cpp \
+    bionic/__memset_chk.cpp \
    bionic/pthread_attr.cpp \
    bionic/pthread_detach.cpp \
    bionic/pthread_equal.cpp \
@@ -230,88 +264,56 @@ libc_bionic_src_files := \
    bionic/raise.cpp \
    bionic/sbrk.cpp \
    bionic/scandir.cpp \
-    bionic/sched_getaffinity.cpp \
    bionic/__set_errno.cpp \
    bionic/setlocale.cpp \
    bionic/signalfd.cpp \
    bionic/sigwait.cpp \
-    bionic/statvfs.cpp \
+    bionic/__strcat_chk.cpp \
+    bionic/__strcpy_chk.cpp \
    bionic/strerror.cpp \
    bionic/strerror_r.cpp \
+    bionic/__strlcat_chk.cpp \
+    bionic/__strlcpy_chk.cpp \
+    bionic/__strlen_chk.cpp \
+    bionic/__strncat_chk.cpp \
+    bionic/__strncpy_chk.cpp \
    bionic/strsignal.cpp \
    bionic/stubs.cpp \
    bionic/sysconf.cpp \
    bionic/tdestroy.cpp \
    bionic/tmpfile.cpp \
+    bionic/__umask_chk.cpp \
+    bionic/__vsnprintf_chk.cpp \
+    bionic/__vsprintf_chk.cpp \
    bionic/wait.cpp \
    bionic/wchar.cpp \

-libc_tzcode_src_files := \
-    tzcode/asctime.c \
-    tzcode/difftime.c \
-    tzcode/localtime.c \
-    tzcode/strftime.c \
-    tzcode/strptime.c \
-
 libc_upstream_freebsd_src_files := \
-    upstream-freebsd/lib/libc/stdio/clrerr.c \
-    upstream-freebsd/lib/libc/stdio/fclose.c \
-    upstream-freebsd/lib/libc/stdio/fdopen.c \
-    upstream-freebsd/lib/libc/stdio/feof.c \
-    upstream-freebsd/lib/libc/stdio/ferror.c \
-    upstream-freebsd/lib/libc/stdio/fgetln.c \
-    upstream-freebsd/lib/libc/stdio/fgetpos.c \
-    upstream-freebsd/lib/libc/stdio/fgets.c \
-    upstream-freebsd/lib/libc/stdio/fileno.c \
-    upstream-freebsd/lib/libc/stdio/flags.c \
-    upstream-freebsd/lib/libc/stdio/fopen.c \
-    upstream-freebsd/lib/libc/stdio/fpurge.c \
-    upstream-freebsd/lib/libc/stdio/fputs.c \
-    upstream-freebsd/lib/libc/stdio/fsetpos.c \
-    upstream-freebsd/lib/libc/stdio/funopen.c \
-    upstream-freebsd/lib/libc/stdio/fwalk.c \
-    upstream-freebsd/lib/libc/stdio/fwrite.c \
-    upstream-freebsd/lib/libc/stdio/getc.c \
-    upstream-freebsd/lib/libc/stdio/getchar.c \
-    upstream-freebsd/lib/libc/stdio/makebuf.c \
-    upstream-freebsd/lib/libc/stdio/mktemp.c \
-    upstream-freebsd/lib/libc/stdio/putc.c \
-    upstream-freebsd/lib/libc/stdio/putchar.c \
-    upstream-freebsd/lib/libc/stdio/puts.c \
-    upstream-freebsd/lib/libc/stdio/putw.c \
-    upstream-freebsd/lib/libc/stdio/remove.c \
-    upstream-freebsd/lib/libc/stdio/rget.c \
-    upstream-freebsd/lib/libc/stdio/setbuf.c \
-    upstream-freebsd/lib/libc/stdio/setbuffer.c \
-    upstream-freebsd/lib/libc/stdio/setvbuf.c \
-    upstream-freebsd/lib/libc/stdio/tempnam.c \
-    upstream-freebsd/lib/libc/stdio/tmpnam.c \
-    upstream-freebsd/lib/libc/stdio/wsetup.c \
-    upstream-freebsd/lib/libc/stdlib/abs.c \
-    upstream-freebsd/lib/libc/stdlib/getopt_long.c \
-    upstream-freebsd/lib/libc/stdlib/imaxabs.c \
-    upstream-freebsd/lib/libc/stdlib/imaxdiv.c \
-    upstream-freebsd/lib/libc/stdlib/labs.c \
-    upstream-freebsd/lib/libc/stdlib/llabs.c \
-    upstream-freebsd/lib/libc/stdlib/qsort.c \
    upstream-freebsd/lib/libc/stdlib/realpath.c \
    upstream-freebsd/lib/libc/string/wcpcpy.c \
    upstream-freebsd/lib/libc/string/wcpncpy.c \
    upstream-freebsd/lib/libc/string/wcscasecmp.c \
+    upstream-freebsd/lib/libc/string/wcscat.c \
+    upstream-freebsd/lib/libc/string/wcschr.c \
+    upstream-freebsd/lib/libc/string/wcscmp.c \
+    upstream-freebsd/lib/libc/string/wcscpy.c \
    upstream-freebsd/lib/libc/string/wcscspn.c \
    upstream-freebsd/lib/libc/string/wcsdup.c \
    upstream-freebsd/lib/libc/string/wcslcat.c \
    upstream-freebsd/lib/libc/string/wcslcpy.c \
+    upstream-freebsd/lib/libc/string/wcslen.c \
    upstream-freebsd/lib/libc/string/wcsncasecmp.c \
    upstream-freebsd/lib/libc/string/wcsncat.c \
    upstream-freebsd/lib/libc/string/wcsncmp.c \
    upstream-freebsd/lib/libc/string/wcsncpy.c \
    upstream-freebsd/lib/libc/string/wcsnlen.c \
    upstream-freebsd/lib/libc/string/wcspbrk.c \
+    upstream-freebsd/lib/libc/string/wcsrchr.c \
    upstream-freebsd/lib/libc/string/wcsspn.c \
    upstream-freebsd/lib/libc/string/wcsstr.c \
    upstream-freebsd/lib/libc/string/wcstok.c \
    upstream-freebsd/lib/libc/string/wmemchr.c \
+    upstream-freebsd/lib/libc/string/wmemcmp.c \
    upstream-freebsd/lib/libc/string/wmemcpy.c \
    upstream-freebsd/lib/libc/string/wmemmove.c \
    upstream-freebsd/lib/libc/string/wmemset.c \
@@ -367,23 +369,6 @@ libc_common_src_files += \
 	bionic/memmove.c.arm \
 	string/bcopy.c \
 	string/strncmp.c \
-	string/strncat.c \
-	string/strncpy.c \
-	bionic/strchr.cpp \
-	string/strrchr.c \
-	bionic/memchr.c \
-	bionic/memrchr.c \
-	string/index.c \
-	bionic/strnlen.c \
-	string/strlcat.c \
-	string/strlcpy.c \
-	upstream-freebsd/lib/libc/string/wcschr.c \
-	upstream-freebsd/lib/libc/string/wcsrchr.c \
-	upstream-freebsd/lib/libc/string/wcscmp.c \
-	upstream-freebsd/lib/libc/string/wcscpy.c \
-	upstream-freebsd/lib/libc/string/wmemcmp.c \
-	upstream-freebsd/lib/libc/string/wcslen.c \
-	upstream-freebsd/lib/libc/string/wcscat.c

 # These files need to be arm so that gdbserver
 # can set breakpoints in them without messing
@@ -407,6 +392,7 @@ libc_common_src_files += \
    bionic/pthread-rwlocks.c \
    bionic/pthread-timers.c \
    bionic/ptrace.c \
+    string/strcpy.c \

 libc_static_common_src_files += \
    bionic/pthread.c \
@@ -421,25 +407,7 @@ libc_common_src_files += \
 	string/bcopy.c \
 	string/strcmp.c \
 	string/strcpy.c \
-	string/strncmp.c \
-	string/strcat.c \
-	string/strncat.c \
-	string/strncpy.c \
-	bionic/strchr.cpp \
-	string/strrchr.c \
-	bionic/memchr.c \
-	bionic/memrchr.c \
-	string/index.c \
-	bionic/strnlen.c \
-	string/strlcat.c \
-	string/strlcpy.c \
-	upstream-freebsd/lib/libc/string/wcschr.c \
-	upstream-freebsd/lib/libc/string/wcsrchr.c \
-	upstream-freebsd/lib/libc/string/wcscmp.c \
-	upstream-freebsd/lib/libc/string/wcscpy.c \
-	upstream-freebsd/lib/libc/string/wmemcmp.c \
-	upstream-freebsd/lib/libc/string/wcslen.c \
-	upstream-freebsd/lib/libc/string/wcscat.c
+	string/strncmp.c

 libc_common_src_files += \
 	bionic/pthread-atfork.c \
@@ -501,6 +469,14 @@ libc_common_cflags := \
    -DLOG_ON_HEAP_ERROR \
    -Wall -Wextra

+# these macro definitions are required to implement the
+# 'timezone' and 'daylight' global variables, as well as
+# properly update the 'tm_gmtoff' field in 'struct tm'.
+#
+libc_common_cflags += \
+    -DTM_GMTOFF=tm_gmtoff \
+    -DUSG_COMPAT=1
+
 ifeq ($(strip $(DEBUG_BIONIC_LIBC)),true)
  libc_common_cflags += -DDEBUG
 endif
@@ -578,17 +554,23 @@ libc_crt_target_cflags += \
 # static C++ destructors are properly called on dlclose().
 #
 ifeq ($(TARGET_ARCH),arm)
+    libc_crtbegin_extension := c
    libc_crt_target_so_cflags :=
 endif
 ifeq ($(TARGET_ARCH),mips)
+    libc_crtbegin_extension := S
    libc_crt_target_so_cflags := -fPIC
 endif
 ifeq ($(TARGET_ARCH),x86)
+    libc_crtbegin_extension := c
    libc_crt_target_so_cflags := -fPIC
 endif
+ifeq ($(libc_crtbegin_extension),)
+    $(error $(TARGET_ARCH) not supported)
+endif
 libc_crt_target_so_cflags += $(libc_crt_target_cflags)
-libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.c
-libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.c
+libc_crt_target_crtbegin_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin.$(libc_crtbegin_extension)
+libc_crt_target_crtbegin_so_file := $(LOCAL_PATH)/arch-$(TARGET_ARCH)/bionic/crtbegin_so.$(libc_crtbegin_extension)

 # See the comment in crtbrand.c for the reason why we need to generate
 # crtbrand.s before generating crtbrand.o.
@@ -704,28 +686,6 @@ LOCAL_SYSTEM_SHARED_LIBRARIES :=
 include $(BUILD_STATIC_LIBRARY)


-# ========================================================
-# libc_tzcode.a - upstream 'tzcode' code
-# ========================================================
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(libc_tzcode_src_files)
-LOCAL_CFLAGS := \
-    $(libc_common_cflags) \
-    -std=gnu99 \
-    -DSTD_INSPIRED=1 \
-    -DTZDIR=\"/system/usr/share/zoneinfo\" \
-    -DTM_GMTOFF=tm_gmtoff \
-    -DUSG_COMPAT=1
-LOCAL_C_INCLUDES := $(libc_common_c_includes)
-LOCAL_MODULE := libc_tzcode
-LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
-LOCAL_SYSTEM_SHARED_LIBRARIES :=
-
-include $(BUILD_STATIC_LIBRARY)
-
-
 # ========================================================
 # libc_freebsd.a - upstream FreeBSD C library code
 # ========================================================
@@ -801,12 +761,7 @@ LOCAL_CFLAGS := $(libc_common_cflags) \
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
 LOCAL_MODULE := libc_common
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
-LOCAL_WHOLE_STATIC_LIBRARIES := \
-    libbionic_ssp \
-    libc_bionic \
-    libc_freebsd \
-    libc_netbsd \
-    libc_tzcode
+LOCAL_WHOLE_STATIC_LIBRARIES := libbionic_ssp libc_bionic libc_freebsd libc_netbsd
 LOCAL_SYSTEM_SHARED_LIBRARIES :=

 # TODO: split out the asflags.
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -1673,6 +1673,38 @@ SUCH DAMAGE.

 -------------------------------------------------------------------

+Copyright (c) 1990, 1993
+   The Regents of the University of California.  All rights reserved.
+
+This code is derived from software contributed to Berkeley by
+Chris Torek.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+4. Neither the name of the University nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 1990, 1993
   The Regents of the University of California.  All rights reserved.

@@ -2820,7 +2852,7 @@ are met:
 2. Redistributions in binary form must reproduce the above copyright
   notices, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-3. Neither the name of the University nor the names of its contributors
+4. Neither the name of the University nor the names of its contributors
   may be used to endorse or promote products derived from this software
   without specific prior written permission.

@@ -3060,6 +3092,13 @@ are met:
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+   must display the following acknowledgement:
+       This product includes software developed by the NetBSD
+       Foundation, Inc. and its contributors.
+4. Neither the name of The NetBSD Foundation nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
@@ -3877,36 +3916,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 -------------------------------------------------------------------

-Copyright (c) 2010, 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-   Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-   Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-   Neither the name of Intel Corporation nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-------------------------------------------------------------------
-
 Copyright (c) 2010, Intel Corporation
 All rights reserved.

@@ -3937,36 +3946,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 -------------------------------------------------------------------

-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-   Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-   Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-   Neither the name of Intel Corporation nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-------------------------------------------------------------------
-
 Copyright (c) 2011 The Android Open Source Project
 Copyright (c) 2008 ARM Ltd
 All rights reserved.
@@ -3996,36 +3975,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 -------------------------------------------------------------------

-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-   Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-   Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-   Neither the name of Intel Corporation nor the names of its contributors
-   may be used to endorse or promote products derived from this software
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-------------------------------------------------------------------
-
 Copyright (c) 2011, VMware, Inc.
 All rights reserved.

@@ -4109,38 +4058,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 -------------------------------------------------------------------

-Copyright (c) 2013, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-     Neither the name of Linaro Limited nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-------------------------------------------------------------------
-
 Copyright (c)1999 Citrus Project,
 All rights reserved.

--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -201,8 +201,6 @@ int     removexattr(const char *, const char *) 1
 int     lremovexattr(const char *, const char *) 1
 int     __statfs64:statfs64(const char *, size_t, struct statfs *)  1
 long    unshare(unsigned long)  1
-int     swapon(const char *, int) 1
-int     swapoff(const char *) 1

 # time
 int           pause ()                       1
@@ -223,9 +221,6 @@ int           __timer_getoverrun:timer_getoverrun(timer_t)
 int           __timer_delete:timer_delete(timer_t)                                                      1
 int           utimes(const char*, const struct timeval tvp[2])                          1
 int           utimensat(int, const char *, const struct timespec times[2], int)         1
-int           timerfd_create(clockid_t, int)   1
-int           timerfd_settime(int, int, const struct itimerspec *, struct itimerspec *)   1
-int           timerfd_gettime(int, struct itimerspec *)   1

 # signals
 int     sigaction(int, const struct sigaction *, struct sigaction *)  1
--- a/libc/arch-arm/arm.mk
+++ b/libc/arch-arm/arm.mk
@@ -14,6 +14,8 @@ _LIBC_ARCH_COMMON_SRC_FILES := \
    arch-arm/bionic/_setjmp.S \
    arch-arm/bionic/setjmp.S \
    arch-arm/bionic/sigsetjmp.S \
+    arch-arm/bionic/strcpy.S \
+    arch-arm/bionic/strlen.c.arm \
    arch-arm/bionic/syscall.S \
    arch-arm/bionic/tgkill.S \
    arch-arm/bionic/tkill.S \
@@ -26,19 +28,8 @@ _LIBC_ARCH_STATIC_SRC_FILES := \
 _LIBC_ARCH_DYNAMIC_SRC_FILES := \
    arch-arm/bionic/exidx_dynamic.c

-# Remove the C++ fortify function implementations for which there is an
-# arm assembler version.
-_LIBC_FORTIFY_FILES_TO_REMOVE := \
-    bionic/__memcpy_chk.cpp \
-    bionic/__memset_chk.cpp \
-    bionic/__strcpy_chk.cpp \
-    bionic/__strcat_chk.cpp \
-
-libc_common_src_files := \
-    $(filter-out $(_LIBC_FORTIFY_FILES_TO_REMOVE),$(libc_common_src_files))
-
 ifeq ($(strip $(wildcard bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk)),)
-$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
+$(error "TARGET_CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.")
 endif

 include bionic/libc/arch-arm/$(TARGET_CPU_VARIANT)/$(TARGET_CPU_VARIANT).mk
--- a/libc/arch-arm/bionic/abort_arm.S
+++ b/libc/arch-arm/bionic/abort_arm.S
@@ -38,5 +38,5 @@
 ENTRY(abort)
    .save   {r3, r14}
    stmfd   sp!, {r3, r14}
-    bl      PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
+    blx     PIC_SYM(_C_LABEL(__libc_android_abort), PLT)
 END(abort)
--- a/libc/arch-arm/bionic/clone.S
+++ b/libc/arch-arm/bionic/clone.S
@@ -32,33 +32,33 @@

 // int  __pthread_clone(void* (*fn)(void*), void* child_stack, int flags, void* arg);
 ENTRY(__pthread_clone)
-    # Push 'fn' and 'arg' onto 'child_stack'.
+    # Copy the args onto the new stack.
    stmdb   r1!, {r0, r3}

    # The sys_clone system call only takes two arguments: 'flags' and 'child_stack'.
    # 'child_stack' is already in r1, but we need to move 'flags' into position.
    mov     r0, r2
+    stmfd   sp!, {r4, r7}

    # System call.
-    mov     ip, r7
    ldr     r7, =__NR_clone
    swi     #0
-
-    # Child?
    movs    r0, r0
    beq     1f

-    # Parent.
-    mov     r7, ip
+    # In parent, reload saved registers then either return or set errno.
+    ldmfd   sp!, {r4, r7}
    cmn     r0, #(MAX_ERRNO + 1)
    bxls    lr
    neg     r0, r0
    b       __set_errno

-1:  # Child.
-    # Pop 'fn' and 'arg' back off the stack and call __thread_entry.
+1:  # The child.
+    # pick the function arg and call address off the stack and jump
+    # to the C __thread_entry function which does some setup and then
+    # calls the thread's start function
    pop     {r0, r1}
-    # __thread_entry also needs our stack pointer.
+    # __thread_entry needs the TLS pointer
    mov     r2, sp
    b       __thread_entry
 END(__pthread_clone)
--- a/libc/arch-arm/bionic/futex_arm.S
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -34,10 +34,11 @@

 // __futex_syscall3(*ftx, op, val)
 ENTRY(__futex_syscall3)
-    mov     ip, r7
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
    ldr     r7, =__NR_futex
    swi     #0
-    mov     r7, ip
+    ldmia   sp!, {r4, r7}
    bx      lr
 END(__futex_syscall3)

@@ -48,23 +49,25 @@ END(__futex_syscall4)

 // __futex_wait(*ftx, val, *timespec)
 ENTRY(__futex_wait)
-    mov     ip, r7
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
    mov     r3, r2
    mov     r2, r1
    mov     r1, #FUTEX_WAIT
    ldr     r7, =__NR_futex
    swi     #0
-    mov     r7, ip
+    ldmia   sp!, {r4, r7}
    bx      lr
 END(__futex_wait)

 // __futex_wake(*ftx, counter)
 ENTRY(__futex_wake)
-    mov     ip, r7
+    .save   {r4, r7}
+    stmdb   sp!, {r4, r7}
    mov     r2, r1
    mov     r1, #FUTEX_WAKE
    ldr     r7, =__NR_futex
    swi     #0
-    mov     r7, ip
+    ldmia   sp!, {r4, r7}
    bx      lr
 END(__futex_wake)
--- a/libc/arch-arm/bionic/libgcc_compat.c
+++ b/libc/arch-arm/bionic/libgcc_compat.c
@@ -30,8 +30,8 @@
 * dynamic linker to copy their definition into the final libc.so binary.
 *
 * They are required to ensure backwards binary compatibility with
- * libc.so provided by the platform and binaries built with the NDK or
- * different versions/configurations of toolchains.
+ * Android 1.5, 1.6 and even 3.0  system images. Some applications built
+ * using the NDK require them to be here.
 *
 * Now, for a more elaborate description of the issue:
 *
@@ -48,9 +48,7 @@
 *         gcc <options> -o libfoo.so  foo.a libgcc.a -lc -lm
 *
 * This ensures that any helper function needed by the code in foo.a is copied
- * into the final libfoo.so. However, doing so will link a bunch of other __cxa
- * functions from libgcc.a into each .so and executable, causing 4k+ increase
- * in every binary. Therefore the Android platform build system has been
+ * into the final libfoo.so. Unfortunately, the Android build system has been
 * using this instead:
 *
 *         gcc <options> -o libfoo.so foo.a -lc -lm libgcc.a
@@ -60,10 +58,9 @@
 * into libfoo.so. Instead, a symbol import definition will be added to it
 * so libfoo.so can directly call the one in libc.so at runtime.
 *
- * When refreshing toolchains for new versions or using different architecture
- * flags, the set of helper functions copied to libc.so may change, which
- * resulted in some native shared libraries generated with the NDK or prebuilts
- * from vendors to fail to load properly.
+ * When changing toolchains for 2.0, the set of helper functions copied to
+ * libc.so changed, which resulted in some native shared libraries generated
+ * with the NDK to fail to load properly.
 *
 * The NDK has been fixed after 1.6_r1 to use the correct link command, so
 * any native shared library generated with it should now be safe from that
@@ -76,11 +73,6 @@
 * but it is easier to add a single function here than asking several app
 * developers to fix their build.
 *
- * The __aeabi_idiv function is added to the list since cortex-a15 supports
- * HW idiv instructions so the system libc.so doesn't pull in the reference to
- * __aeabi_idiv but legacy libraries built against cortex-a9 targets still need
- * it.
- *
 * Final note: some of the functions below should really be in libm.so to
 *             completely reflect the state of 1.5/1.6 system images. However,
 *             since libm.so depends on libc.so, it's easier to put all of
@@ -121,7 +113,6 @@
    XX(__aeabi_idiv)         \
    XX(__aeabi_l2d)          \
    XX(__aeabi_l2f)          \
-    XX(__aeabi_lasr)         \
    XX(__aeabi_lmul)         \
    XX(__aeabi_llsl)         \
    XX(__aeabi_llsr)         \
@@ -132,7 +123,7 @@
    XX(__cmpdf2)             \
    XX(__divdf3)             \
    XX(__divsf3)             \
-    XX(__eqdf2)              \
+    XX(__eqdf2)             \
    XX(__extendsfdf2)        \
    XX(__fixdfsi)            \
    XX(__fixsfsi)            \
--- a/libc/arch-arm/bionic/memcpy.a9.S
+++ b/libc/arch-arm/bionic/memcpy.a9.S
@@ -1,614 +0,0 @@
-/* Copyright (c) 2013, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-      * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-
-      * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-      * Neither the name of Linaro Limited nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- */
-
-/*
-   This memcpy routine is optimised for Cortex-A15 cores and takes advantage
-   of VFP or NEON when built with the appropriate flags.
-
-   Assumptions:
-
-    ARMv6 (ARMv7-a if using Neon)
-    ARM state
-    Unaligned accesses
-    LDRD/STRD support unaligned word accesses
-
- */
-
-#include <machine/cpu-features.h>
-#include <machine/asm.h>
-
-	.syntax unified
-	/* This implementation requires ARM state.  */
-	.arm
-
-#ifdef __ARM_NEON__
-
-	.fpu	neon
-	.arch	armv7-a
-# define FRAME_SIZE	4
-# define USE_VFP
-# define USE_NEON
-
-#elif !defined (__SOFTFP__)
-
-	.arch	armv6
-	.fpu	vfpv2
-# define FRAME_SIZE	32
-# define USE_VFP
-
-#else
-	.arch	armv6
-# define FRAME_SIZE    32
-
-#endif
-
-/* Old versions of GAS incorrectly implement the NEON align semantics.  */
-#ifdef BROKEN_ASM_NEON_ALIGN
-#define ALIGN(addr, align) addr,:align
-#else
-#define ALIGN(addr, align) addr:align
-#endif
-
-#define PC_OFFSET	8	/* PC pipeline compensation.  */
-#define INSN_SIZE	4
-
-/* Call parameters.  */
-#define dstin	r0
-#define src	r1
-#define count	r2
-
-/* Locals.  */
-#define tmp1	r3
-#define dst	ip
-#define tmp2	r10
-
-#ifndef USE_NEON
-/* For bulk copies using GP registers.  */
-#define	A_l	r2		/* Call-clobbered.  */
-#define	A_h	r3		/* Call-clobbered.  */
-#define	B_l	r4
-#define	B_h	r5
-#define	C_l	r6
-#define	C_h	r7
-#define	D_l	r8
-#define	D_h	r9
-#endif
-
-/* Number of lines ahead to pre-fetch data.  If you change this the code
-   below will need adjustment to compensate.  */
-
-#define prefetch_lines	5
-
-#ifdef USE_VFP
-	.macro	cpy_line_vfp vreg, base
-	vstr	\vreg, [dst, #\base]
-	vldr	\vreg, [src, #\base]
-	vstr	d0, [dst, #\base + 8]
-	vldr	d0, [src, #\base + 8]
-	vstr	d1, [dst, #\base + 16]
-	vldr	d1, [src, #\base + 16]
-	vstr	d2, [dst, #\base + 24]
-	vldr	d2, [src, #\base + 24]
-	vstr	\vreg, [dst, #\base + 32]
-	vldr	\vreg, [src, #\base + prefetch_lines * 64 - 32]
-	vstr	d0, [dst, #\base + 40]
-	vldr	d0, [src, #\base + 40]
-	vstr	d1, [dst, #\base + 48]
-	vldr	d1, [src, #\base + 48]
-	vstr	d2, [dst, #\base + 56]
-	vldr	d2, [src, #\base + 56]
-	.endm
-
-	.macro	cpy_tail_vfp vreg, base
-	vstr	\vreg, [dst, #\base]
-	vldr	\vreg, [src, #\base]
-	vstr	d0, [dst, #\base + 8]
-	vldr	d0, [src, #\base + 8]
-	vstr	d1, [dst, #\base + 16]
-	vldr	d1, [src, #\base + 16]
-	vstr	d2, [dst, #\base + 24]
-	vldr	d2, [src, #\base + 24]
-	vstr	\vreg, [dst, #\base + 32]
-	vstr	d0, [dst, #\base + 40]
-	vldr	d0, [src, #\base + 40]
-	vstr	d1, [dst, #\base + 48]
-	vldr	d1, [src, #\base + 48]
-	vstr	d2, [dst, #\base + 56]
-	vldr	d2, [src, #\base + 56]
-	.endm
-#endif
-
-	.p2align 6
-ENTRY(memcpy)
-
-	mov	dst, dstin	/* Preserve dstin, we need to return it.  */
-	cmp	count, #64
-	bge	.Lcpy_not_short
-	/* Deal with small copies quickly by dropping straight into the
-	   exit block.  */
-
-.Ltail63unaligned:
-#ifdef USE_NEON
-	and	tmp1, count, #0x38
-	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
-	add	pc, pc, tmp1
-	vld1.8	{d0}, [src]!	/* 14 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 12 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 10 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 8 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 6 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 4 words to go.  */
-	vst1.8	{d0}, [dst]!
-	vld1.8	{d0}, [src]!	/* 2 words to go.  */
-	vst1.8	{d0}, [dst]!
-
-	tst	count, #4
-	ldrne	tmp1, [src], #4
-	strne	tmp1, [dst], #4
-#else
-	/* Copy up to 15 full words of data.  May not be aligned.  */
-	/* Cannot use VFP for unaligned data.  */
-	and	tmp1, count, #0x3c
-	add	dst, dst, tmp1
-	add	src, src, tmp1
-	rsb	tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
-	/* Jump directly into the sequence below at the correct offset.  */
-	add	pc, pc, tmp1, lsl #1
-
-	ldr	tmp1, [src, #-60]	/* 15 words to go.  */
-	str	tmp1, [dst, #-60]
-
-	ldr	tmp1, [src, #-56]	/* 14 words to go.  */
-	str	tmp1, [dst, #-56]
-	ldr	tmp1, [src, #-52]
-	str	tmp1, [dst, #-52]
-
-	ldr	tmp1, [src, #-48]	/* 12 words to go.  */
-	str	tmp1, [dst, #-48]
-	ldr	tmp1, [src, #-44]
-	str	tmp1, [dst, #-44]
-
-	ldr	tmp1, [src, #-40]	/* 10 words to go.  */
-	str	tmp1, [dst, #-40]
-	ldr	tmp1, [src, #-36]
-	str	tmp1, [dst, #-36]
-
-	ldr	tmp1, [src, #-32]	/* 8 words to go.  */
-	str	tmp1, [dst, #-32]
-	ldr	tmp1, [src, #-28]
-	str	tmp1, [dst, #-28]
-
-	ldr	tmp1, [src, #-24]	/* 6 words to go.  */
-	str	tmp1, [dst, #-24]
-	ldr	tmp1, [src, #-20]
-	str	tmp1, [dst, #-20]
-
-	ldr	tmp1, [src, #-16]	/* 4 words to go.  */
-	str	tmp1, [dst, #-16]
-	ldr	tmp1, [src, #-12]
-	str	tmp1, [dst, #-12]
-
-	ldr	tmp1, [src, #-8]	/* 2 words to go.  */
-	str	tmp1, [dst, #-8]
-	ldr	tmp1, [src, #-4]
-	str	tmp1, [dst, #-4]
-#endif
-
-	lsls	count, count, #31
-	ldrhcs	tmp1, [src], #2
-	ldrbne	src, [src]		/* Src is dead, use as a scratch.  */
-	strhcs	tmp1, [dst], #2
-	strbne	src, [dst]
-	bx	lr
-
-.Lcpy_not_short:
-	/* At least 64 bytes to copy, but don't know the alignment yet.  */
-	str	tmp2, [sp, #-FRAME_SIZE]!
-	and	tmp2, src, #7
-	and	tmp1, dst, #7
-	cmp	tmp1, tmp2
-	bne	.Lcpy_notaligned
-
-#ifdef USE_VFP
-	/* Magic dust alert!  Force VFP on Cortex-A9.  Experiments show
-	   that the FP pipeline is much better at streaming loads and
-	   stores.  This is outside the critical loop.  */
-	vmov.f32	s0, s0
-#endif
-
-	/* SRC and DST have the same mutual 32-bit alignment, but we may
-	   still need to pre-copy some bytes to get to natural alignment.
-	   We bring DST into full 64-bit alignment.  */
-	lsls	tmp2, dst, #29
-	beq	1f
-	rsbs	tmp2, tmp2, #0
-	sub	count, count, tmp2, lsr #29
-	ldrmi	tmp1, [src], #4
-	strmi	tmp1, [dst], #4
-	lsls	tmp2, tmp2, #2
-	ldrhcs	tmp1, [src], #2
-	ldrbne	tmp2, [src], #1
-	strhcs	tmp1, [dst], #2
-	strbne	tmp2, [dst], #1
-
-1:
-	subs	tmp2, count, #64	/* Use tmp2 for count.  */
-	blt	.Ltail63aligned
-
-	cmp	tmp2, #512
-	bge	.Lcpy_body_long
-
-.Lcpy_body_medium:			/* Count in tmp2.  */
-#ifdef USE_VFP
-1:
-	vldr	d0, [src, #0]
-	subs	tmp2, tmp2, #64
-	vldr	d1, [src, #8]
-	vstr	d0, [dst, #0]
-	vldr	d0, [src, #16]
-	vstr	d1, [dst, #8]
-	vldr	d1, [src, #24]
-	vstr	d0, [dst, #16]
-	vldr	d0, [src, #32]
-	vstr	d1, [dst, #24]
-	vldr	d1, [src, #40]
-	vstr	d0, [dst, #32]
-	vldr	d0, [src, #48]
-	vstr	d1, [dst, #40]
-	vldr	d1, [src, #56]
-	vstr	d0, [dst, #48]
-	add	src, src, #64
-	vstr	d1, [dst, #56]
-	add	dst, dst, #64
-	bge	1b
-	tst	tmp2, #0x3f
-	beq	.Ldone
-
-.Ltail63aligned:			/* Count in tmp2.  */
-	and	tmp1, tmp2, #0x38
-	add	dst, dst, tmp1
-	add	src, src, tmp1
-	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
-	add	pc, pc, tmp1
-
-	vldr	d0, [src, #-56]	/* 14 words to go.  */
-	vstr	d0, [dst, #-56]
-	vldr	d0, [src, #-48]	/* 12 words to go.  */
-	vstr	d0, [dst, #-48]
-	vldr	d0, [src, #-40]	/* 10 words to go.  */
-	vstr	d0, [dst, #-40]
-	vldr	d0, [src, #-32]	/* 8 words to go.  */
-	vstr	d0, [dst, #-32]
-	vldr	d0, [src, #-24]	/* 6 words to go.  */
-	vstr	d0, [dst, #-24]
-	vldr	d0, [src, #-16]	/* 4 words to go.  */
-	vstr	d0, [dst, #-16]
-	vldr	d0, [src, #-8]	/* 2 words to go.  */
-	vstr	d0, [dst, #-8]
-#else
-	sub	src, src, #8
-	sub	dst, dst, #8
-1:
-	ldrd	A_l, A_h, [src, #8]
-	strd	A_l, A_h, [dst, #8]
-	ldrd	A_l, A_h, [src, #16]
-	strd	A_l, A_h, [dst, #16]
-	ldrd	A_l, A_h, [src, #24]
-	strd	A_l, A_h, [dst, #24]
-	ldrd	A_l, A_h, [src, #32]
-	strd	A_l, A_h, [dst, #32]
-	ldrd	A_l, A_h, [src, #40]
-	strd	A_l, A_h, [dst, #40]
-	ldrd	A_l, A_h, [src, #48]
-	strd	A_l, A_h, [dst, #48]
-	ldrd	A_l, A_h, [src, #56]
-	strd	A_l, A_h, [dst, #56]
-	ldrd	A_l, A_h, [src, #64]!
-	strd	A_l, A_h, [dst, #64]!
-	subs	tmp2, tmp2, #64
-	bge	1b
-	tst	tmp2, #0x3f
-	bne	1f
-	ldr	tmp2,[sp], #FRAME_SIZE
-	bx	lr
-1:
-	add	src, src, #8
-	add	dst, dst, #8
-
-.Ltail63aligned:			/* Count in tmp2.  */
-	/* Copy up to 7 d-words of data.  Similar to Ltail63unaligned, but
-	   we know that the src and dest are 32-bit aligned so we can use
-	   LDRD/STRD to improve efficiency.  */
-	/* TMP2 is now negative, but we don't care about that.  The bottom
-	   six bits still tell us how many bytes are left to copy.  */
-
-	and	tmp1, tmp2, #0x38
-	add	dst, dst, tmp1
-	add	src, src, tmp1
-	rsb	tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
-	add	pc, pc, tmp1
-	ldrd	A_l, A_h, [src, #-56]	/* 14 words to go.  */
-	strd	A_l, A_h, [dst, #-56]
-	ldrd	A_l, A_h, [src, #-48]	/* 12 words to go.  */
-	strd	A_l, A_h, [dst, #-48]
-	ldrd	A_l, A_h, [src, #-40]	/* 10 words to go.  */
-	strd	A_l, A_h, [dst, #-40]
-	ldrd	A_l, A_h, [src, #-32]	/* 8 words to go.  */
-	strd	A_l, A_h, [dst, #-32]
-	ldrd	A_l, A_h, [src, #-24]	/* 6 words to go.  */
-	strd	A_l, A_h, [dst, #-24]
-	ldrd	A_l, A_h, [src, #-16]	/* 4 words to go.  */
-	strd	A_l, A_h, [dst, #-16]
-	ldrd	A_l, A_h, [src, #-8]	/* 2 words to go.  */
-	strd	A_l, A_h, [dst, #-8]
-
-#endif
-	tst	tmp2, #4
-	ldrne	tmp1, [src], #4
-	strne	tmp1, [dst], #4
-	lsls	tmp2, tmp2, #31		/* Count (tmp2) now dead. */
-	ldrhcs	tmp1, [src], #2
-	ldrbne	tmp2, [src]
-	strhcs	tmp1, [dst], #2
-	strbne	tmp2, [dst]
-
-.Ldone:
-	ldr	tmp2, [sp], #FRAME_SIZE
-	bx	lr
-
-.Lcpy_body_long:			/* Count in tmp2.  */
-
-	/* Long copy.  We know that there's at least (prefetch_lines * 64)
-	   bytes to go.  */
-#ifdef USE_VFP
-	/* Don't use PLD.  Instead, read some data in advance of the current
-	   copy position into a register.  This should act like a PLD
-	   operation but we won't have to repeat the transfer.  */
-
-	vldr	d3, [src, #0]
-	vldr	d4, [src, #64]
-	vldr	d5, [src, #128]
-	vldr	d6, [src, #192]
-	vldr	d7, [src, #256]
-
-	vldr	d0, [src, #8]
-	vldr	d1, [src, #16]
-	vldr	d2, [src, #24]
-	add	src, src, #32
-
-	subs	tmp2, tmp2, #prefetch_lines * 64 * 2
-	blt	2f
-1:
-	cpy_line_vfp	d3, 0
-	cpy_line_vfp	d4, 64
-	cpy_line_vfp	d5, 128
-	add	dst, dst, #3 * 64
-	add	src, src, #3 * 64
-	cpy_line_vfp	d6, 0
-	cpy_line_vfp	d7, 64
-	add	dst, dst, #2 * 64
-	add	src, src, #2 * 64
-	subs	tmp2, tmp2, #prefetch_lines * 64
-	bge	1b
-
-2:
-	cpy_tail_vfp	d3, 0
-	cpy_tail_vfp	d4, 64
-	cpy_tail_vfp	d5, 128
-	add	src, src, #3 * 64
-	add	dst, dst, #3 * 64
-	cpy_tail_vfp	d6, 0
-	vstr	d7, [dst, #64]
-	vldr	d7, [src, #64]
-	vstr	d0, [dst, #64 + 8]
-	vldr	d0, [src, #64 + 8]
-	vstr	d1, [dst, #64 + 16]
-	vldr	d1, [src, #64 + 16]
-	vstr	d2, [dst, #64 + 24]
-	vldr	d2, [src, #64 + 24]
-	vstr	d7, [dst, #64 + 32]
-	add	src, src, #96
-	vstr	d0, [dst, #64 + 40]
-	vstr	d1, [dst, #64 + 48]
-	vstr	d2, [dst, #64 + 56]
-	add	dst, dst, #128
-	add	tmp2, tmp2, #prefetch_lines * 64
-	b	.Lcpy_body_medium
-#else
-	/* Long copy.  Use an SMS style loop to maximize the I/O
-	   bandwidth of the core.  We don't have enough spare registers
-	   to synthesise prefetching, so use PLD operations.  */
-	/* Pre-bias src and dst.  */
-	sub	src, src, #8
-	sub	dst, dst, #8
-	pld	[src, #8]
-	pld	[src, #72]
-	subs	tmp2, tmp2, #64
-	pld	[src, #136]
-	ldrd	A_l, A_h, [src, #8]
-	strd	B_l, B_h, [sp, #8]
-	ldrd	B_l, B_h, [src, #16]
-	strd	C_l, C_h, [sp, #16]
-	ldrd	C_l, C_h, [src, #24]
-	strd	D_l, D_h, [sp, #24]
-	pld	[src, #200]
-	ldrd	D_l, D_h, [src, #32]!
-	b	1f
-	.p2align	6
-2:
-	pld	[src, #232]
-	strd	A_l, A_h, [dst, #40]
-	ldrd	A_l, A_h, [src, #40]
-	strd	B_l, B_h, [dst, #48]
-	ldrd	B_l, B_h, [src, #48]
-	strd	C_l, C_h, [dst, #56]
-	ldrd	C_l, C_h, [src, #56]
-	strd	D_l, D_h, [dst, #64]!
-	ldrd	D_l, D_h, [src, #64]!
-	subs	tmp2, tmp2, #64
-1:
-	strd	A_l, A_h, [dst, #8]
-	ldrd	A_l, A_h, [src, #8]
-	strd	B_l, B_h, [dst, #16]
-	ldrd	B_l, B_h, [src, #16]
-	strd	C_l, C_h, [dst, #24]
-	ldrd	C_l, C_h, [src, #24]
-	strd	D_l, D_h, [dst, #32]
-	ldrd	D_l, D_h, [src, #32]
-	bcs	2b
-	/* Save the remaining bytes and restore the callee-saved regs.  */
-	strd	A_l, A_h, [dst, #40]
-	add	src, src, #40
-	strd	B_l, B_h, [dst, #48]
-	ldrd	B_l, B_h, [sp, #8]
-	strd	C_l, C_h, [dst, #56]
-	ldrd	C_l, C_h, [sp, #16]
-	strd	D_l, D_h, [dst, #64]
-	ldrd	D_l, D_h, [sp, #24]
-	add	dst, dst, #72
-	tst	tmp2, #0x3f
-	bne	.Ltail63aligned
-	ldr	tmp2, [sp], #FRAME_SIZE
-	bx	lr
-#endif
-
-.Lcpy_notaligned:
-	pld	[src]
-	pld	[src, #64]
-	/* There's at least 64 bytes to copy, but there is no mutual
-	   alignment.  */
-	/* Bring DST to 64-bit alignment.  */
-	lsls	tmp2, dst, #29
-	pld	[src, #(2 * 64)]
-	beq	1f
-	rsbs	tmp2, tmp2, #0
-	sub	count, count, tmp2, lsr #29
-	ldrmi	tmp1, [src], #4
-	strmi	tmp1, [dst], #4
-	lsls	tmp2, tmp2, #2
-	ldrbne	tmp1, [src], #1
-	ldrhcs	tmp2, [src], #2
-	strbne	tmp1, [dst], #1
-	strhcs	tmp2, [dst], #2
-1:
-	pld	[src, #(3 * 64)]
-	subs	count, count, #64
-	ldrmi	tmp2, [sp], #FRAME_SIZE
-	bmi	.Ltail63unaligned
-	pld	[src, #(4 * 64)]
-
-#ifdef USE_NEON
-	vld1.8	{d0-d3}, [src]!
-	vld1.8	{d4-d7}, [src]!
-	subs	count, count, #64
-	bmi	2f
-1:
-	pld	[src, #(4 * 64)]
-	vst1.8	{d0-d3}, [ALIGN (dst, 64)]!
-	vld1.8	{d0-d3}, [src]!
-	vst1.8	{d4-d7}, [ALIGN (dst, 64)]!
-	vld1.8	{d4-d7}, [src]!
-	subs	count, count, #64
-	bpl	1b
-2:
-	vst1.8	{d0-d3}, [ALIGN (dst, 64)]!
-	vst1.8	{d4-d7}, [ALIGN (dst, 64)]!
-	ands	count, count, #0x3f
-#else
-	/* Use an SMS style loop to maximize the I/O bandwidth.  */
-	sub	src, src, #4
-	sub	dst, dst, #8
-	subs	tmp2, count, #64	/* Use tmp2 for count.  */
-	ldr	A_l, [src, #4]
-	ldr	A_h, [src, #8]
-	strd	B_l, B_h, [sp, #8]
-	ldr	B_l, [src, #12]
-	ldr	B_h, [src, #16]
-	strd	C_l, C_h, [sp, #16]
-	ldr	C_l, [src, #20]
-	ldr	C_h, [src, #24]
-	strd	D_l, D_h, [sp, #24]
-	ldr	D_l, [src, #28]
-	ldr	D_h, [src, #32]!
-	b	1f
-	.p2align	6
-2:
-	pld	[src, #(5 * 64) - (32 - 4)]
-	strd	A_l, A_h, [dst, #40]
-	ldr	A_l, [src, #36]
-	ldr	A_h, [src, #40]
-	strd	B_l, B_h, [dst, #48]
-	ldr	B_l, [src, #44]
-	ldr	B_h, [src, #48]
-	strd	C_l, C_h, [dst, #56]
-	ldr	C_l, [src, #52]
-	ldr	C_h, [src, #56]
-	strd	D_l, D_h, [dst, #64]!
-	ldr	D_l, [src, #60]
-	ldr	D_h, [src, #64]!
-	subs	tmp2, tmp2, #64
-1:
-	strd	A_l, A_h, [dst, #8]
-	ldr	A_l, [src, #4]
-	ldr	A_h, [src, #8]
-	strd	B_l, B_h, [dst, #16]
-	ldr	B_l, [src, #12]
-	ldr	B_h, [src, #16]
-	strd	C_l, C_h, [dst, #24]
-	ldr	C_l, [src, #20]
-	ldr	C_h, [src, #24]
-	strd	D_l, D_h, [dst, #32]
-	ldr	D_l, [src, #28]
-	ldr	D_h, [src, #32]
-	bcs	2b
-
-	/* Save the remaining bytes and restore the callee-saved regs.  */
-	strd	A_l, A_h, [dst, #40]
-	add	src, src, #36
-	strd	B_l, B_h, [dst, #48]
-	ldrd	B_l, B_h, [sp, #8]
-	strd	C_l, C_h, [dst, #56]
-	ldrd	C_l, C_h, [sp, #16]
-	strd	D_l, D_h, [dst, #64]
-	ldrd	D_l, D_h, [sp, #24]
-	add	dst, dst, #72
-	ands	count, tmp2, #0x3f
-#endif
-	ldr	tmp2, [sp], #FRAME_SIZE
-	bne	.Ltail63unaligned
-	bx	lr
-END(memcpy)
--- a/libc/arch-arm/generic/bionic/strcpy.S
+++ b/libc/arch-arm/generic/bionic/strcpy.S
--- a/libc/arch-arm/generic/bionic/strlen.c
+++ b/libc/arch-arm/generic/bionic/strlen.c
@@ -33,16 +33,16 @@ size_t strlen(const char *s)
 {
    __builtin_prefetch(s);
    __builtin_prefetch(s+32);
-
+    
    union {
        const char      *b;
        const uint32_t  *w;
        uintptr_t       i;
    } u;
-
+    
    // these are some scratch variables for the asm code below
    uint32_t v, t;
-
+    
    // initialize the string length to zero
    size_t l = 0;

@@ -60,60 +60,52 @@ size_t strlen(const char *s)
    // We need to process 32 bytes per loop to schedule PLD properly
    // and achieve the maximum bus speed.
    asm(
-        "ldr     %[v], [%[s]], #4           \n"
+        "ldr     %[v], [ %[s] ], #4         \n"
        "sub     %[l], %[l], %[s]           \n"
        "0:                                 \n"
 #if __ARM_HAVE_PLD
-        "pld     [%[s], #64]                \n"
+        "pld     [ %[s], #64 ]              \n"
 #endif
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
 #if !defined(__OPTIMIZE_SIZE__)
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
        "bne     1f                         \n"
        "sub     %[t], %[v], %[mask], lsr #7\n"
        "and     %[t], %[t], %[mask]        \n"
        "bics    %[t], %[t], %[v]           \n"
-        "it      eq                         \n"
-        "ldreq   %[v], [%[s]], #4           \n"
+        "ldreq   %[v], [ %[s] ], #4         \n"
 #endif
        "beq     0b                         \n"
        "1:                                 \n"
@@ -125,14 +117,13 @@ size_t strlen(const char *s)
        "beq     2f                         \n"
        "add     %[l], %[l], #1             \n"
        "tst     %[v], #0xFF0000            \n"
-        "it      ne                         \n"
        "addne   %[l], %[l], #1             \n"
        "2:                                 \n"
        : [l]"=&r"(l), [v]"=&r"(v), [t]"=&r"(t), [s]"=&r"(u.b)
        : "%[l]"(l), "%[s]"(u.b), [mask]"r"(0x80808080UL)
        : "cc"
    );
-
+    
 done:
    return l;
 }
--- a/libc/arch-arm/bionic/tgkill.S
+++ b/libc/arch-arm/bionic/tgkill.S
@@ -39,7 +39,6 @@
 */

 ENTRY(tgkill)
-    .save   {r4-r7, ip, lr}
    stmfd   sp!, {r4-r7, ip, lr}
    ldr     r7, =__NR_tgkill
    swi     #0
--- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
@@ -1,227 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-// Get the length of src string, then get the source of the dst string.
-// Check that the two lengths together don't exceed the threshold, then
-// do a memcpy of the data.
-ENTRY(__strcat_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    push    {r4, r5}
-    .save   {r4, r5}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    mov     lr, r2
-
-    // Save the dst register to r5
-    mov     r5, r0
-
-    // Zero out r4
-    eor     r4, r4, r4
-
-    // r1 contains the address of the string to count.
-.L_strlen_start:
-    mov     r0, r1
-    ands    r3, r1, #7
-    beq     .L_mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-    ands    ip, r3, #2
-    beq     .L_align_to_64
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r3, [r1], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-
-    .p2align 2
-.L_mainloop:
-    ldrd    r2, r3, [r1], #8
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_update_count_and_finish:
-    sub     r3, r1, r0
-    sub     r3, r3, #1
-    b       .L_finish
-
-.L_zero_in_first_register:
-    sub     r3, r1, r0
-    lsls    r2, ip, #17
-    bne     .L_sub8_and_finish
-    bcs     .L_sub7_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub6_and_finish
-
-    sub     r3, r3, #5
-    b       .L_finish
-
-.L_sub8_and_finish:
-    sub     r3, r3, #8
-    b       .L_finish
-
-.L_sub7_and_finish:
-    sub     r3, r3, #7
-    b       .L_finish
-
-.L_sub6_and_finish:
-    sub     r3, r3, #6
-    b       .L_finish
-
-.L_zero_in_second_register:
-    sub     r3, r1, r0
-    lsls    r2, ip, #17
-    bne     .L_sub4_and_finish
-    bcs     .L_sub3_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub2_and_finish
-
-    sub     r3, r3, #1
-    b       .L_finish
-
-.L_sub4_and_finish:
-    sub     r3, r3, #4
-    b       .L_finish
-
-.L_sub3_and_finish:
-    sub     r3, r3, #3
-    b       .L_finish
-
-.L_sub2_and_finish:
-    sub     r3, r3, #2
-
-.L_finish:
-    cmp     r4, #0
-    bne     .L_strlen_done
-
-    // Time to get the dst string length.
-    mov     r1, r5
-
-    // Save the original source address to r5.
-    mov     r5, r0
-
-    // Save the current length (adding 1 for the terminator).
-    add     r4, r3, #1
-    b       .L_strlen_start
-
-    // r0 holds the pointer to the dst string.
-    // r3 holds the dst string length.
-    // r4 holds the src string length + 1.
-.L_strlen_done:
-    add     r2, r3, r4
-    cmp     r2, lr
-    bhi     __strcat_chk_failed
-
-    // Set up the registers for the memcpy code.
-    mov     r1, r5
-    pld     [r1, #64]
-    mov     r2, r4
-    add     r0, r0, r3
-    pop     {r4, r5}
-
-    .cfi_endproc
-END(__strcat_chk)
-
-#define MEMCPY_BASE         __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
-
-#include "memcpy_base.S"
-
-ENTRY(__strcat_chk_failed)
-    .cfi_startproc
-    .save   {r0, lr}
-    .save   {r4, r5}
-
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-error_code:
-    .word   BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-
-    .cfi_endproc
-END(__strcat_chk_failed)
-
-    .data
-error_string:
-    .string "strcat buffer overflow"
--- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
@@ -1,188 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-// Get the length of the source string first, then do a memcpy of the data
-// instead of a strcpy.
-ENTRY(__strcpy_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    mov     lr, r2
-    mov     r0, r1
-
-    ands    r3, r1, #7
-    beq     .L_mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-    ands    ip, r3, #2
-    beq     .L_align_to_64
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r3, [r0], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-
-    .p2align 2
-.L_mainloop:
-    ldrd    r2, r3, [r0], #8
-
-    pld     [r0, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_update_count_and_finish:
-    sub     r3, r0, r1
-    sub     r3, r3, #1
-    b       .L_check_size
-
-.L_zero_in_first_register:
-    sub     r3, r0, r1
-    lsls    r2, ip, #17
-    bne     .L_sub8_and_finish
-    bcs     .L_sub7_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub6_and_finish
-
-    sub     r3, r3, #5
-    b       .L_check_size
-
-.L_sub8_and_finish:
-    sub     r3, r3, #8
-    b       .L_check_size
-
-.L_sub7_and_finish:
-    sub     r3, r3, #7
-    b       .L_check_size
-
-.L_sub6_and_finish:
-    sub     r3, r3, #6
-    b       .L_check_size
-
-.L_zero_in_second_register:
-    sub     r3, r0, r1
-    lsls    r2, ip, #17
-    bne     .L_sub4_and_finish
-    bcs     .L_sub3_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub2_and_finish
-
-    sub     r3, r3, #1
-    b       .L_check_size
-
-.L_sub4_and_finish:
-    sub     r3, r3, #4
-    b       .L_check_size
-
-.L_sub3_and_finish:
-    sub     r3, r3, #3
-    b       .L_check_size
-
-.L_sub2_and_finish:
-    sub     r3, r3, #2
-
-.L_check_size:
-    pld     [r1, #0]
-    pld     [r1, #64]
-    ldr     r0, [sp]
-    cmp     r3, lr
-    bhs     __strcpy_chk_failed
-
-    // Add 1 for copy length to get the string terminator.
-    add     r2, r3, #1
-
-    .cfi_endproc
-END(__strcpy_chk)
-
-#define MEMCPY_BASE         __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__strcpy_chk_failed)
-    .cfi_startproc
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-error_code:
-    .word   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-
-    .cfi_endproc
-END(__strcpy_chk_failed)
-
-    .data
-error_string:
-    .string "strcpy buffer overflow"
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -25,88 +25,122 @@
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
+
+/* Assumes neon instructions and a cache line size of 64 bytes. */
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
 /*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * This code assumes it is running on a processor that supports all arm v7
+ * instructions, that supports neon instructions, and that has a 64 byte
+ * cache line.
 */

-// Prototype: void *memcpy (void *dst, const void *src, size_t count).
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
        .text
-        .syntax unified
        .fpu    neon

-ENTRY(__memcpy_chk)
-        .cfi_startproc
-        cmp     r2, r3
-        bhi     __memcpy_chk_fail
-
-        // Fall through to memcpy...
-        .cfi_endproc
-END(__memcpy_chk)
+#define CACHE_LINE_SIZE     64

 ENTRY(memcpy)
-        .cfi_startproc
-        pld     [r1, #64]
-        push    {r0, lr}
-        .save   {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
+        .save       {r0, lr}
+        /* start preloading as early as possible */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        stmfd       sp!, {r0, lr}
+        pld         [r1, #(CACHE_LINE_SIZE*1)]

-        .cfi_endproc
+        /* do we have at least 16-bytes to copy (needed for alignment below) */
+        cmp         r2, #16
+        blo         5f
+
+        /* align destination to cache-line for the write-buffer */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0xF
+        beq         0f
+
+        /* copy up to 15-bytes (count in r3) */
+        sub         r2, r2, r3
+        movs        ip, r3, lsl #31
+        ldrmib      lr, [r1], #1
+        strmib      lr, [r0], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+        movs        ip, r3, lsl #29
+        bge         1f
+        // copies 4 bytes, destination 32-bits aligned
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1:      bcc         2f
+        // copies 8 bytes, destination 64-bits aligned
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0, :64]!
+2:
+
+0:      /* preload immediately the next cache line, which we may need */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        pld         [r1, #(CACHE_LINE_SIZE*1)]
+
+        /* make sure we have at least 64 bytes to copy */
+        subs        r2, r2, #64
+        blo         2f
+
+        /* Preload all the cache lines we need.
+         * NOTE: The number of pld below depends on CACHE_LINE_SIZE,
+         * ideally we would increase the distance in the main loop to
+         * avoid the goofy code below. In practice this doesn't seem to make
+         * a big difference.
+         * NOTE: The value CACHE_LINE_SIZE * 4 was chosen through
+         * experimentation.
+         */
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+        pld         [r1, #(CACHE_LINE_SIZE*3)]
+        pld         [r1, #(CACHE_LINE_SIZE*4)]
+
+1:      /* The main loop copies 64 bytes at a time */
+        vld1.8      {d0  - d3},   [r1]!
+        vld1.8      {d4  - d7},   [r1]!
+        pld         [r1, #(CACHE_LINE_SIZE*4)]
+        subs        r2, r2, #64
+        vst1.8      {d0  - d3},   [r0, :128]!
+        vst1.8      {d4  - d7},   [r0, :128]!
+        bhs         1b
+
+2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
+        add         r2, r2, #64
+        subs        r2, r2, #32
+        blo         4f
+
+3:      /* 32 bytes at a time. These cache lines were already preloaded */
+        vld1.8      {d0 - d3},  [r1]!
+        subs        r2, r2, #32
+        vst1.8      {d0 - d3},  [r0, :128]!
+        bhs         3b
+4:      /* less than 32 left */
+        add         r2, r2, #32
+        tst         r2, #0x10
+        beq         5f
+        // copies 16 bytes, 128-bits aligned
+        vld1.8      {d0, d1}, [r1]!
+        vst1.8      {d0, d1}, [r0, :128]!
+
+5:      /* copy up to 15-bytes (count in r2) */
+        movs        ip, r2, lsl #29
+        bcc         1f
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0]!
+1:      bge         2f
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+2:      movs        ip, r2, lsl #31
+        ldrmib      r3, [r1], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strmib      r3, [r0], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+
+        ldmfd       sp!, {r0, lr}
+        bx          lr
 END(memcpy)
-
-#define MEMCPY_BASE         __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__memcpy_chk_fail)
-        .cfi_startproc
-        // Preserve lr for backtrace.
-        push    {lr}
-        .save   {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr     r0, error_message
-        ldr     r1, error_code
-1:
-        add     r0, pc
-        bl      __fortify_chk_fail
-error_code:
-        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
-        .word   error_string-(1b+8)
-        .cfi_endproc
-END(__memcpy_chk_fail)
-
-        .data
-error_string:
-        .string "memcpy buffer overflow"
--- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
@@ -1,329 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-ENTRY(MEMCPY_BASE)
-        .cfi_startproc
-        .save   {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-
-        // Assumes that n >= 0, and dst, src are valid pointers.
-        // For any sizes less than 832 use the neon code that doesn't
-        // care about the src alignment. This avoids any checks
-        // for src alignment, and offers the best improvement since
-        // smaller sized copies are dominated by the overhead of
-        // the pre and post main loop.
-        // For larger copies, if src and dst cannot both be aligned to
-        // word boundaries, use the neon code.
-        // For all other copies, align dst to a double word boundary
-        // and copy using LDRD/STRD instructions.
-
-        cmp     r2, #16
-        blo     .L_copy_less_than_16_unknown_align
-
-        // TODO: The aligned copy code is extremely slow copying some large
-        //       buffers so always go through the unaligned path for now.
-        //cmp     r2, #832
-        //bge     .L_check_alignment
-
-.L_copy_unknown_alignment:
-        // Unknown alignment of src and dst.
-        // Assumes that the first few bytes have already been prefetched.
-
-        // Align destination to 128 bits. The mainloop store instructions
-        // require this alignment or they will throw an exception.
-        rsb         r3, r0, #0
-        ands        r3, r3, #0xF
-        beq         2f
-
-        // Copy up to 15 bytes (count in r3).
-        sub         r2, r2, r3
-        movs        ip, r3, lsl #31
-
-        itt         mi
-        ldrbmi      lr, [r1], #1
-        strbmi      lr, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1], #1
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0], #1
-
-        movs        ip, r3, lsl #29
-        bge         1f
-        // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
-        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
-        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
-1:      bcc         2f
-        // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0, :64]!
-
-2:      // Make sure we have at least 64 bytes to copy.
-        subs        r2, r2, #64
-        blo         2f
-
-1:      // The main loop copies 64 bytes at a time.
-        vld1.8      {d0  - d3},   [r1]!
-        vld1.8      {d4  - d7},   [r1]!
-        pld         [r1, #(64*4)]
-        subs        r2, r2, #64
-        vst1.8      {d0  - d3},   [r0, :128]!
-        vst1.8      {d4  - d7},   [r0, :128]!
-        bhs         1b
-
-2:      // Fix-up the remaining count and make sure we have >= 32 bytes left.
-        adds        r2, r2, #32
-        blo         3f
-
-        // 32 bytes. These cache lines were already preloaded.
-        vld1.8      {d0 - d3},  [r1]!
-        sub         r2, r2, #32
-        vst1.8      {d0 - d3},  [r0, :128]!
-3:      // Less than 32 left.
-        add         r2, r2, #32
-        tst         r2, #0x10
-        beq         .L_copy_less_than_16_unknown_align
-        // Copies 16 bytes, destination 128 bits aligned.
-        vld1.8      {d0, d1}, [r1]!
-        vst1.8      {d0, d1}, [r0, :128]!
-
-.L_copy_less_than_16_unknown_align:
-        // Copy up to 15 bytes (count in r2).
-        movs        ip, r2, lsl #29
-        bcc         1f
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0]!
-1:      bge         2f
-        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
-        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-
-2:      // Copy 0 to 4 bytes.
-        lsls        r2, r2, #31
-        itt         ne
-        ldrbne      lr, [r1], #1
-        strbne      lr, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1]
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0]
-
-        pop         {r0, pc}
-
-.L_check_alignment:
-        // If src and dst cannot both be aligned to a word boundary,
-        // use the unaligned copy version.
-        eor     r3, r0, r1
-        ands    r3, r3, #0x3
-        bne     .L_copy_unknown_alignment
-
-        .cfi_endproc
-END(MEMCPY_BASE)
-
-ENTRY(MEMCPY_BASE_ALIGNED)
-        .cfi_startproc
-        .save   {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-
-        // To try and improve performance, stack layout changed,
-        // i.e., not keeping the stack looking like users expect
-        // (highest numbered register at highest address).
-        strd    r4, r5, [sp, #-8]!
-        .save   {r4, r5}
-        .cfi_adjust_cfa_offset 8
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp, #-8]!
-        .save   {r6, r7}
-        .cfi_adjust_cfa_offset 8
-        .cfi_rel_offset r6, 0
-        .cfi_rel_offset r7, 0
-        strd    r8, r9, [sp, #-8]!
-        .save   {r8, r9}
-        .cfi_adjust_cfa_offset 8
-        .cfi_rel_offset r8, 0
-        .cfi_rel_offset r9, 4
-
-        // Optimized for already aligned dst code.
-        ands    ip, r0, #3
-        bne     .L_dst_not_word_aligned
-
-.L_word_aligned:
-        // Align the destination buffer to 8 bytes, to make sure double
-        // loads and stores don't cross a cache line boundary,
-        // as they are then more expensive even if the data is in the cache
-        // (require two load/store issue cycles instead of one).
-        // If only one of the buffers is not 8 bytes aligned,
-        // then it's more important to align dst than src,
-        // because there is more penalty for stores
-        // than loads that cross a cacheline boundary.
-        // This check and realignment are only done if there is >= 832
-        // bytes to copy.
-
-        // Dst is word aligned, but check if it is already double word aligned.
-        ands    r3, r0, #4
-        beq     1f
-        ldr     r3, [r1], #4
-        str     r3, [r0], #4
-        sub     r2, #4
-
-1:      // Can only get here if > 64 bytes to copy, so don't do check r2.
-        sub     r2, #64
-
-2:      // Every loop iteration copies 64 bytes.
-        .irp    offset, #0, #8, #16, #24, #32
-        ldrd    r4, r5, [r1, \offset]
-        strd    r4, r5, [r0, \offset]
-        .endr
-
-        ldrd    r4, r5, [r1, #40]
-        ldrd    r6, r7, [r1, #48]
-        ldrd    r8, r9, [r1, #56]
-
-        // Keep the pld as far from the next load as possible.
-        // The amount to prefetch was determined experimentally using
-        // large sizes, and verifying the prefetch size does not affect
-        // the smaller copies too much.
-        // WARNING: If the ldrd and strd instructions get too far away
-        //          from each other, performance suffers. Three loads
-        //          in a row is the best tradeoff.
-        pld     [r1, #(64*16)]
-        strd    r4, r5, [r0, #40]
-        strd    r6, r7, [r0, #48]
-        strd    r8, r9, [r0, #56]
-
-        add     r0, r0, #64
-        add     r1, r1, #64
-        subs    r2, r2, #64
-        bge     2b
-
-        // Fix-up the remaining count and make sure we have >= 32 bytes left.
-        adds    r2, r2, #32
-        blo     4f
-
-        // Copy 32 bytes. These cache lines were already preloaded.
-        .irp    offset, #0, #8, #16, #24
-        ldrd    r4, r5, [r1, \offset]
-        strd    r4, r5, [r0, \offset]
-        .endr
-        add     r1, r1, #32
-        add     r0, r0, #32
-        sub     r2, r2, #32
-4:      // Less than 32 left.
-        add     r2, r2, #32
-        tst     r2, #0x10
-        beq     5f
-        // Copy 16 bytes.
-        .irp    offset, #0, #8
-        ldrd    r4, r5, [r1, \offset]
-        strd    r4, r5, [r0, \offset]
-        .endr
-        add     r1, r1, #16
-        add     r0, r0, #16
-
-5:      // Copy up to 15 bytes (count in r2).
-        movs    ip, r2, lsl #29
-        bcc     1f
-        // Copy 8 bytes.
-        ldrd    r4, r5, [r1], #8
-        strd    r4, r5, [r0], #8
-1:      bge         2f
-        // Copy 4 bytes.
-        ldr     r4, [r1], #4
-        str     r4, [r0], #4
-2:      // Copy 0 to 4 bytes.
-        lsls    r2, r2, #31
-        itt     ne
-        ldrbne  lr, [r1], #1
-        strbne  lr, [r0], #1
-        itttt   cs
-        ldrbcs  ip, [r1], #1
-        ldrbcs  lr, [r1]
-        strbcs  ip, [r0], #1
-        strbcs  lr, [r0]
-
-        // Restore registers: optimized pop {r0, pc}
-        ldrd    r8, r9, [sp], #8
-        ldrd    r6, r7, [sp], #8
-        ldrd    r4, r5, [sp], #8
-        pop     {r0, pc}
-
-.L_dst_not_word_aligned:
-        // Align dst to word.
-        rsb     ip, ip, #4
-        cmp     ip, #2
-
-        itt     gt
-        ldrbgt  lr, [r1], #1
-        strbgt  lr, [r0], #1
-
-        itt     ge
-        ldrbge  lr, [r1], #1
-        strbge  lr, [r0], #1
-
-        ldrb    lr, [r1], #1
-        strb    lr, [r0], #1
-
-        sub     r2, r2, ip
-
-        // Src is guaranteed to be at least word aligned by this point.
-        b       .L_word_aligned
-
-        .cfi_endproc
-END(MEMCPY_BASE_ALIGNED)
--- a/libc/arch-arm/cortex-a15/bionic/memset.S
+++ b/libc/arch-arm/cortex-a15/bionic/memset.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2008 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -28,173 +28,79 @@

 #include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

-        /*
-         * Optimized memset() for ARM.
+		/*
+		 * Optimized memset() for ARM.
         *
         * memset() returns its first argument.
-         */
+		 */

-        .fpu        neon
-        .syntax     unified
-
-ENTRY(__memset_chk)
-        .cfi_startproc
-        cmp         r2, r3
-        bls         .L_done
-
-        // Preserve lr for backtrace.
-        .save       {lr}
-        push        {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr         r0, error_message
-        ldr         r1, error_code
-1:
-        add         r0, pc
-        bl          __fortify_chk_fail
-error_code:
-        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
-error_message:
-        .word       error_string-(1b+8)
-
-        .cfi_endproc
-END(__memset_chk)
+    .fpu    neon

 ENTRY(bzero)
-        .cfi_startproc
-        mov         r2, r1
-        mov         r1, #0
-.L_done:
+        mov     r2, r1
+        mov     r1, #0
        // Fall through to memset...
-        .cfi_endproc
 END(bzero)

 ENTRY(memset)
-        .cfi_startproc
        .save       {r0}
        stmfd       sp!, {r0}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset r0, 0

-        // The new algorithm is slower for copies < 16 so use the old
-        // neon code in that case.
+        vdup.8      q0, r1
+
+        /* do we have at least 16-bytes to write (needed for alignment below) */
        cmp         r2, #16
-        blo         .L_set_less_than_16_unknown_align
+        blo         3f

-        // Use strd which requires an even and odd register so move the
-        // values so that:
-        //   r0 and r1 contain the memset value
-        //   r2 is the number of bytes to set
-        //   r3 is the destination pointer
-        mov         r3, r0
+        /* align destination to 16 bytes for the write-buffer */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0xF
+        beq         2f

-        // Copy the byte value in every byte of r1.
-        mov         r1, r1, lsl #24
-        orr         r1, r1, r1, lsr #8
-        orr         r1, r1, r1, lsr #16
+        /* write up to 15-bytes (count in r3) */
+        sub         r2, r2, r3
+        movs        ip, r3, lsl #31
+        strmib      r1, [r0], #1
+        strcsb      r1, [r0], #1
+        strcsb      r1, [r0], #1
+        movs        ip, r3, lsl #29
+        bge         1f

-.L_check_alignment:
-        // Align destination to a double word to avoid the strd crossing
-        // a cache line boundary.
-        ands        ip, r3, #7
-        bne         .L_do_double_word_align
+        // writes 4 bytes, 32-bits aligned
+        vst1.32     {d0[0]}, [r0, :32]!
+1:      bcc         2f

-.L_double_word_aligned:
-        mov         r0, r1
+        // writes 8 bytes, 64-bits aligned
+        vst1.8      {d0}, [r0, :64]!
+2:
+        /* make sure we have at least 32 bytes to write */
+        subs        r2, r2, #32
+        blo         2f
+        vmov        q1, q0

-        subs        r2, #64
-        blo         .L_set_less_than_64
+1:      /* The main loop writes 32 bytes at a time */
+        subs        r2, r2, #32
+        vst1.8      {d0 - d3}, [r0, :128]!
+        bhs         1b

-1:      // Main loop sets 64 bytes at a time.
-        .irp        offset, #0, #8, #16, #24, #32, #40, #48, #56
-        strd        r0, r1, [r3, \offset]
-        .endr
+2:      /* less than 32 left */
+        add         r2, r2, #32
+        tst         r2, #0x10
+        beq         3f

-        add         r3, #64
-        subs        r2, #64
-        bge         1b
-
-.L_set_less_than_64:
-        // Restore r2 to the count of bytes left to set.
-        add         r2, #64
-        lsls        ip, r2, #27
-        bcc         .L_set_less_than_32
-        // Set 32 bytes.
-        .irp        offset, #0, #8, #16, #24
-        strd        r0, r1, [r3, \offset]
-        .endr
-        add         r3, #32
-
-.L_set_less_than_32:
-        bpl         .L_set_less_than_16
-        // Set 16 bytes.
-        .irp        offset, #0, #8
-        strd        r0, r1, [r3, \offset]
-        .endr
-        add         r3, #16
-
-.L_set_less_than_16:
-        // Less than 16 bytes to set.
-        lsls        ip, r2, #29
-        bcc         .L_set_less_than_8
-
-        // Set 8 bytes.
-        strd        r0, r1, [r3], #8
-
-.L_set_less_than_8:
-        bpl         .L_set_less_than_4
-        // Set 4 bytes
-        str         r1, [r3], #4
-
-.L_set_less_than_4:
-        lsls        ip, r2, #31
-        it          ne
-        strbne      r1, [r3], #1
-        itt         cs
-        strbcs      r1, [r3], #1
-        strbcs      r1, [r3]
-
-        ldmfd       sp!, {r0}
-        bx          lr
-
-.L_do_double_word_align:
-        rsb         ip, ip, #8
-        sub         r2, r2, ip
-        movs        r0, ip, lsl #31
-        it          mi
-        strbmi      r1, [r3], #1
-        itt         cs
-        strbcs      r1, [r3], #1
-        strbcs      r1, [r3], #1
-
-        // Dst is at least word aligned by this point.
-        cmp         ip, #4
-        blo         .L_double_word_aligned
-        str         r1, [r3], #4
-        b           .L_double_word_aligned
-
-.L_set_less_than_16_unknown_align:
-        // Set up to 15 bytes.
-        vdup.8      d0, r1
+        // writes 16 bytes, 128-bits aligned
+        vst1.8      {d0, d1}, [r0, :128]!
+3:      /* write up to 15-bytes (count in r2) */
        movs        ip, r2, lsl #29
        bcc         1f
        vst1.8      {d0}, [r0]!
 1:      bge         2f
        vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        it          mi
-        strbmi      r1, [r0], #1
-        itt         cs
-        strbcs      r1, [r0], #1
-        strbcs      r1, [r0], #1
+        strmib      r1, [r0], #1
+        strcsb      r1, [r0], #1
+        strcsb      r1, [r0], #1
        ldmfd       sp!, {r0}
        bx          lr
-        .cfi_endproc
 END(memset)
-
-        .data
-error_string:
-        .string     "memset buffer overflow"
--- a/libc/arch-arm/cortex-a15/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcat.S
@@ -1,568 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-    .macro m_push
-    push    {r0, r4, r5, lr}
-    .endm // m_push
-
-    .macro m_pop
-    pop     {r0, r4, r5, pc}
-    .endm // m_pop
-
-    .macro m_scan_byte
-    ldrb    r3, [r0]
-    cbz     r3, strcat_r0_scan_done
-    add     r0, #1
-    .endm // m_scan_byte
-
-    .macro m_copy_byte reg, cmd, label
-    ldrb    \reg, [r1], #1
-    strb    \reg, [r0], #1
-    \cmd    \reg, \label
-    .endm // m_copy_byte
-
-ENTRY(strcat)
-    // Quick check to see if src is empty.
-    ldrb    r2, [r1]
-    pld     [r1, #0]
-    cbnz    r2, strcat_continue
-    bx      lr
-
-strcat_continue:
-    // To speed up really small dst strings, unroll checking the first 4 bytes.
-    m_push
-    m_scan_byte
-    m_scan_byte
-    m_scan_byte
-    m_scan_byte
-
-    ands    r3, r0, #7
-    beq     strcat_mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcat_align_to_32
-
-    ldrb    r5, [r0]
-    cbz     r5, strcat_r0_scan_done
-    add     r0, r0, #1
-
-strcat_align_to_32:
-    bcc     strcat_align_to_64
-
-    ldrb    r2, [r0]
-    cbz     r2, strcat_r0_scan_done
-    add     r0, r0, #1
-    ldrb    r4, [r0]
-    cbz     r4, strcat_r0_scan_done
-    add     r0, r0, #1
-
-strcat_align_to_64:
-    tst     r3, #4
-    beq     strcat_mainloop
-    ldr     r3, [r0], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_second_register
-    b       strcat_mainloop
-
-strcat_r0_scan_done:
-    // For short copies, hard-code checking the first 8 bytes since this
-    // new code doesn't win until after about 8 bytes.
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
-    m_pop
-
-strcpy_continue:
-    ands    r3, r0, #7
-    beq     strcpy_check_src_align
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcpy_align_to_32
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_64:
-    tst     r3, #4
-    beq     strcpy_check_src_align
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-    str     r2, [r0], #4
-
-strcpy_check_src_align:
-    // At this point dst is aligned to a double word, check if src
-    // is also aligned to a double word.
-    ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
-
-    .p2align 2
-strcpy_mainloop:
-    ldrd    r2, r3, [r1], #8
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_mainloop
-
-strcpy_complete:
-    m_pop
-
-strcpy_zero_in_first_register:
-    lsls    lr, ip, #17
-    bne     strcpy_copy1byte
-    bcs     strcpy_copy2bytes
-    lsls    ip, ip, #1
-    bne     strcpy_copy3bytes
-
-strcpy_copy4bytes:
-    // Copy 4 bytes to the destiniation.
-    str     r2, [r0]
-    m_pop
-
-strcpy_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_copy2bytes:
-    strh    r2, [r0]
-    m_pop
-
-strcpy_copy3bytes:
-    strh    r2, [r0], #2
-    lsr     r2, #16
-    strb    r2, [r0]
-    m_pop
-
-strcpy_zero_in_second_register:
-    lsls    lr, ip, #17
-    bne     strcpy_copy5bytes
-    bcs     strcpy_copy6bytes
-    lsls    ip, ip, #1
-    bne     strcpy_copy7bytes
-
-    // Copy 8 bytes to the destination.
-    strd    r2, r3, [r0]
-    m_pop
-
-strcpy_copy5bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0]
-    m_pop
-
-strcpy_copy6bytes:
-    str     r2, [r0], #4
-    strh    r3, [r0]
-    m_pop
-
-strcpy_copy7bytes:
-    str     r2, [r0], #4
-    strh    r3, [r0], #2
-    lsr     r3, #16
-    strb    r3, [r0]
-    m_pop
-
-strcpy_unaligned_copy:
-    // Dst is aligned to a double word, while src is at an unknown alignment.
-    // There are 7 different versions of the unaligned copy code
-    // to prevent overreading the src. The mainloop of every single version
-    // will store 64 bits per loop. The difference is how much of src can
-    // be read without potentially crossing a page boundary.
-    tbb     [pc, r3]
-strcpy_unaligned_branchtable:
-    .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
-
-    .p2align 2
-    // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
-    ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    lsrs    ip, r3, #24
-    strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
-
-strcpy_unalign7_copy5bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0]
-strcpy_unalign_return:
-    m_pop
-
-strcpy_unalign7_copy6bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    m_pop
-
-strcpy_unalign7_copy7bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    strb    r5, [r0], #1
-    m_pop
-
-    .p2align 2
-    // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-    ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r3, #0xff0000
-    beq     strcpy_copy7bytes
-    lsrs    ip, r3, #24
-    strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
-
-    .p2align 2
-    // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign5
-
-strcpy_unalign_copy5bytes:
-    str     r2, [r0], #4
-    strb    r4, [r0]
-    m_pop
-
-strcpy_unalign_copy6bytes:
-    str     r2, [r0], #4
-    strb    r4, [r0], #1
-    strb    r5, [r0]
-    m_pop
-
-    .p2align 2
-    // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign4
-
-    .p2align 2
-    // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
-    ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    lsrs    lr, r2, #24
-    beq     strcpy_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign3
-
-strcpy_unalign3_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_unalign3_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_pop
-
-strcpy_unalign3_copy3bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0], #1
-    strb    r4, [r0]
-    m_pop
-
-    .p2align 2
-    // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign_copy2bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r2, #0xff0000
-    beq     strcpy_copy3bytes
-    lsrs    ip, r2, #24
-    beq     strcpy_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign2
-
-    .p2align 2
-    // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign1
-
-strcpy_unalign_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_unalign_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r4, [r0]
-    m_pop
-
-    .p2align 2
-strcat_mainloop:
-    ldrd    r2, r3, [r0], #8
-
-    pld     [r0, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_second_register
-    b       strcat_mainloop
-
-strcat_zero_in_first_register:
-    // Prefetch the src now, it's going to be used soon.
-    pld     [r1, #0]
-    lsls    lr, ip, #17
-    bne     strcat_sub8
-    bcs     strcat_sub7
-    lsls    ip, ip, #1
-    bne     strcat_sub6
-
-    sub     r0, r0, #5
-    b       strcat_r0_scan_done
-
-strcat_sub8:
-    sub     r0, r0, #8
-    b       strcat_r0_scan_done
-
-strcat_sub7:
-    sub     r0, r0, #7
-    b       strcat_r0_scan_done
-
-strcat_sub6:
-    sub     r0, r0, #6
-    b       strcat_r0_scan_done
-
-strcat_zero_in_second_register:
-    // Prefetch the src now, it's going to be used soon.
-    pld     [r1, #0]
-    lsls    lr, ip, #17
-    bne     strcat_sub4
-    bcs     strcat_sub3
-    lsls    ip, ip, #1
-    bne     strcat_sub2
-
-    sub     r0, r0, #1
-    b       strcat_r0_scan_done
-
-strcat_sub4:
-    sub     r0, r0, #4
-    b       strcat_r0_scan_done
-
-strcat_sub3:
-    sub     r0, r0, #3
-    b       strcat_r0_scan_done
-
-strcat_sub2:
-    sub     r0, r0, #2
-    b       strcat_r0_scan_done
-END(strcat)
--- a/libc/arch-arm/cortex-a15/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S
@@ -123,13 +123,8 @@ ENTRY(strcmp)
        .macro  init
        /* Macro to save temporary registers and prepare magic values.  */
        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
        mvn     r6, #0  /* all F */
        mov     r7, #0  /* all 0 */
        .endm   /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
 #endif /* not  __ARMEB__ */
        .endm /* setup_return */

-        .cfi_startproc
        pld [r0, #0]
        pld [r1, #0]

        /* Are both strings double-word aligned?  */
        orr     ip, r0, r1
        tst     ip, #7
-        bne     .L_do_align
+        bne     do_align

        /* Fast path.  */
-        .save   {r4-r7}
        init

-.L_doubleword_aligned:
+doubleword_aligned:

        /* Get here when the strings to compare are double-word aligned.  */
        /* Compare two words in every iteration.  */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
        ldrd    r2, r3, [r0], #8
        ldrd    r4, r5, [r1], #8

-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=return_35
        b       2b

-.L_do_align:
+do_align:
        /* Is the first string word-aligned?  */
        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
+        beq     word_aligned_r0

        /* Fast compare byte by byte until the first string is word-aligned.  */
        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
        bic     r0, r0, #3
        ldr     r2, [r0], #4
        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
+        beq     byte2
+        bcs     byte3

-.L_byte1:
+byte1:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE1_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte2:
+byte2:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE2_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte3:
+byte3:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE3_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
+        bne     fast_return
+        m_cbnz  reg=r3, label=word_aligned_r0

-.L_fast_return:
+fast_return:
        mov     r0, ip
        bx      lr

-.L_word_aligned_r0:
+word_aligned_r0:
        init
        /* The first string is word-aligned.  */
        /* Is the second string word-aligned?  */
        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
+        bne     strcmp_unaligned

-.L_word_aligned:
+word_aligned:
        /* The strings are word-aligned. */
        /* Is the first string double-word aligned?  */
        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
+        beq     doubleword_aligned_r0

        /* If r0 is not double-word aligned yet, align it by loading
        and comparing the next word from each string.  */
        ldr     r2, [r0], #4
        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=return_24

-.L_doubleword_aligned_r0:
+doubleword_aligned_r0:
        /* Get here when r0 is double-word aligned.  */
        /* Is r1 doubleword_aligned?  */
        tst     r1, #4
-        beq     .L_doubleword_aligned
+        beq     doubleword_aligned

        /* Get here when the strings to compare are word-aligned,
        r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -278,9 +271,9 @@ ENTRY(strcmp)

        /* Load the next double-word from each string and compare.  */
        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=return_25
        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=return_34
        b       3b

        .macro miscmp_word offsetlo offsethi
@@ -304,47 +297,47 @@ ENTRY(strcmp)
        and     r2, r3, r6, S2LOMEM #\offsetlo
        it      eq
        cmpeq   r2, r5
-        bne     .L_return_25
+        bne     return_25
        ldr     r5, [r1], #4
        cmp     ip, #0
        eor r3, r2, r3
        S2HIMEM r2, r5, #\offsethi
        it      eq
        cmpeq   r3, r2
-        bne     .L_return_32
+        bne     return_32
        b       7b
        .endm /* miscmp_word */

-.L_strcmp_unaligned:
+strcmp_unaligned:
        /* r0 is word-aligned, r1 is at offset ip from a word.  */
        /* Align r1 to the (previous) word-boundary.  */
        bic     r1, r1, #3

        /* Unaligned comparison word by word using LDRs. */
        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
+        beq     miscmp_word_16                    /* If ip == 2.  */
+        bge     miscmp_word_24                    /* If ip == 3.  */
        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
+miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
+miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8


-.L_return_32:
+return_32:
        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
+        b       do_return
+return_34:
        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
+        b       do_return
+return_25:
        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
+        b       do_return
+return_35:
        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
+        b       do_return
+return_24:
        setup_return w1=r2, w2=r4

-.L_do_return:
+do_return:

 #ifdef __ARMEB__
        mov     r0, ip
@@ -356,16 +349,11 @@ ENTRY(strcmp)
        ldrd    r6, r7, [sp]
        ldrd    r4, r5, [sp, #8]
        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7

        /* There is a zero or a different byte between r1 and r2.  */
        /* r0 contains a mask of all-zero bytes in r1.  */
        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
+        m_cbz   reg=r0, label=compute_return_value
        clz     r0, r0
        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
        rsb     r0, r0, #24
@@ -373,7 +361,7 @@ ENTRY(strcmp)
        lsr     r1, r1, r0
        lsr     r2, r2, r0

-.L_compute_return_value:
+compute_return_value:
        movs    r0, #1
        cmp     r1, r2
        /* The return value is computed as follows.
@@ -386,5 +374,4 @@ ENTRY(strcmp)
        it      ls
        sbcls   r0, r0, r0
        bx      lr
-        .cfi_endproc
 END(strcmp)
--- a/libc/arch-arm/cortex-a15/bionic/strcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcpy.S
@@ -1,451 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-    .macro m_push
-    push    {r0, r4, r5, lr}
-    .endm // m_push
-
-    .macro m_pop
-    pop     {r0, r4, r5, pc}
-    .endm // m_pop
-
-    .macro m_copy_byte reg, cmd, label
-    ldrb    \reg, [r1], #1
-    strb    \reg, [r0], #1
-    \cmd    \reg, \label
-    .endm // m_copy_byte
-
-ENTRY(strcpy)
-    // For short copies, hard-code checking the first 8 bytes since this
-    // new code doesn't win until after about 8 bytes.
-    m_push
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
-    m_pop
-
-strcpy_continue:
-    pld     [r1, #0]
-    ands    r3, r0, #7
-    beq     strcpy_check_src_align
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcpy_align_to_32
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_64:
-    tst     r3, #4
-    beq     strcpy_check_src_align
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-    str     r2, [r0], #4
-
-strcpy_check_src_align:
-    // At this point dst is aligned to a double word, check if src
-    // is also aligned to a double word.
-    ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
-
-    .p2align 2
-strcpy_mainloop:
-    ldrd    r2, r3, [r1], #8
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_mainloop
-
-strcpy_complete:
-    m_pop
-
-strcpy_zero_in_first_register:
-    lsls    lr, ip, #17
-    bne     strcpy_copy1byte
-    bcs     strcpy_copy2bytes
-    lsls    ip, ip, #1
-    bne     strcpy_copy3bytes
-
-strcpy_copy4bytes:
-    // Copy 4 bytes to the destiniation.
-    str     r2, [r0]
-    m_pop
-
-strcpy_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_copy2bytes:
-    strh    r2, [r0]
-    m_pop
-
-strcpy_copy3bytes:
-    strh    r2, [r0], #2
-    lsr     r2, #16
-    strb    r2, [r0]
-    m_pop
-
-strcpy_zero_in_second_register:
-    lsls    lr, ip, #17
-    bne     strcpy_copy5bytes
-    bcs     strcpy_copy6bytes
-    lsls    ip, ip, #1
-    bne     strcpy_copy7bytes
-
-    // Copy 8 bytes to the destination.
-    strd    r2, r3, [r0]
-    m_pop
-
-strcpy_copy5bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0]
-    m_pop
-
-strcpy_copy6bytes:
-    str     r2, [r0], #4
-    strh    r3, [r0]
-    m_pop
-
-strcpy_copy7bytes:
-    str     r2, [r0], #4
-    strh    r3, [r0], #2
-    lsr     r3, #16
-    strb    r3, [r0]
-    m_pop
-
-strcpy_unaligned_copy:
-    // Dst is aligned to a double word, while src is at an unknown alignment.
-    // There are 7 different versions of the unaligned copy code
-    // to prevent overreading the src. The mainloop of every single version
-    // will store 64 bits per loop. The difference is how much of src can
-    // be read without potentially crossing a page boundary.
-    tbb     [pc, r3]
-strcpy_unaligned_branchtable:
-    .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
-
-    .p2align 2
-    // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
-    ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    lsrs    ip, r3, #24
-    strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
-
-strcpy_unalign7_copy5bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0]
-strcpy_unalign_return:
-    m_pop
-
-strcpy_unalign7_copy6bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    m_pop
-
-strcpy_unalign7_copy7bytes:
-    str     r2, [r0], #4
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    strb    r5, [r0], #1
-    m_pop
-
-    .p2align 2
-    // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-    ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r3, #0xff0000
-    beq     strcpy_copy7bytes
-    lsrs    ip, r3, #24
-    strd    r2, r3, [r0], #8
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
-
-    .p2align 2
-    // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign5
-
-strcpy_unalign_copy5bytes:
-    str     r2, [r0], #4
-    strb    r4, [r0]
-    m_pop
-
-strcpy_unalign_copy6bytes:
-    str     r2, [r0], #4
-    strb    r4, [r0], #1
-    strb    r5, [r0]
-    m_pop
-
-    .p2align 2
-    // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign4
-
-    .p2align 2
-    // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
-    ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    lsrs    lr, r2, #24
-    beq     strcpy_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign3
-
-strcpy_unalign3_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_unalign3_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_pop
-
-strcpy_unalign3_copy3bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0], #1
-    strb    r4, [r0]
-    m_pop
-
-    .p2align 2
-    // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign_copy2bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r2, #0xff0000
-    beq     strcpy_copy3bytes
-    lsrs    ip, r2, #24
-    beq     strcpy_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign2
-
-    .p2align 2
-    // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    strd    r2, r3, [r0], #8
-    b       strcpy_unalign1
-
-strcpy_unalign_copy1byte:
-    strb    r2, [r0]
-    m_pop
-
-strcpy_unalign_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r4, [r0]
-    m_pop
-END(strcpy)
--- a/libc/arch-arm/cortex-a15/bionic/strlen.S
+++ b/libc/arch-arm/cortex-a15/bionic/strlen.S
@@ -1,165 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-ENTRY(strlen)
-    pld     [r0, #0]
-    mov     r1, r0
-
-    ands    r3, r0, #7
-    beq     mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     align_to_32
-
-    ldrb    r2, [r1], #1
-    cbz     r2, update_count_and_return
-
-align_to_32:
-    bcc     align_to_64
-    ands    ip, r3, #2
-    beq     align_to_64
-
-    ldrb    r2, [r1], #1
-    cbz     r2, update_count_and_return
-    ldrb    r2, [r1], #1
-    cbz     r2, update_count_and_return
-
-align_to_64:
-    tst     r3, #4
-    beq     mainloop
-    ldr     r3, [r1], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     zero_in_second_register
-
-    .p2align 2
-mainloop:
-    ldrd    r2, r3, [r1], #8
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     zero_in_second_register
-    b       mainloop
-
-update_count_and_return:
-    sub     r0, r1, r0
-    sub     r0, r0, #1
-    bx      lr
-
-zero_in_first_register:
-    sub     r0, r1, r0
-    lsls    r3, ip, #17
-    bne     sub8_and_return
-    bcs     sub7_and_return
-    lsls    ip, ip, #1
-    bne     sub6_and_return
-
-    sub     r0, r0, #5
-    bx      lr
-
-sub8_and_return:
-    sub     r0, r0, #8
-    bx      lr
-
-sub7_and_return:
-    sub     r0, r0, #7
-    bx      lr
-
-sub6_and_return:
-    sub     r0, r0, #6
-    bx      lr
-
-zero_in_second_register:
-    sub     r0, r1, r0
-    lsls    r3, ip, #17
-    bne     sub4_and_return
-    bcs     sub3_and_return
-    lsls    ip, ip, #1
-    bne     sub2_and_return
-
-    sub     r0, r0, #1
-    bx      lr
-
-sub4_and_return:
-    sub     r0, r0, #4
-    bx      lr
-
-sub3_and_return:
-    sub     r0, r0, #3
-    bx      lr
-
-sub2_and_return:
-    sub     r0, r0, #2
-    bx      lr
-END(strlen)
--- a/libc/arch-arm/cortex-a15/cortex-a15.mk
+++ b/libc/arch-arm/cortex-a15/cortex-a15.mk
@@ -1,10 +1,5 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S)
-$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a15/bionic/strcat.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S)
-$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a15/bionic/strcpy.S)
-$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)
-$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/cortex-a15/bionic/__strcat_chk.S)
-$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/cortex-a15/bionic/__strcpy_chk.S)

 include bionic/libc/arch-arm/generic/generic.mk
--- a/libc/arch-arm/cortex-a8/cortex-a8.mk
+++ b/libc/arch-arm/cortex-a8/cortex-a8.mk
@@ -1 +0,0 @@
-include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk
--- a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
@@ -1,230 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-    .fpu    neon
-    .thumb
-    .thumb_func
-
-// Get the length of src string, then get the source of the dst string.
-// Check that the two lengths together don't exceed the threshold, then
-// do a memcpy of the data.
-ENTRY(__strcat_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    push    {r4, r5}
-    .save   {r4, r5}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    mov     lr, r2
-
-    // Save the dst register to r5
-    mov     r5, r0
-
-    // Zero out r4
-    eor     r4, r4, r4
-
-    // r1 contains the address of the string to count.
-.L_strlen_start:
-    mov     r0, r1
-
-    ands    r3, r0, #7
-    bne     .L_align_src
-
-    .p2align 2
-.L_mainloop:
-    ldmia   r1!, {r2, r3}
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_zero_in_first_register:
-    sub     r3, r1, r0
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     .L_check_byte1_reg1
-
-    sub     r3, r3, #8
-    b       .L_finish
-
-.L_check_byte1_reg1:
-    bcc     .L_check_byte2_reg1
-
-    sub     r3, r3, #7
-    b       .L_finish
-
-.L_check_byte2_reg1:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    it      ne
-    subne   r3, r3, #6
-    bne     .L_finish
-    sub     r3, r3, #5
-    b       .L_finish
-
-.L_zero_in_second_register:
-    sub     r3, r1, r0
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     .L_check_byte1_reg2
-
-    sub     r3, r3, #4
-    b       .L_finish
-
-.L_check_byte1_reg2:
-    bcc     .L_check_byte2_reg2
-
-    sub     r3, r3, #3
-    b       .L_finish
-
-.L_check_byte2_reg2:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    it      ne
-    subne   r3, r3, #2
-    bne     .L_finish
-    sub     r3, r3, #1
-    b       .L_finish
-
-.L_align_src:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_done
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_done
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_done
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_done:
-    sub     r3, r1, r0
-    sub     r3, r3, #1
-
-.L_finish:
-    cmp     r4, #0
-    bne     .L_strlen_done
-
-    // Time to get the dst string length.
-    mov     r1, r5
-
-    // Save the original source address to r5.
-    mov     r5, r0
-
-    // Save the current length (adding 1 for the terminator).
-    add     r4, r3, #1
-    b       .L_strlen_start
-
-    // r0 holds the pointer to the dst string.
-    // r3 holds the dst string length.
-    // r4 holds the src string length + 1.
-.L_strlen_done:
-    add     r2, r3, r4
-    cmp     r2, lr
-    bhi     __strcat_chk_fail
-
-    // Set up the registers for the memcpy code.
-    mov     r1, r5
-    pld     [r1, #64]
-    mov     r2, r4
-    add     r0, r0, r3
-    pop     {r4, r5}
-
-    // Fall through into the memcpy_base function.
-    .cfi_endproc
-END(__strcat_chk)
-
-#define MEMCPY_BASE         __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__strcat_chk_fail)
-    .cfi_startproc
-
-    .save   {r0, lr}
-    .save   {r4, r5}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-error_code:
-    .word   BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-
-    .cfi_endproc
-END(__strcat_chk_fail)
-
-    .data
-error_string:
-    .string "strcat buffer overflow"
--- a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
@@ -1,194 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-    .fpu    neon
-    .thumb
-    .thumb_func
-
-// Get the length of the source string first, then do a memcpy of the data
-// instead of a strcpy.
-ENTRY(__strcpy_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    mov     lr, r2
-    mov     r0, r1
-
-    ands    r3, r0, #7
-    bne     .L_align_src
-
-    .p2align 2
-.L_mainloop:
-    ldmia   r0!, {r2, r3}
-
-    pld     [r0, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_zero_in_first_register:
-    sub     r3, r0, r1
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     .L_check_byte1_reg1
-
-    sub     r3, r3, #8
-    b       .L_check_size
-
-.L_check_byte1_reg1:
-    bcc     .L_check_byte2_reg1
-
-    sub     r3, r3, #7
-    b       .L_check_size
-
-.L_check_byte2_reg1:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    it      ne
-    subne   r3, r3, #6
-    bne     .L_check_size
-    sub     r3, r3, #5
-    b       .L_check_size
-
-.L_zero_in_second_register:
-    sub     r3, r0, r1
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     .L_check_byte1_reg2
-
-    sub     r3, r3, #4
-    b       .L_check_size
-
-.L_check_byte1_reg2:
-    bcc     .L_check_byte2_reg2
-
-    sub     r3, r3, #3
-    b       .L_check_size
-
-.L_check_byte2_reg2:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    it      ne
-    subne   r3, r3, #2
-    bne     .L_check_size
-    sub     r3, r3, #1
-    b       .L_check_size
-
-.L_align_src:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_done
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_done
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_done
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r2, [r0], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_done:
-    sub     r3, r0, r1
-    sub     r3, r3, #1
-
-.L_check_size:
-    pld     [r1, #0]
-    pld     [r1, #64]
-    ldr     r0, [sp]
-    cmp     r3, lr
-    bhs     __strcpy_chk_fail
-
-    // Add 1 for copy length to get the string terminator.
-    add     r2, r3, #1
-
-    .cfi_endproc
-
-    // Fall through into the memcpy_base function.
-END(__strcpy_chk)
-
-#define MEMCPY_BASE         __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__strcpy_chk_fail)
-    .cfi_startproc
-
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-
-error_code:
-    .word   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-
-    .cfi_endproc
-END(__strcpy_chk_fail)
-
-    .data
-error_string:
-    .string "strcpy buffer overflow"
--- a/libc/arch-arm/cortex-a9/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy.S
@@ -26,8 +26,8 @@
 * SUCH DAMAGE.
 */

+#include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

 /*
 * This code assumes it is running on a processor that supports all arm v7
@@ -35,58 +35,177 @@
 * cache line.
 */

-        .syntax unified
+        .text
        .fpu    neon
-        .thumb
-        .thumb_func

-ENTRY(__memcpy_chk)
-        .cfi_startproc
-        cmp         r2, r3
-        bhi         __memcpy_chk_fail
-
-        // Fall through to memcpy...
-        .cfi_endproc
-END(__memcpy_chk)
+#define CACHE_LINE_SIZE     32

 ENTRY(memcpy)
-        .cfi_startproc
+        .save       {r0, lr}
+        /* start preloading as early as possible */
+        pld         [r1, #(CACHE_LINE_SIZE * 0)]
+        stmfd       sp!, {r0, lr}
+        pld         [r1, #(CACHE_LINE_SIZE * 2)]

-        pld     [r1, #0]
-        stmfd   sp!, {r0, lr}
-        .save   {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-        pld     [r1, #64]
+        // Check so divider is at least 16 bytes, needed for alignment code.
+        cmp         r2, #16
+        blo         5f

-        .cfi_endproc
+
+        /* check if buffers are aligned. If so, run arm-only version */
+        eor         r3, r0, r1
+        ands        r3, r3, #0x3
+        beq         11f
+
+        /* Check the upper size limit for Neon unaligned memory access in memcpy */
+        cmp         r2, #224
+        blo         3f
+
+        /* align destination to 16 bytes for the write-buffer */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0xF
+        beq         3f
+
+        /* copy up to 15-bytes (count in r3) */
+        sub         r2, r2, r3
+        movs        ip, r3, lsl #31
+        ldrmib      lr, [r1], #1
+        strmib      lr, [r0], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+        movs        ip, r3, lsl #29
+        bge         1f
+        // copies 4 bytes, destination 32-bits aligned
+        vld1.32     {d0[0]}, [r1]!
+        vst1.32     {d0[0]}, [r0, :32]!
+1:      bcc         2f
+        // copies 8 bytes, destination 64-bits aligned
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0, :64]!
+2:
+        /* preload immediately the next cache line, which we may need */
+        pld         [r1, #(CACHE_LINE_SIZE * 0)]
+        pld         [r1, #(CACHE_LINE_SIZE * 2)]
+3:
+        /* make sure we have at least 64 bytes to copy */
+        subs        r2, r2, #64
+        blo         2f
+
+        /* preload all the cache lines we need */
+        pld         [r1, #(CACHE_LINE_SIZE * 4)]
+        pld         [r1, #(CACHE_LINE_SIZE * 6)]
+
+1:      /* The main loop copies 64 bytes at a time */
+        vld1.8      {d0 - d3}, [r1]!
+        vld1.8      {d4 - d7}, [r1]!
+        pld         [r1, #(CACHE_LINE_SIZE * 6)]
+        subs        r2, r2, #64
+        vst1.8      {d0 - d3}, [r0]!
+        vst1.8      {d4 - d7}, [r0]!
+        bhs         1b
+
+2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
+        add         r2, r2, #64
+        subs        r2, r2, #32
+        blo         4f
+
+3:      /* 32 bytes at a time. These cache lines were already preloaded */
+        vld1.8      {d0 - d3}, [r1]!
+        subs        r2, r2, #32
+        vst1.8      {d0 - d3}, [r0]!
+        bhs         3b
+
+4:      /* less than 32 left */
+        add         r2, r2, #32
+        tst         r2, #0x10
+        beq         5f
+        // copies 16 bytes, 128-bits aligned
+        vld1.8      {d0, d1}, [r1]!
+        vst1.8      {d0, d1}, [r0]!
+5:      /* copy up to 15-bytes (count in r2) */
+        movs        ip, r2, lsl #29
+        bcc         1f
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0]!
+1:      bge         2f
+        vld1.32     {d0[0]}, [r1]!
+        vst1.32     {d0[0]}, [r0]!
+2:      movs        ip, r2, lsl #31
+        ldrmib      r3, [r1], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strmib      r3, [r0], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+
+        ldmfd       sp!, {r0, lr}
+        bx          lr
+11:
+        /* Simple arm-only copy loop to handle aligned copy operations */
+        stmfd       sp!, {r4, r5, r6, r7, r8}
+        pld         [r1, #(CACHE_LINE_SIZE * 4)]
+
+        /* Check alignment */
+        rsb         r3, r1, #0
+        ands        r3, #3
+        beq         2f
+
+        /* align source to 32 bits. We need to insert 2 instructions between
+         * a ldr[b|h] and str[b|h] because byte and half-word instructions
+         * stall 2 cycles.
+         */
+        movs        r12, r3, lsl #31
+        sub         r2, r2, r3      /* we know that r3 <= r2 because r2 >= 4 */
+        ldrmib      r3, [r1], #1
+        ldrcsb      r4, [r1], #1
+        ldrcsb      r5, [r1], #1
+        strmib      r3, [r0], #1
+        strcsb      r4, [r0], #1
+        strcsb      r5, [r0], #1
+
+2:
+        subs        r2, r2, #64
+        blt         4f
+
+3:      /* Main copy loop, copying 64 bytes at a time */
+        pld         [r1, #(CACHE_LINE_SIZE * 8)]
+        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        subs        r2, r2, #64
+        bge         3b
+
+4:      /* Check if there are > 32 bytes left */
+        adds        r2, r2, #64
+        subs        r2, r2, #32
+        blt         5f
+
+        /* Copy 32 bytes */
+        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
+        subs        r2, #32
+
+5:      /* Handle any remaining bytes */
+        adds        r2, #32
+        beq         6f
+
+        movs        r12, r2, lsl #28
+        ldmcsia     r1!, {r3, r4, r5, r6}   /* 16 bytes */
+        ldmmiia     r1!, {r7, r8}           /*  8 bytes */
+        stmcsia     r0!, {r3, r4, r5, r6}
+        stmmiia     r0!, {r7, r8}
+        movs        r12, r2, lsl #30
+        ldrcs       r3, [r1], #4            /*  4 bytes */
+        ldrmih      r4, [r1], #2            /*  2 bytes */
+        strcs       r3, [r0], #4
+        strmih      r4, [r0], #2
+        tst         r2, #0x1
+        ldrneb      r3, [r1]                /*  last byte  */
+        strneb      r3, [r0]
+6:
+        ldmfd       sp!, {r4, r5, r6, r7, r8}
+        ldmfd       sp!, {r0, pc}
 END(memcpy)
-
-#define MEMCPY_BASE         __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__memcpy_chk_fail)
-        .cfi_startproc
-        // Preserve lr for backtrace.
-        push    {lr}
-        .save   {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr     r0, error_message
-        ldr     r1, error_code
-1:
-        add     r0, pc
-        bl      __fortify_chk_fail
-error_code:
-        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
-        .word   error_string-(1b+4)
-        .cfi_endproc
-END(__memcpy_chk_fail)
-
-        .data
-error_string:
-        .string     "memcpy buffer overflow"
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -1,233 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * This code assumes it is running on a processor that supports all arm v7
- * instructions, that supports neon instructions, and that has a 32 byte
- * cache line.
- */
-
-ENTRY(MEMCPY_BASE)
-        .cfi_startproc
-        .save       {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-
-        // Check so divider is at least 16 bytes, needed for alignment code.
-        cmp         r2, #16
-        blo         5f
-
-        /* check if buffers are aligned. If so, run arm-only version */
-        eor         r3, r0, r1
-        ands        r3, r3, #0x3
-        beq         __memcpy_base_aligned
-
-        /* Check the upper size limit for Neon unaligned memory access in memcpy */
-        cmp         r2, #224
-        blo         3f
-
-        /* align destination to 16 bytes for the write-buffer */
-        rsb         r3, r0, #0
-        ands        r3, r3, #0xF
-        beq         3f
-
-        /* copy up to 15-bytes (count in r3) */
-        sub         r2, r2, r3
-        movs        ip, r3, lsl #31
-        itt         mi
-        ldrbmi      lr, [r1], #1
-        strbmi      lr, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1], #1
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0], #1
-        movs        ip, r3, lsl #29
-        bge         1f
-        // copies 4 bytes, destination 32-bits aligned
-        vld1.32     {d0[0]}, [r1]!
-        vst1.32     {d0[0]}, [r0, :32]!
-1:      bcc         2f
-        // copies 8 bytes, destination 64-bits aligned
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0, :64]!
-2:
-        /* preload immediately the next cache line, which we may need */
-        pld         [r1, #0]
-        pld         [r1, #(32 * 2)]
-3:
-        /* make sure we have at least 64 bytes to copy */
-        subs        r2, r2, #64
-        blo         2f
-
-        /* preload all the cache lines we need */
-        pld         [r1, #(32 * 4)]
-        pld         [r1, #(32 * 6)]
-
-1:      /* The main loop copies 64 bytes at a time */
-        vld1.8      {d0 - d3}, [r1]!
-        vld1.8      {d4 - d7}, [r1]!
-        pld         [r1, #(32 * 6)]
-        subs        r2, r2, #64
-        vst1.8      {d0 - d3}, [r0]!
-        vst1.8      {d4 - d7}, [r0]!
-        bhs         1b
-
-2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
-        add         r2, r2, #64
-        subs        r2, r2, #32
-        blo         4f
-
-3:      /* 32 bytes at a time. These cache lines were already preloaded */
-        vld1.8      {d0 - d3}, [r1]!
-        subs        r2, r2, #32
-        vst1.8      {d0 - d3}, [r0]!
-        bhs         3b
-
-4:      /* less than 32 left */
-        add         r2, r2, #32
-        tst         r2, #0x10
-        beq         5f
-        // copies 16 bytes, 128-bits aligned
-        vld1.8      {d0, d1}, [r1]!
-        vst1.8      {d0, d1}, [r0]!
-5:      /* copy up to 15-bytes (count in r2) */
-        movs        ip, r2, lsl #29
-        bcc         1f
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0]!
-1:      bge         2f
-        vld1.32     {d0[0]}, [r1]!
-        vst1.32     {d0[0]}, [r0]!
-2:      movs        ip, r2, lsl #31
-        itt         mi
-        ldrbmi      r3, [r1], #1
-        strbmi      r3, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1], #1
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0], #1
-
-        ldmfd       sp!, {r0, lr}
-        bx          lr
-
-        .cfi_endproc
-END(MEMCPY_BASE)
-
-ENTRY(MEMCPY_BASE_ALIGNED)
-        .cfi_startproc
-
-        .save       {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-
-        /* Simple arm-only copy loop to handle aligned copy operations */
-        stmfd       sp!, {r4-r8}
-        .save       {r4-r8}
-        .cfi_adjust_cfa_offset 20
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        .cfi_rel_offset r8, 16
-        pld         [r1, #(32 * 4)]
-
-        /* Check alignment */
-        rsb         r3, r1, #0
-        ands        r3, #3
-        beq         2f
-
-        /* align source to 32 bits. We need to insert 2 instructions between
-         * a ldr[b|h] and str[b|h] because byte and half-word instructions
-         * stall 2 cycles.
-         */
-        movs        r12, r3, lsl #31
-        sub         r2, r2, r3      /* we know that r3 <= r2 because r2 >= 4 */
-        itt         mi
-        ldrbmi      r3, [r1], #1
-        strbmi      r3, [r0], #1
-        itttt       cs
-        ldrbcs      r4, [r1], #1
-        ldrbcs      r5, [r1], #1
-        strbcs      r4, [r0], #1
-        strbcs      r5, [r0], #1
-
-2:
-        subs        r2, r2, #64
-        blt         4f
-
-3:      /* Main copy loop, copying 64 bytes at a time */
-        pld         [r1, #(32 * 8)]
-        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        subs        r2, r2, #64
-        bge         3b
-
-4:      /* Check if there are > 32 bytes left */
-        adds        r2, r2, #64
-        subs        r2, r2, #32
-        blt         5f
-
-        /* Copy 32 bytes */
-        ldmia       r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        stmia       r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
-        subs        r2, #32
-
-5:      /* Handle any remaining bytes */
-        adds        r2, #32
-        beq         6f
-
-        movs        r12, r2, lsl #28
-        itt         cs
-        ldmiacs     r1!, {r3, r4, r5, r6}   /* 16 bytes */
-        stmiacs     r0!, {r3, r4, r5, r6}
-        itt         mi
-        ldmiami     r1!, {r7, r8}           /*  8 bytes */
-        stmiami     r0!, {r7, r8}
-        movs        r12, r2, lsl #30
-        itt         cs
-        ldrcs       r3, [r1], #4            /*  4 bytes */
-        strcs       r3, [r0], #4
-        itt         mi
-        ldrhmi      r4, [r1], #2            /*  2 bytes */
-        strhmi      r4, [r0], #2
-        tst         r2, #0x1
-        itt         ne
-        ldrbne      r3, [r1]                /*  last byte  */
-        strbne      r3, [r0]
-6:
-        ldmfd       sp!, {r4-r8}
-        ldmfd       sp!, {r0, pc}
-
-        .cfi_endproc
-END(MEMCPY_BASE_ALIGNED)
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -28,7 +28,6 @@

 #include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

 /*
 * This code assumes it is running on a processor that supports all arm v7
@@ -37,52 +36,19 @@

    .fpu    neon

-ENTRY(__memset_chk)
-        .cfi_startproc
-        cmp         r2, r3
-        bls         .L_done
-
-        // Preserve lr for backtrace.
-        push        {lr}
-        .save       {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr         r0, error_message
-        ldr         r1, error_code
-1:
-        add         r0, pc
-        bl          __fortify_chk_fail
-error_code:
-        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
-error_message:
-        .word       error_string-(1b+8)
-
-        .cfi_endproc
-END(__memset_chk)
-
 ENTRY(bzero)
-        .cfi_startproc
        mov     r2, r1
        mov     r1, #0
-
-.L_done:
-        // Fall through to memset...
-        .cfi_endproc
 END(bzero)

 /* memset() returns its first argument.  */
 ENTRY(memset)
-        .cfi_startproc
-
        # The neon memset only wins for less than 132.
        cmp         r2, #132
-        bhi         __memset_large_copy
+        bhi         11f

-        stmfd       sp!, {r0}
        .save       {r0}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset r0, 0
+        stmfd       sp!, {r0}

        vdup.8      q0, r1

@@ -115,26 +81,13 @@ ENTRY(memset)
        strcsb      r1, [r0], #1
        ldmfd       sp!, {r0}
        bx          lr
-
-        .cfi_endproc
-END(memset)
-
-ENTRY(__memset_large_copy)
-        .cfi_startproc
-
+11:
        /* compute the offset to align the destination
         * offset = (4-(src&3))&3 = -src & 3
         */
-        stmfd       sp!, {r0, r4-r7, lr}
-        .save       {r0, r4-r7, lr}
-        .cfi_def_cfa_offset 24
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset r4, 4
-        .cfi_rel_offset r5, 8
-        .cfi_rel_offset r6, 12
-        .cfi_rel_offset r7, 16
-        .cfi_rel_offset lr, 20

+        .save       {r0, r4-r7, lr}
+        stmfd       sp!, {r0, r4-r7, lr}
        rsb         r3, r0, #0
        ands        r3, r3, #3
        cmp         r3, r2
@@ -196,9 +149,4 @@ ENTRY(__memset_large_copy)
        strcsb      r1, [r0]
        ldmfd       sp!, {r0, r4-r7, lr}
        bx          lr
-        .cfi_endproc
-END(__memset_large_copy)
-
-        .data
-error_string:
-        .string     "memset buffer overflow"
+END(memset)
--- a/libc/arch-arm/cortex-a9/bionic/strcat.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcat.S
@@ -1,548 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-    .macro m_push
-    push    {r0, r4, r5, lr}
-    .endm // m_push
-
-    .macro m_ret inst
-    \inst   {r0, r4, r5, pc}
-    .endm // m_ret
-
-    .macro m_scan_byte
-    ldrb    r3, [r0]
-    cbz     r3, strcat_r0_scan_done
-    add     r0, #1
-    .endm // m_scan_byte
-
-    .macro m_copy_byte reg, cmd, label
-    ldrb    \reg, [r1], #1
-    strb    \reg, [r0], #1
-    \cmd    \reg, \label
-    .endm // m_copy_byte
-
-ENTRY(strcat)
-    // Quick check to see if src is empty.
-    ldrb        r2, [r1]
-    pld         [r1, #0]
-    cbnz        r2, strcat_continue
-    bx          lr
-
-strcat_continue:
-    // To speed up really small dst strings, unroll checking the first 4 bytes.
-    m_push
-    m_scan_byte
-    m_scan_byte
-    m_scan_byte
-    m_scan_byte
-
-    ands    r3, r0, #7
-    bne     strcat_align_src
-
-    .p2align 2
-strcat_mainloop:
-    ldmia   r0!, {r2, r3}
-
-    pld     [r0, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_second_register
-    b       strcat_mainloop
-
-strcat_zero_in_first_register:
-    sub     r0, r0, #4
-
-strcat_zero_in_second_register:
-    // Check for zero in byte 0.
-    tst     ip, #0x80
-    it      ne
-    subne   r0, r0, #4
-    bne     strcat_r0_scan_done
-    // Check for zero in byte 1.
-    tst     ip, #0x8000
-    it      ne
-    subne   r0, r0, #3
-    bne     strcat_r0_scan_done
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    it      ne
-    subne   r0, r0, #2
-    it      eq
-    // Zero is in byte 3.
-    subeq   r0, r0, #1
-
-strcat_r0_scan_done:
-    // Unroll the first 8 bytes that will be copied.
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
-    m_ret   inst=pop
-
-strcpy_continue:
-    pld     [r1, #0]
-    ands    r3, r0, #7
-    bne     strcpy_align_dst
-
-strcpy_check_src_align:
-    // At this point dst is aligned to a double word, check if src
-    // is also aligned to a double word.
-    ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
-
-    .p2align 2
-strcpy_mainloop:
-    ldmia   r1!, {r2, r3}
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_mainloop
-
-strcpy_zero_in_first_register:
-    lsls    lr, ip, #17
-    itt     ne
-    strbne  r2, [r0]
-    m_ret   inst=popne
-    itt     cs
-    strhcs  r2, [r0]
-    m_ret   inst=popcs
-    lsls    ip, ip, #1
-    itt     eq
-    streq   r2, [r0]
-    m_ret   inst=popeq
-    strh    r2, [r0], #2
-    lsr     r3, r2, #16
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_zero_in_second_register:
-    lsls    lr, ip, #17
-    ittt    ne
-    stmiane r0!, {r2}
-    strbne  r3, [r0]
-    m_ret   inst=popne
-    ittt    cs
-    strcs   r2, [r0], #4
-    strhcs  r3, [r0]
-    m_ret   inst=popcs
-    lsls    ip, ip, #1
-    itt     eq
-    stmiaeq r0, {r2, r3}
-    m_ret   inst=popeq
-    stmia   r0!, {r2}
-    strh    r3, [r0], #2
-    lsr     r4, r3, #16
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-strcpy_align_dst:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcpy_align_to_32
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
-
-    ldrb    r4, [r1], #1
-    strb    r4, [r0], #1
-    cmp     r4, #0
-    it      eq
-    m_ret   inst=popeq
-    ldrb    r5, [r1], #1
-    strb    r5, [r0], #1
-    cmp     r5, #0
-    it      eq
-    m_ret   inst=popeq
-
-strcpy_align_to_64:
-    tst     r3, #4
-    beq     strcpy_check_src_align
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-    stmia   r0!, {r2}
-    b       strcpy_check_src_align
-
-strcpy_complete:
-    m_ret   inst=pop
-
-strcpy_unaligned_copy:
-    // Dst is aligned to a double word, while src is at an unknown alignment.
-    // There are 7 different versions of the unaligned copy code
-    // to prevent overreading the src. The mainloop of every single version
-    // will store 64 bits per loop. The difference is how much of src can
-    // be read without potentially crossing a page boundary.
-    tbb     [pc, r3]
-strcpy_unaligned_branchtable:
-    .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
-
-    .p2align 2
-    // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
-    ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    lsrs    ip, r3, #24
-    stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
-
-strcpy_unalign7_copy5bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0]
-strcpy_unalign_return:
-    m_ret   inst=pop
-
-strcpy_unalign7_copy6bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    m_ret   inst=pop
-
-strcpy_unalign7_copy7bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    strb    r5, [r0], #1
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-    ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r3, #0xff0000
-    beq     strcpy_unalign6_copy7bytes
-    lsrs    ip, r3, #24
-    stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
-
-strcpy_unalign6_copy7bytes:
-    stmia   r0!, {r2}
-    strh    r3, [r0], #2
-    lsr     r3, #16
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign5
-
-strcpy_unalign_copy5bytes:
-    stmia   r0!, {r2}
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy6bytes:
-    stmia   r0!, {r2}
-    strb    r4, [r0], #1
-    strb    r5, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
-    ldmia   r1!, {r2}
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldmia   r1!, {r3}
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign4
-
-    .p2align 2
-    // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
-    ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    lsrs    lr, r2, #24
-    beq     strcpy_unalign_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign3
-
-strcpy_unalign3_copy1byte:
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign3_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign3_copy3bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0], #1
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign_copy2bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r2, #0xff0000
-    beq     strcpy_unalign_copy3bytes
-    lsrs    ip, r2, #24
-    beq     strcpy_unalign_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign2
-
-    .p2align 2
-    // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign1
-
-strcpy_unalign_copy1byte:
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy3bytes:
-    strh    r2, [r0], #2
-    lsr     r2, #16
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy4bytes:
-    stmia   r0, {r2}
-    m_ret   inst=pop
-
-strcat_align_src:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcat_align_to_32
-    ldrb    r2, [r0], #1
-    cbz     r2, strcat_r0_update
-
-strcat_align_to_32:
-    bcc     strcat_align_to_64
-    ldrb    r2, [r0], #1
-    cbz     r2, strcat_r0_update
-    ldrb    r2, [r0], #1
-    cbz     r2, strcat_r0_update
-
-strcat_align_to_64:
-    tst     r3, #4
-    beq     strcat_mainloop
-    ldr     r3, [r0], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcat_zero_in_second_register
-    b       strcat_mainloop
-
-strcat_r0_update:
-    sub     r0, r0, #1
-    b strcat_r0_scan_done
-END(strcat)
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S
@@ -123,13 +123,8 @@ ENTRY(strcmp)
        .macro  init
        /* Macro to save temporary registers and prepare magic values.  */
        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
        mvn     r6, #0  /* all F */
        mov     r7, #0  /* all 0 */
        .endm   /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
 #endif /* not  __ARMEB__ */
        .endm /* setup_return */

-        .cfi_startproc
        pld [r0, #0]
        pld [r1, #0]

        /* Are both strings double-word aligned?  */
        orr     ip, r0, r1
        tst     ip, #7
-        bne     .L_do_align
+        bne     do_align

        /* Fast path.  */
-        .save   {r4-r7}
        init

-.L_doubleword_aligned:
+doubleword_aligned:

        /* Get here when the strings to compare are double-word aligned.  */
        /* Compare two words in every iteration.  */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
        ldrd    r2, r3, [r0], #8
        ldrd    r4, r5, [r1], #8

-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=return_35
        b       2b

-.L_do_align:
+do_align:
        /* Is the first string word-aligned?  */
        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
+        beq     word_aligned_r0

        /* Fast compare byte by byte until the first string is word-aligned.  */
        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
        bic     r0, r0, #3
        ldr     r2, [r0], #4
        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
+        beq     byte2
+        bcs     byte3

-.L_byte1:
+byte1:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE1_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte2:
+byte2:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE2_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte3:
+byte3:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE3_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
+        bne     fast_return
+        m_cbnz  reg=r3, label=word_aligned_r0

-.L_fast_return:
+fast_return:
        mov     r0, ip
        bx      lr

-.L_word_aligned_r0:
+word_aligned_r0:
        init
        /* The first string is word-aligned.  */
        /* Is the second string word-aligned?  */
        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
+        bne     strcmp_unaligned

-.L_word_aligned:
+word_aligned:
        /* The strings are word-aligned. */
        /* Is the first string double-word aligned?  */
        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
+        beq     doubleword_aligned_r0

        /* If r0 is not double-word aligned yet, align it by loading
        and comparing the next word from each string.  */
        ldr     r2, [r0], #4
        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=return_24

-.L_doubleword_aligned_r0:
+doubleword_aligned_r0:
        /* Get here when r0 is double-word aligned.  */
        /* Is r1 doubleword_aligned?  */
        tst     r1, #4
-        beq     .L_doubleword_aligned
+        beq     doubleword_aligned

        /* Get here when the strings to compare are word-aligned,
        r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -278,9 +271,9 @@ ENTRY(strcmp)

        /* Load the next double-word from each string and compare.  */
        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=return_25
        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=return_34
        b       3b

        .macro miscmp_word offsetlo offsethi
@@ -304,33 +297,33 @@ ENTRY(strcmp)
        and     r2, r3, r6, S2LOMEM #\offsetlo
        it      eq
        cmpeq   r2, r5
-        bne     .L_return_25
+        bne     return_25
        ldr     r5, [r1], #4
        cmp     ip, #0
        eor r3, r2, r3
        S2HIMEM r2, r5, #\offsethi
        it      eq
        cmpeq   r3, r2
-        bne     .L_return_32
+        bne     return_32
        b       7b
        .endm /* miscmp_word */

-.L_return_32:
+return_32:
        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
+        b       do_return
+return_34:
        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
+        b       do_return
+return_25:
        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
+        b       do_return
+return_35:
        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
+        b       do_return
+return_24:
        setup_return w1=r2, w2=r4

-.L_do_return:
+do_return:

 #ifdef __ARMEB__
        mov     r0, ip
@@ -342,16 +335,11 @@ ENTRY(strcmp)
        ldrd    r6, r7, [sp]
        ldrd    r4, r5, [sp, #8]
        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7

        /* There is a zero or a different byte between r1 and r2.  */
        /* r0 contains a mask of all-zero bytes in r1.  */
        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
+        m_cbz   reg=r0, label=compute_return_value
        clz     r0, r0
        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
        rsb     r0, r0, #24
@@ -359,7 +347,7 @@ ENTRY(strcmp)
        lsr     r1, r1, r0
        lsr     r2, r2, r0

-.L_compute_return_value:
+compute_return_value:
        movs    r0, #1
        cmp     r1, r2
        /* The return value is computed as follows.
@@ -379,7 +367,7 @@ ENTRY(strcmp)
     * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
     * version of the code.
     */
-.L_strcmp_unaligned:
+strcmp_unaligned:
 	wp1 .req r0
 	wp2 .req r1
 	b1  .req r2
@@ -532,11 +520,6 @@ ENTRY(strcmp)
    ldrd    r6, r7, [sp]
    ldrd    r4, r5, [sp, #8]
    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7

 	bx	lr

@@ -558,5 +541,4 @@ ENTRY(strcmp)
    adds    sp, sp, #16

 	bx	lr
-    .cfi_endproc
 END(strcmp)
--- a/libc/arch-arm/cortex-a9/bionic/strcpy.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcpy.S
@@ -1,456 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-    .macro m_push
-    push    {r0, r4, r5, lr}
-    .endm // m_push
-
-    .macro m_ret inst
-    \inst   {r0, r4, r5, pc}
-    .endm // m_ret
-
-    .macro m_copy_byte reg, cmd, label
-    ldrb    \reg, [r1], #1
-    strb    \reg, [r0], #1
-    \cmd    \reg, \label
-    .endm // m_copy_byte
-
-ENTRY(strcpy)
-    // Unroll the first 8 bytes that will be copied.
-    m_push
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
-    m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
-
-strcpy_finish:
-    m_ret   inst=pop
-
-strcpy_continue:
-    pld     [r1, #0]
-    ands    r3, r0, #7
-    bne     strcpy_align_dst
-
-strcpy_check_src_align:
-    // At this point dst is aligned to a double word, check if src
-    // is also aligned to a double word.
-    ands    r3, r1, #7
-    bne     strcpy_unaligned_copy
-
-    .p2align 2
-strcpy_mainloop:
-    ldmia   r1!, {r2, r3}
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_mainloop
-
-strcpy_zero_in_first_register:
-    lsls    lr, ip, #17
-    itt     ne
-    strbne  r2, [r0]
-    m_ret   inst=popne
-    itt     cs
-    strhcs  r2, [r0]
-    m_ret   inst=popcs
-    lsls    ip, ip, #1
-    itt     eq
-    streq   r2, [r0]
-    m_ret   inst=popeq
-    strh    r2, [r0], #2
-    lsr     r3, r2, #16
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_zero_in_second_register:
-    lsls    lr, ip, #17
-    ittt    ne
-    stmiane r0!, {r2}
-    strbne  r3, [r0]
-    m_ret   inst=popne
-    ittt    cs
-    strcs   r2, [r0], #4
-    strhcs  r3, [r0]
-    m_ret   inst=popcs
-    lsls    ip, ip, #1
-    itt     eq
-    stmiaeq r0, {r2, r3}
-    m_ret   inst=popeq
-    stmia   r0!, {r2}
-    strh    r3, [r0], #2
-    lsr     r4, r3, #16
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-strcpy_align_dst:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     strcpy_align_to_32
-
-    ldrb    r2, [r1], #1
-    strb    r2, [r0], #1
-    cbz     r2, strcpy_complete
-
-strcpy_align_to_32:
-    bcc     strcpy_align_to_64
-
-    ldrb    r4, [r1], #1
-    strb    r4, [r0], #1
-    cmp     r4, #0
-    it      eq
-    m_ret   inst=popeq
-    ldrb    r5, [r1], #1
-    strb    r5, [r0], #1
-    cmp     r5, #0
-    it      eq
-    m_ret   inst=popeq
-
-strcpy_align_to_64:
-    tst     r3, #4
-    beq     strcpy_check_src_align
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-    stmia   r0!, {r2}
-    b       strcpy_check_src_align
-
-strcpy_complete:
-    m_ret   inst=pop
-
-strcpy_unaligned_copy:
-    // Dst is aligned to a double word, while src is at an unknown alignment.
-    // There are 7 different versions of the unaligned copy code
-    // to prevent overreading the src. The mainloop of every single version
-    // will store 64 bits per loop. The difference is how much of src can
-    // be read without potentially crossing a page boundary.
-    tbb     [pc, r3]
-strcpy_unaligned_branchtable:
-    .byte 0
-    .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
-    .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
-
-    .p2align 2
-    // Can read 7 bytes before possibly crossing a page.
-strcpy_unalign7:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r3, [r1]
-    cbz     r3, strcpy_unalign7_copy5bytes
-    ldrb    r4, [r1, #1]
-    cbz     r4, strcpy_unalign7_copy6bytes
-    ldrb    r5, [r1, #2]
-    cbz     r5, strcpy_unalign7_copy7bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    lsrs    ip, r3, #24
-    stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign7
-
-strcpy_unalign7_copy5bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0]
-strcpy_unalign_return:
-    m_ret   inst=pop
-
-strcpy_unalign7_copy6bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    m_ret   inst=pop
-
-strcpy_unalign7_copy7bytes:
-    stmia   r0!, {r2}
-    strb    r3, [r0], #1
-    strb    r4, [r0], #1
-    strb    r5, [r0], #1
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 6 bytes before possibly crossing a page.
-strcpy_unalign6:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-    ldrb    r5, [r1, #1]
-    cbz     r5, strcpy_unalign_copy6bytes
-
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r3, #0xff0000
-    beq     strcpy_unalign6_copy7bytes
-    lsrs    ip, r3, #24
-    stmia   r0!, {r2, r3}
-    beq     strcpy_unalign_return
-    b       strcpy_unalign6
-
-strcpy_unalign6_copy7bytes:
-    stmia   r0!, {r2}
-    strh    r3, [r0], #2
-    lsr     r3, #16
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 5 bytes before possibly crossing a page.
-strcpy_unalign5:
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldrb    r4, [r1]
-    cbz     r4, strcpy_unalign_copy5bytes
-
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign5
-
-strcpy_unalign_copy5bytes:
-    stmia   r0!, {r2}
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy6bytes:
-    stmia   r0!, {r2}
-    strb    r4, [r0], #1
-    strb    r5, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 4 bytes before possibly crossing a page.
-strcpy_unalign4:
-    ldmia   r1!, {r2}
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    ldmia   r1!, {r3}
-    pld     [r1, #64]
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign4
-
-    .p2align 2
-    // Can read 3 bytes before possibly crossing a page.
-strcpy_unalign3:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign3_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign3_copy2bytes
-    ldrb    r4, [r1, #2]
-    cbz     r4, strcpy_unalign3_copy3bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    lsrs    lr, r2, #24
-    beq     strcpy_unalign_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign3
-
-strcpy_unalign3_copy1byte:
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign3_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign3_copy3bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0], #1
-    strb    r4, [r0]
-    m_ret   inst=pop
-
-    .p2align 2
-    // Can read 2 bytes before possibly crossing a page.
-strcpy_unalign2:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-    ldrb    r3, [r1, #1]
-    cbz     r3, strcpy_unalign_copy2bytes
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-    pld     [r1, #64]
-
-    tst     r2, #0xff0000
-    beq     strcpy_unalign_copy3bytes
-    lsrs    ip, r2, #24
-    beq     strcpy_unalign_copy4bytes
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign2
-
-    .p2align 2
-    // Can read 1 byte before possibly crossing a page.
-strcpy_unalign1:
-    ldrb    r2, [r1]
-    cbz     r2, strcpy_unalign_copy1byte
-
-    ldr     r2, [r1], #4
-    ldr     r3, [r1], #4
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     strcpy_zero_in_second_register
-
-    stmia   r0!, {r2, r3}
-    b       strcpy_unalign1
-
-strcpy_unalign_copy1byte:
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy2bytes:
-    strb    r2, [r0], #1
-    strb    r3, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy3bytes:
-    strh    r2, [r0], #2
-    lsr     r2, #16
-    strb    r2, [r0]
-    m_ret   inst=pop
-
-strcpy_unalign_copy4bytes:
-    stmia   r0, {r2}
-    m_ret   inst=pop
-END(strcpy)
--- a/libc/arch-arm/cortex-a9/bionic/strlen.S
+++ b/libc/arch-arm/cortex-a9/bionic/strlen.S
@@ -1,167 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-ENTRY(strlen)
-    pld     [r0, #0]
-    mov     r1, r0
-
-    ands    r3, r0, #7
-    bne     align_src
-
-    .p2align 2
-mainloop:
-    ldmia   r1!, {r2, r3}
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     zero_in_second_register
-    b       mainloop
-
-zero_in_first_register:
-    sub     r0, r1, r0
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     check_byte1_reg1
-
-    sub     r0, r0, #8
-    bx      lr
-
-check_byte1_reg1:
-    bcc     check_byte2_reg1
-
-    sub     r0, r0, #7
-    bx      lr
-
-check_byte2_reg1:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    itt     ne
-    subne   r0, r0, #6
-    bxne    lr
-    sub     r0, r0, #5
-    bx      lr
-
-zero_in_second_register:
-    sub     r0, r1, r0
-    // Check for zero in byte 0.
-    lsls    r2, ip, #17
-    beq     check_byte1_reg2
-
-    sub     r0, r0, #4
-    bx      lr
-
-check_byte1_reg2:
-    bcc     check_byte2_reg2
-
-    sub     r0, r0, #3
-    bx      lr
-
-check_byte2_reg2:
-    // Check for zero in byte 2.
-    tst     ip, #0x800000
-    itt     ne
-    subne   r0, r0, #2
-    bxne    lr
-    sub     r0, r0, #1
-    bx      lr
-
-align_src:
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     align_to_32
-
-    ldrb    r2, [r1], #1
-    cbz     r2, done
-
-align_to_32:
-    bcc     align_to_64
-
-    ldrb    r2, [r1], #1
-    cbz     r2, done
-    ldrb    r2, [r1], #1
-    cbz     r2, done
-
-align_to_64:
-    tst     r3, #4
-    beq     mainloop
-    ldr     r2, [r1], #4
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     zero_in_second_register
-    b       mainloop
-
-done:
-    sub     r0, r1, r0
-    sub     r0, r0, #1
-    bx      lr
-END(strlen)
--- a/libc/arch-arm/cortex-a9/cortex-a9.mk
+++ b/libc/arch-arm/cortex-a9/cortex-a9.mk
@@ -1,10 +1,5 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S)
-$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a9/bionic/strcat.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S)
-$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a9/bionic/strcpy.S)
-$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a9/bionic/strlen.S)
-$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/cortex-a9/bionic/__strcat_chk.S)
-$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/cortex-a9/bionic/__strcpy_chk.S)

 include bionic/libc/arch-arm/generic/generic.mk
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -28,7 +28,6 @@

 #include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

        /*
         * Optimized memcpy() for ARM.
@@ -37,13 +36,6 @@
         * so we have to preserve R0.
         */

-ENTRY(__memcpy_chk)
-        cmp         r2, r3
-        bgt         fortify_check_failed
-
-        // Fall through to memcpy...
-END(__memcpy_chk)
-
 ENTRY(memcpy)
        /* The stack must always be 64-bits aligned to be compliant with the
         * ARM ABI. Since we have to save R0, we might as well save R4
@@ -385,20 +377,4 @@ copy_last_3_and_return:
        add         sp,  sp, #28
        ldmfd       sp!, {r0, r4, lr}
        bx          lr
-
-        // Only reached when the __memcpy_chk check fails.
-fortify_check_failed:
-        ldr     r0, error_message
-        ldr     r1, error_code
-1:
-        add     r0, pc
-        bl      __fortify_chk_fail
-error_code:
-        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
-        .word   error_string-(1b+8)
 END(memcpy)
-
-        .data
-error_string:
-        .string     "memcpy buffer overflow"
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -27,7 +27,6 @@
 */

 #include <machine/asm.h>
-#include "libc_events.h"

        /*
         * Optimized memset() for ARM.
@@ -35,28 +34,9 @@
         * memset() returns its first argument.
         */

-ENTRY(__memset_chk)
-        cmp         r2, r3
-        bls         done
-
-        ldr         r0, error_message
-        ldr         r1, error_code
-1:
-        add         r0, pc
-        bl          __fortify_chk_fail
-error_code:
-        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
-error_message:
-        .word       error_string-(1b+8)
-
-END(__memset_chk)
-
 ENTRY(bzero)
        mov     r2, r1
        mov     r1, #0
-
-done:
-        // Fall through to memset...
 END(bzero)

 ENTRY(memset)
@@ -127,7 +107,3 @@ ENTRY(memset)
        ldmfd       sp!, {r0, r4-r7, lr}
        bx          lr
 END(memset)
-
-        .data
-error_string:
-        .string     "memset buffer overflow"
--- a/libc/arch-arm/generic/generic.mk
+++ b/libc/arch-arm/generic/generic.mk
@@ -1,8 +1,3 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S)
-$(call libc-add-cpu-variant-src,STRCAT,string/strcat.c)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S)
-$(call libc-add-cpu-variant-src,STRCPY,arch-arm/generic/bionic/strcpy.S)
-$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c)
-$(call libc-add-cpu-variant-src,__STRCAT_CHK,bionic/__strcat_chk.cpp)
-$(call libc-add-cpu-variant-src,__STRCPY_CHK,bionic/__strcpy_chk.cpp)
--- a/libc/arch-arm/krait/bionic/__strcat_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcat_chk.S
@@ -1,225 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-// Get the length of src string, then get the source of the dst string.
-// Check that the two lengths together don't exceed the threshold, then
-// do a memcpy of the data.
-ENTRY(__strcat_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    push    {r4, r5}
-    .save   {r4, r5}
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    mov     lr, r2
-
-    // Save the dst register to r5
-    mov     r5, r0
-
-    // Zero out r4
-    eor     r4, r4, r4
-
-    // r1 contains the address of the string to count.
-.L_strlen_start:
-    mov     r0, r1
-    ands    r3, r1, #7
-    beq     .L_mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-    ands    ip, r3, #2
-    beq     .L_align_to_64
-
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-    ldrb    r2, [r1], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r3, [r1], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-
-    .p2align 2
-.L_mainloop:
-    ldrd    r2, r3, [r1], #8
-
-    pld     [r1, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_update_count_and_finish:
-    sub     r3, r1, r0
-    sub     r3, r3, #1
-    b       .L_finish
-
-.L_zero_in_first_register:
-    sub     r3, r1, r0
-    lsls    r2, ip, #17
-    bne     .L_sub8_and_finish
-    bcs     .L_sub7_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub6_and_finish
-
-    sub     r3, r3, #5
-    b       .L_finish
-
-.L_sub8_and_finish:
-    sub     r3, r3, #8
-    b       .L_finish
-
-.L_sub7_and_finish:
-    sub     r3, r3, #7
-    b       .L_finish
-
-.L_sub6_and_finish:
-    sub     r3, r3, #6
-    b       .L_finish
-
-.L_zero_in_second_register:
-    sub     r3, r1, r0
-    lsls    r2, ip, #17
-    bne     .L_sub4_and_finish
-    bcs     .L_sub3_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub2_and_finish
-
-    sub     r3, r3, #1
-    b       .L_finish
-
-.L_sub4_and_finish:
-    sub     r3, r3, #4
-    b       .L_finish
-
-.L_sub3_and_finish:
-    sub     r3, r3, #3
-    b       .L_finish
-
-.L_sub2_and_finish:
-    sub     r3, r3, #2
-
-.L_finish:
-    cmp     r4, #0
-    bne     .L_strlen_done
-
-    // Time to get the dst string length.
-    mov     r1, r5
-
-    // Save the original source address to r5.
-    mov     r5, r0
-
-    // Save the current length (adding 1 for the terminator).
-    add     r4, r3, #1
-    b       .L_strlen_start
-
-    // r0 holds the pointer to the dst string.
-    // r3 holds the dst string length.
-    // r4 holds the src string length + 1.
-.L_strlen_done:
-    add     r2, r3, r4
-    cmp     r2, lr
-    bhi     __strcat_chk_failed
-
-    // Set up the registers for the memcpy code.
-    mov     r1, r5
-    pld     [r1, #64]
-    mov     r2, r4
-    add     r0, r0, r3
-    pop     {r4, r5}
-
-    .cfi_endproc
-END(__strcat_chk)
-
-#define MEMCPY_BASE         __strcat_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__strcat_chk_failed)
-    .cfi_startproc
-    .save   {r0, lr}
-    .save   {r4, r5}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-    .cfi_adjust_cfa_offset 8
-    .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-error_code:
-    .word   BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-
-    .cfi_endproc
-END(__strcat_chk_failed)
-
-    .data
-error_string:
-    .string "strcat buffer overflow"
--- a/libc/arch-arm/krait/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S
@@ -1,187 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-#include "libc_events.h"
-
-    .syntax unified
-
-    .thumb
-    .thumb_func
-
-// Get the length of the source string first, then do a memcpy of the data
-// instead of a strcpy.
-ENTRY(__strcpy_chk)
-    .cfi_startproc
-    pld     [r0, #0]
-    push    {r0, lr}
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    mov     lr, r2
-    mov     r0, r1
-
-    ands    r3, r1, #7
-    beq     .L_mainloop
-
-    // Align to a double word (64 bits).
-    rsb     r3, r3, #8
-    lsls    ip, r3, #31
-    beq     .L_align_to_32
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_32:
-    bcc     .L_align_to_64
-    ands    ip, r3, #2
-    beq     .L_align_to_64
-
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-    ldrb    r2, [r0], #1
-    cbz     r2, .L_update_count_and_finish
-
-.L_align_to_64:
-    tst     r3, #4
-    beq     .L_mainloop
-    ldr     r3, [r0], #4
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-
-    .p2align 2
-.L_mainloop:
-    ldrd    r2, r3, [r0], #8
-
-    pld     [r0, #64]
-
-    sub     ip, r2, #0x01010101
-    bic     ip, ip, r2
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_first_register
-
-    sub     ip, r3, #0x01010101
-    bic     ip, ip, r3
-    ands    ip, ip, #0x80808080
-    bne     .L_zero_in_second_register
-    b       .L_mainloop
-
-.L_update_count_and_finish:
-    sub     r3, r0, r1
-    sub     r3, r3, #1
-    b       .L_check_size
-
-.L_zero_in_first_register:
-    sub     r3, r0, r1
-    lsls    r2, ip, #17
-    bne     .L_sub8_and_finish
-    bcs     .L_sub7_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub6_and_finish
-
-    sub     r3, r3, #5
-    b       .L_check_size
-
-.L_sub8_and_finish:
-    sub     r3, r3, #8
-    b       .L_check_size
-
-.L_sub7_and_finish:
-    sub     r3, r3, #7
-    b       .L_check_size
-
-.L_sub6_and_finish:
-    sub     r3, r3, #6
-    b       .L_check_size
-
-.L_zero_in_second_register:
-    sub     r3, r0, r1
-    lsls    r2, ip, #17
-    bne     .L_sub4_and_finish
-    bcs     .L_sub3_and_finish
-    lsls    ip, ip, #1
-    bne     .L_sub2_and_finish
-
-    sub     r3, r3, #1
-    b       .L_check_size
-
-.L_sub4_and_finish:
-    sub     r3, r3, #4
-    b       .L_check_size
-
-.L_sub3_and_finish:
-    sub     r3, r3, #3
-    b       .L_check_size
-
-.L_sub2_and_finish:
-    sub     r3, r3, #2
-
-.L_check_size:
-    pld     [r1, #0]
-    pld     [r1, #64]
-    ldr     r0, [sp]
-    cmp     r3, lr
-    bhs     __strcpy_chk_failed
-
-    // Add 1 for copy length to get the string terminator.
-    add     r2, r3, #1
-
-    .cfi_endproc
-END(__strcpy_chk)
-
-#define MEMCPY_BASE         __strcpy_chk_memcpy_base
-#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__strcpy_chk_failed)
-    .cfi_startproc
-    .save   {r0, lr}
-    .cfi_def_cfa_offset 8
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset lr, 4
-
-    ldr     r0, error_message
-    ldr     r1, error_code
-1:
-    add     r0, pc
-    bl      __fortify_chk_fail
-error_code:
-    .word   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
-error_message:
-    .word   error_string-(1b+4)
-    .cfi_endproc
-END(__strcpy_chk_failed)
-
-    .data
-error_string:
-    .string "strcpy buffer overflow"
--- a/libc/arch-arm/krait/bionic/memcpy.S
+++ b/libc/arch-arm/krait/bionic/memcpy.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2008 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,8 @@

 /* Assumes neon instructions and a cache line size of 32 bytes. */

+#include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

 /*
 * This code assumes it is running on a processor that supports all arm v7
@@ -38,55 +38,109 @@
 */

        .text
-        .syntax unified
        .fpu    neon
-        .thumb
-        .thumb_func

-ENTRY(__memcpy_chk)
-        .cfi_startproc
-        cmp         r2, r3
-        bhi         __memcpy_chk_fail
-
-        // Fall through to memcpy...
-        .cfi_endproc
-END(__memcpy_chk)
+#define CACHE_LINE_SIZE     32

 ENTRY(memcpy)
-        .cfi_startproc
-        pld     [r1, #64]
-        stmfd   sp!, {r0, lr}
-        .save   {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-        .cfi_endproc
+        .save       {r0, lr}
+        /* start preloading as early as possible */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        stmfd       sp!, {r0, lr}
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+
+        /* do we have at least 16-bytes to copy (needed for alignment below) */
+        cmp         r2, #16
+        blo         5f
+
+        /* align destination to cache-line for the write-buffer */
+        rsb         r3, r0, #0
+        ands        r3, r3, #0xF
+        beq         0f
+
+        /* copy up to 15-bytes (count in r3) */
+        sub         r2, r2, r3
+        movs        ip, r3, lsl #31
+        ldrmib      lr, [r1], #1
+        strmib      lr, [r0], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+        movs        ip, r3, lsl #29
+        bge         1f
+        // copies 4 bytes, destination 32-bits aligned
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
+1:      bcc         2f
+        // copies 8 bytes, destination 64-bits aligned
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0, :64]!
+2:
+
+0:      /* preload immediately the next cache line, which we may need */
+        pld         [r1, #(CACHE_LINE_SIZE*0)]
+        pld         [r1, #(CACHE_LINE_SIZE*2)]
+
+        /* make sure we have at least 64 bytes to copy */
+        subs        r2, r2, #64
+        blo         2f
+
+        /* Preload all the cache lines we need.
+         * NOTE: The number of pld below depends on CACHE_LINE_SIZE,
+         * ideally we would increase the distance in the main loop to
+         * avoid the goofy code below. In practice this doesn't seem to make
+         * a big difference.
+         * NOTE: The value CACHE_LINE_SIZE * 8 was chosen through
+         * experimentation.
+         */
+        pld         [r1, #(CACHE_LINE_SIZE*4)]
+        pld         [r1, #(CACHE_LINE_SIZE*6)]
+        pld         [r1, #(CACHE_LINE_SIZE*8)]
+
+1:      /* The main loop copies 64 bytes at a time */
+        vld1.8      {d0  - d3},   [r1]!
+        vld1.8      {d4  - d7},   [r1]!
+        pld         [r1, #(CACHE_LINE_SIZE*8)]
+        subs        r2, r2, #64
+        vst1.8      {d0  - d3},   [r0, :128]!
+        vst1.8      {d4  - d7},   [r0, :128]!
+        bhs         1b
+
+2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
+        add         r2, r2, #64
+        subs        r2, r2, #32
+        blo         4f
+
+3:      /* 32 bytes at a time. These cache lines were already preloaded */
+        vld1.8      {d0 - d3},  [r1]!
+        subs        r2, r2, #32
+        vst1.8      {d0 - d3},  [r0, :128]!
+        bhs         3b
+4:      /* less than 32 left */
+        add         r2, r2, #32
+        tst         r2, #0x10
+        beq         5f
+        // copies 16 bytes, 128-bits aligned
+        vld1.8      {d0, d1}, [r1]!
+        vst1.8      {d0, d1}, [r0, :128]!
+
+5:      /* copy up to 15-bytes (count in r2) */
+        movs        ip, r2, lsl #29
+        bcc         1f
+        vld1.8      {d0}, [r1]!
+        vst1.8      {d0}, [r0]!
+1:      bge         2f
+        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
+        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
+2:      movs        ip, r2, lsl #31
+        ldrmib      r3, [r1], #1
+        ldrcsb      ip, [r1], #1
+        ldrcsb      lr, [r1], #1
+        strmib      r3, [r0], #1
+        strcsb      ip, [r0], #1
+        strcsb      lr, [r0], #1
+
+        ldmfd       sp!, {r0, lr}
+        bx          lr
 END(memcpy)
-
-#define MEMCPY_BASE         __memcpy_base
-#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
-#include "memcpy_base.S"
-
-ENTRY(__memcpy_chk_fail)
-        .cfi_startproc
-        // Preserve lr for backtrace.
-        push    {lr}
-        .save   {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr     r0, error_message
-        ldr     r1, error_code
-1:
-        add     r0, pc
-        bl      __fortify_chk_fail
-error_code:
-        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
-error_message:
-        .word   error_string-(1b+4)
-        .cfi_endproc
-END(__memcpy_chk_fail)
-
-        .data
-error_string:
-        .string     "memcpy buffer overflow"
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-
-/*
- * This code assumes it is running on a processor that supports all arm v7
- * instructions, that supports neon instructions, and that has a 32 byte
- * cache line.
- */
-
-// Assumes neon instructions and a cache line size of 32 bytes.
-
-ENTRY(MEMCPY_BASE)
-        .cfi_startproc
-        .save {r0, lr}
-        .cfi_def_cfa_offset 8
-        .cfi_rel_offset r0, 0
-        .cfi_rel_offset lr, 4
-
-        /* do we have at least 16-bytes to copy (needed for alignment below) */
-        cmp         r2, #16
-        blo         5f
-
-        /* align destination to cache-line for the write-buffer */
-        rsb         r3, r0, #0
-        ands        r3, r3, #0xF
-        beq         2f
-
-        /* copy up to 15-bytes (count in r3) */
-        sub         r2, r2, r3
-        movs        ip, r3, lsl #31
-        itt         mi
-        ldrbmi      lr, [r1], #1
-        strbmi      lr, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1], #1
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0], #1
-        movs        ip, r3, lsl #29
-        bge         1f
-        // copies 4 bytes, destination 32-bits aligned
-        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
-        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
-1:      bcc         2f
-        // copies 8 bytes, destination 64-bits aligned
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0, :64]!
-
-2:      /* make sure we have at least 64 bytes to copy */
-        subs        r2, r2, #64
-        blo         2f
-
-1:      /* The main loop copies 64 bytes at a time */
-        vld1.8      {d0  - d3},   [r1]!
-        vld1.8      {d4  - d7},   [r1]!
-        pld         [r1, #(32*8)]
-        subs        r2, r2, #64
-        vst1.8      {d0  - d3},   [r0, :128]!
-        vst1.8      {d4  - d7},   [r0, :128]!
-        bhs         1b
-
-2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
-        adds        r2, r2, #32
-        blo         4f
-
-        /* Copy 32 bytes. These cache lines were already preloaded */
-        vld1.8      {d0 - d3},  [r1]!
-        sub         r2, r2, #32
-        vst1.8      {d0 - d3},  [r0, :128]!
-
-4:      /* less than 32 left */
-        add         r2, r2, #32
-        tst         r2, #0x10
-        beq         5f
-        // copies 16 bytes, 128-bits aligned
-        vld1.8      {d0, d1}, [r1]!
-        vst1.8      {d0, d1}, [r0, :128]!
-
-5:      /* copy up to 15-bytes (count in r2) */
-        movs        ip, r2, lsl #29
-        bcc         1f
-        vld1.8      {d0}, [r1]!
-        vst1.8      {d0}, [r0]!
-1:      bge         2f
-        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
-        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
-2:      movs        ip, r2, lsl #31
-        itt         mi
-        ldrbmi      r3, [r1], #1
-        strbmi      r3, [r0], #1
-        itttt       cs
-        ldrbcs      ip, [r1], #1
-        ldrbcs      lr, [r1], #1
-        strbcs      ip, [r0], #1
-        strbcs      lr, [r0], #1
-
-        ldmfd       sp!, {r0, lr}
-        bx          lr
-
-        .cfi_endproc
-END(MEMCPY_BASE)
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -28,7 +28,6 @@

 #include <machine/cpu-features.h>
 #include <machine/asm.h>
-#include "libc_events.h"

 /*
 * This code assumes it is running on a processor that supports all arm v7
@@ -38,47 +37,15 @@

    .fpu    neon

-ENTRY(__memset_chk)
-        .cfi_startproc
-        cmp         r2, r3
-        bls         .L_done
-
-        // Preserve lr for backtrace.
-        .save       {lr}
-        push        {lr}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset lr, 0
-
-        ldr         r0, error_message
-        ldr         r1, error_code
-1:
-        add         r0, pc
-        bl          __fortify_chk_fail
-error_code:
-        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
-error_message:
-        .word       error_string-(1b+8)
-
-        .cfi_endproc
-END(__memset_chk)
-
 ENTRY(bzero)
-        .cfi_startproc
        mov     r2, r1
        mov     r1, #0
-
-.L_done:
-        // Fall through to memset...
-        .cfi_endproc
 END(bzero)

 /* memset() returns its first argument.  */
 ENTRY(memset)
-        .cfi_startproc
        .save       {r0}
        stmfd       sp!, {r0}
-        .cfi_def_cfa_offset 4
-        .cfi_rel_offset r0, 0

        vdup.8      q0, r1

@@ -111,9 +78,4 @@ ENTRY(memset)
        strcsb      r1, [r0], #1
        ldmfd       sp!, {r0}
        bx          lr
-        .cfi_endproc
 END(memset)
-
-        .data
-error_string:
-        .string     "memset buffer overflow"
--- a/libc/arch-arm/krait/bionic/strcmp.S
+++ b/libc/arch-arm/krait/bionic/strcmp.S
@@ -123,13 +123,8 @@ ENTRY(strcmp)
        .macro  init
        /* Macro to save temporary registers and prepare magic values.  */
        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
        mvn     r6, #0  /* all F */
        mov     r7, #0  /* all 0 */
        .endm   /* init */
@@ -170,20 +165,18 @@ ENTRY(strcmp)
 #endif /* not  __ARMEB__ */
        .endm /* setup_return */

-        .cfi_startproc
        pld [r0, #0]
        pld [r1, #0]

        /* Are both strings double-word aligned?  */
        orr     ip, r0, r1
        tst     ip, #7
-        bne     .L_do_align
+        bne     do_align

        /* Fast path.  */
-        .save   {r4-r7}
        init

-.L_doubleword_aligned:
+doubleword_aligned:

        /* Get here when the strings to compare are double-word aligned.  */
        /* Compare two words in every iteration.  */
@@ -196,14 +189,14 @@ ENTRY(strcmp)
        ldrd    r2, r3, [r0], #8
        ldrd    r4, r5, [r1], #8

-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=return_35
        b       2b

-.L_do_align:
+do_align:
        /* Is the first string word-aligned?  */
        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
+        beq     word_aligned_r0

        /* Fast compare byte by byte until the first string is word-aligned.  */
        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +204,58 @@ ENTRY(strcmp)
        bic     r0, r0, #3
        ldr     r2, [r0], #4
        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
+        beq     byte2
+        bcs     byte3

-.L_byte1:
+byte1:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE1_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte2:
+byte2:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE2_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+        bne     fast_return
+        m_cbz   reg=r3, label=fast_return

-.L_byte3:
+byte3:
        ldrb    ip, [r1], #1
        uxtb    r3, r2, ror #BYTE3_OFFSET
        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
+        bne     fast_return
+        m_cbnz  reg=r3, label=word_aligned_r0

-.L_fast_return:
+fast_return:
        mov     r0, ip
        bx      lr

-.L_word_aligned_r0:
+word_aligned_r0:
        init
        /* The first string is word-aligned.  */
        /* Is the second string word-aligned?  */
        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
+        bne     strcmp_unaligned

-.L_word_aligned:
+word_aligned:
        /* The strings are word-aligned. */
        /* Is the first string double-word aligned?  */
        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
+        beq     doubleword_aligned_r0

        /* If r0 is not double-word aligned yet, align it by loading
        and comparing the next word from each string.  */
        ldr     r2, [r0], #4
        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=return_24

-.L_doubleword_aligned_r0:
+doubleword_aligned_r0:
        /* Get here when r0 is double-word aligned.  */
        /* Is r1 doubleword_aligned?  */
        tst     r1, #4
-        beq     .L_doubleword_aligned
+        beq     doubleword_aligned

        /* Get here when the strings to compare are word-aligned,
        r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -278,9 +271,9 @@ ENTRY(strcmp)

        /* Load the next double-word from each string and compare.  */
        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=return_25
        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=return_34
        b       3b

        .macro miscmp_word offsetlo offsethi
@@ -304,46 +297,46 @@ ENTRY(strcmp)
        and     r2, r3, r6, S2LOMEM #\offsetlo
        it      eq
        cmpeq   r2, r5
-        bne     .L_return_25
+        bne     return_25
        ldr     r5, [r1], #4
        cmp     ip, #0
        eor r3, r2, r3
        S2HIMEM r2, r5, #\offsethi
        it      eq
        cmpeq   r3, r2
-        bne     .L_return_32
+        bne     return_32
        b       7b
        .endm /* miscmp_word */

-.L_strcmp_unaligned:
+strcmp_unaligned:
        /* r0 is word-aligned, r1 is at offset ip from a word.  */
        /* Align r1 to the (previous) word-boundary.  */
        bic     r1, r1, #3

        /* Unaligned comparison word by word using LDRs. */
        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
+        beq     miscmp_word_16                    /* If ip == 2.  */
+        bge     miscmp_word_24                    /* If ip == 3.  */
        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
+miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8


-.L_return_32:
+return_32:
        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
+        b       do_return
+return_34:
        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
+        b       do_return
+return_25:
        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
+        b       do_return
+return_35:
        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
+        b       do_return
+return_24:
        setup_return w1=r2, w2=r4

-.L_do_return:
+do_return:

 #ifdef __ARMEB__
        mov     r0, ip
@@ -355,16 +348,11 @@ ENTRY(strcmp)
        ldrd    r6, r7, [sp]
        ldrd    r4, r5, [sp, #8]
        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7

        /* There is a zero or a different byte between r1 and r2.  */
        /* r0 contains a mask of all-zero bytes in r1.  */
        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
+        m_cbz   reg=r0, label=compute_return_value
        clz     r0, r0
        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
        rsb     r0, r0, #24
@@ -372,7 +360,7 @@ ENTRY(strcmp)
        lsr     r1, r1, r0
        lsr     r2, r2, r0

-.L_compute_return_value:
+compute_return_value:
        movs    r0, #1
        cmp     r1, r2
        /* The return value is computed as follows.
@@ -392,7 +380,7 @@ ENTRY(strcmp)
     * previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S
     * for the unedited version of this code.
     */
-.L_miscmp_word_16:
+miscmp_word_16:
 	wp1 .req r0
 	wp2 .req r1
 	b1  .req r2
@@ -465,11 +453,6 @@ ENTRY(strcmp)
    ldrd    r6, r7, [sp]
    ldrd    r4, r5, [sp, #8]
    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7

 	bx	lr

@@ -489,12 +472,6 @@ ENTRY(strcmp)
    ldrd    r6, r7, [sp]
    ldrd    r4, r5, [sp, #8]
    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7

 	bx	lr
-    .cfi_endproc
 END(strcmp)
--- a/libc/arch-arm/krait/krait.mk
+++ b/libc/arch-arm/krait/krait.mk
@@ -1,11 +1,5 @@
 $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S)
 $(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S)
 $(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S)
-$(call libc-add-cpu-variant-src,__STRCAT_CHK,arch-arm/krait/bionic/__strcat_chk.S)
-$(call libc-add-cpu-variant-src,__STRCPY_CHK,arch-arm/krait/bionic/__strcpy_chk.S)
-# Use cortex-a15 versions of strcat/strcpy/strlen.
-$(call libc-add-cpu-variant-src,STRCAT,arch-arm/cortex-a15/bionic/strcat.S)
-$(call libc-add-cpu-variant-src,STRCPY,arch-arm/cortex-a15/bionic/strcpy.S)
-$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S)

 include bionic/libc/arch-arm/generic/generic.mk
--- a/libc/arch-arm/syscalls.mk
+++ b/libc/arch-arm/syscalls.mk
@@ -126,8 +126,6 @@ syscall_src += arch-arm/syscalls/removexattr.S
 syscall_src += arch-arm/syscalls/lremovexattr.S
 syscall_src += arch-arm/syscalls/__statfs64.S
 syscall_src += arch-arm/syscalls/unshare.S
-syscall_src += arch-arm/syscalls/swapon.S
-syscall_src += arch-arm/syscalls/swapoff.S
 syscall_src += arch-arm/syscalls/pause.S
 syscall_src += arch-arm/syscalls/gettimeofday.S
 syscall_src += arch-arm/syscalls/settimeofday.S
@@ -146,9 +144,6 @@ syscall_src += arch-arm/syscalls/__timer_getoverrun.S
 syscall_src += arch-arm/syscalls/__timer_delete.S
 syscall_src += arch-arm/syscalls/utimes.S
 syscall_src += arch-arm/syscalls/utimensat.S
-syscall_src += arch-arm/syscalls/timerfd_create.S
-syscall_src += arch-arm/syscalls/timerfd_settime.S
-syscall_src += arch-arm/syscalls/timerfd_gettime.S
 syscall_src += arch-arm/syscalls/sigaction.S
 syscall_src += arch-arm/syscalls/sigprocmask.S
 syscall_src += arch-arm/syscalls/__sigsuspend.S
--- a/libc/arch-arm/syscalls/swapoff.S
+++ b/libc/arch-arm/syscalls/swapoff.S
@@ -1,15 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-#include <linux/err.h>
-#include <machine/asm.h>
-
-ENTRY(swapoff)
-    mov     ip, r7
-    ldr     r7, =__NR_swapoff
-    swi     #0
-    mov     r7, ip
-    cmn     r0, #(MAX_ERRNO + 1)
-    bxls    lr
-    neg     r0, r0
-    b       __set_errno
-END(swapoff)
--- a/libc/arch-arm/syscalls/swapon.S
+++ b/libc/arch-arm/syscalls/swapon.S
@@ -1,15 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-#include <linux/err.h>
-#include <machine/asm.h>
-
-ENTRY(swapon)
-    mov     ip, r7
-    ldr     r7, =__NR_swapon
-    swi     #0
-    mov     r7, ip
-    cmn     r0, #(MAX_ERRNO + 1)
-    bxls    lr
-    neg     r0, r0
-    b       __set_errno
-END(swapon)
--- a/libc/arch-arm/syscalls/timerfd_create.S
+++ b/libc/arch-arm/syscalls/timerfd_create.S
@@ -1,15 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-#include <linux/err.h>
-#include <machine/asm.h>
-
-ENTRY(timerfd_create)
-    mov     ip, r7
-    ldr     r7, =__NR_timerfd_create
-    swi     #0
-    mov     r7, ip
-    cmn     r0, #(MAX_ERRNO + 1)
-    bxls    lr
-    neg     r0, r0
-    b       __set_errno
-END(timerfd_create)
--- a/libc/arch-arm/syscalls/timerfd_gettime.S
+++ b/libc/arch-arm/syscalls/timerfd_gettime.S
@@ -1,15 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-#include <linux/err.h>
-#include <machine/asm.h>
-
-ENTRY(timerfd_gettime)
-    mov     ip, r7
-    ldr     r7, =__NR_timerfd_gettime
-    swi     #0
-    mov     r7, ip
-    cmn     r0, #(MAX_ERRNO + 1)
-    bxls    lr
-    neg     r0, r0
-    b       __set_errno
-END(timerfd_gettime)
--- a/libc/arch-arm/syscalls/timerfd_settime.S
+++ b/libc/arch-arm/syscalls/timerfd_settime.S
@@ -1,15 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-#include <linux/err.h>
-#include <machine/asm.h>
-
-ENTRY(timerfd_settime)
-    mov     ip, r7
-    ldr     r7, =__NR_timerfd_settime
-    swi     #0
-    mov     r7, ip
-    cmn     r0, #(MAX_ERRNO + 1)
-    bxls    lr
-    neg     r0, r0
-    b       __set_errno
-END(timerfd_settime)
--- a/libc/arch-mips/bionic/__dso_handle.S
+++ b/libc/arch-mips/bionic/__dso_handle.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2010 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -26,13 +26,17 @@
 * SUCH DAMAGE.
 */

-extern void __cxa_finalize(void *);
-extern void *__dso_handle;
+# The __dso_handle global variable is used by static
+# C++ constructors and destructors in the binary.
+# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
+#
+        .section .bss
+        .align 4

-__attribute__((visibility("hidden"),destructor))
-void __on_dlclose() {
-  __cxa_finalize(&__dso_handle);
-}
+#ifndef CRT_LEGACY_WORKAROUND
+	.hidden __dso_handle
+#endif

-#include "__dso_handle_so.h"
-#include "atexit.h"
+        .globl __dso_handle
+__dso_handle:
+        .long 0
--- a/libc/arch-mips/bionic/__dso_handle_so.S
+++ b/libc/arch-mips/bionic/__dso_handle_so.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2010 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -26,11 +26,13 @@
 * SUCH DAMAGE.
 */

-extern void *__dso_handle;
-extern int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
-
-__attribute__ ((visibility ("hidden")))
-int atexit(void (*func)(void))
-{
-  return (__cxa_atexit((void (*)(void *))func, (void *)0, &__dso_handle));
-}
+# The __dso_handle global variable is used by static
+# C++ constructors and destructors in the binary.
+# See http://www.codesourcery.com/public/cxx-abi/abi.html#dso-dtor
+#
+	.data
+        .align 4
+	.hidden __dso_handle
+        .globl __dso_handle
+__dso_handle:
+        .long __dso_handle
--- a/libc/arch-mips/bionic/atexit.S
+++ b/libc/arch-mips/bionic/atexit.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2008 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -25,16 +25,19 @@
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
-#ifndef _BIONIC_NAME_MEM_H
-#define _BIONIC_NAME_MEM_H
-
-#include <sys/cdefs.h>
-#include <stddef.h>
-
-__BEGIN_DECLS
-
-int __bionic_name_mem(void *addr, size_t len, const char *name);
-
-__END_DECLS
-
-#endif
+	.text
+	.globl	atexit
+	.hidden	atexit
+	.type	atexit, @function
+	.align  4
+	.ent	atexit
+atexit:
+	.set	noreorder
+	.cpload	$t9
+	.set	reorder
+	la	$t9, __cxa_atexit
+	move	$a1, $0
+	la      $a2, __dso_handle
+	j	$t9
+	.size	atexit, .-atexit
+	.end	atexit
--- a/libc/arch-mips/bionic/crtbegin.S
+++ b/libc/arch-mips/bionic/crtbegin.S
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+	.text
+	.align 4
+	.type __start,@function
+	.globl __start
+	.globl _start
+
+# this is the small startup code that is first run when
+# any executable that is statically-linked with Bionic
+# runs.
+#
+# it's purpose is to call __libc_init with appropriate
+# arguments, which are:
+#
+#    - the address of the raw data block setup by the Linux
+#      kernel ELF loader
+#
+#    - address of an "onexit" function, not used on any
+#      platform supported by Bionic
+#
+#    - address of the "main" function of the program.
+#
+#    - address of the constructor list
+#
+
+	.ent	__start
+__start:	
+_start:
+	bal	1f
+1:
+	.set	noreorder
+	.cpload	$ra
+	.set	reorder
+
+	move	$a0, $sp
+	move	$a1, $0
+	la	$a2, main
+	la	$a3, 1f
+	subu	$sp, 32
+	la	$t9, __libc_init
+	j	$t9
+	.end	__start
+
+1:	.long	__PREINIT_ARRAY__
+	.long	__INIT_ARRAY__
+	.long	__FINI_ARRAY__
+
+	.section .preinit_array, "aw"
+	.type __PREINIT_ARRAY__, @object
+	.globl __PREINIT_ARRAY__
+__PREINIT_ARRAY__:
+	.long -1
+
+	.section .init_array, "aw"
+	.type __INIT_ARRAY__, @object
+	.globl __INIT_ARRAY__
+__INIT_ARRAY__:
+	.long -1
+
+	.section .fini_array, "aw"
+	.type __FINI_ARRAY__, @object
+	.globl __FINI_ARRAY__
+__FINI_ARRAY__:
+	.long -1
+	.long __do_global_dtors_aux
+
+	.abicalls
+	.text
+	.align	2
+	.set	nomips16
+	.ent	__do_global_dtors_aux
+	.type	__do_global_dtors_aux, @function
+__do_global_dtors_aux:
+	.frame	$sp,32,$31		# vars= 0, regs= 1/0, args= 16, gp= 8
+	.mask	0x80000000,-4
+	.fmask	0x00000000,0
+	.set	noreorder
+	.cpload	$25
+	.set	nomacro
+	addiu	$sp,$sp,-32
+	sw	$31,28($sp)
+	.cprestore	16
+	lw	$2,%got(completed.1269)($28)
+	lbu	$2,%lo(completed.1269)($2)
+	bne	$2,$0,$L8
+	nop
+
+$L4:
+	lw	$2,%got(__cxa_finalize)($28)
+	beq	$2,$0,$L6
+	nop
+
+	lw	$2,%got(__dso_handle)($28)
+	lw	$4,0($2)
+	lw	$25,%call16(__cxa_finalize)($28)
+	.reloc	1f,R_MIPS_JALR,__cxa_finalize
+1:	jalr	$25
+	nop
+
+	lw	$28,16($sp)
+$L6:
+	lw	$2,%got(completed.1269)($28)
+	li	$3,1			# 0x1
+	sb	$3,%lo(completed.1269)($2)
+$L8:
+	lw	$31,28($sp)
+	addiu	$sp,$sp,32
+	j	$31
+	nop
+
+	.set	macro
+	.set	reorder
+	.end	__do_global_dtors_aux
+	.size	__do_global_dtors_aux, .-__do_global_dtors_aux
+	.local	completed.1269
+	.comm	completed.1269,1,1
+	.weak	__cxa_finalize
+
+#include "__dso_handle.S"
+#include "atexit.S"
--- a/libc/arch-mips/bionic/crtbegin.c
+++ b/libc/arch-mips/bionic/crtbegin.c
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "../../bionic/libc_init_common.h"
-#include <stddef.h>
-#include <stdint.h>
-
-__attribute__ ((section (".preinit_array")))
-void (*__PREINIT_ARRAY__)(void) = (void (*)(void)) -1;
-
-__attribute__ ((section (".init_array")))
-void (*__INIT_ARRAY__)(void) = (void (*)(void)) -1;
-
-__attribute__ ((section (".fini_array")))
-void (*__FINI_ARRAY__)(void) = (void (*)(void)) -1;
-
-
-__LIBC_HIDDEN__  void do_mips_start(void *raw_args) {
-  structors_array_t array;
-  array.preinit_array = &__PREINIT_ARRAY__;
-  array.init_array = &__INIT_ARRAY__;
-  array.fini_array = &__FINI_ARRAY__;
-
-  __libc_init(raw_args, NULL, &main, &array);
-}
-
-/*
- * This function prepares the return address with a branch-and-link
- * instruction (bal) and then uses a .cpload to compute the Global
- * Offset Table (GOT) pointer ($gp). The $gp is then used to load
- * the address of _do_start() into $t9 just before calling it.
- * Terminating the stack with a NULL return address.
- */
-__asm__ (
-"       .set push                   \n"
-"                                   \n"
-"       .text                       \n"
-"       .align  4                   \n"
-"       .type __start,@function     \n"
-"       .globl __start              \n"
-"       .globl  _start              \n"
-"                                   \n"
-"       .ent    __start             \n"
-"__start:                           \n"
-" _start:                           \n"
-"       .frame   $sp,32,$ra         \n"
-"       .mask   0x80000000,-4       \n"
-"                                   \n"
-"       .set noreorder              \n"
-"       bal     1f                  \n"
-"       nop                         \n"
-"1:                                 \n"
-"       .cpload $ra                 \n"
-"       .set reorder                \n"
-"                                   \n"
-"       move    $a0, $sp            \n"
-"       addiu   $sp, $sp, (-32)     \n"
-"       sw      $0, 28($sp)         \n"
-"       la      $t9, do_mips_start  \n"
-"       jalr    $t9                 \n"
-"                                   \n"
-"2:     b       2b                  \n"
-"       .end    __start             \n"
-"                                   \n"
-"       .set pop                    \n"
-);
-
-#include "__dso_handle.h"
-#include "atexit.h"
--- a/libc/arch-mips/bionic/crtbegin_so.S
+++ b/libc/arch-mips/bionic/crtbegin_so.S
@@ -25,31 +25,70 @@
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
+	.section .init_array, "aw"
+	.type __INIT_ARRAY__, @object
+	.globl __INIT_ARRAY__
+__INIT_ARRAY__:
+	.long -1

-#include <errno.h>
-#include <sys/mman.h>
-#include <unistd.h>
+	.section .fini_array, "aw"
+	.type __FINI_ARRAY__, @object
+	.globl __FINI_ARRAY__
+__FINI_ARRAY__:
+	.long -1
+	.long __do_global_dtors_aux

-#include "private/ErrnoRestorer.h"
+	.abicalls
+	.text
+	.align	2
+	.set	nomips16
+	.ent	__do_global_dtors_aux
+	.type	__do_global_dtors_aux, @function
+__do_global_dtors_aux:
+	.frame	$sp,32,$31		# vars= 0, regs= 1/0, args= 16, gp= 8
+	.mask	0x80000000,-4
+	.fmask	0x00000000,0
+	.set	noreorder
+	.cpload	$25
+	.set	nomacro
+	addiu	$sp,$sp,-32
+	sw	$31,28($sp)
+	.cprestore	16
+	lw	$2,%got(completed.1269)($28)
+	lbu	$2,%lo(completed.1269)($2)
+	bne	$2,$0,$L8
+	nop

-// mmap2(2) is like mmap(2), but the offset is in 4096-byte blocks, not bytes.
-extern "C" void*  __mmap2(void*, size_t, int, int, int, size_t);
+$L4:
+	lw	$2,%got(__cxa_finalize)($28)
+	beq	$2,$0,$L6
+	nop

-#define MMAP2_SHIFT 12 // 2**12 == 4096
+	lw	$2,%got(__dso_handle)($28)
+	lw	$4,0($2)
+	lw	$25,%call16(__cxa_finalize)($28)
+	.reloc	1f,R_MIPS_JALR,__cxa_finalize
+1:	jalr	$25
+	nop

-void* mmap(void* addr, size_t size, int prot, int flags, int fd, off_t offset) {
-  if (offset & ((1UL << MMAP2_SHIFT)-1)) {
-    errno = EINVAL;
-    return MAP_FAILED;
-  }
+	lw	$28,16($sp)
+$L6:
+	lw	$2,%got(completed.1269)($28)
+	li	$3,1			# 0x1
+	sb	$3,%lo(completed.1269)($2)
+$L8:
+	lw	$31,28($sp)
+	addiu	$sp,$sp,32
+	j	$31
+	nop

-  size_t unsigned_offset = static_cast<size_t>(offset); // To avoid sign extension.
-  void* result = __mmap2(addr, size, prot, flags, fd, unsigned_offset >> MMAP2_SHIFT);
+	.set	macro
+	.set	reorder
+	.end	__do_global_dtors_aux
+	.size	__do_global_dtors_aux, .-__do_global_dtors_aux
+	.local	completed.1269
+	.comm	completed.1269,1,1
+	.weak	__cxa_finalize

-  if (result != MAP_FAILED && (flags & (MAP_PRIVATE | MAP_ANONYMOUS)) != 0) {
-    ErrnoRestorer errno_restorer;
-    madvise(result, size, MADV_MERGEABLE);
-  }
-
-  return result;
-}
+#include "__dso_handle_so.S"
+#include "atexit.S"
--- a/libc/arch-mips/syscalls.mk
+++ b/libc/arch-mips/syscalls.mk
@@ -129,8 +129,6 @@ syscall_src += arch-mips/syscalls/removexattr.S
 syscall_src += arch-mips/syscalls/lremovexattr.S
 syscall_src += arch-mips/syscalls/__statfs64.S
 syscall_src += arch-mips/syscalls/unshare.S
-syscall_src += arch-mips/syscalls/swapon.S
-syscall_src += arch-mips/syscalls/swapoff.S
 syscall_src += arch-mips/syscalls/pause.S
 syscall_src += arch-mips/syscalls/gettimeofday.S
 syscall_src += arch-mips/syscalls/settimeofday.S
@@ -149,9 +147,6 @@ syscall_src += arch-mips/syscalls/__timer_getoverrun.S
 syscall_src += arch-mips/syscalls/__timer_delete.S
 syscall_src += arch-mips/syscalls/utimes.S
 syscall_src += arch-mips/syscalls/utimensat.S
-syscall_src += arch-mips/syscalls/timerfd_create.S
-syscall_src += arch-mips/syscalls/timerfd_settime.S
-syscall_src += arch-mips/syscalls/timerfd_gettime.S
 syscall_src += arch-mips/syscalls/sigaction.S
 syscall_src += arch-mips/syscalls/sigprocmask.S
 syscall_src += arch-mips/syscalls/__sigsuspend.S
--- a/libc/arch-mips/syscalls/swapoff.S
+++ b/libc/arch-mips/syscalls/swapoff.S
@@ -1,22 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-    .text
-    .globl swapoff
-    .align 4
-    .ent swapoff
-
-swapoff:
-    .set noreorder
-    .cpload $t9
-    li $v0, __NR_swapoff
-    syscall
-    bnez $a3, 1f
-    move $a0, $v0
-    j $ra
-    nop
-1:
-    la $t9,__set_errno
-    j $t9
-    nop
-    .set reorder
-    .end swapoff
--- a/libc/arch-mips/syscalls/swapon.S
+++ b/libc/arch-mips/syscalls/swapon.S
@@ -1,22 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-    .text
-    .globl swapon
-    .align 4
-    .ent swapon
-
-swapon:
-    .set noreorder
-    .cpload $t9
-    li $v0, __NR_swapon
-    syscall
-    bnez $a3, 1f
-    move $a0, $v0
-    j $ra
-    nop
-1:
-    la $t9,__set_errno
-    j $t9
-    nop
-    .set reorder
-    .end swapon
--- a/libc/arch-mips/syscalls/timerfd_create.S
+++ b/libc/arch-mips/syscalls/timerfd_create.S
@@ -1,22 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-    .text
-    .globl timerfd_create
-    .align 4
-    .ent timerfd_create
-
-timerfd_create:
-    .set noreorder
-    .cpload $t9
-    li $v0, __NR_timerfd_create
-    syscall
-    bnez $a3, 1f
-    move $a0, $v0
-    j $ra
-    nop
-1:
-    la $t9,__set_errno
-    j $t9
-    nop
-    .set reorder
-    .end timerfd_create
--- a/libc/arch-mips/syscalls/timerfd_gettime.S
+++ b/libc/arch-mips/syscalls/timerfd_gettime.S
@@ -1,22 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-    .text
-    .globl timerfd_gettime
-    .align 4
-    .ent timerfd_gettime
-
-timerfd_gettime:
-    .set noreorder
-    .cpload $t9
-    li $v0, __NR_timerfd_gettime
-    syscall
-    bnez $a3, 1f
-    move $a0, $v0
-    j $ra
-    nop
-1:
-    la $t9,__set_errno
-    j $t9
-    nop
-    .set reorder
-    .end timerfd_gettime
--- a/libc/arch-mips/syscalls/timerfd_settime.S
+++ b/libc/arch-mips/syscalls/timerfd_settime.S
@@ -1,22 +0,0 @@
-/* autogenerated by gensyscalls.py */
-#include <asm/unistd.h>
-    .text
-    .globl timerfd_settime
-    .align 4
-    .ent timerfd_settime
-
-timerfd_settime:
-    .set noreorder
-    .cpload $t9
-    li $v0, __NR_timerfd_settime
-    syscall
-    bnez $a3, 1f
-    move $a0, $v0
-    j $ra
-    nop
-1:
-    la $t9,__set_errno
-    j $t9
-    nop
-    .set reorder
-    .end timerfd_settime
--- a/libc/arch-x86/include/machine/endian.h
+++ b/libc/arch-x86/include/machine/endian.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: endian.h,v 1.17 2011/03/12 04:03:04 guenther Exp $	*/
+/*	$OpenBSD: endian.h,v 1.14 2005/12/13 00:35:23 millert Exp $	*/

 /*-
 * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
@@ -24,28 +24,38 @@
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#ifndef _MACHINE_ENDIAN_H_
-#define _MACHINE_ENDIAN_H_
+#ifndef _I386_ENDIAN_H_
+#define _I386_ENDIAN_H_

 #ifdef __GNUC__

-#define	__swap32md(x) __statement({					\
+#if defined(_KERNEL) && !defined(I386_CPU)
+#define	__swap32md(x) ({						\
 	uint32_t __swap32md_x = (x);					\
 									\
-	__asm ("bswap %0" : "+r" (__swap32md_x));			\
+	__asm ("bswap %1" : "+r" (__swap32md_x));			\
 	__swap32md_x;							\
 })
+#else
+#define	__swap32md(x) ({						\
+	uint32_t __swap32md_x = (x);					\
+									\
+	__asm ("rorw $8, %w1; rorl $16, %1; rorw $8, %w1" :		\
+	    "+r" (__swap32md_x));					\
+	__swap32md_x;							\
+})
+#endif	/* _KERNEL && !I386_CPU */

-#define	__swap64md(x) __statement({					\
+#define	__swap64md(x) ({						\
 	uint64_t __swap64md_x = (x);					\
 									\
 	(uint64_t)__swap32md(__swap64md_x >> 32) |			\
 	    (uint64_t)__swap32md(__swap64md_x & 0xffffffff) << 32;	\
 })
-#define	__swap16md(x) __statement({					\
+#define	__swap16md(x) ({						\
 	uint16_t __swap16md_x = (x);					\
 									\
-	__asm ("rorw $8, %w0" : "+r" (__swap16md_x));			\
+	__asm ("rorw $8, %w1" : "+r" (__swap16md_x));			\
 	__swap16md_x;							\
 })

@@ -58,4 +68,4 @@
 #include <sys/types.h>
 #include <sys/endian.h>

-#endif /* _MACHINE_ENDIAN_H_ */
+#endif /* _I386_ENDIAN_H_ */
--- a/libc/arch-x86/string/bcopy_wrapper.S
+++ b/libc/arch-x86/string/bcopy_wrapper.S
@@ -0,0 +1,45 @@
+/*
+Copyright (c) 2010, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#if defined(USE_SSSE3)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define MEMCPY	bcopy
+# define USE_AS_MEMMOVE
+# define USE_AS_BCOPY
+# include "ssse3-memcpy5.S"
+
+#else
+
+# include "bcopy.S"
+
+#endif
--- a/libc/arch-x86/string/bzero_wrapper.S
+++ b/libc/arch-x86/string/bzero_wrapper.S
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2010, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(USE_SSE2)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define USE_AS_BZERO
+# define sse2_memset5_atom bzero
+# include "sse2-memset5-atom.S"
+
+#else
+
+# include "bzero.S"
+
+#endif
--- a/libc/arch-x86/string/cache_wrapper.S
+++ b/libc/arch-x86/string/cache_wrapper.S
--- a/libc/arch-x86/string/index.S
+++ b/libc/arch-x86/string/index.S
@@ -0,0 +1,26 @@
+/*	$OpenBSD: index.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+#ifdef STRCHR
+ENTRY(strchr)
+#else
+ENTRY(index)
+#endif
+	movl	4(%esp),%eax
+	movb	8(%esp),%cl
+	.align 2,0x90
+L1:
+	movb	(%eax),%dl
+	cmpb	%dl,%cl			/* found char??? */
+	je 	L2
+	incl	%eax
+	testb	%dl,%dl			/* null terminator??? */
+	jnz	L1
+	xorl	%eax,%eax
+L2:
+	ret
--- a/libc/arch-x86/string/memchr.S
+++ b/libc/arch-x86/string/memchr.S
@@ -0,0 +1,27 @@
+/*	$OpenBSD: memchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+ENTRY(memchr)
+	pushl	%edi
+	movl	8(%esp),%edi		/* string address */
+	movl	12(%esp),%eax		/* set character to search for */
+	movl	16(%esp),%ecx		/* set length of search */
+	testl	%ecx,%ecx		/* test for len == 0 */
+	jz	L1
+	cld				/* set search forward */
+	repne				/* search! */
+	scasb
+	jne	L1			/* scan failed, return null */
+	leal	-1(%edi),%eax		/* adjust result of scan */
+	popl	%edi
+	ret
+	.align 2,0x90
+L1:	xorl	%eax,%eax
+	popl	%edi
+	ret
+END(memchr)
--- a/libc/arch-x86/string/ssse3-strncmp-atom.S
+++ b/libc/arch-x86/string/ssse3-strncmp-atom.S
@@ -28,8 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#if defined(USE_SSSE3)

-#define USE_AS_STRNCMP
-#define STRCMP  strncmp
-#include "ssse3-strcmp-atom.S"
+# define MEMCMP memcmp
+# include "ssse3-memcmp3-new.S"

+#else
+
+# include "memcmp.S"
+
+#endif
--- a/libc/arch-x86/string/memcpy_wrapper.S
+++ b/libc/arch-x86/string/memcpy_wrapper.S
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2010, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(USE_SSSE3)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define MEMCPY	memcpy
+# define USE_AS_MEMMOVE
+# include "ssse3-memcpy5.S"
+
+#else
+
+# include "memcpy.S"
+
+#endif
--- a/libc/arch-x86/string/memmove_wrapper.S
+++ b/libc/arch-x86/string/memmove_wrapper.S
@@ -0,0 +1,43 @@
+/*
+Copyright (c) 2010, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(USE_SSSE3)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define MEMCPY memmove
+# define USE_AS_MEMMOVE
+# include "ssse3-memcpy5.S"
+
+#else
+
+# include "memmove.S"
+
+#endif
--- a/libc/arch-x86/string/memset_wrapper.S
+++ b/libc/arch-x86/string/memset_wrapper.S
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2010, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(USE_SSE2)
+
+# include "cache_wrapper.S"
+# undef __i686
+# define sse2_memset5_atom memset
+# include "sse2-memset5-atom.S"
+
+#else
+
+# include "memset.S"
+
+#endif
--- a/libc/arch-x86/string/sse2-index-atom.S
+++ b/libc/arch-x86/string/sse2-index-atom.S
@@ -1,32 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define strchr  index
-#include "sse2-strchr-atom.S"
--- a/libc/arch-x86/string/sse2-memchr-atom.S
+++ b/libc/arch-x86/string/sse2-memchr-atom.S
@@ -1,556 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define ENTRANCE PUSH (%edi);
-#define PARMS  8
-#define RETURN  POP (%edi); ret; CFI_PUSH (%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-#define LEN   STR2+4
-
-	.text
-ENTRY (memchr)
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-	test	%edx, %edx
-	jz	L(return_null)
-
-	punpcklbw %xmm1, %xmm1
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case2_prolog)
-
-	sub	$16, %edx
-	jbe	L(return_null)
-	lea	16(%edi), %edi
-	and	$15, %ecx
-	and	$-16, %edi
-	add	%ecx, %edx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	jmp	L(loop_prolog)
-
-	.p2align 4
-L(crosscache):
-	and	$15, %ecx
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	sar	%cl, %eax
-	test	%eax, %eax
-
-	jnz	L(match_case2_prolog1)
-	lea	-16(%edx), %edx
-	add	%ecx, %edx
-	jle	L(return_null)
-	lea	16(%edi), %edi
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	.p2align 4
-L(loop_prolog):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	lea	64(%edi), %edi
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	lea	64(%edi), %edi
-	mov	%edi, %ecx
-	and	$-64, %edi
-	and	$63, %ecx
-	add	%ecx, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-	movdqa	(%edi), %xmm0
-	movdqa	16(%edi), %xmm2
-	movdqa	32(%edi), %xmm3
-	movdqa	48(%edi), %xmm4
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm0, %xmm3
-	pmaxub	%xmm2, %xmm4
-	pmaxub	%xmm3, %xmm4
-	add	$64, %edi
-	pmovmskb %xmm4, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	sub	$64, %edi
-
-	pmovmskb %xmm0, %eax
-	xor	%ecx, %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	pmovmskb %xmm2, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	lea	16(%ecx), %ecx
-	test	%eax, %eax
-	jnz	L(match_case1)
-
-	pcmpeqb	48(%edi), %xmm1
-	pmovmskb %xmm1, %eax
-	lea	16(%ecx), %ecx
-
-	.p2align 4
-L(match_case1):
-	add	%ecx, %edi
-	test	%al, %al
-	jz	L(match_case1_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case1_8)
-	test	$0x01, %al
-	jnz	L(exit_case1_1)
-	test	$0x02, %al
-	jnz	L(exit_case1_2)
-	test	$0x04, %al
-	jnz	L(exit_case1_3)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_8):
-	test	$0x10, %al
-	jnz	L(exit_case1_5)
-	test	$0x20, %al
-	jnz	L(exit_case1_6)
-	test	$0x40, %al
-	jnz	L(exit_case1_7)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case1_high_8)
-	test	$0x01, %ah
-	jnz	L(exit_case1_9)
-	test	$0x02, %ah
-	jnz	L(exit_case1_10)
-	test	$0x04, %ah
-	jnz	L(exit_case1_11)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high_8):
-	test	$0x10, %ah
-	jnz	L(exit_case1_13)
-	test	$0x20, %ah
-	jnz	L(exit_case1_14)
-	test	$0x40, %ah
-	jnz	L(exit_case1_15)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	xor	%ecx, %ecx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	movdqa	16(%edi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$32, %edx
-	jbe	L(return_null)
-
-	movdqa	32(%edi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	48(%edi), %xmm1
-	lea	16(%ecx), %ecx
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(match_case2)
-
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_1):
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_2):
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_3):
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_5):
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_6):
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_7):
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_9):
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_10):
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_11):
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_13):
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_14):
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case1_15):
-	lea	14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2):
-	sub	%ecx, %edx
-L(match_case2_prolog1):
-	add	%ecx, %edi
-L(match_case2_prolog):
-	test	%al, %al
-	jz	L(match_case2_high)
-	mov	%al, %cl
-	and	$15, %cl
-	jz	L(match_case2_8)
-	test	$0x01, %al
-	jnz	L(exit_case2_1)
-	test	$0x02, %al
-	jnz	L(exit_case2_2)
-	test	$0x04, %al
-	jnz	L(exit_case2_3)
-	sub	$4, %edx
-	jb	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_8):
-	test	$0x10, %al
-	jnz	L(exit_case2_5)
-	test	$0x20, %al
-	jnz	L(exit_case2_6)
-	test	$0x40, %al
-	jnz	L(exit_case2_7)
-	sub	$8, %edx
-	jb	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high):
-	mov	%ah, %ch
-	and	$15, %ch
-	jz	L(match_case2_high_8)
-	test	$0x01, %ah
-	jnz	L(exit_case2_9)
-	test	$0x02, %ah
-	jnz	L(exit_case2_10)
-	test	$0x04, %ah
-	jnz	L(exit_case2_11)
-	sub	$12, %edx
-	jb	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_high_8):
-	test	$0x10, %ah
-	jnz	L(exit_case2_13)
-	test	$0x20, %ah
-	jnz	L(exit_case2_14)
-	test	$0x40, %ah
-	jnz	L(exit_case2_15)
-	sub	$16, %edx
-	jb	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_1):
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_2):
-	sub	$2, %edx
-	jb	L(return_null)
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_3):
-	sub	$3, %edx
-	jb	L(return_null)
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_5):
-	sub	$5, %edx
-	jb	L(return_null)
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_6):
-	sub	$6, %edx
-	jb	L(return_null)
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_7):
-	sub	$7, %edx
-	jb	L(return_null)
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_9):
-	sub	$9, %edx
-	jb	L(return_null)
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_10):
-	sub	$10, %edx
-	jb	L(return_null)
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_11):
-	sub	$11, %edx
-	jb	L(return_null)
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_13):
-	sub	$13, %edx
-	jb	L(return_null)
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_14):
-	sub	$14, %edx
-	jb	L(return_null)
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(exit_case2_15):
-	sub	$15, %edx
-	jb	L(return_null)
-	lea	14(%edi), %eax
-	RETURN
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-END (memchr)
--- a/libc/arch-x86/string/sse2-memrchr-atom.S
+++ b/libc/arch-x86/string/sse2-memrchr-atom.S
@@ -1,778 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define PARMS  4
-#define STR1  PARMS
-#define STR2  STR1+4
-#define LEN   STR2+4
-
-	.text
-ENTRY (memrchr)
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-	mov	LEN(%esp), %edx
-
-	test	%edx, %edx
-	jz	L(return_null)
-	sub	$16, %edx
-	jbe	L(length_less16)
-
-	punpcklbw %xmm1, %xmm1
-	add	%edx, %ecx
-	punpcklbw %xmm1, %xmm1
-
-	movdqu	(%ecx), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	pcmpeqb	%xmm1, %xmm0
-
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	mov	%ecx, %eax
-	and	$15, %eax
-	jz	L(loop_prolog)
-
-	add	$16, %ecx
-	add	$16, %edx
-	and	$-16, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop_prolog):
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm4
-	pcmpeqb	%xmm1, %xmm4
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	movdqa	(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	mov	%ecx, %eax
-	and	$63, %eax
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	add	$64, %ecx
-	add	$64, %edx
-	and	$-64, %ecx
-	sub	%eax, %edx
-
-	.p2align 4
-L(align64_loop):
-	sub	$64, %ecx
-	sub	$64, %edx
-	jbe	L(exit_loop)
-
-	movdqa	(%ecx), %xmm0
-	movdqa	16(%ecx), %xmm2
-	movdqa	32(%ecx), %xmm3
-	movdqa	48(%ecx), %xmm4
-
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
-
-	pmaxub	%xmm3, %xmm0
-	pmaxub	%xmm4, %xmm2
-	pmaxub	%xmm0, %xmm2
-	pmovmskb %xmm2, %eax
-
-	test	%eax, %eax
-	jz	L(align64_loop)
-
-	pmovmskb %xmm4, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm2
-
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	(%ecx), %xmm1
-
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches16)
-
-	pmovmskb %xmm1, %eax
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_loop):
-	add	$64, %edx
-	cmp	$32, %edx
-	jbe	L(exit_loop_32)
-
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48)
-
-	movdqa	32(%ecx), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %eax
-	test	%eax, %eax
-	jnz	L(matches32)
-
-	movdqa	16(%ecx), %xmm3
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %eax
-	test	%eax, %eax
-	jnz	L(matches16_1)
-	cmp	$48, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches0_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(exit_loop_32):
-	movdqa	48(%ecx), %xmm0
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches48_1)
-	cmp	$16, %edx
-	jbe	L(return_null)
-
-	pcmpeqb	32(%ecx), %xmm1
-	pmovmskb %xmm1, %eax
-	test	%eax, %eax
-	jnz	L(matches32_1)
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(matches16):
-	lea	16(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32):
-	lea	32(%ecx), %ecx
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48):
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(exit_dispatch_8)
-	test	$0x08, %al
-	jnz	L(exit_4)
-	test	$0x04, %al
-	jnz	L(exit_3)
-	test	$0x02, %al
-	jnz	L(exit_2)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_8):
-	test	$0x80, %al
-	jnz	L(exit_8)
-	test	$0x40, %al
-	jnz	L(exit_7)
-	test	$0x20, %al
-	jnz	L(exit_6)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(exit_dispatch_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_12)
-	test	$0x04, %ah
-	jnz	L(exit_11)
-	test	$0x02, %ah
-	jnz	L(exit_10)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_16)
-	test	$0x40, %ah
-	jnz	L(exit_15)
-	test	$0x20, %ah
-	jnz	L(exit_14)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_2):
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_3):
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_4):
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_6):
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_7):
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_8):
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_10):
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_11):
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_12):
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_14):
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_15):
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_16):
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(matches0_1):
-	lea	-64(%edx), %edx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches16_1):
-	lea	-48(%edx), %edx
-	lea	16(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches32_1):
-	lea	-32(%edx), %edx
-	lea	32(%ecx), %ecx
-
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(matches48_1):
-	lea	-16(%edx), %edx
-	lea	48(%ecx), %ecx
-
-	.p2align 4
-L(exit_dispatch_1):
-	test	%ah, %ah
-	jnz	L(exit_dispatch_1_high)
-	mov	%al, %ah
-	and	$15 << 4, %ah
-	jnz	L(exit_dispatch_1_8)
-	test	$0x08, %al
-	jnz	L(exit_1_4)
-	test	$0x04, %al
-	jnz	L(exit_1_3)
-	test	$0x02, %al
-	jnz	L(exit_1_2)
-
-	add	$0, %edx
-	jl	L(return_null)
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_8):
-	test	$0x80, %al
-	jnz	L(exit_1_8)
-	test	$0x40, %al
-	jnz	L(exit_1_7)
-	test	$0x20, %al
-	jnz	L(exit_1_6)
-
-	add	$4, %edx
-	jl	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high):
-	mov	%ah, %al
-	and	$15 << 4, %al
-	jnz	L(exit_dispatch_1_high_8)
-	test	$0x08, %ah
-	jnz	L(exit_1_12)
-	test	$0x04, %ah
-	jnz	L(exit_1_11)
-	test	$0x02, %ah
-	jnz	L(exit_1_10)
-
-	add	$8, %edx
-	jl	L(return_null)
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_dispatch_1_high_8):
-	test	$0x80, %ah
-	jnz	L(exit_1_16)
-	test	$0x40, %ah
-	jnz	L(exit_1_15)
-	test	$0x20, %ah
-	jnz	L(exit_1_14)
-
-	add	$12, %edx
-	jl	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_2):
-	add	$1, %edx
-	jl	L(return_null)
-	lea	1(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_3):
-	add	$2, %edx
-	jl	L(return_null)
-	lea	2(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_4):
-	add	$3, %edx
-	jl	L(return_null)
-	lea	3(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_6):
-	add	$5, %edx
-	jl	L(return_null)
-	lea	5(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_7):
-	add	$6, %edx
-	jl	L(return_null)
-	lea	6(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_8):
-	add	$7, %edx
-	jl	L(return_null)
-	lea	7(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_10):
-	add	$9, %edx
-	jl	L(return_null)
-	lea	9(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_11):
-	add	$10, %edx
-	jl	L(return_null)
-	lea	10(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_12):
-	add	$11, %edx
-	jl	L(return_null)
-	lea	11(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_14):
-	add	$13, %edx
-	jl	L(return_null)
-	lea	13(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_15):
-	add	$14, %edx
-	jl	L(return_null)
-	lea	14(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit_1_16):
-	add	$15, %edx
-	jl	L(return_null)
-	lea	15(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16_offset0):
-	mov	%dl, %cl
-	pcmpeqb	(%eax), %xmm1
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	mov	%eax, %ecx
-	pmovmskb %xmm1, %eax
-
-	and	%edx, %eax
-	test	%eax, %eax
-	jnz	L(exit_dispatch)
-
-	xor	%eax, %eax
-	ret
-
-	.p2align 4
-L(length_less16):
-	punpcklbw %xmm1, %xmm1
-	add	$16, %edx
-	punpcklbw %xmm1, %xmm1
-
-	mov	%ecx, %eax
-	pshufd	$0, %xmm1, %xmm1
-
-	and	$15, %ecx
-	jz	L(length_less16_offset0)
-
-	PUSH	(%edi)
-
-	mov	%cl, %dh
-	add	%dl, %dh
-	and	$-16, %eax
-
-	sub	$16, %dh
-	ja	L(length_less16_part2)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	sar	%cl, %edi
-	add	%ecx, %eax
-	mov	%dl, %cl
-
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2):
-	movdqa	16(%eax), %xmm2
-	pcmpeqb	%xmm1, %xmm2
-	pmovmskb %xmm2, %edi
-
-	mov	%cl, %ch
-
-	mov	%dh, %cl
-	mov	$1, %edx
-	sal	%cl, %edx
-	sub	$1, %edx
-
-	and	%edx, %edi
-
-	test	%edi, %edi
-	jnz	L(length_less16_part2_return)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edi
-
-	mov	%ch, %cl
-	sar	%cl, %edi
-	test	%edi, %edi
-	jz	L(ret_null)
-
-	bsr	%edi, %edi
-	add	%edi, %eax
-	xor	%ch, %ch
-	add	%ecx, %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(length_less16_part2_return):
-	bsr	%edi, %edi
-	lea	16(%eax, %edi), %eax
-	POP	(%edi)
-	ret
-
-	CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ret_null):
-	xor	%eax, %eax
-	POP	(%edi)
-	ret
-
-END (memrchr)
--- a/libc/arch-x86/string/sse2-memset5-atom.S
+++ b/libc/arch-x86/string/sse2-memset5-atom.S
@@ -28,9 +28,6 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include "cache.h"
-#undef __i686
-
 #ifndef L
 # define L(label)	.L##label
 #endif
@@ -139,13 +136,9 @@ __i686.get_pc_thunk.bx:
    jmp		*TABLE(,%ecx,4)
 #endif

-#ifndef MEMSET
-# define MEMSET memset
-#endif
-
 	.section .text.sse2,"ax",@progbits
 	ALIGN (4)
-ENTRY (MEMSET)
+ENTRY (sse2_memset5_atom)
 	ENTRANCE

 	movl	LEN(%esp), %ecx
@@ -918,4 +911,4 @@ L(aligned_16_15bytes):
 	SETRTNVAL
 	RETURN_END

-END (MEMSET)
+END (sse2_memset5_atom)
--- a/libc/arch-x86/string/sse2-strchr-atom.S
+++ b/libc/arch-x86/string/sse2-strchr-atom.S
@@ -1,391 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS	8
-#define ENTRANCE	PUSH(%edi)
-#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
-
-
-#define STR1	PARMS
-#define STR2	STR1+4
-
-	.text
-ENTRY (strchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$15, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	je	L(loop)
-
-/* Handle unaligned string.  */
-	and	$-16, %edi
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-	add	%ecx, %edi
-	test	%edx, %edx
-	jz	L(match_case1)
-	jmp	L(match_case2)
-
-	.p2align 4
-L(unaligned_no_match):
-	test	%edx, %edx
-	jne	L(return_null)
-
-	pxor	%xmm2, %xmm2
-	add	$16, %edi
-
-	.p2align 4
-/* Loop start on aligned string.  */
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(matches)
-	test	%edx, %edx
-	jnz	L(return_null)
-	add	$16, %edi
-	jmp	L(loop)
-
-L(matches):
-	/* There is a match.  First find where NULL is.  */
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-
-	mov	%al, %cl
-	and	$15, %cl
-	jnz	L(match_case2_4)
-
-	mov	%dl, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x10, %dl
-	jnz	L(return_null)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x20, %dl
-	jnz	L(return_null)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x40, %dl
-	jnz	L(return_null)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_4):
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x01, %dl
-	jnz	L(return_null)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x02, %dl
-	jnz	L(return_null)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x04, %dl
-	jnz	L(return_null)
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-
-	mov	%ah, %cl
-	and	$15, %cl
-	jnz	L(match_case2_12)
-
-	mov	%dh, %ch
-	and	$15, %ch
-	jnz	L(return_null)
-
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x10, %dh
-	jnz	L(return_null)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x20, %dh
-	jnz	L(return_null)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x40, %dh
-	jnz	L(return_null)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case2_12):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x01, %dh
-	jnz	L(return_null)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x02, %dh
-	jnz	L(return_null)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x04, %dh
-	jnz	L(return_null)
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	lea	7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	lea	15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit1):
-	lea	(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	1(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit5):
-	lea	4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit9):
-	lea	8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit13):
-	lea	12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-END (strchr)
--- a/libc/arch-x86/string/sse2-strlen-atom.S
+++ b/libc/arch-x86/string/sse2-strlen-atom.S
@@ -1,112 +1,71 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
+#define STRLEN sse2_strlen_atom

-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
+#ifndef L
+# define L(label)	.L##label
+#endif

-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif

-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif

-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif

-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif

-#ifndef USE_AS_STRCAT
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif

-# ifndef STRLEN
-#  define STRLEN strlen
-# endif
+#ifndef cfi_remember_state
+# define cfi_remember_state		.cfi_remember_state
+#endif

-# ifndef L
-#  define L(label)	.L##label
-# endif
+#ifndef cfi_restore_state
+# define cfi_restore_state		.cfi_restore_state
+#endif

-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-/* calee safe register only for strnlen is required */
-
-# ifdef USE_AS_STRNLEN
-#  ifndef cfi_rel_offset
-#   define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#  endif
-
-#  ifndef cfi_restore
-#   define cfi_restore(reg)	.cfi_restore reg
-#  endif
-
-#  ifndef cfi_adjust_cfa_offset
-#   define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#  endif
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
 	cfi_startproc
-# endif
+#endif

-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
 	.size name, .-name
-# endif
+#endif

-# define PARMS	4
-# define STR	PARMS
-# define RETURN	ret
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)

-# ifdef USE_AS_STRNLEN
-#  define LEN	PARMS + 8
-#  define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)

-#  define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#  define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
-#  define POP(REG)	popl	REG;	CFI_POP (REG)
-#  undef RETURN
-#  define RETURN	POP (%edi); ret; CFI_PUSH(%edi);
-# endif
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+#define PARMS		4
+#define	STR		PARMS
+#define ENTRANCE
+#define RETURN		ret

 	.text
 ENTRY (STRLEN)
+	ENTRANCE
 	mov	STR(%esp), %edx
-# ifdef USE_AS_STRNLEN
-	PUSH	(%edi)
-	movl	LEN(%esp), %edi
-	sub	$4, %edi
-	jbe	L(len_less4_prolog)
-# endif
-#endif
 	xor	%eax, %eax
 	cmpb	$0, (%edx)
 	jz	L(exit_tail0)
@@ -116,12 +75,6 @@ ENTRY (STRLEN)
 	jz	L(exit_tail2)
 	cmpb	$0, 3(%edx)
 	jz	L(exit_tail3)
-
-#ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less8_prolog)
-#endif
-
 	cmpb	$0, 4(%edx)
 	jz	L(exit_tail4)
 	cmpb	$0, 5(%edx)
@@ -130,12 +83,6 @@ ENTRY (STRLEN)
 	jz	L(exit_tail6)
 	cmpb	$0, 7(%edx)
 	jz	L(exit_tail7)
-
-#ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less12_prolog)
-#endif
-
 	cmpb	$0, 8(%edx)
 	jz	L(exit_tail8)
 	cmpb	$0, 9(%edx)
@@ -144,12 +91,6 @@ ENTRY (STRLEN)
 	jz	L(exit_tail10)
 	cmpb	$0, 11(%edx)
 	jz	L(exit_tail11)
-
-#ifdef USE_AS_STRNLEN
-	sub	$4, %edi
-	jbe	L(len_less16_prolog)
-#endif
-
 	cmpb	$0, 12(%edx)
 	jz	L(exit_tail12)
 	cmpb	$0, 13(%edx)
@@ -158,532 +99,212 @@ ENTRY (STRLEN)
 	jz	L(exit_tail14)
 	cmpb	$0, 15(%edx)
 	jz	L(exit_tail15)
-
 	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	mov	%eax, %ecx
+	mov	%edx, %eax
+	mov	%edx, %ecx
 	and	$-16, %eax
-
-#ifdef USE_AS_STRNLEN
-	and	$15, %edx
-	add	%edx, %edi
-	sub	$64, %edi
-	jbe	L(len_less64)
-#endif
+	add	$16, %ecx
+	add	$16, %eax

 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
 	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
 	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

+
 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
 	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

-#ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-#endif
-
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

-#ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-#endif
-
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

-#ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-#endif
-
 	pcmpeqb	(%eax), %xmm0
 	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm1
 	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm2
 	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

 	pcmpeqb	(%eax), %xmm3
 	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
 	test	%edx, %edx
+	lea	16(%eax), %eax
 	jnz	L(exit)

-#ifdef USE_AS_STRNLEN
-	mov	%eax, %edx
-	and	$63, %edx
-	add	%edx, %edi
-#endif
-
 	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-#ifdef USE_AS_STRNLEN
-	sub	$64, %edi
-	jbe	L(len_less64)
-#endif
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
+	PUSH (%esi)
+	PUSH (%edi)
+	PUSH (%ebx)
+	PUSH (%ebp)
+	xor	%ebp, %ebp
+L(aligned_64):
+	pcmpeqb	(%eax), %xmm0
+	pcmpeqb	16(%eax), %xmm1
+	pcmpeqb	32(%eax), %xmm2
+	pcmpeqb	48(%eax), %xmm3
+	pmovmskb %xmm0, %edx
+	pmovmskb %xmm1, %esi
+	pmovmskb %xmm2, %edi
+	pmovmskb %xmm3, %ebx
+	or	%edx, %ebp
+	or	%esi, %ebp
+	or	%edi, %ebp
+	or	%ebx, %ebp
 	lea	64(%eax), %eax
+	jz	L(aligned_64)
+L(48leave):
 	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
+	jnz	L(aligned_64_exit_16)
+	test	%esi, %esi
+	jnz	L(aligned_64_exit_32)
+	test	%edi, %edi
+	jnz	L(aligned_64_exit_48)
+	mov	%ebx, %edx
+	lea	(%eax), %eax
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_48):
+	lea	-16(%eax), %eax
+	mov	%edi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_32):
+	lea	-32(%eax), %eax
+	mov	%esi, %edx
+	jmp	L(aligned_64_exit)
+L(aligned_64_exit_16):
+	lea	-48(%eax), %eax
+L(aligned_64_exit):
+	POP (%ebp)
+	POP (%ebx)
+	POP (%edi)
+	POP (%esi)
 L(exit):
 	sub	%ecx, %eax
 	test	%dl, %dl
 	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_8)
 	test	$0x01, %dl
 	jnz	L(exit_tail0)
+
 	test	$0x02, %dl
 	jnz	L(exit_tail1)
+
 	test	$0x04, %dl
 	jnz	L(exit_tail2)
-	add	$3, %eax
-	RETURN

-	.p2align 4
-L(exit_8):
+	test	$0x08, %dl
+	jnz	L(exit_tail3)
+
 	test	$0x10, %dl
 	jnz	L(exit_tail4)
+
 	test	$0x20, %dl
 	jnz	L(exit_tail5)
+
 	test	$0x40, %dl
 	jnz	L(exit_tail6)
 	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_high_8)
-	test	$0x01, %dh
-	jnz	L(exit_tail8)
-	test	$0x02, %dh
-	jnz	L(exit_tail9)
-	test	$0x04, %dh
-	jnz	L(exit_tail10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_high_8):
-	test	$0x10, %dh
-	jnz	L(exit_tail12)
-	test	$0x20, %dh
-	jnz	L(exit_tail13)
-	test	$0x40, %dh
-	jnz	L(exit_tail14)
-	add	$15, %eax
 L(exit_tail0):
 	RETURN

-#ifdef USE_AS_STRNLEN
-
-	.p2align 4
-L(len_less64):
-	pxor	%xmm0, %xmm0
-	add	$64, %edi
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-	sub	$16, %edi
-	jbe	L(return_start_len)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(strnlen_exit)
-
-#ifndef USE_AS_STRLCAT
-	movl	LEN(%esp), %eax
-	RETURN
-#else
-	jmp	L(return_start_len)
-#endif
-
-	.p2align 4
-L(strnlen_exit):
-	sub	%ecx, %eax
-
-	test	%dl, %dl
-	jz	L(strnlen_exit_high)
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(strnlen_exit_8)
-	test	$0x01, %dl
-	jnz	L(exit_tail0)
-	test	$0x02, %dl
-	jnz	L(strnlen_exit_tail1)
-	test	$0x04, %dl
-	jnz	L(strnlen_exit_tail2)
-	sub	$4, %edi
-	jb	L(return_start_len)
-	lea	3(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_8):
-	test	$0x10, %dl
-	jnz	L(strnlen_exit_tail4)
-	test	$0x20, %dl
-	jnz	L(strnlen_exit_tail5)
-	test	$0x40, %dl
-	jnz	L(strnlen_exit_tail6)
-	sub	$8, %edi
-	jb	L(return_start_len)
-	lea	7(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(strnlen_exit_high_8)
+L(exit_high):
+	add	$8, %eax
 	test	$0x01, %dh
-	jnz	L(strnlen_exit_tail8)
+	jnz	L(exit_tail0)
+
 	test	$0x02, %dh
-	jnz	L(strnlen_exit_tail9)
+	jnz	L(exit_tail1)
+
 	test	$0x04, %dh
-	jnz	L(strnlen_exit_tail10)
-	sub	$12, %edi
-	jb	L(return_start_len)
-	lea	11(%eax), %eax
-	RETURN
+	jnz	L(exit_tail2)
+
+	test	$0x08, %dh
+	jnz	L(exit_tail3)

-	.p2align 4
-L(strnlen_exit_high_8):
 	test	$0x10, %dh
-	jnz	L(strnlen_exit_tail12)
+	jnz	L(exit_tail4)
+
 	test	$0x20, %dh
-	jnz	L(strnlen_exit_tail13)
+	jnz	L(exit_tail5)
+
 	test	$0x40, %dh
-	jnz	L(strnlen_exit_tail14)
-	sub	$16, %edi
-	jb	L(return_start_len)
-	lea	15(%eax), %eax
+	jnz	L(exit_tail6)
+	add	$7, %eax
 	RETURN

-	.p2align 4
-L(strnlen_exit_tail1):
-	sub	$2, %edi
-	jb	L(return_start_len)
-	lea	1(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail2):
-	sub	$3, %edi
-	jb	L(return_start_len)
-	lea	2(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail4):
-	sub	$5, %edi
-	jb	L(return_start_len)
-	lea	4(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail5):
-	sub	$6, %edi
-	jb	L(return_start_len)
-	lea	5(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail6):
-	sub	$7, %edi
-	jb	L(return_start_len)
-	lea	6(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail8):
-	sub	$9, %edi
-	jb	L(return_start_len)
-	lea	8(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail9):
-	sub	$10, %edi
-	jb	L(return_start_len)
-	lea	9(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail10):
-	sub	$11, %edi
-	jb	L(return_start_len)
-	lea	10(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail12):
-	sub	$13, %edi
-	jb	L(return_start_len)
-	lea	12(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail13):
-	sub	$14, %edi
-	jb	L(return_start_len)
-	lea	13(%eax), %eax
-	RETURN
-
-	.p2align 4
-L(strnlen_exit_tail14):
-	sub	$15, %edi
-	jb	L(return_start_len)
-	lea	14(%eax), %eax
-	RETURN
-
-#ifndef USE_AS_STRLCAT
-	.p2align 4
-L(return_start_len):
-	movl	LEN(%esp), %eax
-	RETURN
-#endif
-
-/* for prolog only */
-
-	.p2align 4
-L(len_less4_prolog):
-	xor	%eax, %eax
-
-	add	$4, %edi
-	jz	L(exit_tail0)
-
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmp	$1, %edi
-	je	L(exit_tail1)
-
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmp	$2, %edi
-	je	L(exit_tail2)
-
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmp	$3, %edi
-	je	L(exit_tail3)
-
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-	mov	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(len_less8_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmp	$1, %edi
-	je	L(exit_tail5)
-
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmp	$2, %edi
-	je	L(exit_tail6)
-
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmp	$3, %edi
-	je	L(exit_tail7)
-
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-	mov	$8, %eax
-	RETURN
-
-
-	.p2align 4
-L(len_less12_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmp	$1, %edi
-	je	L(exit_tail9)
-
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmp	$2, %edi
-	je	L(exit_tail10)
-
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmp	$3, %edi
-	je	L(exit_tail11)
-
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-	mov	$12, %eax
-	RETURN
-
-	.p2align 4
-L(len_less16_prolog):
-	add	$4, %edi
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmp	$1, %edi
-	je	L(exit_tail13)
-
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmp	$2, %edi
-	je	L(exit_tail14)
-
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmp	$3, %edi
-	je	L(exit_tail15)
-
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-	mov	$16, %eax
-	RETURN
-#endif
-
 	.p2align 4
 L(exit_tail1):
 	add	$1, %eax
@@ -743,7 +364,6 @@ L(exit_tail14):

 L(exit_tail15):
 	add	$15, %eax
-#ifndef USE_AS_STRCAT
-	RETURN
+	ret
+
 END (STRLEN)
-#endif
--- a/libc/arch-x86/string/sse2-strnlen-atom.S
+++ b/libc/arch-x86/string/sse2-strnlen-atom.S
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNLEN 1
-#define STRLEN  strnlen
-#include "sse2-strlen-atom.S"
--- a/libc/arch-x86/string/sse2-strrchr-atom.S
+++ b/libc/arch-x86/string/sse2-strrchr-atom.S
@@ -1,753 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#define PARMS	8
-#define ENTRANCE	PUSH(%edi);
-#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (strrchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	pxor	%xmm2, %xmm2
-	mov	%ecx, %edi
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$63, %ecx
-	pshufd	$0, %xmm1, %xmm1
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	xor	%ebx, %ebx
-	jmp	L(loop)
-
-	CFI_POP    (%esi)
-	CFI_POP    (%ebx)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-	PUSH	(%ebx)
-
-	mov	%eax, %ebx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%ebx, %ebx
-	jz	L(return_null_1)
-	mov	%ebx, %eax
-	mov	%esi, %edi
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	jmp	L(match_case1)
-
-	CFI_PUSH    (%ebx)
-	CFI_PUSH    (%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%ebx)
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH    (%ebx)
-	CFI_PUSH    (%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-	mov	%eax, %ebx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(find_zero_8)
-	test	$0x01, %cl
-	jnz	L(FindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(FindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(FindZeroExit3)
-	and	$1 << 4 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_8):
-	test	$0x10, %cl
-	jnz	L(FindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(FindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(FindZeroExit7)
-	and	$1 << 8 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(FindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(FindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(FindZeroExit11)
-	and	$1 << 12 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(FindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(FindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(FindZeroExit15)
-	and	$1 << 16 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit1):
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit2):
-	and	$1 << 2 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit3):
-	and	$1 << 3 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit5):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit6):
-	and	$1 << 6 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit7):
-	and	$1 << 7 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit9):
-	and	$1 << 9 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit10):
-	and	$1 << 10 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit11):
-	and	$1 << 11 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit13):
-	and	$1 << 13 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit14):
-	and	$1 << 14 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-	jmp     L(match_case1)
-
-	CFI_PUSH	(%ebx)
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(FindZeroExit15):
-	and	$1 << 15 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%ebx)
-	POP	(%esi)
-
-	.p2align 4
-L(match_case1):
-	test	%ah, %ah
-	jnz	L(match_case1_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(match_case1_8)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_8):
-	test	$0x80, %al
-	jnz	L(Exit8)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(match_case1_high_8)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_case1_high_8):
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit2):
-	lea	-15(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit3):
-	lea	-14(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	lea	-13(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit6):
-	lea	-11(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit7):
-	lea	-10(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit8):
-	lea	-9(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit10):
-	lea	-7(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit11):
-	lea	-6(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	lea	-5(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit14):
-	lea	-3(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit15):
-	lea	-2(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(Exit16):
-	lea	-1(%edi), %eax
-	RETURN
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(prolog_find_zero_8)
-	test	$0x01, %cl
-	jnz	L(PrologFindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(PrologFindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(PrologFindZeroExit3)
-	and	$1 << 4 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_8):
-	test	$0x10, %cl
-	jnz	L(PrologFindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(PrologFindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(PrologFindZeroExit7)
-	and	$1 << 8 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(prolog_find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(PrologFindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(PrologFindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(PrologFindZeroExit11)
-	and	$1 << 12 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(PrologFindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(PrologFindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(PrologFindZeroExit15)
-	and	$1 << 16 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit1):
-	and	$1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit2):
-	and	$1 << 2 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit3):
-	and	$1 << 3 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit5):
-	and	$1 << 5 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit6):
-	and	$1 << 6 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit7):
-	and	$1 << 7 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit9):
-	and	$1 << 9 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit10):
-	and	$1 << 10 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit11):
-	and	$1 << 11 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit13):
-	and	$1 << 13 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit14):
-	and	$1 << 14 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(PrologFindZeroExit15):
-	and	$1 << 15 - 1, %eax
-	jnz	L(match_case1)
-	xor	%eax, %eax
-	RETURN
-
-END (strrchr)
--- a/libc/arch-x86/string/sse2-wcschr-atom.S
+++ b/libc/arch-x86/string/sse2-wcschr-atom.S
@@ -1,267 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#define PARMS	4
-
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcschr)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %eax
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-	and	$63, %eax
-	cmp	$48, %eax
-	ja	L(cross_cache)
-
-	movdqu	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	and	$-16, %ecx
-	jmp	L(loop)
-
-	.p2align 4
-L(cross_cache):
-	PUSH	(%edi)
-	mov	%ecx, %edi
-	mov	%eax, %ecx
-	and	$-16, %edi
-	and	$15, %ecx
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-
-	add	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jz	L(match_case1)
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(unaligned_no_match):
-	mov	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	pxor	%xmm2, %xmm2
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	add	$16, %ecx
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	pmovmskb %xmm2, %edx
-	test	%eax, %eax
-	jz	L(return_null)
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_4):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-	test	$15, %ah
-	jnz	L(match_case2_12)
-	test	$15, %dh
-	jnz	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_12):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(exit0)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(exit3)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit0):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit3):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-END (wcschr)
--- a/libc/arch-x86/string/sse2-wcscmp-atom.S
+++ b/libc/arch-x86/string/sse2-wcscmp-atom.S
--- a/libc/arch-x86/string/sse2-wcslen-atom.S
+++ b/libc/arch-x86/string/sse2-wcslen-atom.S
@@ -1,306 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define PARMS	4
-# define STR	PARMS
-# define RETURN ret
-
-	.text
-ENTRY (wcslen)
-	mov	STR(%esp), %edx
-#endif
-	cmp	$0, (%edx)
-	jz	L(exit_tail0)
-	cmp	$0, 4(%edx)
-	jz	L(exit_tail1)
-	cmp	$0, 8(%edx)
-	jz	L(exit_tail2)
-	cmp	$0, 12(%edx)
-	jz	L(exit_tail3)
-	cmp	$0, 16(%edx)
-	jz	L(exit_tail4)
-	cmp	$0, 20(%edx)
-	jz	L(exit_tail5)
-	cmp	$0, 24(%edx)
-	jz	L(exit_tail6)
-	cmp	$0, 28(%edx)
-	jz	L(exit_tail7)
-
-	pxor	%xmm0, %xmm0
-
-	lea	32(%edx), %eax
-	lea	-16(%eax), %ecx
-	and	$-16, %eax
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqd	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqd	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	jmp	L(aligned_64_loop)
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	shr	$2, %eax
-	test	%dl, %dl
-	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_1)
-	RETURN
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_3)
-	add	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail0):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	mov	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail2):
-	mov	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail3):
-	mov	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail4):
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail5):
-	mov	$5, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail6):
-	mov	$6, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail7):
-	mov	$7, %eax
-#ifndef USE_AS_WCSCAT
-	RETURN
-
-END (wcslen)
-#endif
--- a/libc/arch-x86/string/sse2-wcsrchr-atom.S
+++ b/libc/arch-x86/string/sse2-wcsrchr-atom.S
@@ -1,402 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  8
-#define ENTRANCE PUSH(%edi);
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcsrchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %edi
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-
-/* Save current match */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-
-	mov	%eax, %edx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm3, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm4, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm5, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%edx, %edx
-	jz	L(return_null_1)
-	mov	%edx, %eax
-	mov	%esi, %edi
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-/* save match info */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(find_zero_in_fourth_wchar)
-	and	$1 << 9 - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_fourth_wchar):
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_or_fourth_wchar):
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(prolog_find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_null)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$1 << 5 - 1, %eax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(prolog_find_zero_in_fourth_wchar)
-	and	$1 << 9 - 1, %eax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_fourth_wchar):
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-END (wcsrchr)
--- a/libc/arch-x86/string/ssse3-memcmp16-atom.S
+++ b/libc/arch-x86/string/ssse3-memcmp16-atom.S
@@ -1,37 +0,0 @@
-/*
-Copyright (c) 2013, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define MEMCMP  __memcmp16
-
-/* int __memcmp16(const unsigned short *ptr1, const unsigned short *ptr2, size_t n); */
-
-#define USE_UTF16
-#define USE_AS_MEMCMP16 1
-#include "ssse3-memcmp-atom.S"
--- a/libc/arch-x86/string/ssse3-memcmp3-new.S
+++ b/libc/arch-x86/string/ssse3-memcmp3-new.S
--- a/libc/arch-x86/string/ssse3-memcpy-atom.S
+++ b/libc/arch-x86/string/ssse3-memcpy-atom.S
@@ -28,11 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include "cache.h"
-#undef __i686
-
 #ifndef MEMCPY
-# define MEMCPY	memcpy
+# define MEMCPY	ssse3_memcpy5
 #endif

 #ifndef L
--- a/libc/arch-x86/string/ssse3-strcat-atom.S
+++ b/libc/arch-x86/string/ssse3-strcat-atom.S
@@ -1,620 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef cfi_remember_state
-# define cfi_remember_state		.cfi_remember_state
-#endif
-
-#ifndef cfi_restore_state
-# define cfi_restore_state		.cfi_restore_state
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)			\
-	cfi_endproc;			\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)			\
-  cfi_adjust_cfa_offset (4);		\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)			\
-  cfi_adjust_cfa_offset (-4);		\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifndef STRCAT
-# define STRCAT	strcat
-#endif
-
-#define PARMS	4
-#define STR1	PARMS+4
-#define STR2	STR1+4
-
-#ifdef USE_AS_STRNCAT
-# define LEN	STR2+8
-#endif
-
-#define USE_AS_STRCAT
-
-	.section .text.ssse3,"ax",@progbits
-ENTRY (STRCAT)
-	PUSH	(%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-#define RETURN	jmp	L(StrcpyAtom)
-#include "sse2-strlen-atom.S"
-
-L(StrcpyAtom):
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-#ifdef USE_AS_STRNCAT
-	PUSH	(%ebx)
-	mov	LEN(%esp), %ebx
-	test	%ebx, %ebx
-	jz	L(StrncatExit0)
-	cmp	$8, %ebx
-	jbe	L(StrncpyExit8Bytes)
-#endif
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-#ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	jb	L(StrncpyExit15Bytes)
-#endif
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%ecx)
-	jz	L(Exit16)
-#ifdef USE_AS_STRNCAT
-	cmp	$16, %ebx
-	je	L(StrncatExit16)
-
-# define RETURN1	POP (%ebx); POP (%edi);	ret; \
-	CFI_PUSH (%ebx); CFI_PUSH (%edi)
-# define USE_AS_STRNCPY
-#else
-# define RETURN1	POP(%edi); ret; CFI_PUSH(%edi)
-#endif
-#include "ssse3-strcpy-atom.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit1):
-	movb	%bh, 1(%edx)
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit2):
-	movb	%bh, 2(%edx)
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit3):
-	movb	%bh, 3(%edx)
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit4):
-	movb	%bh, 4(%edx)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit5):
-	movb	%bh, 5(%edx)
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit6):
-	movb	%bh, 6(%edx)
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit7):
-	movb	%bh, 7(%edx)
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit8):
-	movb	%bh, 8(%edx)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit9):
-	movb	%bh, 9(%edx)
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit10):
-	movb	%bh, 10(%edx)
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit11):
-	movb	%bh, 11(%edx)
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit12):
-	movb	%bh, 12(%edx)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit13):
-	movb	%bh, 13(%edx)
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit14):
-	movb	%bh, 14(%edx)
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit15):
-	movb	%bh, 15(%edx)
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit16):
-	movb	%bh, 16(%edx)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-#ifdef USE_AS_STRNCPY
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	lea	(%esi, %edx), %esi
-	lea	-9(%ebx), %edx
-	and	$1<<7, %dh
-	or	%al, %dh
-	lea	(%esi), %edx
-	POP	(%esi)
-	jz	L(ExitHighCase2)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	xor	%cl, %cl
-	movb	%cl, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase2):
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHighCase3)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	%bh, 8(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHighCase3):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	cmp	$15, %ebx
-	je	L(StrncatExit15)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm1, 8(%edx)
-	movb	%bh, 16(%edx)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncatExit0):
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$9, %ebx
-	je	L(StrncatExit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrncatExit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrncatExit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrncatExit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrncatExit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrncatExit14)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-	lea	14(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrncatExit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrncatExit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrncatExit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrncatExit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrncatExit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrncatExit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrncatExit7)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	lea	7(%edx), %eax
-	cmpb	$1, (%eax)
-	sbb	$-1, %eax
-	movb	%bh, (%eax)
-	movl	%edi, %eax
-	RETURN1
-
-#endif
-END (STRCAT)
--- a/libc/arch-x86/string/ssse3-strcmp-latest.S
+++ b/libc/arch-x86/string/ssse3-strcmp-latest.S
@@ -107,12 +107,8 @@ name:					\
 	sub	%esi, %ebp
 #endif

-#ifndef STRCMP
-# define STRCMP strcmp
-#endif
-
 	.section .text.ssse3,"ax",@progbits
-ENTRY (STRCMP)
+ENTRY (ssse3_strcmp_latest)
 #ifdef USE_AS_STRNCMP
 	PUSH	(%ebp)
 #endif
@@ -2275,4 +2271,4 @@ L(less16bytes_sncmp):
 	ret
 #endif

-END (STRCMP)
+END (ssse3_strcmp_latest)
--- a/libc/arch-x86/string/ssse3-strcpy-atom.S
+++ b/libc/arch-x86/string/ssse3-strcpy-atom.S
--- a/libc/arch-x86/string/ssse3-strlcat-atom.S
+++ b/libc/arch-x86/string/ssse3-strlcat-atom.S
--- a/libc/arch-x86/string/ssse3-strlcpy-atom.S
+++ b/libc/arch-x86/string/ssse3-strlcpy-atom.S
--- a/libc/arch-x86/string/ssse3-strncat-atom.S
+++ b/libc/arch-x86/string/ssse3-strncat-atom.S
@@ -1,34 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define STRCAT  strncat
-#define USE_AS_STRNCAT
-
-#include "ssse3-strcat-atom.S"
--- a/libc/arch-x86/string/ssse3-strncpy-atom.S
+++ b/libc/arch-x86/string/ssse3-strncpy-atom.S
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNCPY
-#define STRCPY strncpy
-#include "ssse3-strcpy-atom.S"
--- a/libc/arch-x86/string/ssse3-wcscat-atom.S
+++ b/libc/arch-x86/string/ssse3-wcscat-atom.S
@@ -1,114 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc                  .cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc                    .cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  4
-#define STR1  PARMS+4
-#define STR2  STR1+4
-
-#define USE_AS_WCSCAT
-
-.text
-ENTRY (wcscat)
-	PUSH    (%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-#define RETURN  jmp L(WcscpyAtom)
-#include "sse2-wcslen-atom.S"
-
-L(WcscpyAtom):
-	shl	$2, %eax
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-
-	cmp	$0, (%ecx)
-	jz	L(Exit4)
-	cmp	$0, 4(%ecx)
-	jz	L(Exit8)
-	cmp	$0, 8(%ecx)
-	jz	L(Exit12)
-	cmp	$0, 12(%ecx)
-	jz	L(Exit16)
-
-#undef RETURN
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi)
-#include "ssse3-wcscpy-atom.S"
-
-END (wcscat)
--- a/libc/arch-x86/string/ssse3-wcscpy-atom.S
+++ b/libc/arch-x86/string/ssse3-wcscpy-atom.S
@@ -1,652 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef cfi_rel_offset
-#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-# endif
-
-# ifndef cfi_restore
-#  define cfi_restore(reg)	.cfi_restore reg
-# endif
-
-# ifndef cfi_adjust_cfa_offset
-#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
-
-# define STR1	PARMS
-# define STR2	STR1+4
-# define LEN	STR2+4
-
-.text
-ENTRY (wcscpy)
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmp	$0, (%ecx)
-	jz	L(ExitTail4)
-	cmp	$0, 4(%ecx)
-	jz	L(ExitTail8)
-	cmp	$0, 8(%ecx)
-	jz	L(ExitTail12)
-	cmp	$0, 12(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-#endif
-	PUSH	(%esi)
-	lea	16(%ecx), %esi
-
-	and	$-16, %esi
-
-	pxor	%xmm0, %xmm0
-	pcmpeqd	(%esi), %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-	jz	L(Align16Both)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$8, %eax
-	je	L(Shl8)
-	jmp	L(Shl12)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqd	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqd	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	$-0x40, %esi
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	palignr	$4, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl4Start)
-
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	POP	(%esi)
-	add	$12, %edx
-	add	$12, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	palignr	$8, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl8Start)
-
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	POP	(%esi)
-	add	$8, %edx
-	add	$8, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	palignr	$12, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl12Start)
-
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit12)
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN
-
-CFI_POP	(%edi)
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-#ifndef USE_AS_WCSCAT
-END (wcscpy)
-#endif
--- a/libc/arch-x86/string/ssse3-wmemcmp-atom.S
+++ b/libc/arch-x86/string/ssse3-wmemcmp-atom.S
@@ -1,35 +0,0 @@
-/*
-Copyright (c) 2011, 2012, 2013 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define MEMCMP  wmemcmp
-
-#define USE_WCHAR
-#define USE_AS_WMEMCMP 1
-#include "ssse3-memcmp-atom.S"
--- a/libc/arch-x86/string/strchr.S
+++ b/libc/arch-x86/string/strchr.S
@@ -0,0 +1,3 @@
+/*	$OpenBSD: strchr.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+#define STRCHR
+#include "index.S"
--- a/libc/arch-x86/string/ssse3-bcopy-atom.S
+++ b/libc/arch-x86/string/ssse3-bcopy-atom.S
@@ -28,8 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#if defined(USE_SSSE3)

-#define MEMCPY	bcopy
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#include "ssse3-memcpy-atom.S"
+# define ssse3_strcmp_latest strcmp
+# include "ssse3-strcmp-latest.S"
+
+#else
+
+# include "strcmp.S"
+
+#endif
--- a/libc/arch-x86/string/strcpy.S
+++ b/libc/arch-x86/string/strcpy.S
@@ -0,0 +1,64 @@
+/*	$OpenBSD: strcpy.S,v 1.8 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+#if defined(APIWARN)
+#APP
+	.section .gnu.warning.strcpy
+	.ascii "warning: strcpy() is almost always misused, please use strlcpy()"
+#NO_APP
+#endif
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strcpy)
+	movl	4(%esp),%ecx		/* dst address */
+	movl	8(%esp),%edx		/* src address */
+	pushl	%ecx			/* push dst address */
+
+	.align 2,0x90
+L1:	movb	(%edx),%al		/* unroll loop, but not too much */
+	movb	%al,(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	1(%edx),%al
+	movb	%al,1(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	2(%edx),%al
+	movb	%al,2(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	3(%edx),%al
+	movb	%al,3(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	4(%edx),%al
+	movb	%al,4(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	5(%edx),%al
+	movb	%al,5(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	6(%edx),%al
+	movb	%al,6(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	7(%edx),%al
+	movb	%al,7(%ecx)
+	addl	$8,%edx
+	addl	$8,%ecx
+	testb	%al,%al
+	jnz	L1
+L2:	popl	%eax			/* pop dst address */
+	ret
+END(strcpy)
--- a/libc/arch-x86/string/ssse3-memmove-atom.S
+++ b/libc/arch-x86/string/ssse3-memmove-atom.S
@@ -28,7 +28,13 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#if defined(USE_SSE2)

-#define MEMCPY memmove
-#define USE_AS_MEMMOVE
-#include "ssse3-memcpy-atom.S"
+# define sse2_strlen_atom strlen
+# include "sse2-strlen-atom.S"
+
+#else
+
+# include "strlen.S"
+
+#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Baligh Uddin	e4dda1f1e9	Merge "resolved conflicts for merge of `5d0ad38c` to jb-mr2-dev" into jb-mr2-dev	2013-09-25 02:31:37 +00:00
Elliott Hughes	5a6f55a3c4	resolved conflicts for merge of `5d0ad38c` to jb-mr2-dev Change-Id: I606f6d86aad46c5eda0dc0d907fe691ca0eea826	2013-09-24 15:53:34 -07:00
				`@@ -1 +0,0 @@`
				`include bionic/libc/arch-arm/cortex-a15/cortex-a15.mk`