From 5b5d6e7045dece4e112553e9a2516240ea32f812 Mon Sep 17 00:00:00 2001 From: Shu Zhang Date: Wed, 12 Mar 2014 11:18:41 +0800 Subject: [PATCH] add 32-bit bionic implementation for denver arch Add 32-bit bionic implementation for denver. Use denver version of memcpy/ memset. Use Cortex-A15 version of strlen/strcat/strcpy/strcmp. Change-Id: I4c6b675f20cf41a29cadf70a11d1635d7df5b30a --- libc/arch-arm/arm.mk | 2 +- libc/arch-arm/denver/bionic/__strcat_chk.S | 221 +++++++++++++++++++ libc/arch-arm/denver/bionic/__strcpy_chk.S | 182 ++++++++++++++++ libc/arch-arm/denver/bionic/memcpy.S | 105 +++++++++ libc/arch-arm/denver/bionic/memcpy_base.S | 234 +++++++++++++++++++++ libc/arch-arm/denver/bionic/memset.S | 207 ++++++++++++++++++ libc/arch-arm/denver/denver.mk | 12 ++ 7 files changed, 962 insertions(+), 1 deletion(-) create mode 100644 libc/arch-arm/denver/bionic/__strcat_chk.S create mode 100644 libc/arch-arm/denver/bionic/__strcpy_chk.S create mode 100644 libc/arch-arm/denver/bionic/memcpy.S create mode 100644 libc/arch-arm/denver/bionic/memcpy_base.S create mode 100644 libc/arch-arm/denver/bionic/memset.S create mode 100644 libc/arch-arm/denver/denver.mk diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk index 89a1ce050..cbc5fa7b2 100644 --- a/libc/arch-arm/arm.mk +++ b/libc/arch-arm/arm.mk @@ -70,7 +70,7 @@ ifeq ($(strip $(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)),) endif cpu_variant_mk := $(LOCAL_PATH)/arch-arm/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT)/$(TARGET_$(my_2nd_arch_prefix)CPU_VARIANT).mk ifeq ($(wildcard $(cpu_variant_mk)),) -$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") +$(error "TARGET_$(my_2nd_arch_prefix)CPU_VARIANT not set or set to an unknown value. Possible values are cortex-a7, cortex-a8, cortex-a9, cortex-a15, krait, denver. Use generic for devices that do not have a CPU similar to any of the supported cpu variants.") endif include $(cpu_variant_mk) libc_common_additional_dependencies += $(cpu_variant_mk) diff --git a/libc/arch-arm/denver/bionic/__strcat_chk.S b/libc/arch-arm/denver/bionic/__strcat_chk.S new file mode 100644 index 000000000..36da2d9d8 --- /dev/null +++ b/libc/arch-arm/denver/bionic/__strcat_chk.S @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + + .syntax unified + + .thumb + .thumb_func + +// Get the length of src string, then get the source of the dst string. +// Check that the two lengths together don't exceed the threshold, then +// do a memcpy of the data. +ENTRY(__strcat_chk) + pld [r0, #0] + push {r0, lr} + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + push {r4, r5} + .save {r4, r5} + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + + mov lr, r2 + + // Save the dst register to r5 + mov r5, r0 + + // Zero out r4 + eor r4, r4, r4 + + // r1 contains the address of the string to count. +.L_strlen_start: + mov r0, r1 + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r1], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r1], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r1], #8 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r1, r0 + sub r3, r3, #1 + b .L_finish + +.L_zero_in_first_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_finish + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_finish + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_finish + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_finish + +.L_zero_in_second_register: + sub r3, r1, r0 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_finish + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_finish + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_finish + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_finish: + cmp r4, #0 + bne .L_strlen_done + + // Time to get the dst string length. + mov r1, r5 + + // Save the original source address to r5. + mov r5, r0 + + // Save the current length (adding 1 for the terminator). + add r4, r3, #1 + b .L_strlen_start + + // r0 holds the pointer to the dst string. + // r3 holds the dst string length. + // r4 holds the src string length + 1. +.L_strlen_done: + add r2, r3, r4 + cmp r2, lr + bhi __strcat_chk_failed + + // Set up the registers for the memcpy code. + mov r1, r5 + pld [r1, #64] + mov r2, r4 + add r0, r0, r3 + pop {r4, r5} +END(__strcat_chk) + +#define MEMCPY_BASE __strcat_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned + +#include "memcpy_base.S" + +ENTRY_PRIVATE(__strcat_chk_failed) + .save {r0, lr} + .save {r4, r5} + + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcat_chk_failed) + + .data +error_string: + .string "strcat: prevented write past end of buffer" diff --git a/libc/arch-arm/denver/bionic/__strcpy_chk.S b/libc/arch-arm/denver/bionic/__strcpy_chk.S new file mode 100644 index 000000000..c3e3e14fa --- /dev/null +++ b/libc/arch-arm/denver/bionic/__strcpy_chk.S @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + + .syntax unified + + .thumb + .thumb_func + +// Get the length of the source string first, then do a memcpy of the data +// instead of a strcpy. +ENTRY(__strcpy_chk) + pld [r0, #0] + push {r0, lr} + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + + mov lr, r2 + mov r0, r1 + + ands r3, r1, #7 + beq .L_mainloop + + // Align to a double word (64 bits). + rsb r3, r3, #8 + lsls ip, r3, #31 + beq .L_align_to_32 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_32: + bcc .L_align_to_64 + ands ip, r3, #2 + beq .L_align_to_64 + + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + ldrb r2, [r0], #1 + cbz r2, .L_update_count_and_finish + +.L_align_to_64: + tst r3, #4 + beq .L_mainloop + ldr r3, [r0], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + + .p2align 2 +.L_mainloop: + ldrd r2, r3, [r0], #8 + + pld [r0, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne .L_zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne .L_zero_in_second_register + b .L_mainloop + +.L_update_count_and_finish: + sub r3, r0, r1 + sub r3, r3, #1 + b .L_check_size + +.L_zero_in_first_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub8_and_finish + bcs .L_sub7_and_finish + lsls ip, ip, #1 + bne .L_sub6_and_finish + + sub r3, r3, #5 + b .L_check_size + +.L_sub8_and_finish: + sub r3, r3, #8 + b .L_check_size + +.L_sub7_and_finish: + sub r3, r3, #7 + b .L_check_size + +.L_sub6_and_finish: + sub r3, r3, #6 + b .L_check_size + +.L_zero_in_second_register: + sub r3, r0, r1 + lsls r2, ip, #17 + bne .L_sub4_and_finish + bcs .L_sub3_and_finish + lsls ip, ip, #1 + bne .L_sub2_and_finish + + sub r3, r3, #1 + b .L_check_size + +.L_sub4_and_finish: + sub r3, r3, #4 + b .L_check_size + +.L_sub3_and_finish: + sub r3, r3, #3 + b .L_check_size + +.L_sub2_and_finish: + sub r3, r3, #2 + +.L_check_size: + pld [r1, #0] + pld [r1, #64] + ldr r0, [sp] + cmp r3, lr + bhs __strcpy_chk_failed + + // Add 1 for copy length to get the string terminator. + add r2, r3, #1 +END(__strcpy_chk) + +#define MEMCPY_BASE __strcpy_chk_memcpy_base +#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY_PRIVATE(__strcpy_chk_failed) + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+4) +END(__strcpy_chk_failed) + + .data +error_string: + .string "strcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/denver/bionic/memcpy.S b/libc/arch-arm/denver/bionic/memcpy.S new file mode 100644 index 000000000..da4f3dd79 --- /dev/null +++ b/libc/arch-arm/denver/bionic/memcpy.S @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Prototype: void *memcpy (void *dst, const void *src, size_t count). + +#include +#include + + .text + .syntax unified + .fpu neon + +ENTRY(__memcpy_chk) + cmp r2, r3 + bhi __memcpy_chk_fail + + // Fall through to memcpy... +END(__memcpy_chk) + +ENTRY(memcpy) + pld [r1, #64] + push {r0, lr} + .save {r0, lr} + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 +END(memcpy) + +#define MEMCPY_BASE __memcpy_base +#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned +#include "memcpy_base.S" + +ENTRY_PRIVATE(__memcpy_chk_fail) + // Preserve lr for backtrace. + push {lr} + .save {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(__memcpy_chk_fail) + + .data +error_string: + .string "memcpy: prevented write past end of buffer" diff --git a/libc/arch-arm/denver/bionic/memcpy_base.S b/libc/arch-arm/denver/bionic/memcpy_base.S new file mode 100644 index 000000000..2abb48671 --- /dev/null +++ b/libc/arch-arm/denver/bionic/memcpy_base.S @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * All rights reserved. + * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define CACHE_LINE_SIZE (64) +#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*6) + +ENTRY_PRIVATE(MEMCPY_BASE) + .cfi_def_cfa_offset 8 + .cfi_rel_offset r0, 0 + .cfi_rel_offset lr, 4 + + cmp r2, #0 + beq .L_memcpy_done + cmp r0, r1 + beq .L_memcpy_done + + /* preload next cache line */ + pld [r1, #CACHE_LINE_SIZE*1] + + /* Deal with very small blocks (< 32bytes) asap */ + cmp r2, #32 + blo .L_memcpy_lt_32bytes + /* no need to align if len < 128 bytes */ + cmp r2, #128 + blo .L_memcpy_lt_128bytes + + /* large copy, align dest to 64 byte boundry */ + pld [r1, #CACHE_LINE_SIZE*2] + rsb r3, r0, #0 + ands r3, r3, #0x3F + pld [r1, #CACHE_LINE_SIZE*3] + beq .L_memcpy_dispatch + sub r2, r2, r3 + /* copy 1 byte */ + movs ip, r3, lsl #31 + itt mi + ldrbmi ip, [r1], #1 + strbmi ip, [r0], #1 + /* copy 2 bytes */ + itt cs + ldrhcs ip, [r1], #2 + strhcs ip, [r0], #2 + /* copy 4 bytes */ + movs ip, r3, lsl #29 + itt mi + ldrmi ip, [r1], #4 + strmi ip, [r0], #4 + /* copy 8 bytes */ + bcc 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0, :64]! +1: /* copy 16 bytes */ + movs ip, r3, lsl #27 + bpl 1f + vld1.8 {q0}, [r1]! + vst1.8 {q0}, [r0, :128]! +1: /* copy 32 bytes */ + bcc .L_memcpy_dispatch + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + +.L_memcpy_dispatch: + // pre-decrement by 128 to detect nearly-done condition easily, but + // also need to check if we have less than 128 bytes left at this + // point due to alignment code above + subs r2, r2, #128 + blo .L_memcpy_lt_128presub + + // Denver does better if both source and dest are aligned so + // we'll special-case that even though the code is virually identical + tst r1, #0xF + bne .L_memcpy_neon_unalign_src_pld + + // DRAM memcpy should be throttled slightly to get full bandwidth + // + cmp r2, #32768 + bhi .L_memcpy_neon_unalign_src_pld + .align 4 +1: + /* copy 128 bytes in each loop */ + subs r2, r2, #128 + + /* preload a cache line */ + pld [r1, #PREFETCH_DISTANCE] + /* copy a cache line */ + vld1.8 {q0, q1}, [r1, :128]! + vst1.8 {q0, q1}, [r0, :256]! + vld1.8 {q0, q1}, [r1, :128]! + vst1.8 {q0, q1}, [r0, :256]! + /* preload a cache line */ + pld [r1, #PREFETCH_DISTANCE] + /* copy a cache line */ + vld1.8 {q0, q1}, [r1, :128]! + vst1.8 {q0, q1}, [r0, :256]! + vld1.8 {q0, q1}, [r1, :128]! + vst1.8 {q0, q1}, [r0, :256]! + + bhs 1b + adds r2, r2, #128 + bne .L_memcpy_lt_128bytes_align + pop {r0, pc} + + .align 4 +.L_memcpy_neon_unalign_src_pld: +1: + /* copy 128 bytes in each loop */ + subs r2, r2, #128 + + /* preload a cache line */ + pld [r1, #PREFETCH_DISTANCE] + /* copy a cache line */ + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + /* preload a cache line */ + pld [r1, #PREFETCH_DISTANCE] + /* copy a cache line */ + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + + bhs 1b + adds r2, r2, #128 + bne .L_memcpy_lt_128bytes_align + pop {r0, pc} + +.L_memcpy_lt_128presub: + add r2, r2, #128 +.L_memcpy_lt_128bytes_align: + /* copy 64 bytes */ + movs ip, r2, lsl #26 + bcc 1f + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! +1: /* copy 32 bytes */ + bpl 1f + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0, :256]! +1: /* copy 16 bytes */ + movs ip, r2, lsl #28 + bcc 1f + vld1.8 {q0}, [r1]! + vst1.8 {q0}, [r0, :128]! +1: /* copy 8 bytes */ + bpl 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0, :64]! +1: /* copy 4 bytes */ + tst r2, #4 + itt ne + ldrne ip, [r1], #4 + strne ip, [r0], #4 + /* copy 2 bytes */ + movs ip, r2, lsl #31 + itt cs + ldrhcs ip, [r1], #2 + strhcs ip, [r0], #2 + /* copy 1 byte */ + itt mi + ldrbmi ip, [r1] + strbmi ip, [r0] + + pop {r0, pc} + +.L_memcpy_lt_128bytes: + /* copy 64 bytes */ + movs ip, r2, lsl #26 + bcc 1f + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0]! + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0]! +1: /* copy 32 bytes */ + bpl .L_memcpy_lt_32bytes + vld1.8 {q0, q1}, [r1]! + vst1.8 {q0, q1}, [r0]! +.L_memcpy_lt_32bytes: + /* copy 16 bytes */ + movs ip, r2, lsl #28 + bcc 1f + vld1.8 {q0}, [r1]! + vst1.8 {q0}, [r0]! +1: /* copy 8 bytes */ + bpl 1f + vld1.8 {d0}, [r1]! + vst1.8 {d0}, [r0]! +1: /* copy 4 bytes */ + tst r2, #4 + itt ne + ldrne ip, [r1], #4 + strne ip, [r0], #4 + /* copy 2 bytes */ + movs ip, r2, lsl #31 + itt cs + ldrhcs ip, [r1], #2 + strhcs ip, [r0], #2 + /* copy 1 byte */ + itt mi + ldrbmi ip, [r1] + strbmi ip, [r0] + +.L_memcpy_done: + pop {r0, pc} +END(MEMCPY_BASE) diff --git a/libc/arch-arm/denver/bionic/memset.S b/libc/arch-arm/denver/bionic/memset.S new file mode 100644 index 000000000..bf3d9aded --- /dev/null +++ b/libc/arch-arm/denver/bionic/memset.S @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + + /* + * Optimized memset() for ARM. + * + * memset() returns its first argument. + */ + + .fpu neon + .syntax unified + +ENTRY(__memset_chk) + cmp r2, r3 + bls .L_done + + // Preserve lr for backtrace. + push {lr} + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 + + + ldr r0, error_message + ldr r1, error_code +1: + add r0, pc + bl __fortify_chk_fail +error_code: + .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW +error_message: + .word error_string-(1b+8) +END(__memset_chk) + +ENTRY(bzero) + mov r2, r1 + mov r1, #0 +.L_done: + // Fall through to memset... +END(bzero) + +ENTRY(memset) + pldw [r0] + mov r3, r0 + + // Duplicate the low byte of r1 + mov r1, r1, lsl #24 + orr r1, r1, r1, lsr #8 + orr r1, r1, r1, lsr #16 + + cmp r2, #16 + blo .L_less_than_16 + + // This section handles regions 16 bytes or larger + // + // Use aligned vst1.8 and vstm when possible. Register values will be: + // ip is scratch + // q0, q1, and r1 contain the memset value + // r2 is the number of bytes to set + // r3 is the advancing destination pointer + vdup.32 q0, r1 + + ands ip, r3, 0xF + beq .L_memset_aligned + + // Align dest pointer to 16-byte boundary. + pldw [r0, #64] + rsb ip, ip, #16 + + // Pre-adjust the byte count to reflect post-aligment value. Expecting + // 8-byte alignment to be rather common so we special case that one. + sub r2, r2, ip + + /* set 1 byte */ + tst ip, #1 + it ne + strbne r1, [r3], #1 + /* set 2 bytes */ + tst ip, #2 + it ne + strhne r1, [r3], #2 + /* set 4 bytes */ + movs ip, ip, lsl #29 + it mi + strmi r1, [r3], #4 + /* set 8 bytes */ + itt cs + strcs r1, [r3], #4 + strcs r1, [r3], #4 + +.L_memset_aligned: + // Destination is now 16-byte aligned. Determine how to handle + // remaining bytes. + vmov q1, q0 + cmp r2, #128 + blo .L_less_than_128 + + // We need to set a larger block of memory. Use four Q regs to + // set a full cache line in one instruction. Pre-decrement + // r2 to simplify end-of-loop detection + vmov q2, q0 + vmov q3, q0 + pldw [r0, #128] + sub r2, r2, #128 + .align 4 +.L_memset_loop_128: + pldw [r3, #192] + vstm r3!, {q0, q1, q2, q3} + vstm r3!, {q0, q1, q2, q3} + subs r2, r2, #128 + bhs .L_memset_loop_128 + + // Un-bias r2 so it contains the number of bytes left. Early + // exit if we are done. + adds r2, r2, #128 + beq 2f + + .align 4 +.L_less_than_128: + // set 64 bytes + movs ip, r2, lsl #26 + bcc 1f + vst1.8 {q0, q1}, [r3, :128]! + vst1.8 {q0, q1}, [r3, :128]! + beq 2f +1: + // set 32 bytes + bpl 1f + vst1.8 {q0, q1}, [r3, :128]! +1: + // set 16 bytes + movs ip, r2, lsl #28 + bcc 1f + vst1.8 {q0}, [r3, :128]! + beq 2f +1: + // set 8 bytes + bpl 1f + vst1.8 {d0}, [r3, :64]! +1: + // set 4 bytes + tst r2, #4 + it ne + strne r1, [r3], #4 +1: + // set 2 bytes + movs ip, r2, lsl #31 + it cs + strhcs r1, [r3], #2 + // set 1 byte + it mi + strbmi r1, [r3] +2: + bx lr + +.L_less_than_16: + // Store up to 15 bytes without worrying about byte alignment + movs ip, r2, lsl #29 + bcc 1f + str r1, [r3], #4 + str r1, [r3], #4 + beq 2f +1: + it mi + strmi r1, [r3], #4 + movs ip, r2, lsl #31 + it mi + strbmi r1, [r3], #1 + itt cs + strbcs r1, [r3], #1 + strbcs r1, [r3] +2: + bx lr +END(memset) + + .data +error_string: + .string "memset: prevented write past end of buffer" diff --git a/libc/arch-arm/denver/denver.mk b/libc/arch-arm/denver/denver.mk new file mode 100644 index 000000000..3fcc4572c --- /dev/null +++ b/libc/arch-arm/denver/denver.mk @@ -0,0 +1,12 @@ +libc_bionic_src_files_arm += \ + arch-arm/denver/bionic/memcpy.S \ + arch-arm/denver/bionic/memset.S \ + arch-arm/denver/bionic/__strcat_chk.S \ + arch-arm/denver/bionic/__strcpy_chk.S + +# Use cortex-a15 versions of strcat/strcpy/strlen. +libc_bionic_src_files_arm += \ + arch-arm/cortex-a15/bionic/strcat.S \ + arch-arm/cortex-a15/bionic/strcpy.S \ + arch-arm/cortex-a15/bionic/strlen.S \ + arch-arm/cortex-a15/bionic/strcmp.S