libc: krait: Use performance version of memcpy
Change-Id: Iaa52635240da8b8746693186b66b69778e833c32
This commit is contained in:
		
				
					committed by
					
						
						Jake Weinstein
					
				
			
			
				
	
			
			
			
						parent
						
							a0b9cad076
						
					
				
				
					commit
					1d0268c6b8
				
			@@ -40,7 +40,7 @@
 | 
			
		||||
ENTRY(__strcat_chk)
 | 
			
		||||
    pld     [r0, #0]
 | 
			
		||||
    push    {r0, lr}
 | 
			
		||||
    .cfi_def_cfa_offset 8
 | 
			
		||||
    .cfi_adjust_cfa_offset 8
 | 
			
		||||
    .cfi_rel_offset r0, 0
 | 
			
		||||
    .cfi_rel_offset lr, 4
 | 
			
		||||
    push    {r4, r5}
 | 
			
		||||
@@ -177,7 +177,7 @@ ENTRY(__strcat_chk)
 | 
			
		||||
.L_strlen_done:
 | 
			
		||||
    add     r2, r3, r4
 | 
			
		||||
    cmp     r2, lr
 | 
			
		||||
    bhi     __strcat_chk_failed
 | 
			
		||||
    bhi     .L_strcat_chk_failed
 | 
			
		||||
 | 
			
		||||
    // Set up the registers for the memcpy code.
 | 
			
		||||
    mov     r1, r5
 | 
			
		||||
@@ -185,20 +185,17 @@ ENTRY(__strcat_chk)
 | 
			
		||||
    mov     r2, r4
 | 
			
		||||
    add     r0, r0, r3
 | 
			
		||||
    pop     {r4, r5}
 | 
			
		||||
END(__strcat_chk)
 | 
			
		||||
    .cfi_adjust_cfa_offset -8
 | 
			
		||||
    .cfi_restore r4
 | 
			
		||||
    .cfi_restore r5
 | 
			
		||||
 | 
			
		||||
#define MEMCPY_BASE         __strcat_chk_memcpy_base
 | 
			
		||||
#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
 | 
			
		||||
#include "memcpy_base.S"
 | 
			
		||||
 | 
			
		||||
ENTRY_PRIVATE(__strcat_chk_failed)
 | 
			
		||||
    .cfi_def_cfa_offset 8
 | 
			
		||||
    .cfi_rel_offset r0, 0
 | 
			
		||||
    .cfi_rel_offset lr, 4
 | 
			
		||||
    // Undo the above cfi directives.
 | 
			
		||||
    .cfi_adjust_cfa_offset 8
 | 
			
		||||
    .cfi_rel_offset r4, 0
 | 
			
		||||
    .cfi_rel_offset r5, 4
 | 
			
		||||
 | 
			
		||||
.L_strcat_chk_failed:
 | 
			
		||||
    ldr     r0, error_message
 | 
			
		||||
    ldr     r1, error_code
 | 
			
		||||
1:
 | 
			
		||||
@@ -208,7 +205,7 @@ error_code:
 | 
			
		||||
    .word   BIONIC_EVENT_STRCAT_BUFFER_OVERFLOW
 | 
			
		||||
error_message:
 | 
			
		||||
    .word   error_string-(1b+4)
 | 
			
		||||
END(__strcat_chk_failed)
 | 
			
		||||
END(__strcat_chk)
 | 
			
		||||
 | 
			
		||||
    .data
 | 
			
		||||
error_string:
 | 
			
		||||
 
 | 
			
		||||
@@ -39,7 +39,7 @@
 | 
			
		||||
ENTRY(__strcpy_chk)
 | 
			
		||||
    pld     [r0, #0]
 | 
			
		||||
    push    {r0, lr}
 | 
			
		||||
    .cfi_def_cfa_offset 8
 | 
			
		||||
    .cfi_adjust_cfa_offset 8
 | 
			
		||||
    .cfi_rel_offset r0, 0
 | 
			
		||||
    .cfi_rel_offset lr, 4
 | 
			
		||||
 | 
			
		||||
@@ -149,21 +149,14 @@ ENTRY(__strcpy_chk)
 | 
			
		||||
    pld     [r1, #64]
 | 
			
		||||
    ldr     r0, [sp]
 | 
			
		||||
    cmp     r3, lr
 | 
			
		||||
    bhs     __strcpy_chk_failed
 | 
			
		||||
    bhs     .L_strcpy_chk_failed
 | 
			
		||||
 | 
			
		||||
    // Add 1 for copy length to get the string terminator.
 | 
			
		||||
    add     r2, r3, #1
 | 
			
		||||
END(__strcpy_chk)
 | 
			
		||||
 | 
			
		||||
#define MEMCPY_BASE         __strcpy_chk_memcpy_base
 | 
			
		||||
#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
 | 
			
		||||
#include "memcpy_base.S"
 | 
			
		||||
 | 
			
		||||
ENTRY_PRIVATE(__strcpy_chk_failed)
 | 
			
		||||
    .cfi_def_cfa_offset 8
 | 
			
		||||
    .cfi_rel_offset r0, 0
 | 
			
		||||
    .cfi_rel_offset lr, 4
 | 
			
		||||
 | 
			
		||||
.L_strcpy_chk_failed:
 | 
			
		||||
    ldr     r0, error_message
 | 
			
		||||
    ldr     r1, error_code
 | 
			
		||||
1:
 | 
			
		||||
@@ -173,7 +166,7 @@ error_code:
 | 
			
		||||
    .word   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
 | 
			
		||||
error_message:
 | 
			
		||||
    .word   error_string-(1b+4)
 | 
			
		||||
END(__strcpy_chk_failed)
 | 
			
		||||
END(__strcpy_chk)
 | 
			
		||||
 | 
			
		||||
    .data
 | 
			
		||||
error_string:
 | 
			
		||||
 
 | 
			
		||||
@@ -45,7 +45,7 @@
 | 
			
		||||
 | 
			
		||||
ENTRY(__memcpy_chk)
 | 
			
		||||
        cmp         r2, r3
 | 
			
		||||
        bhi         __memcpy_chk_fail
 | 
			
		||||
        bhi         .L_memcpy_chk_fail
 | 
			
		||||
 | 
			
		||||
        // Fall through to memcpy...
 | 
			
		||||
END(__memcpy_chk)
 | 
			
		||||
@@ -53,19 +53,20 @@ END(__memcpy_chk)
 | 
			
		||||
ENTRY(memcpy)
 | 
			
		||||
        pld     [r1, #64]
 | 
			
		||||
        stmfd   sp!, {r0, lr}
 | 
			
		||||
        .cfi_def_cfa_offset 8
 | 
			
		||||
        .cfi_adjust_cfa_offset 8
 | 
			
		||||
        .cfi_rel_offset r0, 0
 | 
			
		||||
        .cfi_rel_offset lr, 4
 | 
			
		||||
END(memcpy)
 | 
			
		||||
 | 
			
		||||
#define MEMCPY_BASE         __memcpy_base
 | 
			
		||||
#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
 | 
			
		||||
#include "memcpy_base.S"
 | 
			
		||||
 | 
			
		||||
ENTRY_PRIVATE(__memcpy_chk_fail)
 | 
			
		||||
        // Undo the cfi directives from above.
 | 
			
		||||
        .cfi_adjust_cfa_offset -8
 | 
			
		||||
        .cfi_restore r0
 | 
			
		||||
        .cfi_restore lr
 | 
			
		||||
.L_memcpy_chk_fail:
 | 
			
		||||
        // Preserve lr for backtrace.
 | 
			
		||||
        push    {lr}
 | 
			
		||||
        .cfi_def_cfa_offset 4
 | 
			
		||||
        .cfi_adjust_cfa_offset 4
 | 
			
		||||
        .cfi_rel_offset lr, 0
 | 
			
		||||
 | 
			
		||||
        ldr     r0, error_message
 | 
			
		||||
@@ -77,7 +78,7 @@ error_code:
 | 
			
		||||
        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
 | 
			
		||||
error_message:
 | 
			
		||||
        .word   error_string-(1b+4)
 | 
			
		||||
END(__memcpy_chk_fail)
 | 
			
		||||
END(memcpy)
 | 
			
		||||
 | 
			
		||||
        .data
 | 
			
		||||
error_string:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,122 +1,191 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2013 The Android Open Source Project
 | 
			
		||||
 * All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in
 | 
			
		||||
 *    the documentation and/or other materials provided with the
 | 
			
		||||
 *    distribution.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
			
		||||
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
			
		||||
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 | 
			
		||||
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 | 
			
		||||
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
			
		||||
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 | 
			
		||||
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 | 
			
		||||
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 | 
			
		||||
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
			
		||||
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 | 
			
		||||
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | 
			
		||||
 * SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
/***************************************************************************
 | 
			
		||||
 Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
 | 
			
		||||
 | 
			
		||||
 Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 modification, are permitted provided that the following conditions are met:
 | 
			
		||||
     * Redistributions of source code must retain the above copyright
 | 
			
		||||
       notice, this list of conditions and the following disclaimer.
 | 
			
		||||
     * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
       notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
       documentation and/or other materials provided with the distribution.
 | 
			
		||||
     * Neither the name of The Linux Foundation nor the names of its contributors may
 | 
			
		||||
       be used to endorse or promote products derived from this software
 | 
			
		||||
       without specific prior written permission.
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * This code assumes it is running on a processor that supports all arm v7
 | 
			
		||||
 * instructions, that supports neon instructions, and that has a 32 byte
 | 
			
		||||
 * cache line.
 | 
			
		||||
 */
 | 
			
		||||
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 | 
			
		||||
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
			
		||||
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
			
		||||
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
			
		||||
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | 
			
		||||
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | 
			
		||||
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
			
		||||
 POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
  ***************************************************************************/
 | 
			
		||||
 | 
			
		||||
// Assumes neon instructions and a cache line size of 32 bytes.
 | 
			
		||||
/* Assumes neon instructions and a cache line size of 64 bytes. */
 | 
			
		||||
 | 
			
		||||
ENTRY_PRIVATE(MEMCPY_BASE)
 | 
			
		||||
        .cfi_def_cfa_offset 8
 | 
			
		||||
        .cfi_rel_offset r0, 0
 | 
			
		||||
        .cfi_rel_offset lr, 4
 | 
			
		||||
#include <machine/cpu-features.h>
 | 
			
		||||
#include <machine/asm.h>
 | 
			
		||||
 | 
			
		||||
        /* do we have at least 16-bytes to copy (needed for alignment below) */
 | 
			
		||||
#define PLDOFFS	(10)
 | 
			
		||||
#define PLDTHRESH (PLDOFFS)
 | 
			
		||||
#define BBTHRESH (4096/64)
 | 
			
		||||
#define PLDSIZE (64)
 | 
			
		||||
 | 
			
		||||
#if (PLDOFFS < 1)
 | 
			
		||||
#error Routine does not support offsets less than 1
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if (PLDTHRESH < PLDOFFS)
 | 
			
		||||
#error PLD threshold must be greater than or equal to the PLD offset
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	.text
 | 
			
		||||
	.fpu    neon
 | 
			
		||||
 | 
			
		||||
.L_memcpy_base:
 | 
			
		||||
	cmp	r2, #4
 | 
			
		||||
	blt	.L_neon_lt4
 | 
			
		||||
	cmp	r2, #16
 | 
			
		||||
        blo         5f
 | 
			
		||||
	blt	.L_neon_lt16
 | 
			
		||||
	cmp	r2, #32
 | 
			
		||||
	blt	.L_neon_16
 | 
			
		||||
	cmp	r2, #64
 | 
			
		||||
	blt	.L_neon_copy_32_a
 | 
			
		||||
 | 
			
		||||
        /* align destination to cache-line for the write-buffer */
 | 
			
		||||
        rsb         r3, r0, #0
 | 
			
		||||
        ands        r3, r3, #0xF
 | 
			
		||||
        beq         2f
 | 
			
		||||
	mov	r12, r2, lsr #6
 | 
			
		||||
	cmp	r12, #PLDTHRESH
 | 
			
		||||
	ble	.L_neon_copy_64_loop_nopld
 | 
			
		||||
 | 
			
		||||
        /* copy up to 15-bytes (count in r3) */
 | 
			
		||||
        sub         r2, r2, r3
 | 
			
		||||
        movs        ip, r3, lsl #31
 | 
			
		||||
        itt         mi
 | 
			
		||||
        ldrbmi      lr, [r1], #1
 | 
			
		||||
        strbmi      lr, [r0], #1
 | 
			
		||||
        itttt       cs
 | 
			
		||||
        ldrbcs      ip, [r1], #1
 | 
			
		||||
        ldrbcs      lr, [r1], #1
 | 
			
		||||
        strbcs      ip, [r0], #1
 | 
			
		||||
        strbcs      lr, [r0], #1
 | 
			
		||||
        movs        ip, r3, lsl #29
 | 
			
		||||
        bge         1f
 | 
			
		||||
        // copies 4 bytes, destination 32-bits aligned
 | 
			
		||||
        vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
 | 
			
		||||
        vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
 | 
			
		||||
1:      bcc         2f
 | 
			
		||||
        // copies 8 bytes, destination 64-bits aligned
 | 
			
		||||
        vld1.8      {d0}, [r1]!
 | 
			
		||||
        vst1.8      {d0}, [r0, :64]!
 | 
			
		||||
	push	{r9, r10}
 | 
			
		||||
	.cfi_adjust_cfa_offset 8
 | 
			
		||||
	.cfi_rel_offset r9, 0
 | 
			
		||||
	.cfi_rel_offset r10, 4
 | 
			
		||||
 | 
			
		||||
2:      /* make sure we have at least 64 bytes to copy */
 | 
			
		||||
        subs        r2, r2, #64
 | 
			
		||||
        blo         2f
 | 
			
		||||
	cmp	r12, #BBTHRESH
 | 
			
		||||
	ble	.L_neon_prime_pump
 | 
			
		||||
 | 
			
		||||
1:      /* The main loop copies 64 bytes at a time */
 | 
			
		||||
        vld1.8      {d0  - d3},   [r1]!
 | 
			
		||||
        vld1.8      {d4  - d7},   [r1]!
 | 
			
		||||
        pld         [r1, #(32*8)]
 | 
			
		||||
        subs        r2, r2, #64
 | 
			
		||||
        vst1.8      {d0  - d3},   [r0, :128]!
 | 
			
		||||
        vst1.8      {d4  - d7},   [r0, :128]!
 | 
			
		||||
        bhs         1b
 | 
			
		||||
	add	lr, r0, #0x400
 | 
			
		||||
	add	r9, r1, #(PLDOFFS*PLDSIZE)
 | 
			
		||||
	sub	lr, lr, r9
 | 
			
		||||
	lsl	lr, lr, #21
 | 
			
		||||
	lsr	lr, lr, #21
 | 
			
		||||
	add	lr, lr, #(PLDOFFS*PLDSIZE)
 | 
			
		||||
	cmp	r12, lr, lsr #6
 | 
			
		||||
	ble	.L_neon_prime_pump
 | 
			
		||||
 | 
			
		||||
2:      /* fix-up the remaining count and make sure we have >= 32 bytes left */
 | 
			
		||||
        adds        r2, r2, #32
 | 
			
		||||
        blo         4f
 | 
			
		||||
	itt	gt
 | 
			
		||||
	movgt	r9, #(PLDOFFS)
 | 
			
		||||
	rsbsgt	r9, r9, lr, lsr #6
 | 
			
		||||
	ble	.L_neon_prime_pump
 | 
			
		||||
 | 
			
		||||
        /* Copy 32 bytes. These cache lines were already preloaded */
 | 
			
		||||
        vld1.8      {d0 - d3},  [r1]!
 | 
			
		||||
        sub         r2, r2, #32
 | 
			
		||||
        vst1.8      {d0 - d3},  [r0, :128]!
 | 
			
		||||
	add	r10, r1, lr
 | 
			
		||||
	bic	r10, #0x3F
 | 
			
		||||
 | 
			
		||||
4:      /* less than 32 left */
 | 
			
		||||
        add         r2, r2, #32
 | 
			
		||||
        tst         r2, #0x10
 | 
			
		||||
        beq         5f
 | 
			
		||||
        // copies 16 bytes, 128-bits aligned
 | 
			
		||||
        vld1.8      {d0, d1}, [r1]!
 | 
			
		||||
        vst1.8      {d0, d1}, [r0, :128]!
 | 
			
		||||
	sub	r12, r12, lr, lsr #6
 | 
			
		||||
 | 
			
		||||
5:      /* copy up to 15-bytes (count in r2) */
 | 
			
		||||
        movs        ip, r2, lsl #29
 | 
			
		||||
	cmp	r9, r12
 | 
			
		||||
	itee	le
 | 
			
		||||
	suble	r12, r12, r9
 | 
			
		||||
	movgt	r9, r12
 | 
			
		||||
	movgt	r12, #0
 | 
			
		||||
 | 
			
		||||
	pld	[r1, #((PLDOFFS-1)*PLDSIZE)]
 | 
			
		||||
.L_neon_copy_64_loop_outer_doublepld:
 | 
			
		||||
	pld	[r1, #((PLDOFFS)*PLDSIZE)]
 | 
			
		||||
	vld1.32	{q0, q1}, [r1]!
 | 
			
		||||
	vld1.32	{q2, q3}, [r1]!
 | 
			
		||||
	ldr	r3, [r10]
 | 
			
		||||
	subs	r9, r9, #1
 | 
			
		||||
	vst1.32	{q0, q1}, [r0]!
 | 
			
		||||
	vst1.32	{q2, q3}, [r0]!
 | 
			
		||||
	add	r10, #64
 | 
			
		||||
	bne	.L_neon_copy_64_loop_outer_doublepld
 | 
			
		||||
	cmp	r12, #0
 | 
			
		||||
	beq	.L_neon_pop_before_nopld
 | 
			
		||||
 | 
			
		||||
	cmp	r12, #(512*1024/64)
 | 
			
		||||
	blt	.L_neon_copy_64_loop_outer
 | 
			
		||||
 | 
			
		||||
.L_neon_copy_64_loop_ddr:
 | 
			
		||||
	vld1.32	{q0, q1}, [r1]!
 | 
			
		||||
	vld1.32	{q2, q3}, [r1]!
 | 
			
		||||
	pld	[r10]
 | 
			
		||||
	subs	r12, r12, #1
 | 
			
		||||
	vst1.32	{q0, q1}, [r0]!
 | 
			
		||||
	vst1.32	{q2, q3}, [r0]!
 | 
			
		||||
	add	r10, #64
 | 
			
		||||
	bne	.L_neon_copy_64_loop_ddr
 | 
			
		||||
	b	.L_neon_pop_before_nopld
 | 
			
		||||
 | 
			
		||||
.L_neon_prime_pump:
 | 
			
		||||
	mov	lr, #(PLDOFFS*PLDSIZE)
 | 
			
		||||
	add	r10, r1, #(PLDOFFS*PLDSIZE)
 | 
			
		||||
	bic	r10, #0x3F
 | 
			
		||||
	sub	r12, r12, #PLDOFFS
 | 
			
		||||
	ldr	r3, [r10, #(-1*PLDSIZE)]
 | 
			
		||||
 | 
			
		||||
.L_neon_copy_64_loop_outer:
 | 
			
		||||
	vld1.32	{q0, q1}, [r1]!
 | 
			
		||||
	vld1.32	{q2, q3}, [r1]!
 | 
			
		||||
	ldr	r3, [r10]
 | 
			
		||||
	subs	r12, r12, #1
 | 
			
		||||
	vst1.32	{q0, q1}, [r0]!
 | 
			
		||||
	vst1.32	{q2, q3}, [r0]!
 | 
			
		||||
	add	r10, #64
 | 
			
		||||
	bne	.L_neon_copy_64_loop_outer
 | 
			
		||||
 | 
			
		||||
.L_neon_pop_before_nopld:
 | 
			
		||||
	mov	r12, lr, lsr #6
 | 
			
		||||
	pop	{r9, r10}
 | 
			
		||||
	.cfi_adjust_cfa_offset -8
 | 
			
		||||
	.cfi_restore r9
 | 
			
		||||
	.cfi_restore r10
 | 
			
		||||
 | 
			
		||||
.L_neon_copy_64_loop_nopld:
 | 
			
		||||
	vld1.32	{q8, q9}, [r1]!
 | 
			
		||||
	vld1.32	{q10, q11}, [r1]!
 | 
			
		||||
	subs	r12, r12, #1
 | 
			
		||||
	vst1.32	{q8, q9}, [r0]!
 | 
			
		||||
	vst1.32	{q10, q11}, [r0]!
 | 
			
		||||
	bne	.L_neon_copy_64_loop_nopld
 | 
			
		||||
	ands	r2, r2, #0x3f
 | 
			
		||||
	beq	.L_neon_exit
 | 
			
		||||
 | 
			
		||||
.L_neon_copy_32_a:
 | 
			
		||||
	movs	r3, r2, lsl #27
 | 
			
		||||
	bcc	.L_neon_16
 | 
			
		||||
	vld1.32	{q0,q1}, [r1]!
 | 
			
		||||
	vst1.32	{q0,q1}, [r0]!
 | 
			
		||||
 | 
			
		||||
.L_neon_16:
 | 
			
		||||
	bpl	.L_neon_lt16
 | 
			
		||||
	vld1.32	{q8}, [r1]!
 | 
			
		||||
	vst1.32	{q8}, [r0]!
 | 
			
		||||
	ands	r2, r2, #0x0f
 | 
			
		||||
	beq	.L_neon_exit
 | 
			
		||||
 | 
			
		||||
.L_neon_lt16:
 | 
			
		||||
	movs	r3, r2, lsl #29
 | 
			
		||||
	bcc	1f
 | 
			
		||||
	vld1.8	{d0}, [r1]!
 | 
			
		||||
	vst1.8	{d0}, [r0]!
 | 
			
		||||
1:      bge         2f
 | 
			
		||||
1:
 | 
			
		||||
	bge	.L_neon_lt4
 | 
			
		||||
	vld4.8	{d0[0], d1[0], d2[0], d3[0]}, [r1]!
 | 
			
		||||
	vst4.8	{d0[0], d1[0], d2[0], d3[0]}, [r0]!
 | 
			
		||||
2:      movs        ip, r2, lsl #31
 | 
			
		||||
        itt         mi
 | 
			
		||||
        ldrbmi      r3, [r1], #1
 | 
			
		||||
        strbmi      r3, [r0], #1
 | 
			
		||||
        itttt       cs
 | 
			
		||||
        ldrbcs      ip, [r1], #1
 | 
			
		||||
        ldrbcs      lr, [r1], #1
 | 
			
		||||
        strbcs      ip, [r0], #1
 | 
			
		||||
        strbcs      lr, [r0], #1
 | 
			
		||||
 | 
			
		||||
        ldmfd       sp!, {r0, pc}
 | 
			
		||||
END(MEMCPY_BASE)
 | 
			
		||||
.L_neon_lt4:
 | 
			
		||||
	movs	r2, r2, lsl #31
 | 
			
		||||
	itt	cs
 | 
			
		||||
	ldrhcs	r3, [r1], #2
 | 
			
		||||
	strhcs	r3, [r0], #2
 | 
			
		||||
	itt	mi
 | 
			
		||||
	ldrbmi	r3, [r1]
 | 
			
		||||
	strbmi	r3, [r0]
 | 
			
		||||
 | 
			
		||||
.L_neon_exit:
 | 
			
		||||
	pop	{r0, pc}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user