diff --git a/libc/Android.mk b/libc/Android.mk index 6c2c4ae1b..54047c89b 100644 --- a/libc/Android.mk +++ b/libc/Android.mk @@ -941,10 +941,6 @@ LOCAL_SRC_FILES := $(libc_bionic_src_files) LOCAL_CFLAGS := $(libc_common_cflags) \ -Wframe-larger-than=2048 \ -# memcpy.S, memchr.S, etc. do not compile with Clang. -LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as -LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as - LOCAL_CONLYFLAGS := $(libc_common_conlyflags) LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include @@ -972,10 +968,6 @@ LOCAL_SRC_FILES := $(libc_bionic_ndk_src_files) LOCAL_CFLAGS := $(libc_common_cflags) \ -Wframe-larger-than=2048 \ -# memcpy.S, memchr.S, etc. do not compile with Clang. -LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as -LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as - LOCAL_CONLYFLAGS := $(libc_common_conlyflags) LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include @@ -1024,10 +1016,6 @@ LOCAL_SRC_FILES := $(libc_pthread_src_files) LOCAL_CFLAGS := $(libc_common_cflags) \ -Wframe-larger-than=2048 \ -# memcpy.S, memchr.S, etc. do not compile with Clang. -LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as -LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as - LOCAL_CONLYFLAGS := $(libc_common_conlyflags) LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast LOCAL_C_INCLUDES := $(libc_common_c_includes) diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S index 299f5a2d2..8ee6ac26c 100644 --- a/libc/arch-arm/cortex-a9/bionic/memset.S +++ b/libc/arch-arm/cortex-a9/bionic/memset.S @@ -35,6 +35,7 @@ */ .fpu neon + .syntax unified ENTRY(__memset_chk) cmp r2, r3 @@ -100,9 +101,9 @@ ENTRY(memset) 1: bge 2f vst1.32 {d0[0]}, [r0]! 2: movs ip, r2, lsl #31 - strmib r1, [r0], #1 - strcsb r1, [r0], #1 - strcsb r1, [r0], #1 + strbmi r1, [r0], #1 + strbcs r1, [r0], #1 + strbcs r1, [r0], #1 ldmfd sp!, {r0} bx lr END(memset) @@ -131,11 +132,11 @@ ENTRY_PRIVATE(__memset_large_copy) orr r1, r1, r1, lsr #16 movs r12, r3, lsl #31 - strcsb r1, [r0], #1 /* can't use strh (alignment unknown) */ - strcsb r1, [r0], #1 - strmib r1, [r0], #1 + strbcs r1, [r0], #1 /* can't use strh (alignment unknown) */ + strbcs r1, [r0], #1 + strbmi r1, [r0], #1 subs r2, r2, r3 - ldmlsfd sp!, {r0, r4-r7, lr} /* return */ + popls {r0, r4-r7, lr} /* return */ bxls lr /* align the destination to a cache-line */ @@ -155,9 +156,9 @@ ENTRY_PRIVATE(__memset_large_copy) /* conditionally writes 0 to 7 words (length in r3) */ movs r3, r3, lsl #28 - stmcsia r0!, {r1, lr} - stmcsia r0!, {r1, lr} - stmmiia r0!, {r1, lr} + stmcs r0!, {r1, lr} + stmcs r0!, {r1, lr} + stmmi r0!, {r1, lr} movs r3, r3, lsl #2 strcs r1, [r0], #4 @@ -172,13 +173,13 @@ ENTRY_PRIVATE(__memset_large_copy) /* conditionally stores 0 to 31 bytes */ movs r2, r2, lsl #28 - stmcsia r0!, {r1,r3,r12,lr} - stmmiia r0!, {r1, lr} + stmcs r0!, {r1,r3,r12,lr} + stmmi r0!, {r1, lr} movs r2, r2, lsl #2 strcs r1, [r0], #4 - strmih r1, [r0], #2 + strhmi r1, [r0], #2 movs r2, r2, lsl #2 - strcsb r1, [r0] + strbcs r1, [r0] ldmfd sp!, {r0, r4-r7, lr} bx lr END(__memset_large_copy) diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S index 70a2a58ce..c78dbd4bf 100644 --- a/libc/arch-arm/generic/bionic/memcmp.S +++ b/libc/arch-arm/generic/bionic/memcmp.S @@ -40,6 +40,8 @@ * Optimized memcmp() for Cortex-A9. */ +.syntax unified + ENTRY(memcmp) pld [r0, #(CACHE_LINE_SIZE * 0)] pld [r0, #(CACHE_LINE_SIZE * 1)] @@ -161,25 +163,25 @@ ENTRY(memcmp) eors r0, r0, ip ldreq r0, [r4], #4 ldreq ip, [r1, #4]! - eoreqs r0, r0, lr + eorseq r0, r0, lr ldreq r0, [r4], #4 ldreq lr, [r1, #4]! - eoreqs r0, r0, ip + eorseq r0, r0, ip ldreq r0, [r4], #4 ldreq ip, [r1, #4]! - eoreqs r0, r0, lr + eorseq r0, r0, lr ldreq r0, [r4], #4 ldreq lr, [r1, #4]! - eoreqs r0, r0, ip + eorseq r0, r0, ip ldreq r0, [r4], #4 ldreq ip, [r1, #4]! - eoreqs r0, r0, lr + eorseq r0, r0, lr ldreq r0, [r4], #4 ldreq lr, [r1, #4]! - eoreqs r0, r0, ip + eorseq r0, r0, ip ldreq r0, [r4], #4 ldreq ip, [r1, #4]! - eoreqs r0, r0, lr + eorseq r0, r0, lr bne 2f subs r2, r2, #32 bhs 0b @@ -263,17 +265,17 @@ ENTRY(memcmp) ldreq lr, [r1], #4 ldreq r0, [r4], #4 orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip + eorseq r0, r0, ip moveq ip, lr, lsr #16 ldreq lr, [r1], #4 ldreq r0, [r4], #4 orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip + eorseq r0, r0, ip moveq ip, lr, lsr #16 ldreq lr, [r1], #4 ldreq r0, [r4], #4 orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip + eorseq r0, r0, ip bne 7f subs r2, r2, #16 bhs 6b @@ -317,7 +319,7 @@ ENTRY(memcmp) ldreq r7, [r1], #4 ldreq r0, [r4], #4 orreq ip, ip, r7, lsl r6 - eoreqs r0, r0, ip + eorseq r0, r0, ip bne 7f subs r2, r2, #8 bhs 6b diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S index b0c79abf7..ea5a3998b 100644 --- a/libc/arch-arm/generic/bionic/memcpy.S +++ b/libc/arch-arm/generic/bionic/memcpy.S @@ -37,6 +37,8 @@ * so we have to preserve R0. */ + .syntax unified + ENTRY(__memcpy_chk) cmp r2, r3 bhi __memcpy_chk_fail @@ -81,12 +83,12 @@ ENTRY(memcpy) */ movs r12, r3, lsl #31 sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ - ldrmib r3, [r1], #1 - ldrcsb r4, [r1], #1 - ldrcsb r12,[r1], #1 - strmib r3, [r0], #1 - strcsb r4, [r0], #1 - strcsb r12,[r0], #1 + ldrbmi r3, [r1], #1 + ldrbcs r4, [r1], #1 + ldrbcs r12,[r1], #1 + strbmi r3, [r0], #1 + strbcs r4, [r0], #1 + strbcs r12,[r0], #1 .Lsrc_aligned: @@ -109,10 +111,10 @@ ENTRY(memcpy) /* conditionally copies 0 to 7 words (length in r3) */ movs r12, r3, lsl #28 - ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ - ldmmiia r1!, {r8, r9} /* 8 bytes */ - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r8, r9} + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ + ldmmi r1!, {r8, r9} /* 8 bytes */ + stmcs r0!, {r4, r5, r6, r7} + stmmi r0!, {r8, r9} tst r3, #0x4 ldrne r10,[r1], #4 /* 4 bytes */ strne r10,[r0], #4 @@ -177,18 +179,18 @@ ENTRY(memcpy) /* conditionnaly copies 0 to 31 bytes */ movs r12, r2, lsl #28 - ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ - ldmmiia r1!, {r8, r9} /* 8 bytes */ - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r8, r9} + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */ + ldmmi r1!, {r8, r9} /* 8 bytes */ + stmcs r0!, {r4, r5, r6, r7} + stmmi r0!, {r8, r9} movs r12, r2, lsl #30 ldrcs r3, [r1], #4 /* 4 bytes */ - ldrmih r4, [r1], #2 /* 2 bytes */ + ldrhmi r4, [r1], #2 /* 2 bytes */ strcs r3, [r0], #4 - strmih r4, [r0], #2 + strhmi r4, [r0], #2 tst r2, #0x1 - ldrneb r3, [r1] /* last byte */ - strneb r3, [r0] + ldrbne r3, [r1] /* last byte */ + strbne r3, [r0] /* we're done! restore everything and return */ 1: ldmfd sp!, {r5-r11} @@ -228,11 +230,11 @@ ENTRY(memcpy) * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) */ movs r5, r5, lsl #31 - strmib r3, [r0], #1 + strbmi r3, [r0], #1 movmi r3, r3, lsr #8 - strcsb r3, [r0], #1 + strbcs r3, [r0], #1 movcs r3, r3, lsr #8 - strcsb r3, [r0], #1 + strbcs r3, [r0], #1 movcs r3, r3, lsr #8 cmp r2, #4 @@ -363,23 +365,23 @@ ENTRY(memcpy) .Lpartial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) - strmib r3, [r0], #1 + strbmi r3, [r0], #1 movmi r3, r3, lsr #8 - strcsb r3, [r0], #1 + strbcs r3, [r0], #1 movcs r3, r3, lsr #8 - strcsb r3, [r0], #1 + strbcs r3, [r0], #1 /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} .Lcopy_last_3_and_return: movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ - ldrmib r2, [r1], #1 - ldrcsb r3, [r1], #1 - ldrcsb r12,[r1] - strmib r2, [r0], #1 - strcsb r3, [r0], #1 - strcsb r12,[r0] + ldrbmi r2, [r1], #1 + ldrbcs r3, [r1], #1 + ldrbcs r12,[r1] + strbmi r2, [r0], #1 + strbcs r3, [r0], #1 + strbcs r12,[r0] /* we're done! restore sp and spilled registers and return */ add sp, sp, #28 diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S index be35de9ff..d17a9c4e0 100644 --- a/libc/arch-arm/generic/bionic/memset.S +++ b/libc/arch-arm/generic/bionic/memset.S @@ -35,6 +35,8 @@ * memset() returns its first argument. */ + .syntax unified + ENTRY(__memset_chk) cmp r2, r3 bls done @@ -76,11 +78,11 @@ ENTRY(memset) orr r1, r1, r1, lsr #16 movs r12, r3, lsl #31 - strcsb r1, [r0], #1 /* can't use strh (alignment unknown) */ - strcsb r1, [r0], #1 - strmib r1, [r0], #1 + strbcs r1, [r0], #1 /* can't use strh (alignment unknown) */ + strbcs r1, [r0], #1 + strbmi r1, [r0], #1 subs r2, r2, r3 - ldmlsfd sp!, {r0, r4-r7, lr} /* return */ + popls {r0, r4-r7, lr} /* return */ bxls lr /* align the destination to a cache-line */ @@ -100,9 +102,9 @@ ENTRY(memset) /* conditionally writes 0 to 7 words (length in r3) */ movs r3, r3, lsl #28 - stmcsia r0!, {r1, lr} - stmcsia r0!, {r1, lr} - stmmiia r0!, {r1, lr} + stmcs r0!, {r1, lr} + stmcs r0!, {r1, lr} + stmmi r0!, {r1, lr} movs r3, r3, lsl #2 strcs r1, [r0], #4 @@ -117,13 +119,13 @@ ENTRY(memset) /* conditionally stores 0 to 31 bytes */ movs r2, r2, lsl #28 - stmcsia r0!, {r1,r3,r12,lr} - stmmiia r0!, {r1, lr} + stmcs r0!, {r1,r3,r12,lr} + stmmi r0!, {r1, lr} movs r2, r2, lsl #2 strcs r1, [r0], #4 - strmih r1, [r0], #2 + strhmi r1, [r0], #2 movs r2, r2, lsl #2 - strcsb r1, [r0] + strbcs r1, [r0] ldmfd sp!, {r0, r4-r7, lr} bx lr END(memset) diff --git a/libc/arch-arm/generic/bionic/strcpy.S b/libc/arch-arm/generic/bionic/strcpy.S index 802a62da8..89ea098ad 100644 --- a/libc/arch-arm/generic/bionic/strcpy.S +++ b/libc/arch-arm/generic/bionic/strcpy.S @@ -32,6 +32,8 @@ #include #include +.syntax unified + ENTRY(strcpy) pld [r1, #0] eor r2, r0, r1 @@ -108,15 +110,15 @@ ENTRY(strcpy) #ifdef __ARMEB__ tst r2, #0xff00 iteet ne - strneh r2, [ip], #2 + strhne r2, [ip], #2 lsreq r2, r2, #8 - streqb r2, [ip] + strbeq r2, [ip] tstne r2, #0xff #else tst r2, #0xff itet ne - strneh r2, [ip], #2 - streqb r2, [ip] + strhne r2, [ip], #2 + strbeq r2, [ip] tstne r2, #0xff00 #endif bne 5b diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S index e9f643133..a4fbe178f 100644 --- a/libc/arch-arm/krait/bionic/memset.S +++ b/libc/arch-arm/krait/bionic/memset.S @@ -37,6 +37,7 @@ */ .fpu neon + .syntax unified ENTRY(__memset_chk) cmp r2, r3 @@ -98,9 +99,9 @@ ENTRY(memset) 1: bge 2f vst1.32 {d0[0]}, [r0]! 2: movs ip, r2, lsl #31 - strmib r1, [r0], #1 - strcsb r1, [r0], #1 - strcsb r1, [r0], #1 + strbmi r1, [r0], #1 + strbcs r1, [r0], #1 + strbcs r1, [r0], #1 ldmfd sp!, {r0} bx lr END(memset) diff --git a/libc/arch-arm64/generic/bionic/memchr.S b/libc/arch-arm64/generic/bionic/memchr.S index e5ea57d8c..a00dd8dee 100644 --- a/libc/arch-arm64/generic/bionic/memchr.S +++ b/libc/arch-arm64/generic/bionic/memchr.S @@ -101,7 +101,7 @@ ENTRY(memchr) and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.2d[0] + mov synd, vend.d[0] /* Clear the soff*2 lower bits */ lsl tmp, soff, #1 lsr synd, synd, tmp @@ -121,7 +121,7 @@ ENTRY(memchr) /* Use a fast check for the termination condition */ orr vend.16b, vhas_chr1.16b, vhas_chr2.16b addp vend.2d, vend.2d, vend.2d - mov synd, vend.2d[0] + mov synd, vend.d[0] /* We're not out of data, loop if we haven't found the character */ cbz synd, .Lloop @@ -131,7 +131,7 @@ ENTRY(memchr) and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.2d[0] + mov synd, vend.d[0] /* Only do the clear for the last possible block */ b.hi .Ltail diff --git a/libc/arch-arm64/generic/bionic/strchr.S b/libc/arch-arm64/generic/bionic/strchr.S index 469b83c63..b54106d93 100644 --- a/libc/arch-arm64/generic/bionic/strchr.S +++ b/libc/arch-arm64/generic/bionic/strchr.S @@ -109,7 +109,7 @@ ENTRY(strchr) addp vend1.16b, vend1.16b, vend2.16b // 128->64 lsr tmp1, tmp3, tmp1 - mov tmp3, vend1.2d[0] + mov tmp3, vend1.d[0] bic tmp1, tmp3, tmp1 // Mask padding bits. cbnz tmp1, .Ltail @@ -124,7 +124,7 @@ ENTRY(strchr) orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b orr vend1.16b, vend1.16b, vend2.16b addp vend1.2d, vend1.2d, vend1.2d - mov tmp1, vend1.2d[0] + mov tmp1, vend1.d[0] cbz tmp1, .Lloop /* Termination condition found. Now need to establish exactly why @@ -138,7 +138,7 @@ ENTRY(strchr) addp vend1.16b, vend1.16b, vend2.16b // 256->128 addp vend1.16b, vend1.16b, vend2.16b // 128->64 - mov tmp1, vend1.2d[0] + mov tmp1, vend1.d[0] .Ltail: /* Count the trailing zeros, by bit reversing... */ rbit tmp1, tmp1