diff --git a/libc/arch-arm64/arm64.mk b/libc/arch-arm64/arm64.mk index e44ee3150..8b7970c9b 100644 --- a/libc/arch-arm64/arm64.mk +++ b/libc/arch-arm64/arm64.mk @@ -3,7 +3,6 @@ libc_common_src_files_arm64 := \ bionic/memchr.c \ bionic/memrchr.c \ - bionic/strchr.cpp \ bionic/strrchr.cpp \ upstream-freebsd/lib/libc/string/wcscat.c \ upstream-freebsd/lib/libc/string/wcschr.c \ diff --git a/libc/arch-arm64/denver64/denver64.mk b/libc/arch-arm64/denver64/denver64.mk index 36146bfa8..9119d2aa7 100644 --- a/libc/arch-arm64/denver64/denver64.mk +++ b/libc/arch-arm64/denver64/denver64.mk @@ -3,6 +3,7 @@ libc_bionic_src_files_arm64 += \ arch-arm64/denver64/bionic/memcpy.S \ arch-arm64/generic/bionic/memmove.S \ arch-arm64/denver64/bionic/memset.S \ + arch-arm64/generic/bionic/strchr.S \ arch-arm64/generic/bionic/strcmp.S \ arch-arm64/generic/bionic/strlen.S \ arch-arm64/generic/bionic/strncmp.S \ diff --git a/libc/arch-arm64/generic-neon/generic-neon.mk b/libc/arch-arm64/generic-neon/generic-neon.mk index 2cbe3cf2f..6cbe335ee 100644 --- a/libc/arch-arm64/generic-neon/generic-neon.mk +++ b/libc/arch-arm64/generic-neon/generic-neon.mk @@ -2,6 +2,7 @@ libc_bionic_src_files_arm64 += \ arch-arm64/generic/bionic/memcmp.S \ arch-arm64/generic/bionic/memmove.S \ arch-arm64/generic/bionic/memset.S \ + arch-arm64/generic/bionic/strchr.S \ arch-arm64/generic/bionic/strcmp.S \ arch-arm64/generic/bionic/strlen.S \ arch-arm64/generic/bionic/strncmp.S \ diff --git a/libc/arch-arm64/generic/bionic/strchr.S b/libc/arch-arm64/generic/bionic/strchr.S new file mode 100644 index 000000000..158eaf72e --- /dev/null +++ b/libc/arch-arm64/generic/bionic/strchr.S @@ -0,0 +1,153 @@ +/* + strchr - find a character in a string + + Copyright (c) 2014, ARM Limited + All rights Reserved. + Copyright (c) 2014, Linaro Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the company nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +#include + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 + +#define result x0 + +#define src x2 +#define tmp1 x3 +#define wtmp2 w4 +#define tmp3 x5 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_nul1 v3 +#define vhas_nul2 v4 +#define vhas_chr1 v5 +#define vhas_chr2 v6 +#define vrepmask_0 v7 +#define vrepmask_c v16 +#define vend1 v17 +#define vend2 v18 + +/* Core algorithm. + + For each 32-byte hunk we calculate a 64-bit syndrome value, with + two bits per byte (LSB is always in bits 0 and 1, for both big + and little-endian systems). For each tuple, bit 0 is set iff + the relevant byte matched the requested character; bit 1 is set + iff the relevant byte matched the NUL end of string (we trigger + off bit0 for the special case of looking for NUL). Since the bits + in the syndrome reflect exactly the order in which things occur + in the original string a count_trailing_zeros() operation will + identify exactly which byte is causing the termination, and why. */ + +/* Locals and temporaries. */ + +ENTRY(strchr) + /* Magic constant 0x40100401 to allow us to identify which lane + matches the requested byte. Magic constant 0x80200802 used + similarly for NUL termination. */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ + dup vrepmask_c.4s, wtmp2 + ands tmp1, srcin, #31 + add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ + b.eq .Lloop + + /* Input string is not 32-byte aligned. Rather than forcing + the padding bytes to a safe value, we calculate the syndrome + for all the bytes, but then mask off those bits of the + syndrome that are related to the padding. */ + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + neg tmp1, tmp1 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + lsl tmp1, tmp1, #1 + addp vend1.16b, vend1.16b, vend2.16b // 256->128 + mov tmp3, #~0 + addp vend1.16b, vend1.16b, vend2.16b // 128->64 + lsr tmp1, tmp3, tmp1 + + mov tmp3, vend1.2d[0] + bic tmp1, tmp3, tmp1 // Mask padding bits. + cbnz tmp1, .Ltail + +.Lloop: + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + cmeq vhas_nul1.16b, vdata1.16b, #0 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_nul2.16b, vdata2.16b, #0 + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + /* Use a fast check for the termination condition. */ + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + orr vend1.16b, vend1.16b, vend2.16b + addp vend1.2d, vend1.2d, vend1.2d + mov tmp1, vend1.2d[0] + cbz tmp1, .Lloop + + /* Termination condition found. Now need to establish exactly why + we terminated. */ + and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b + and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b + orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b + orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b + addp vend1.16b, vend1.16b, vend2.16b // 256->128 + addp vend1.16b, vend1.16b, vend2.16b // 128->64 + + mov tmp1, vend1.2d[0] +.Ltail: + /* Count the trailing zeros, by bit reversing... */ + rbit tmp1, tmp1 + /* Re-bias source. */ + sub src, src, #32 + clz tmp1, tmp1 /* And counting the leading zeros. */ + /* Tmp1 is even if the target charager was found first. Otherwise + we've found the end of string and we weren't looking for NUL. */ + tst tmp1, #1 + add result, src, tmp1, lsr #1 + csel result, result, xzr, eq + ret +END(strchr) diff --git a/libc/arch-arm64/generic/generic.mk b/libc/arch-arm64/generic/generic.mk index e10cf6602..49973188f 100644 --- a/libc/arch-arm64/generic/generic.mk +++ b/libc/arch-arm64/generic/generic.mk @@ -3,6 +3,7 @@ libc_bionic_src_files_arm64 += \ arch-arm64/generic/bionic/memcpy.S \ arch-arm64/generic/bionic/memmove.S \ arch-arm64/generic/bionic/memset.S \ + arch-arm64/generic/bionic/strchr.S \ arch-arm64/generic/bionic/strcmp.S \ arch-arm64/generic/bionic/strlen.S \ arch-arm64/generic/bionic/strncmp.S \