am e6a33cef: Merge "libm: arm: Add arm specific floor() optimization"
* commit 'e6a33cefe4b3759fb77d9e69356cb50a97ea7e54': libm: arm: Add arm specific floor() optimization
This commit is contained in:
commit
d957bd08b0
@ -130,7 +130,6 @@ LOCAL_SRC_FILES := \
|
|||||||
upstream-freebsd/lib/msun/src/s_fdim.c \
|
upstream-freebsd/lib/msun/src/s_fdim.c \
|
||||||
upstream-freebsd/lib/msun/src/s_finite.c \
|
upstream-freebsd/lib/msun/src/s_finite.c \
|
||||||
upstream-freebsd/lib/msun/src/s_finitef.c \
|
upstream-freebsd/lib/msun/src/s_finitef.c \
|
||||||
upstream-freebsd/lib/msun/src/s_floor.c \
|
|
||||||
upstream-freebsd/lib/msun/src/s_floorf.c \
|
upstream-freebsd/lib/msun/src/s_floorf.c \
|
||||||
upstream-freebsd/lib/msun/src/s_fma.c \
|
upstream-freebsd/lib/msun/src/s_fma.c \
|
||||||
upstream-freebsd/lib/msun/src/s_fmaf.c \
|
upstream-freebsd/lib/msun/src/s_fmaf.c \
|
||||||
@ -264,20 +263,39 @@ LOCAL_SRC_FILES += \
|
|||||||
LOCAL_SRC_FILES_arm += \
|
LOCAL_SRC_FILES_arm += \
|
||||||
arm/fenv.c \
|
arm/fenv.c \
|
||||||
|
|
||||||
|
# s_floor.S requires neon instructions.
|
||||||
|
ifdef TARGET_2ND_ARCH
|
||||||
|
arch_variant := $(TARGET_2ND_ARCH_VARIANT)
|
||||||
|
else
|
||||||
|
arch_variant := $(TARGET_ARCH_VARIANT)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Use the C version on armv7-a since it doesn't support neon instructions.
|
||||||
|
ifeq ($(arch_variant),armv7-a)
|
||||||
|
LOCAL_SRC_FILES_arm += upstream-freebsd/lib/msun/src/s_floor.c
|
||||||
|
else
|
||||||
|
LOCAL_SRC_FILES_arm += arm/s_floor.S
|
||||||
|
endif
|
||||||
|
|
||||||
LOCAL_SRC_FILES_arm64 += \
|
LOCAL_SRC_FILES_arm64 += \
|
||||||
arm64/fenv.c \
|
arm64/fenv.c \
|
||||||
|
upstream-freebsd/lib/msun/src/s_floor.c \
|
||||||
|
|
||||||
LOCAL_SRC_FILES_mips += \
|
LOCAL_SRC_FILES_mips += \
|
||||||
mips/fenv.c \
|
mips/fenv.c \
|
||||||
|
upstream-freebsd/lib/msun/src/s_floor.c \
|
||||||
|
|
||||||
LOCAL_SRC_FILES_mips64 += \
|
LOCAL_SRC_FILES_mips64 += \
|
||||||
mips/fenv.c \
|
mips/fenv.c \
|
||||||
|
upstream-freebsd/lib/msun/src/s_floor.c \
|
||||||
|
|
||||||
LOCAL_SRC_FILES_x86 += \
|
LOCAL_SRC_FILES_x86 += \
|
||||||
i387/fenv.c \
|
i387/fenv.c \
|
||||||
|
upstream-freebsd/lib/msun/src/s_floor.c \
|
||||||
|
|
||||||
LOCAL_SRC_FILES_x86_64 += \
|
LOCAL_SRC_FILES_x86_64 += \
|
||||||
amd64/fenv.c \
|
amd64/fenv.c \
|
||||||
|
upstream-freebsd/lib/msun/src/s_floor.c \
|
||||||
|
|
||||||
LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
|
LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
|
||||||
|
|
||||||
@ -297,6 +315,9 @@ LOCAL_CFLAGS := \
|
|||||||
-Wno-unknown-pragmas \
|
-Wno-unknown-pragmas \
|
||||||
-fvisibility=hidden \
|
-fvisibility=hidden \
|
||||||
|
|
||||||
|
LOCAL_ASFLAGS := \
|
||||||
|
-Ibionic/libc \
|
||||||
|
|
||||||
# Workaround the GCC "(long)fn -> lfn" optimization bug which will result in
|
# Workaround the GCC "(long)fn -> lfn" optimization bug which will result in
|
||||||
# self recursions for lrint, lrintf, and lrintl.
|
# self recursions for lrint, lrintf, and lrintl.
|
||||||
# BUG: 14225968
|
# BUG: 14225968
|
||||||
|
142
libm/arm/s_floor.S
Normal file
142
libm/arm/s_floor.S
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
|
||||||
|
* Johnny Qiu <joqiu@nvidia.com>
|
||||||
|
* Shu Zhang <chazhang@nvidia.com>
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following
|
||||||
|
* disclaimer in the documentation and/or other materials provided
|
||||||
|
* with the distribution.
|
||||||
|
* * Neither the name of The Linux Foundation nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||||
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||||
|
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <float.h>
|
||||||
|
#include <private/bionic_asm.h>
|
||||||
|
|
||||||
|
ENTRY(floor) /* x in r0, r1 */
|
||||||
|
|
||||||
|
and r3, r1, #0x80000000 /* sign(x) */
|
||||||
|
bic r1, r1, #0x80000000 /* x = abs(x) */
|
||||||
|
|
||||||
|
/* extract exp of x */
|
||||||
|
lsr r2, r1, #20
|
||||||
|
sub r2, r2, #0x3fc
|
||||||
|
subs r2, r2, #0x3 /* r2 <- exp */
|
||||||
|
|
||||||
|
/* |x| < 1.0? */
|
||||||
|
blt .Lx_lt_one
|
||||||
|
|
||||||
|
/* x < 0? */
|
||||||
|
cmp r3, #0
|
||||||
|
bne .Lclr_frac_neg
|
||||||
|
|
||||||
|
/* |x| <= 2^20? */
|
||||||
|
cmp r2, #20
|
||||||
|
ble .Lclr_frac_r1
|
||||||
|
|
||||||
|
/* |x| < 2^52? */
|
||||||
|
cmp r2, #52
|
||||||
|
blt .Lclr_frac_r0
|
||||||
|
|
||||||
|
/* return x */
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.Lclr_frac_r1:
|
||||||
|
rsb r2, r2, #20
|
||||||
|
lsr r1, r1, r2
|
||||||
|
lsl r1, r1, r2
|
||||||
|
mov r0, #0
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.Lclr_frac_r0:
|
||||||
|
rsb r2, r2, #52
|
||||||
|
lsr r0, r0, r2
|
||||||
|
lsl r0, r0, r2
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.Lclr_frac_neg:
|
||||||
|
/* |x| <= 2^20? */
|
||||||
|
cmp r2, #20
|
||||||
|
ble .Lclr_frac_r1_neg
|
||||||
|
|
||||||
|
/* |x| < 2^52? */
|
||||||
|
cmp r2, #52
|
||||||
|
blt .Lclr_frac_r0_neg
|
||||||
|
|
||||||
|
/* return x */
|
||||||
|
orr r1, r1, #0x80000000
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.Lclr_frac_r1_neg:
|
||||||
|
rsb r2, r2, #20
|
||||||
|
mov r3, #1
|
||||||
|
lsl r3, r3, r2
|
||||||
|
sub r3, r3, #1
|
||||||
|
and r3, r1, r3
|
||||||
|
orr r3, r3, r0
|
||||||
|
lsr r1, r1, r2
|
||||||
|
lsl r1, r1, r2
|
||||||
|
mov r0, #0
|
||||||
|
b .Lreturn_x_neg
|
||||||
|
|
||||||
|
.Lclr_frac_r0_neg:
|
||||||
|
rsb r2, r2, #52
|
||||||
|
mov r3, #1
|
||||||
|
lsl r3, r3, r2
|
||||||
|
sub r3, r3, #1
|
||||||
|
and r3, r0, r3
|
||||||
|
lsr r0, r0, r2
|
||||||
|
lsl r0, r0, r2
|
||||||
|
b .Lreturn_x_neg
|
||||||
|
|
||||||
|
.Lx_lt_one:
|
||||||
|
/* x == +-0? */
|
||||||
|
cmp r0, #0
|
||||||
|
cmpeq r1, #0
|
||||||
|
orreq r1, r1, r3
|
||||||
|
bxeq lr
|
||||||
|
|
||||||
|
/* (x > 0) ? 0 : -1 */
|
||||||
|
mov r1, #0x00100000
|
||||||
|
mov r0, #0
|
||||||
|
cmp r3, #0
|
||||||
|
movne r1, #0xc0000000
|
||||||
|
sub r1, r1, #0x00100000
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.Lreturn_x_neg:
|
||||||
|
cmp r3, #0
|
||||||
|
orr r1, r1, #0x80000000
|
||||||
|
bxeq lr
|
||||||
|
|
||||||
|
vmov d16, r0, r1
|
||||||
|
vmov.f64 d18, #1.0
|
||||||
|
vsub.f64 d16, d16, d18
|
||||||
|
vmov r0, r1, d16
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
END(floor)
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG == 53
|
||||||
|
.weak floorl
|
||||||
|
.equ floorl,floor
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user