am e6a33cef: Merge "libm: arm: Add arm specific floor() optimization"

* commit 'e6a33cefe4b3759fb77d9e69356cb50a97ea7e54':
  libm: arm: Add arm specific floor() optimization
This commit is contained in:
Christopher Ferris 2015-02-27 20:28:36 +00:00 committed by Android Git Automerger
commit d957bd08b0
2 changed files with 164 additions and 1 deletions

View File

@ -130,7 +130,6 @@ LOCAL_SRC_FILES := \
upstream-freebsd/lib/msun/src/s_fdim.c \
upstream-freebsd/lib/msun/src/s_finite.c \
upstream-freebsd/lib/msun/src/s_finitef.c \
upstream-freebsd/lib/msun/src/s_floor.c \
upstream-freebsd/lib/msun/src/s_floorf.c \
upstream-freebsd/lib/msun/src/s_fma.c \
upstream-freebsd/lib/msun/src/s_fmaf.c \
@ -264,20 +263,39 @@ LOCAL_SRC_FILES += \
LOCAL_SRC_FILES_arm += \
arm/fenv.c \
# s_floor.S requires neon instructions.
ifdef TARGET_2ND_ARCH
arch_variant := $(TARGET_2ND_ARCH_VARIANT)
else
arch_variant := $(TARGET_ARCH_VARIANT)
endif
# Use the C version on armv7-a since it doesn't support neon instructions.
ifeq ($(arch_variant),armv7-a)
LOCAL_SRC_FILES_arm += upstream-freebsd/lib/msun/src/s_floor.c
else
LOCAL_SRC_FILES_arm += arm/s_floor.S
endif
LOCAL_SRC_FILES_arm64 += \
arm64/fenv.c \
upstream-freebsd/lib/msun/src/s_floor.c \
LOCAL_SRC_FILES_mips += \
mips/fenv.c \
upstream-freebsd/lib/msun/src/s_floor.c \
LOCAL_SRC_FILES_mips64 += \
mips/fenv.c \
upstream-freebsd/lib/msun/src/s_floor.c \
LOCAL_SRC_FILES_x86 += \
i387/fenv.c \
upstream-freebsd/lib/msun/src/s_floor.c \
LOCAL_SRC_FILES_x86_64 += \
amd64/fenv.c \
upstream-freebsd/lib/msun/src/s_floor.c \
LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
@ -297,6 +315,9 @@ LOCAL_CFLAGS := \
-Wno-unknown-pragmas \
-fvisibility=hidden \
LOCAL_ASFLAGS := \
-Ibionic/libc \
# Workaround the GCC "(long)fn -> lfn" optimization bug which will result in
# self recursions for lrint, lrintf, and lrintl.
# BUG: 14225968

142
libm/arm/s_floor.S Normal file
View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved.
* Johnny Qiu <joqiu@nvidia.com>
* Shu Zhang <chazhang@nvidia.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of The Linux Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <float.h>
#include <private/bionic_asm.h>
ENTRY(floor) /* x in r0, r1 */
and r3, r1, #0x80000000 /* sign(x) */
bic r1, r1, #0x80000000 /* x = abs(x) */
/* extract exp of x */
lsr r2, r1, #20
sub r2, r2, #0x3fc
subs r2, r2, #0x3 /* r2 <- exp */
/* |x| < 1.0? */
blt .Lx_lt_one
/* x < 0? */
cmp r3, #0
bne .Lclr_frac_neg
/* |x| <= 2^20? */
cmp r2, #20
ble .Lclr_frac_r1
/* |x| < 2^52? */
cmp r2, #52
blt .Lclr_frac_r0
/* return x */
bx lr
.Lclr_frac_r1:
rsb r2, r2, #20
lsr r1, r1, r2
lsl r1, r1, r2
mov r0, #0
bx lr
.Lclr_frac_r0:
rsb r2, r2, #52
lsr r0, r0, r2
lsl r0, r0, r2
bx lr
.Lclr_frac_neg:
/* |x| <= 2^20? */
cmp r2, #20
ble .Lclr_frac_r1_neg
/* |x| < 2^52? */
cmp r2, #52
blt .Lclr_frac_r0_neg
/* return x */
orr r1, r1, #0x80000000
bx lr
.Lclr_frac_r1_neg:
rsb r2, r2, #20
mov r3, #1
lsl r3, r3, r2
sub r3, r3, #1
and r3, r1, r3
orr r3, r3, r0
lsr r1, r1, r2
lsl r1, r1, r2
mov r0, #0
b .Lreturn_x_neg
.Lclr_frac_r0_neg:
rsb r2, r2, #52
mov r3, #1
lsl r3, r3, r2
sub r3, r3, #1
and r3, r0, r3
lsr r0, r0, r2
lsl r0, r0, r2
b .Lreturn_x_neg
.Lx_lt_one:
/* x == +-0? */
cmp r0, #0
cmpeq r1, #0
orreq r1, r1, r3
bxeq lr
/* (x > 0) ? 0 : -1 */
mov r1, #0x00100000
mov r0, #0
cmp r3, #0
movne r1, #0xc0000000
sub r1, r1, #0x00100000
bx lr
.Lreturn_x_neg:
cmp r3, #0
orr r1, r1, #0x80000000
bxeq lr
vmov d16, r0, r1
vmov.f64 d18, #1.0
vsub.f64 d16, d16, d18
vmov r0, r1, d16
bx lr
END(floor)
#if LDBL_MANT_DIG == 53
.weak floorl
.equ floorl,floor
#endif