am e6a33cef: Merge "libm: arm: Add arm specific floor() optimization"
				
					
				
			* commit 'e6a33cefe4b3759fb77d9e69356cb50a97ea7e54': libm: arm: Add arm specific floor() optimization
This commit is contained in:
		@@ -130,7 +130,6 @@ LOCAL_SRC_FILES := \
 | 
				
			|||||||
    upstream-freebsd/lib/msun/src/s_fdim.c \
 | 
					    upstream-freebsd/lib/msun/src/s_fdim.c \
 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_finite.c \
 | 
					    upstream-freebsd/lib/msun/src/s_finite.c \
 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_finitef.c \
 | 
					    upstream-freebsd/lib/msun/src/s_finitef.c \
 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
					 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_floorf.c \
 | 
					    upstream-freebsd/lib/msun/src/s_floorf.c \
 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_fma.c \
 | 
					    upstream-freebsd/lib/msun/src/s_fma.c \
 | 
				
			||||||
    upstream-freebsd/lib/msun/src/s_fmaf.c \
 | 
					    upstream-freebsd/lib/msun/src/s_fmaf.c \
 | 
				
			||||||
@@ -264,20 +263,39 @@ LOCAL_SRC_FILES += \
 | 
				
			|||||||
LOCAL_SRC_FILES_arm += \
 | 
					LOCAL_SRC_FILES_arm += \
 | 
				
			||||||
    arm/fenv.c \
 | 
					    arm/fenv.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# s_floor.S requires neon instructions.
 | 
				
			||||||
 | 
					ifdef TARGET_2ND_ARCH
 | 
				
			||||||
 | 
					arch_variant := $(TARGET_2ND_ARCH_VARIANT)
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					arch_variant := $(TARGET_ARCH_VARIANT)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Use the C version on armv7-a since it doesn't support neon instructions.
 | 
				
			||||||
 | 
					ifeq ($(arch_variant),armv7-a)
 | 
				
			||||||
 | 
					LOCAL_SRC_FILES_arm += upstream-freebsd/lib/msun/src/s_floor.c
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					LOCAL_SRC_FILES_arm += arm/s_floor.S
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_SRC_FILES_arm64 += \
 | 
					LOCAL_SRC_FILES_arm64 += \
 | 
				
			||||||
    arm64/fenv.c \
 | 
					    arm64/fenv.c \
 | 
				
			||||||
 | 
					    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_SRC_FILES_mips += \
 | 
					LOCAL_SRC_FILES_mips += \
 | 
				
			||||||
    mips/fenv.c \
 | 
					    mips/fenv.c \
 | 
				
			||||||
 | 
					    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_SRC_FILES_mips64 += \
 | 
					LOCAL_SRC_FILES_mips64 += \
 | 
				
			||||||
    mips/fenv.c \
 | 
					    mips/fenv.c \
 | 
				
			||||||
 | 
					    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_SRC_FILES_x86 += \
 | 
					LOCAL_SRC_FILES_x86 += \
 | 
				
			||||||
    i387/fenv.c \
 | 
					    i387/fenv.c \
 | 
				
			||||||
 | 
					    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_SRC_FILES_x86_64 += \
 | 
					LOCAL_SRC_FILES_x86_64 += \
 | 
				
			||||||
    amd64/fenv.c \
 | 
					    amd64/fenv.c \
 | 
				
			||||||
 | 
					    upstream-freebsd/lib/msun/src/s_floor.c \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
 | 
					LOCAL_C_INCLUDES_x86 += $(LOCAL_PATH)/i387
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -297,6 +315,9 @@ LOCAL_CFLAGS := \
 | 
				
			|||||||
    -Wno-unknown-pragmas \
 | 
					    -Wno-unknown-pragmas \
 | 
				
			||||||
    -fvisibility=hidden \
 | 
					    -fvisibility=hidden \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LOCAL_ASFLAGS := \
 | 
				
			||||||
 | 
					    -Ibionic/libc \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Workaround the GCC "(long)fn -> lfn" optimization bug which will result in
 | 
					# Workaround the GCC "(long)fn -> lfn" optimization bug which will result in
 | 
				
			||||||
# self recursions for lrint, lrintf, and lrintl.
 | 
					# self recursions for lrint, lrintf, and lrintl.
 | 
				
			||||||
# BUG: 14225968
 | 
					# BUG: 14225968
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										142
									
								
								libm/arm/s_floor.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								libm/arm/s_floor.S
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,142 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (c) 2013-2014, NVIDIA Corporation.  All rights reserved.
 | 
				
			||||||
 | 
					 * Johnny Qiu <joqiu@nvidia.com>
 | 
				
			||||||
 | 
					 * Shu Zhang <chazhang@nvidia.com>
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					 * modification, are permitted provided that the following conditions are
 | 
				
			||||||
 | 
					 * met:
 | 
				
			||||||
 | 
					 *     * Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					 *       notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					 *     * Redistributions in binary form must reproduce the above
 | 
				
			||||||
 | 
					 *       copyright notice, this list of conditions and the following
 | 
				
			||||||
 | 
					 *       disclaimer in the documentation and/or other materials provided
 | 
				
			||||||
 | 
					 *       with the distribution.
 | 
				
			||||||
 | 
					 *     * Neither the name of The Linux Foundation nor the names of its
 | 
				
			||||||
 | 
					 *       contributors may be used to endorse or promote products derived
 | 
				
			||||||
 | 
					 *       from this software without specific prior written permission.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
 | 
				
			||||||
 | 
					 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
				
			||||||
 | 
					 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
 | 
				
			||||||
 | 
					 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 | 
				
			||||||
 | 
					 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
				
			||||||
 | 
					 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
				
			||||||
 | 
					 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 | 
				
			||||||
 | 
					 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
				
			||||||
 | 
					 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 | 
				
			||||||
 | 
					 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 | 
				
			||||||
 | 
					 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <float.h>
 | 
				
			||||||
 | 
					#include <private/bionic_asm.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ENTRY(floor)    /* x in r0, r1 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        and             r3, r1, #0x80000000     /* sign(x) */
 | 
				
			||||||
 | 
					        bic             r1, r1, #0x80000000     /* x = abs(x) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* extract exp of x */
 | 
				
			||||||
 | 
					        lsr             r2, r1, #20
 | 
				
			||||||
 | 
					        sub             r2, r2, #0x3fc
 | 
				
			||||||
 | 
					        subs            r2, r2, #0x3            /* r2 <- exp */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* |x| < 1.0? */
 | 
				
			||||||
 | 
					        blt             .Lx_lt_one
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* x < 0? */
 | 
				
			||||||
 | 
					        cmp             r3, #0
 | 
				
			||||||
 | 
					        bne             .Lclr_frac_neg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* |x| <= 2^20? */
 | 
				
			||||||
 | 
					        cmp             r2, #20
 | 
				
			||||||
 | 
					        ble             .Lclr_frac_r1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* |x| < 2^52? */
 | 
				
			||||||
 | 
					        cmp             r2, #52
 | 
				
			||||||
 | 
					        blt             .Lclr_frac_r0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* return x */
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lclr_frac_r1:
 | 
				
			||||||
 | 
					        rsb             r2, r2, #20
 | 
				
			||||||
 | 
					        lsr             r1, r1, r2
 | 
				
			||||||
 | 
					        lsl             r1, r1, r2
 | 
				
			||||||
 | 
					        mov             r0, #0
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lclr_frac_r0:
 | 
				
			||||||
 | 
					        rsb             r2, r2, #52
 | 
				
			||||||
 | 
					        lsr             r0, r0, r2
 | 
				
			||||||
 | 
					        lsl             r0, r0, r2
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lclr_frac_neg:
 | 
				
			||||||
 | 
					        /* |x| <= 2^20? */
 | 
				
			||||||
 | 
					        cmp             r2, #20
 | 
				
			||||||
 | 
					        ble             .Lclr_frac_r1_neg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* |x| < 2^52? */
 | 
				
			||||||
 | 
					        cmp             r2, #52
 | 
				
			||||||
 | 
					        blt             .Lclr_frac_r0_neg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* return x */
 | 
				
			||||||
 | 
					        orr             r1, r1, #0x80000000
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lclr_frac_r1_neg:
 | 
				
			||||||
 | 
					        rsb             r2, r2, #20
 | 
				
			||||||
 | 
					        mov             r3, #1
 | 
				
			||||||
 | 
					        lsl             r3, r3, r2
 | 
				
			||||||
 | 
					        sub             r3, r3, #1
 | 
				
			||||||
 | 
					        and             r3, r1, r3
 | 
				
			||||||
 | 
					        orr             r3, r3, r0
 | 
				
			||||||
 | 
					        lsr             r1, r1, r2
 | 
				
			||||||
 | 
					        lsl             r1, r1, r2
 | 
				
			||||||
 | 
					        mov             r0, #0
 | 
				
			||||||
 | 
					        b               .Lreturn_x_neg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lclr_frac_r0_neg:
 | 
				
			||||||
 | 
					        rsb             r2, r2, #52
 | 
				
			||||||
 | 
					        mov             r3, #1
 | 
				
			||||||
 | 
					        lsl             r3, r3, r2
 | 
				
			||||||
 | 
					        sub             r3, r3, #1
 | 
				
			||||||
 | 
					        and             r3, r0, r3
 | 
				
			||||||
 | 
					        lsr             r0, r0, r2
 | 
				
			||||||
 | 
					        lsl             r0, r0, r2
 | 
				
			||||||
 | 
					        b               .Lreturn_x_neg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lx_lt_one:
 | 
				
			||||||
 | 
					        /* x == +-0? */
 | 
				
			||||||
 | 
					        cmp             r0, #0
 | 
				
			||||||
 | 
					        cmpeq           r1, #0
 | 
				
			||||||
 | 
					        orreq           r1, r1, r3
 | 
				
			||||||
 | 
					        bxeq            lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        /* (x > 0) ? 0 : -1 */
 | 
				
			||||||
 | 
					        mov             r1, #0x00100000
 | 
				
			||||||
 | 
					        mov             r0, #0
 | 
				
			||||||
 | 
					        cmp             r3, #0
 | 
				
			||||||
 | 
					        movne           r1, #0xc0000000
 | 
				
			||||||
 | 
					        sub             r1, r1, #0x00100000
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.Lreturn_x_neg:
 | 
				
			||||||
 | 
					        cmp             r3, #0
 | 
				
			||||||
 | 
					        orr             r1, r1, #0x80000000
 | 
				
			||||||
 | 
					        bxeq            lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        vmov            d16, r0, r1
 | 
				
			||||||
 | 
					        vmov.f64        d18, #1.0
 | 
				
			||||||
 | 
					        vsub.f64        d16, d16, d18
 | 
				
			||||||
 | 
					        vmov            r0, r1, d16
 | 
				
			||||||
 | 
					        bx              lr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					END(floor)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if LDBL_MANT_DIG == 53
 | 
				
			||||||
 | 
					        .weak           floorl
 | 
				
			||||||
 | 
					        .equ            floorl,floor
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
		Reference in New Issue
	
	Block a user