am 87472963: Merge "AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation."

* commit '87472963c7c50240002fd83efec69c267c0d6ec7':
  AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation.
This commit is contained in:
Elliott Hughes 2014-06-09 21:00:35 +00:00 committed by Android Git Automerger
commit b98dd8cb75
3 changed files with 210 additions and 135 deletions

View File

@ -16,6 +16,7 @@
#include "benchmark.h" #include "benchmark.h"
#include <fenv.h>
#include <math.h> #include <math.h>
// Avoid optimization. // Avoid optimization.
@ -113,10 +114,49 @@ static void BM_math_isinf_ZERO(int iters) {
} }
BENCHMARK(BM_math_isinf_ZERO); BENCHMARK(BM_math_isinf_ZERO);
static void BM_math_sin_fast(int iters) {
StartBenchmarkTiming();
d = 1.0;
for (int i = 0; i < iters; ++i) {
d += sin(d);
}
StopBenchmarkTiming();
}
BENCHMARK(BM_math_sin_fast);
static void BM_math_sin_feupdateenv(int iters) {
StartBenchmarkTiming();
d = 1.0;
for (int i = 0; i < iters; ++i) {
fenv_t __libc_save_rm;
feholdexcept(&__libc_save_rm);
fesetround(FE_TONEAREST);
d += sin(d);
feupdateenv(&__libc_save_rm);
}
StopBenchmarkTiming();
}
BENCHMARK(BM_math_sin_feupdateenv);
static void BM_math_sin_fesetenv(int iters) {
StartBenchmarkTiming();
d = 1.0;
for (int i = 0; i < iters; ++i) {
fenv_t __libc_save_rm;
feholdexcept(&__libc_save_rm);
fesetround(FE_TONEAREST);
d += sin(d);
fesetenv(&__libc_save_rm);
}
StopBenchmarkTiming();
}
BENCHMARK(BM_math_sin_fesetenv);
static void BM_math_fpclassify_NORMAL(int iters) { static void BM_math_fpclassify_NORMAL(int iters) {
StartBenchmarkTiming(); StartBenchmarkTiming();

View File

@ -28,114 +28,168 @@
#include <fenv.h> #include <fenv.h>
/* #define FPCR_EXCEPT_SHIFT 8
* Hopefully the system ID byte is immutable, so it's valid to use #define FPCR_EXCEPT_MASK (FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT)
* this as a default environment.
*/
const fenv_t __fe_dfl_env = 0;
int fegetenv(fenv_t* __envp) { #define FPCR_RMODE_SHIFT 22
fenv_t _fpcr, _fpsr;
__asm__ __volatile__("mrs %0,fpcr" : "=r" (_fpcr)); const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */};
__asm__ __volatile__("mrs %0,fpsr" : "=r" (_fpsr));
*__envp = (_fpcr | _fpsr); typedef __uint32_t fpu_control_t; // FPCR, Floating-point Control Register.
typedef __uint32_t fpu_status_t; // FPSR, Floating-point Status Register.
#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr))
#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr))
int fegetenv(fenv_t* envp) {
__get_fpcr(envp->__control);
__get_fpsr(envp->__status);
return 0; return 0;
} }
int fesetenv(const fenv_t* __envp) { int fesetenv(const fenv_t* envp) {
fenv_t _fpcr = (*__envp & FPCR_MASK); fpu_control_t fpcr;
fenv_t _fpsr = (*__envp & FPSR_MASK);
__asm__ __volatile__("msr fpcr,%0" : :"ri" (_fpcr)); __get_fpcr(fpcr);
__asm__ __volatile__("msr fpsr,%0" : :"ri" (_fpsr)); if (envp->__control != fpcr) {
__set_fpcr(envp->__control);
}
__set_fpsr(envp->__status);
return 0; return 0;
} }
int feclearexcept(int __excepts) { int feclearexcept(int excepts) {
fexcept_t __fpscr; fpu_status_t fpsr;
fegetenv(&__fpscr);
__fpscr &= ~__excepts; excepts &= FE_ALL_EXCEPT;
fesetenv(&__fpscr); __get_fpsr(fpsr);
fpsr &= ~excepts;
__set_fpsr(fpsr);
return 0; return 0;
} }
int fegetexceptflag(fexcept_t* __flagp, int __excepts) { int fegetexceptflag(fexcept_t* flagp, int excepts) {
fexcept_t __fpscr; fpu_status_t fpsr;
fegetenv(&__fpscr);
*__flagp = __fpscr & __excepts; excepts &= FE_ALL_EXCEPT;
__get_fpsr(fpsr);
*flagp = fpsr & excepts;
return 0; return 0;
} }
int fesetexceptflag(const fexcept_t* __flagp, int __excepts) { int fesetexceptflag(const fexcept_t* flagp, int excepts) {
fexcept_t __fpscr; fpu_status_t fpsr;
fegetenv(&__fpscr);
__fpscr &= ~__excepts; excepts &= FE_ALL_EXCEPT;
__fpscr |= *__flagp & __excepts; __get_fpsr(fpsr);
fesetenv(&__fpscr); fpsr &= ~excepts;
fpsr |= *flagp & excepts;
__set_fpsr(fpsr);
return 0; return 0;
} }
int feraiseexcept(int __excepts) { int feraiseexcept(int excepts) {
fexcept_t __ex = __excepts; fexcept_t ex = excepts;
fesetexceptflag(&__ex, __excepts);
fesetexceptflag(&ex, excepts);
return 0; return 0;
} }
int fetestexcept(int __excepts) { int fetestexcept(int excepts) {
fexcept_t __fpscr; fpu_status_t fpsr;
fegetenv(&__fpscr);
return (__fpscr & __excepts); excepts &= FE_ALL_EXCEPT;
__get_fpsr(fpsr);
return (fpsr & excepts);
} }
int fegetround(void) { int fegetround(void) {
fenv_t _fpscr; fpu_control_t fpcr;
fegetenv(&_fpscr);
return ((_fpscr >> _FPSCR_RMODE_SHIFT) & 0x3); __get_fpcr(fpcr);
return ((fpcr >> FPCR_RMODE_SHIFT) & FE_TOWARDZERO);
} }
int fesetround(int __round) { int fesetround(int round) {
fenv_t _fpscr; fpu_control_t fpcr, new_fpcr;
fegetenv(&_fpscr);
_fpscr &= ~(0x3 << _FPSCR_RMODE_SHIFT); round &= FE_TOWARDZERO;
_fpscr |= (__round << _FPSCR_RMODE_SHIFT); __get_fpcr(fpcr);
fesetenv(&_fpscr); new_fpcr = fpcr & ~(FE_TOWARDZERO << FPCR_RMODE_SHIFT);
new_fpcr |= (round << FPCR_RMODE_SHIFT);
if (new_fpcr != fpcr) {
__set_fpcr(new_fpcr);
}
return 0; return 0;
} }
int feholdexcept(fenv_t* __envp) { int feholdexcept(fenv_t* envp) {
fenv_t __env; fenv_t env;
fegetenv(&__env); fpu_status_t fpsr;
*__envp = __env; fpu_control_t fpcr, new_fpcr;
__env &= ~(FE_ALL_EXCEPT | _FPSCR_ENABLE_MASK);
fesetenv(&__env); __get_fpsr(fpsr);
__get_fpcr(fpcr);
env.__status = fpsr;
env.__control = fpcr;
*envp = env;
// Set exceptions to untrapped.
new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT);
if (new_fpcr != fpcr) {
__set_fpcr(new_fpcr);
}
// Clear all exceptions.
fpsr &= ~FE_ALL_EXCEPT;
__set_fpsr(fpsr);
return 0; return 0;
} }
int feupdateenv(const fenv_t* __envp) { int feupdateenv(const fenv_t* envp) {
fexcept_t __fpscr; fpu_status_t fpsr;
fegetenv(&__fpscr); fpu_control_t fpcr;
fesetenv(__envp);
feraiseexcept(__fpscr & FE_ALL_EXCEPT); // Set FPU Control register.
__get_fpcr(fpcr);
if (envp->__control != fpcr) {
__set_fpcr(envp->__control);
}
// Set FPU Status register to status | currently raised exceptions.
__get_fpsr(fpsr);
fpsr = envp->__status | (fpsr & FE_ALL_EXCEPT);
__set_fpsr(fpsr);
return 0; return 0;
} }
int feenableexcept(int __mask) { int feenableexcept(int mask) {
fenv_t __old_fpscr, __new_fpscr; fpu_control_t old_fpcr, new_fpcr;
fegetenv(&__old_fpscr);
__new_fpscr = __old_fpscr | (__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT; __get_fpcr(old_fpcr);
fesetenv(&__new_fpscr); new_fpcr = old_fpcr | ((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT); if (new_fpcr != old_fpcr) {
__set_fpcr(new_fpcr);
}
return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
} }
int fedisableexcept(int __mask) { int fedisableexcept(int mask) {
fenv_t __old_fpscr, __new_fpscr; fpu_control_t old_fpcr, new_fpcr;
fegetenv(&__old_fpscr);
__new_fpscr = __old_fpscr & ~((__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT); __get_fpcr(old_fpcr);
fesetenv(&__new_fpscr); new_fpcr = old_fpcr & ~((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT); if (new_fpcr != old_fpcr) {
__set_fpcr(new_fpcr);
}
return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
} }
int fegetexcept(void) { int fegetexcept(void) {
fenv_t __fpscr; fpu_control_t fpcr;
fegetenv(&__fpscr);
return ((__fpscr & _FPSCR_ENABLE_MASK) >> _FPSCR_ENABLE_SHIFT); __get_fpcr(fpcr);
return ((fpcr & FPCR_EXCEPT_MASK) >> FPCR_EXCEPT_SHIFT);
} }

View File

@ -27,15 +27,44 @@
*/ */
/* /*
* Rewritten for Android. * In ARMv8, AArch64 state, floating-point operation is controlled by:
* *
* The ARM FPSCR (Floating-point Status and Control Register) described here: * * FPCR - 32Bit Floating-Point Control Register:
* http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0344b/Chdfafia.html * * [31:27] - Reserved, Res0;
* has been split into the FPCR (Floating-point Control Register) and FPSR * * [26] - AHP, Alternative half-precision control bit;
* (Floating-point Status Register) on the ARMv8. These are described briefly in * * [25] - DN, Default NaN mode control bit;
* "Procedure Call Standard for the ARM 64-bit Architecture" * * [24] - FZ, Flush-to-zero mode control bit;
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf * * [23:22] - RMode, Rounding Mode control field:
* section 5.1.2 SIMD and Floating-Point Registers * * 00 - Round to Nearest (RN) mode;
* * 01 - Round towards Plus Infinity (RP) mode;
* * 10 - Round towards Minus Infinity (RM) mode;
* * 11 - Round towards Zero (RZ) mode.
* * [21:20] - Stride, ignored during AArch64 execution;
* * [19] - Reserved, Res0;
* * [18:16] - Len, ignored during AArch64 execution;
* * [15] - IDE, Input Denormal exception trap;
* * [14:13] - Reserved, Res0;
* * [12] - IXE, Inexact exception trap;
* * [11] - UFE, Underflow exception trap;
* * [10] - OFE, Overflow exception trap;
* * [9] - DZE, Division by Zero exception;
* * [8] - IOE, Invalid Operation exception;
* * [7:0] - Reserved, Res0.
*
* * FPSR - 32Bit Floating-Point Status Register:
* * [31] - N, Negative condition flag for AArch32 (AArch64 sets PSTATE.N);
* * [30] - Z, Zero condition flag for AArch32 (AArch64 sets PSTATE.Z);
* * [29] - C, Carry conditon flag for AArch32 (AArch64 sets PSTATE.C);
* * [28] - V, Overflow conditon flag for AArch32 (AArch64 sets PSTATE.V);
* * [27] - QC, Cumulative saturation bit, Advanced SIMD only;
* * [26:8] - Reserved, Res0;
* * [7] - IDC, Input Denormal cumulative exception;
* * [6:5] - Reserved, Res0;
* * [4] - IXC, Inexact cumulative exception;
* * [3] - UFC, Underflow cumulative exception;
* * [2] - OFC, Overflow cumulative exception;
* * [1] - DZC, Division by Zero cumulative exception;
* * [0] - IOC, Invalid Operation cumulative exception.
*/ */
#ifndef _ARM64_FENV_H_ #ifndef _ARM64_FENV_H_
@ -45,7 +74,11 @@
__BEGIN_DECLS __BEGIN_DECLS
typedef __uint32_t fenv_t; typedef struct {
__uint32_t __control; /* FPCR, Floating-point Control Register */
__uint32_t __status; /* FPSR, Floating-point Status Register */
} fenv_t;
typedef __uint32_t fexcept_t; typedef __uint32_t fexcept_t;
/* Exception flags. */ /* Exception flags. */
@ -54,11 +87,9 @@ typedef __uint32_t fexcept_t;
#define FE_OVERFLOW 0x04 #define FE_OVERFLOW 0x04
#define FE_UNDERFLOW 0x08 #define FE_UNDERFLOW 0x08
#define FE_INEXACT 0x10 #define FE_INEXACT 0x10
#define FE_DENORMAL 0x80
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \ #define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
FE_OVERFLOW | FE_UNDERFLOW) FE_OVERFLOW | FE_UNDERFLOW | FE_DENORMAL)
#define _FPSCR_ENABLE_SHIFT 8
#define _FPSCR_ENABLE_MASK (FE_ALL_EXCEPT << _FPSCR_ENABLE_SHIFT)
/* Rounding modes. */ /* Rounding modes. */
#define FE_TONEAREST 0x0 #define FE_TONEAREST 0x0
@ -66,56 +97,6 @@ typedef __uint32_t fexcept_t;
#define FE_DOWNWARD 0x2 #define FE_DOWNWARD 0x2
#define FE_TOWARDZERO 0x3 #define FE_TOWARDZERO 0x3
#define _FPSCR_RMODE_SHIFT 22
#define FPCR_IOE (1 << 8)
#define FPCR_DZE (1 << 9)
#define FPCR_OFE (1 << 10)
#define FPCR_UFE (1 << 11)
#define FPCR_IXE (1 << 12)
#define FPCR_IDE (1 << 15)
#define FPCR_LEN (7 << 16)
#define FPCR_STRIDE (3 << 20)
#define FPCR_RMODE (3 << 22)
#define FPCR_FZ (1 << 24)
#define FPCR_DN (1 << 25)
#define FPCR_AHP (1 << 26)
#define FPCR_MASK (FPCR_IOE | \
FPCR_DZE | \
FPCR_OFE | \
FPCR_UFE | \
FPCR_IXE | \
FPCR_IDE | \
FPCR_LEN | \
FPCR_STRIDE | \
FPCR_RMODE | \
FPCR_FZ | \
FPCR_DN | \
FPCR_AHP )
#define FPSR_IOC (1 << 0)
#define FPSR_DZC (1 << 1)
#define FPSR_OFC (1 << 2)
#define FPSR_UFC (1 << 3)
#define FPSR_IXC (1 << 4)
#define FPSR_IDC (1 << 7)
#define FPSR_QC (1 << 27)
#define FPSR_V (1 << 28)
#define FPSR_C (1 << 29)
#define FPSR_Z (1 << 30)
#define FPSR_N (1 << 31)
#define FPSR_MASK (FPSR_IOC | \
FPSR_DZC | \
FPSR_OFC | \
FPSR_UFC | \
FPSR_IXC | \
FPSR_IDC | \
FPSR_QC | \
FPSR_V | \
FPSR_C | \
FPSR_Z | \
FPSR_N )
__END_DECLS __END_DECLS
#endif /* !_ARM64_FENV_H_ */ #endif /* !_ARM64_FENV_H_ */