am 87472963
: Merge "AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation."
* commit '87472963c7c50240002fd83efec69c267c0d6ec7': AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation.
This commit is contained in:
commit
b98dd8cb75
@ -16,6 +16,7 @@
|
||||
|
||||
#include "benchmark.h"
|
||||
|
||||
#include <fenv.h>
|
||||
#include <math.h>
|
||||
|
||||
// Avoid optimization.
|
||||
@ -113,10 +114,49 @@ static void BM_math_isinf_ZERO(int iters) {
|
||||
}
|
||||
BENCHMARK(BM_math_isinf_ZERO);
|
||||
|
||||
static void BM_math_sin_fast(int iters) {
|
||||
StartBenchmarkTiming();
|
||||
|
||||
d = 1.0;
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
d += sin(d);
|
||||
}
|
||||
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
BENCHMARK(BM_math_sin_fast);
|
||||
|
||||
static void BM_math_sin_feupdateenv(int iters) {
|
||||
StartBenchmarkTiming();
|
||||
|
||||
d = 1.0;
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
fenv_t __libc_save_rm;
|
||||
feholdexcept(&__libc_save_rm);
|
||||
fesetround(FE_TONEAREST);
|
||||
d += sin(d);
|
||||
feupdateenv(&__libc_save_rm);
|
||||
}
|
||||
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
BENCHMARK(BM_math_sin_feupdateenv);
|
||||
|
||||
static void BM_math_sin_fesetenv(int iters) {
|
||||
StartBenchmarkTiming();
|
||||
|
||||
d = 1.0;
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
fenv_t __libc_save_rm;
|
||||
feholdexcept(&__libc_save_rm);
|
||||
fesetround(FE_TONEAREST);
|
||||
d += sin(d);
|
||||
fesetenv(&__libc_save_rm);
|
||||
}
|
||||
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
BENCHMARK(BM_math_sin_fesetenv);
|
||||
|
||||
static void BM_math_fpclassify_NORMAL(int iters) {
|
||||
StartBenchmarkTiming();
|
||||
|
@ -28,114 +28,168 @@
|
||||
|
||||
#include <fenv.h>
|
||||
|
||||
/*
|
||||
* Hopefully the system ID byte is immutable, so it's valid to use
|
||||
* this as a default environment.
|
||||
*/
|
||||
const fenv_t __fe_dfl_env = 0;
|
||||
#define FPCR_EXCEPT_SHIFT 8
|
||||
#define FPCR_EXCEPT_MASK (FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT)
|
||||
|
||||
int fegetenv(fenv_t* __envp) {
|
||||
fenv_t _fpcr, _fpsr;
|
||||
__asm__ __volatile__("mrs %0,fpcr" : "=r" (_fpcr));
|
||||
__asm__ __volatile__("mrs %0,fpsr" : "=r" (_fpsr));
|
||||
*__envp = (_fpcr | _fpsr);
|
||||
#define FPCR_RMODE_SHIFT 22
|
||||
|
||||
const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */};
|
||||
|
||||
typedef __uint32_t fpu_control_t; // FPCR, Floating-point Control Register.
|
||||
typedef __uint32_t fpu_status_t; // FPSR, Floating-point Status Register.
|
||||
|
||||
#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
|
||||
#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr))
|
||||
#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
|
||||
#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr))
|
||||
|
||||
int fegetenv(fenv_t* envp) {
|
||||
__get_fpcr(envp->__control);
|
||||
__get_fpsr(envp->__status);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fesetenv(const fenv_t* __envp) {
|
||||
fenv_t _fpcr = (*__envp & FPCR_MASK);
|
||||
fenv_t _fpsr = (*__envp & FPSR_MASK);
|
||||
__asm__ __volatile__("msr fpcr,%0" : :"ri" (_fpcr));
|
||||
__asm__ __volatile__("msr fpsr,%0" : :"ri" (_fpsr));
|
||||
int fesetenv(const fenv_t* envp) {
|
||||
fpu_control_t fpcr;
|
||||
|
||||
__get_fpcr(fpcr);
|
||||
if (envp->__control != fpcr) {
|
||||
__set_fpcr(envp->__control);
|
||||
}
|
||||
__set_fpsr(envp->__status);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feclearexcept(int __excepts) {
|
||||
fexcept_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
__fpscr &= ~__excepts;
|
||||
fesetenv(&__fpscr);
|
||||
int feclearexcept(int excepts) {
|
||||
fpu_status_t fpsr;
|
||||
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
__get_fpsr(fpsr);
|
||||
fpsr &= ~excepts;
|
||||
__set_fpsr(fpsr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fegetexceptflag(fexcept_t* __flagp, int __excepts) {
|
||||
fexcept_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
*__flagp = __fpscr & __excepts;
|
||||
int fegetexceptflag(fexcept_t* flagp, int excepts) {
|
||||
fpu_status_t fpsr;
|
||||
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
__get_fpsr(fpsr);
|
||||
*flagp = fpsr & excepts;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fesetexceptflag(const fexcept_t* __flagp, int __excepts) {
|
||||
fexcept_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
__fpscr &= ~__excepts;
|
||||
__fpscr |= *__flagp & __excepts;
|
||||
fesetenv(&__fpscr);
|
||||
int fesetexceptflag(const fexcept_t* flagp, int excepts) {
|
||||
fpu_status_t fpsr;
|
||||
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
__get_fpsr(fpsr);
|
||||
fpsr &= ~excepts;
|
||||
fpsr |= *flagp & excepts;
|
||||
__set_fpsr(fpsr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feraiseexcept(int __excepts) {
|
||||
fexcept_t __ex = __excepts;
|
||||
fesetexceptflag(&__ex, __excepts);
|
||||
int feraiseexcept(int excepts) {
|
||||
fexcept_t ex = excepts;
|
||||
|
||||
fesetexceptflag(&ex, excepts);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fetestexcept(int __excepts) {
|
||||
fexcept_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
return (__fpscr & __excepts);
|
||||
int fetestexcept(int excepts) {
|
||||
fpu_status_t fpsr;
|
||||
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
__get_fpsr(fpsr);
|
||||
return (fpsr & excepts);
|
||||
}
|
||||
|
||||
int fegetround(void) {
|
||||
fenv_t _fpscr;
|
||||
fegetenv(&_fpscr);
|
||||
return ((_fpscr >> _FPSCR_RMODE_SHIFT) & 0x3);
|
||||
fpu_control_t fpcr;
|
||||
|
||||
__get_fpcr(fpcr);
|
||||
return ((fpcr >> FPCR_RMODE_SHIFT) & FE_TOWARDZERO);
|
||||
}
|
||||
|
||||
int fesetround(int __round) {
|
||||
fenv_t _fpscr;
|
||||
fegetenv(&_fpscr);
|
||||
_fpscr &= ~(0x3 << _FPSCR_RMODE_SHIFT);
|
||||
_fpscr |= (__round << _FPSCR_RMODE_SHIFT);
|
||||
fesetenv(&_fpscr);
|
||||
int fesetround(int round) {
|
||||
fpu_control_t fpcr, new_fpcr;
|
||||
|
||||
round &= FE_TOWARDZERO;
|
||||
__get_fpcr(fpcr);
|
||||
new_fpcr = fpcr & ~(FE_TOWARDZERO << FPCR_RMODE_SHIFT);
|
||||
new_fpcr |= (round << FPCR_RMODE_SHIFT);
|
||||
if (new_fpcr != fpcr) {
|
||||
__set_fpcr(new_fpcr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feholdexcept(fenv_t* __envp) {
|
||||
fenv_t __env;
|
||||
fegetenv(&__env);
|
||||
*__envp = __env;
|
||||
__env &= ~(FE_ALL_EXCEPT | _FPSCR_ENABLE_MASK);
|
||||
fesetenv(&__env);
|
||||
int feholdexcept(fenv_t* envp) {
|
||||
fenv_t env;
|
||||
fpu_status_t fpsr;
|
||||
fpu_control_t fpcr, new_fpcr;
|
||||
|
||||
__get_fpsr(fpsr);
|
||||
__get_fpcr(fpcr);
|
||||
env.__status = fpsr;
|
||||
env.__control = fpcr;
|
||||
*envp = env;
|
||||
|
||||
// Set exceptions to untrapped.
|
||||
new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT);
|
||||
if (new_fpcr != fpcr) {
|
||||
__set_fpcr(new_fpcr);
|
||||
}
|
||||
|
||||
// Clear all exceptions.
|
||||
fpsr &= ~FE_ALL_EXCEPT;
|
||||
__set_fpsr(fpsr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feupdateenv(const fenv_t* __envp) {
|
||||
fexcept_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
fesetenv(__envp);
|
||||
feraiseexcept(__fpscr & FE_ALL_EXCEPT);
|
||||
int feupdateenv(const fenv_t* envp) {
|
||||
fpu_status_t fpsr;
|
||||
fpu_control_t fpcr;
|
||||
|
||||
// Set FPU Control register.
|
||||
__get_fpcr(fpcr);
|
||||
if (envp->__control != fpcr) {
|
||||
__set_fpcr(envp->__control);
|
||||
}
|
||||
|
||||
// Set FPU Status register to status | currently raised exceptions.
|
||||
__get_fpsr(fpsr);
|
||||
fpsr = envp->__status | (fpsr & FE_ALL_EXCEPT);
|
||||
__set_fpsr(fpsr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int feenableexcept(int __mask) {
|
||||
fenv_t __old_fpscr, __new_fpscr;
|
||||
fegetenv(&__old_fpscr);
|
||||
__new_fpscr = __old_fpscr | (__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT;
|
||||
fesetenv(&__new_fpscr);
|
||||
return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
|
||||
int feenableexcept(int mask) {
|
||||
fpu_control_t old_fpcr, new_fpcr;
|
||||
|
||||
__get_fpcr(old_fpcr);
|
||||
new_fpcr = old_fpcr | ((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
|
||||
if (new_fpcr != old_fpcr) {
|
||||
__set_fpcr(new_fpcr);
|
||||
}
|
||||
return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
|
||||
}
|
||||
|
||||
int fedisableexcept(int __mask) {
|
||||
fenv_t __old_fpscr, __new_fpscr;
|
||||
fegetenv(&__old_fpscr);
|
||||
__new_fpscr = __old_fpscr & ~((__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT);
|
||||
fesetenv(&__new_fpscr);
|
||||
return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
|
||||
int fedisableexcept(int mask) {
|
||||
fpu_control_t old_fpcr, new_fpcr;
|
||||
|
||||
__get_fpcr(old_fpcr);
|
||||
new_fpcr = old_fpcr & ~((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
|
||||
if (new_fpcr != old_fpcr) {
|
||||
__set_fpcr(new_fpcr);
|
||||
}
|
||||
return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
|
||||
}
|
||||
|
||||
int fegetexcept(void) {
|
||||
fenv_t __fpscr;
|
||||
fegetenv(&__fpscr);
|
||||
return ((__fpscr & _FPSCR_ENABLE_MASK) >> _FPSCR_ENABLE_SHIFT);
|
||||
fpu_control_t fpcr;
|
||||
|
||||
__get_fpcr(fpcr);
|
||||
return ((fpcr & FPCR_EXCEPT_MASK) >> FPCR_EXCEPT_SHIFT);
|
||||
}
|
||||
|
@ -27,15 +27,44 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Rewritten for Android.
|
||||
* In ARMv8, AArch64 state, floating-point operation is controlled by:
|
||||
*
|
||||
* The ARM FPSCR (Floating-point Status and Control Register) described here:
|
||||
* http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0344b/Chdfafia.html
|
||||
* has been split into the FPCR (Floating-point Control Register) and FPSR
|
||||
* (Floating-point Status Register) on the ARMv8. These are described briefly in
|
||||
* "Procedure Call Standard for the ARM 64-bit Architecture"
|
||||
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf
|
||||
* section 5.1.2 SIMD and Floating-Point Registers
|
||||
* * FPCR - 32Bit Floating-Point Control Register:
|
||||
* * [31:27] - Reserved, Res0;
|
||||
* * [26] - AHP, Alternative half-precision control bit;
|
||||
* * [25] - DN, Default NaN mode control bit;
|
||||
* * [24] - FZ, Flush-to-zero mode control bit;
|
||||
* * [23:22] - RMode, Rounding Mode control field:
|
||||
* * 00 - Round to Nearest (RN) mode;
|
||||
* * 01 - Round towards Plus Infinity (RP) mode;
|
||||
* * 10 - Round towards Minus Infinity (RM) mode;
|
||||
* * 11 - Round towards Zero (RZ) mode.
|
||||
* * [21:20] - Stride, ignored during AArch64 execution;
|
||||
* * [19] - Reserved, Res0;
|
||||
* * [18:16] - Len, ignored during AArch64 execution;
|
||||
* * [15] - IDE, Input Denormal exception trap;
|
||||
* * [14:13] - Reserved, Res0;
|
||||
* * [12] - IXE, Inexact exception trap;
|
||||
* * [11] - UFE, Underflow exception trap;
|
||||
* * [10] - OFE, Overflow exception trap;
|
||||
* * [9] - DZE, Division by Zero exception;
|
||||
* * [8] - IOE, Invalid Operation exception;
|
||||
* * [7:0] - Reserved, Res0.
|
||||
*
|
||||
* * FPSR - 32Bit Floating-Point Status Register:
|
||||
* * [31] - N, Negative condition flag for AArch32 (AArch64 sets PSTATE.N);
|
||||
* * [30] - Z, Zero condition flag for AArch32 (AArch64 sets PSTATE.Z);
|
||||
* * [29] - C, Carry conditon flag for AArch32 (AArch64 sets PSTATE.C);
|
||||
* * [28] - V, Overflow conditon flag for AArch32 (AArch64 sets PSTATE.V);
|
||||
* * [27] - QC, Cumulative saturation bit, Advanced SIMD only;
|
||||
* * [26:8] - Reserved, Res0;
|
||||
* * [7] - IDC, Input Denormal cumulative exception;
|
||||
* * [6:5] - Reserved, Res0;
|
||||
* * [4] - IXC, Inexact cumulative exception;
|
||||
* * [3] - UFC, Underflow cumulative exception;
|
||||
* * [2] - OFC, Overflow cumulative exception;
|
||||
* * [1] - DZC, Division by Zero cumulative exception;
|
||||
* * [0] - IOC, Invalid Operation cumulative exception.
|
||||
*/
|
||||
|
||||
#ifndef _ARM64_FENV_H_
|
||||
@ -45,7 +74,11 @@
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
typedef __uint32_t fenv_t;
|
||||
typedef struct {
|
||||
__uint32_t __control; /* FPCR, Floating-point Control Register */
|
||||
__uint32_t __status; /* FPSR, Floating-point Status Register */
|
||||
} fenv_t;
|
||||
|
||||
typedef __uint32_t fexcept_t;
|
||||
|
||||
/* Exception flags. */
|
||||
@ -54,11 +87,9 @@ typedef __uint32_t fexcept_t;
|
||||
#define FE_OVERFLOW 0x04
|
||||
#define FE_UNDERFLOW 0x08
|
||||
#define FE_INEXACT 0x10
|
||||
#define FE_DENORMAL 0x80
|
||||
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
|
||||
FE_OVERFLOW | FE_UNDERFLOW)
|
||||
|
||||
#define _FPSCR_ENABLE_SHIFT 8
|
||||
#define _FPSCR_ENABLE_MASK (FE_ALL_EXCEPT << _FPSCR_ENABLE_SHIFT)
|
||||
FE_OVERFLOW | FE_UNDERFLOW | FE_DENORMAL)
|
||||
|
||||
/* Rounding modes. */
|
||||
#define FE_TONEAREST 0x0
|
||||
@ -66,56 +97,6 @@ typedef __uint32_t fexcept_t;
|
||||
#define FE_DOWNWARD 0x2
|
||||
#define FE_TOWARDZERO 0x3
|
||||
|
||||
#define _FPSCR_RMODE_SHIFT 22
|
||||
|
||||
#define FPCR_IOE (1 << 8)
|
||||
#define FPCR_DZE (1 << 9)
|
||||
#define FPCR_OFE (1 << 10)
|
||||
#define FPCR_UFE (1 << 11)
|
||||
#define FPCR_IXE (1 << 12)
|
||||
#define FPCR_IDE (1 << 15)
|
||||
#define FPCR_LEN (7 << 16)
|
||||
#define FPCR_STRIDE (3 << 20)
|
||||
#define FPCR_RMODE (3 << 22)
|
||||
#define FPCR_FZ (1 << 24)
|
||||
#define FPCR_DN (1 << 25)
|
||||
#define FPCR_AHP (1 << 26)
|
||||
#define FPCR_MASK (FPCR_IOE | \
|
||||
FPCR_DZE | \
|
||||
FPCR_OFE | \
|
||||
FPCR_UFE | \
|
||||
FPCR_IXE | \
|
||||
FPCR_IDE | \
|
||||
FPCR_LEN | \
|
||||
FPCR_STRIDE | \
|
||||
FPCR_RMODE | \
|
||||
FPCR_FZ | \
|
||||
FPCR_DN | \
|
||||
FPCR_AHP )
|
||||
|
||||
#define FPSR_IOC (1 << 0)
|
||||
#define FPSR_DZC (1 << 1)
|
||||
#define FPSR_OFC (1 << 2)
|
||||
#define FPSR_UFC (1 << 3)
|
||||
#define FPSR_IXC (1 << 4)
|
||||
#define FPSR_IDC (1 << 7)
|
||||
#define FPSR_QC (1 << 27)
|
||||
#define FPSR_V (1 << 28)
|
||||
#define FPSR_C (1 << 29)
|
||||
#define FPSR_Z (1 << 30)
|
||||
#define FPSR_N (1 << 31)
|
||||
#define FPSR_MASK (FPSR_IOC | \
|
||||
FPSR_DZC | \
|
||||
FPSR_OFC | \
|
||||
FPSR_UFC | \
|
||||
FPSR_IXC | \
|
||||
FPSR_IDC | \
|
||||
FPSR_QC | \
|
||||
FPSR_V | \
|
||||
FPSR_C | \
|
||||
FPSR_Z | \
|
||||
FPSR_N )
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_ARM64_FENV_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user