Use __builtin_clz instead of default implementation of fixnormz on GCC
The library contains architecture specific versions of the clz functions for arm and mips, but the fallback C version for other architectures is pretty slow. By using __builtin_clz on GCC (available since GCC 3.4), we get a significant (20-40 %) speedup of the total running time on x86. Speed difference: 5.1 s instead of 8.6 s for AAC-LC 128 kbit/s and 3.8 s instead of 4.9 s for HE-AACv2 32 kbit/s, on an i7.
This commit is contained in:
parent
6999980d67
commit
e036dbc8eb
@ -115,6 +115,11 @@ inline INT fixnormz_S (SHORT a)
|
||||
{
|
||||
return fixnormz_D((INT)(a));
|
||||
}
|
||||
#elif defined(__GNUC__)
|
||||
inline INT fixnormz_S (SHORT a)
|
||||
{
|
||||
return a ? __builtin_clz(a) : 16;
|
||||
}
|
||||
#else
|
||||
inline INT fixnormz_S (SHORT a)
|
||||
{
|
||||
@ -131,6 +136,12 @@ inline INT fixnormz_S (SHORT a)
|
||||
#endif
|
||||
|
||||
#if !defined(FUNCTION_fixnormz_D)
|
||||
#if defined(__GNUC__)
|
||||
inline INT fixnormz_D (LONG a)
|
||||
{
|
||||
return a ? __builtin_clz(a) : 32;
|
||||
}
|
||||
#else
|
||||
inline INT fixnormz_D (LONG a)
|
||||
{
|
||||
INT leadingBits = 0;
|
||||
@ -143,6 +154,7 @@ inline INT fixnormz_D (LONG a)
|
||||
return (leadingBits);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
|
Loading…
Reference in New Issue
Block a user