From e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca Mon Sep 17 00:00:00 2001 From: Jakub Stachowski Date: Sat, 11 Aug 2012 16:31:02 +0200 Subject: [PATCH] Use __builtin_clz instead of default implementation of fixnormz on GCC The library contains architecture specific versions of the clz functions for arm and mips, but the fallback C version for other architectures is pretty slow. By using __builtin_clz on GCC (available since GCC 3.4), we get a significant (20-40 %) speedup of the total running time on x86. Speed difference: 5.1 s instead of 8.6 s for AAC-LC 128 kbit/s and 3.8 s instead of 4.9 s for HE-AACv2 32 kbit/s, on an i7. --- libFDK/include/clz.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/libFDK/include/clz.h b/libFDK/include/clz.h index 90cdb2b..4f7d240 100644 --- a/libFDK/include/clz.h +++ b/libFDK/include/clz.h @@ -115,6 +115,11 @@ inline INT fixnormz_S (SHORT a) { return fixnormz_D((INT)(a)); } +#elif defined(__GNUC__) +inline INT fixnormz_S (SHORT a) +{ + return a ? __builtin_clz(a) : 16; +} #else inline INT fixnormz_S (SHORT a) { @@ -131,6 +136,12 @@ inline INT fixnormz_S (SHORT a) #endif #if !defined(FUNCTION_fixnormz_D) +#if defined(__GNUC__) +inline INT fixnormz_D (LONG a) +{ + return a ? __builtin_clz(a) : 32; +} +#else inline INT fixnormz_D (LONG a) { INT leadingBits = 0; @@ -143,6 +154,7 @@ inline INT fixnormz_D (LONG a) return (leadingBits); } #endif +#endif /*****************************************************************************