Add SSE2 support for Windows.

The previous defines controlling SSE2 use assumed GCC. To fix this:
- Import a chunk of defines to typedefs.h from Chrome's build_config.h, primarily to get WEBRTC_ARCH_X86_FAMILY.
- Add a check derived from WebP to define WEBRTC_USE_SSE2.
- Modify cpu_features.cc to work with MSVC. This code is borrowed from chrome/src/base/cpu.cc.
- Change AEC defines to use WEBRTC_USE_SSE2.
- Remove disable_sse2 check from aec.gyp. This is handled by WEBRTC_USE_SSE2.

(Also remove a bit of unused code from aec_core.h)
Review URL: http://webrtc-codereview.appspot.com/95008

git-svn-id: http://webrtc.googlecode.com/svn/trunk@299 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
ajm@google.com 2011-08-04 01:50:00 +00:00
parent ce9bfbb33d
commit ce7c2a231e
9 changed files with 98 additions and 64 deletions

View File

@ -29,20 +29,14 @@
'sources': [ 'sources': [
'../interface/echo_cancellation.h', '../interface/echo_cancellation.h',
'echo_cancellation.c', 'echo_cancellation.c',
'aec_core.h',
'aec_core.c', 'aec_core.c',
'aec_core_sse2.c',
'aec_rdft.h', 'aec_rdft.h',
'aec_rdft.c', 'aec_rdft.c',
'aec_core.h', 'aec_rdft_sse2.c',
'resampler.c',
'resampler.h', 'resampler.h',
], 'resampler.c',
'conditions': [
['disable_sse2 == 0 and (target_arch == "ia32" or target_arch == "x64")', {
'sources': [
'aec_core_sse2.c',
'aec_rdft_sse2.c',
],
}],
], ],
}, },
], ],

View File

@ -468,7 +468,7 @@ int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
WebRtcAec_FilterAdaptation = FilterAdaptation; WebRtcAec_FilterAdaptation = FilterAdaptation;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
if (WebRtc_GetCPUInfo(kSSE2)) { if (WebRtc_GetCPUInfo(kSSE2)) {
#if defined(__SSE2__) #if defined(WEBRTC_USE_SSE2)
WebRtcAec_InitAec_SSE2(); WebRtcAec_InitAec_SSE2();
#endif #endif
} }
@ -561,8 +561,8 @@ void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend,
} }
static void ProcessBlock(aec_t *aec, const short *farend, static void ProcessBlock(aec_t *aec, const short *farend,
const short *nearend, const short *nearendH, const short *nearend, const short *nearendH,
short *output, short *outputH) short *output, short *outputH)
{ {
int i; int i;
float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
@ -601,7 +601,6 @@ static void ProcessBlock(aec_t *aec, const short *farend,
} }
} }
memcpy(fft, aec->xBuf, sizeof(float) * PART_LEN2); memcpy(fft, aec->xBuf, sizeof(float) * PART_LEN2);
memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
// For H band // For H band

View File

@ -16,8 +16,9 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
#include <stdio.h> #include <stdio.h>
#include "typedefs.h"
#include "signal_processing_library.h" #include "signal_processing_library.h"
#include "typedefs.h"
//#define G167 // for running G167 tests //#define G167 // for running G167 tests
//#define UNCONSTR // time-unconstrained filter //#define UNCONSTR // time-unconstrained filter
@ -92,21 +93,13 @@ typedef struct {
float dMinPow[PART_LEN1]; float dMinPow[PART_LEN1];
float dInitMinPow[PART_LEN1]; float dInitMinPow[PART_LEN1];
float *noisePow; float *noisePow;
#ifdef FFTW
float fftR[PART_LEN2];
fftw_complex fftC[PART_LEN2];
fftw_plan fftPlan, ifftPlan;
fftw_complex xfBuf[NR_PART * PART_LEN1];
fftw_complex wfBuf[NR_PART * PART_LEN1];
fftw_complex sde[PART_LEN1];
#else
float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer
float wfBuf[2][NR_PART * PART_LEN1]; // filter fft float wfBuf[2][NR_PART * PART_LEN1]; // filter fft
complex_t sde[PART_LEN1]; // cross-psd of nearend and error complex_t sde[PART_LEN1]; // cross-psd of nearend and error
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer
#endif
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd
float hNs[PART_LEN1]; float hNs[PART_LEN1];
float hNlFbMin, hNlFbLocalMin; float hNlFbMin, hNlFbLocalMin;
@ -169,8 +162,6 @@ typedef void (*WebRtcAec_FilterFar_t)(aec_t *aec, float yf[2][PART_LEN1]);
extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar; extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]); typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]);
extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
#define IP_LEN PART_LEN // this must be at least ceil(2 + sqrt(PART_LEN))
#define W_LEN PART_LEN
typedef void (*WebRtcAec_FilterAdaptation_t) typedef void (*WebRtcAec_FilterAdaptation_t)
(aec_t *aec, float *fft, float ef[2][PART_LEN1]); (aec_t *aec, float *fft, float ef[2][PART_LEN1]);
extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;

View File

@ -12,7 +12,9 @@
* The core AEC algorithm, SSE2 version of speed-critical functions. * The core AEC algorithm, SSE2 version of speed-critical functions.
*/ */
#if defined(__SSE2__) #include "typedefs.h"
#if defined(WEBRTC_USE_SSE2)
#include <emmintrin.h> #include <emmintrin.h>
#include <math.h> #include <math.h>
@ -210,14 +212,6 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
} }
} }
#ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END
#else /* gcc or icc */
# define ALIGN16_BEG
# define ALIGN16_END __attribute__((aligned(16)))
#endif
static __m128 mm_pow_ps(__m128 a, __m128 b) static __m128 mm_pow_ps(__m128 a, __m128 b)
{ {
// a^b = exp2(b * log2(a)) // a^b = exp2(b * log2(a))
@ -432,4 +426,4 @@ void WebRtcAec_InitAec_SSE2(void) {
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
} }
#endif //__SSE2__ #endif // WEBRTC_USE_SSE2

View File

@ -19,10 +19,12 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "aec_rdft.h"
#include <math.h> #include <math.h>
#include "aec_rdft.h"
#include "system_wrappers/interface/cpu_features_wrapper.h" #include "system_wrappers/interface/cpu_features_wrapper.h"
#include "typedefs.h"
// constants shared by all paths (C, SSE2). // constants shared by all paths (C, SSE2).
float rdft_w[64]; float rdft_w[64];
@ -571,7 +573,7 @@ void aec_rdft_init(void) {
rftfsub_128 = rftfsub_128_C; rftfsub_128 = rftfsub_128_C;
rftbsub_128 = rftbsub_128_C; rftbsub_128 = rftbsub_128_C;
if (WebRtc_GetCPUInfo(kSSE2)) { if (WebRtc_GetCPUInfo(kSSE2)) {
#if defined(__SSE2__) #if defined(WEBRTC_USE_SSE2)
aec_rdft_init_sse2(); aec_rdft_init_sse2();
#endif #endif
} }

View File

@ -8,6 +8,9 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
#ifdef _MSC_VER /* visual c++ */ #ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16)) # define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END # define ALIGN16_END
@ -40,3 +43,5 @@ void aec_rdft_init(void);
void aec_rdft_init_sse2(void); void aec_rdft_init_sse2(void);
void aec_rdft_forward_128(float *a); void aec_rdft_forward_128(float *a);
void aec_rdft_inverse_128(float *a); void aec_rdft_inverse_128(float *a);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_

View File

@ -8,7 +8,9 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#if defined(__SSE2__) #include "typedefs.h"
#if defined(WEBRTC_USE_SSE2)
#include <emmintrin.h> #include <emmintrin.h>
#include "aec_rdft.h" #include "aec_rdft.h"
@ -261,4 +263,4 @@ void aec_rdft_init_sse2(void) {
rftbsub_128 = rftbsub_128_SSE2; rftbsub_128 = rftbsub_128_SSE2;
} }
#endif // __SSE2__ #endif // WEBRTC_USE_SS2

View File

@ -8,17 +8,29 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
// Parts of this file derived from Chromium's base/cpu.cc.
#include "cpu_features_wrapper.h" #include "cpu_features_wrapper.h"
#include "typedefs.h"
#if defined(WEBRTC_ARCH_X86_FAMILY)
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#endif
// No CPU feature is available => straight C path. // No CPU feature is available => straight C path.
int GetCPUInfoNoASM(CPUFeature feature) { int GetCPUInfoNoASM(CPUFeature feature) {
(void)feature; (void)feature;
return 0; return 0;
} }
#if defined(WEBRTC_ARCH_X86_FAMILY)
#ifndef _MSC_VER
// Intrinsic for "cpuid". // Intrinsic for "cpuid".
#if defined(__pic__) && defined(__i386__) #if defined(__pic__) && defined(__i386__)
static inline void cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( __asm__ volatile (
"mov %%ebx, %%edi\n" "mov %%ebx, %%edi\n"
"cpuid\n" "cpuid\n"
@ -26,20 +38,22 @@ static inline void cpuid(int cpu_info[4], int info_type) {
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)); : "a"(info_type));
} }
#elif defined(__i386__) || defined(__x86_64__) #else
static inline void cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( __asm__ volatile (
"cpuid\n" "cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)); : "a"(info_type));
} }
#endif #endif
#endif // _MSC_VER
#endif // WEBRTC_ARCH_X86_FAMILY
#if defined(__i386__) || defined(__x86_64__) #if defined(WEBRTC_ARCH_X86_FAMILY)
// Actual feature detection for x86. // Actual feature detection for x86.
static int GetCPUInfo(CPUFeature feature) { static int GetCPUInfo(CPUFeature feature) {
int cpu_info[4]; int cpu_info[4];
cpuid(cpu_info, 1); __cpuid(cpu_info, 1);
if (feature == kSSE2) { if (feature == kSSE2) {
return 0 != (cpu_info[3] & 0x04000000); return 0 != (cpu_info[3] & 0x04000000);
} }

View File

@ -8,21 +8,23 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
/* // This file contains platform-specific typedefs and defines.
*
* This file contains type definitions used in all WebRtc APIs.
*
*/
/* Reserved words definitions */ #ifndef WEBRTC_TYPEDEFS_H_
#define WEBRTC_TYPEDEFS_H_
// Reserved words definitions
#define WEBRTC_EXTERN extern #define WEBRTC_EXTERN extern
#define G_CONST const #define G_CONST const
#define WEBRTC_INLINE extern __inline #define WEBRTC_INLINE extern __inline
#ifndef WEBRTC_TYPEDEFS_H // Define WebRTC preprocessor identifiers based on the current build platform.
#define WEBRTC_TYPEDEFS_H // TODO(ajm): Clean these up. We can probably remove everything in this block.
// - TARGET_MAC_INTEL and TARGET_MAC aren't used anywhere.
/* Define WebRtc preprocessor identifiers based on the current build platform */ // - In the few places where TARGET_PC is used, it should be replaced by
// something more specific.
// - Do we really support PowerPC? Probably not. Remove WEBRTC_MAC_INTEL
// from build/common.gypi as well.
#if defined(WIN32) #if defined(WIN32)
// Windows & Windows Mobile // Windows & Windows Mobile
#if !defined(WEBRTC_TARGET_PC) #if !defined(WEBRTC_TARGET_PC)
@ -33,11 +35,11 @@
#if defined(__LITTLE_ENDIAN__ ) //TODO: is this used? #if defined(__LITTLE_ENDIAN__ ) //TODO: is this used?
#if !defined(WEBRTC_TARGET_MAC_INTEL) #if !defined(WEBRTC_TARGET_MAC_INTEL)
#define WEBRTC_TARGET_MAC_INTEL #define WEBRTC_TARGET_MAC_INTEL
#endif #endif
#else #else
#if !defined(WEBRTC_TARGET_MAC) #if !defined(WEBRTC_TARGET_MAC)
#define WEBRTC_TARGET_MAC #define WEBRTC_TARGET_MAC
#endif #endif
#endif #endif
#else #else
// Linux etc. // Linux etc.
@ -46,6 +48,40 @@
#endif #endif
#endif #endif
// Derived from Chromium's build/build_config.h
// Processor architecture detection. For more info on what's defined, see:
// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
// http://www.agner.org/optimize/calling_conventions.pdf
// or with gcc, run: "echo | gcc -E -dM -"
// TODO(ajm): replace WEBRTC_LITTLE_ENDIAN with WEBRTC_ARCH_LITTLE_ENDIAN?
#if defined(_M_X64) || defined(__x86_64__)
#define WEBRTC_ARCH_X86_FAMILY
#define WEBRTC_ARCH_X86_64
#define WEBRTC_ARCH_64_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN
#elif defined(_M_IX86) || defined(__i386__)
#define WEBRTC_ARCH_X86_FAMILY
#define WEBRTC_ARCH_X86
#define WEBRTC_ARCH_32_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN
#elif defined(__ARMEL__)
// TODO(ajm): Chromium uses the two commented defines. Should we switch?
#define WEBRTC_ARCH_ARM
//#define WEBRTC_ARCH_ARM_FAMILY
//#define WEBRTC_ARCH_ARMEL
#define WEBRTC_ARCH_32_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN
#else
#error Please add support for your architecture in typedefs.h
#endif
// TODO(ajm): SSE2 is disabled on Windows for the moment, because AEC
// optimization is broken. Enable it as soon as AEC is fixed.
//#if defined(__SSE2__) || defined(_MSC_VER)
#if defined(__SSE2__)
#define WEBRTC_USE_SSE2
#endif
#if defined(WEBRTC_TARGET_PC) #if defined(WEBRTC_TARGET_PC)
#if !defined(_MSC_VER) #if !defined(_MSC_VER)
@ -79,7 +115,7 @@
typedef char WebRtc_Word8; typedef char WebRtc_Word8;
typedef uint8_t WebRtc_UWord8; typedef uint8_t WebRtc_UWord8;
/* Define endian for the platform */ // Define endian for the platform
#define WEBRTC_LITTLE_ENDIAN #define WEBRTC_LITTLE_ENDIAN
#elif defined(WEBRTC_TARGET_MAC_INTEL) #elif defined(WEBRTC_TARGET_MAC_INTEL)
@ -94,14 +130,11 @@
typedef uint16_t WebRtc_UWord16; typedef uint16_t WebRtc_UWord16;
typedef uint8_t WebRtc_UWord8; typedef uint8_t WebRtc_UWord8;
/* Define endian for the platform */ // Define endian for the platform
#define WEBRTC_LITTLE_ENDIAN #define WEBRTC_LITTLE_ENDIAN
#else #else
#error "No platform defined for WebRTC type definitions (typedefs.h)"
#error "No platform defined for WebRtc type definitions (webrtc_typedefs.h)"
#endif #endif
#endif // WEBRTC_TYPEDEFS_H_
#endif // WEBRTC_TYPEDEFS_H