Added run time ARM-Neon detection feature in SPL functions.

Review URL: https://webrtc-codereview.appspot.com/728010 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2721 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-09-08 00:09:26 +00:00 · 2012-09-08 00:09:26 +00:00 · 0221b78e2e
commit 0221b78e2e
parent 706a546f69
25 changed files with 428 additions and 164 deletions
--- a/Android.mk
+++ b/Android.mk
@ -45,7 +45,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
 LOCAL_WHOLE_STATIC_LIBRARIES += \
    libwebrtc_aecm_neon \
-    libwebrtc_ns_neon
+    libwebrtc_ns_neon \
+    libwebrtc_spl_neon
 endif

 LOCAL_STATIC_LIBRARIES := \
--- a/android-webrtc.mk
+++ b/android-webrtc.mk
@ -29,13 +29,16 @@ MY_WEBRTC_COMMON_DEFS += \
 ifeq ($(ARCH_ARM_HAVE_NEON),true)
 MY_WEBRTC_COMMON_DEFS += \
    '-DWEBRTC_ARCH_ARM_NEON'
-MY_ARM_CFLAGS_NEON := \
-    -flax-vector-conversions
 endif

 ifneq (,$(filter '-DWEBRTC_DETECT_ARM_NEON' '-DWEBRTC_ARCH_ARM_NEON', \
    $(MY_WEBRTC_COMMON_DEFS)))
 WEBRTC_BUILD_NEON_LIBS := true
+# TODO(kma): Use MY_WEBRTC_COMMON_DEFS for Neon libraies in AECM, NS, and iSAC.
+MY_WEBRTC_COMMON_DEFS += \
+    -mfpu=neon \
+    -mfloat-abi=softfp \
+    -flax-vector-conversions
 endif

 ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
--- a/src/common_audio/signal_processing/Android.mk
+++ b/src/common_audio/signal_processing/Android.mk
@ -21,8 +21,10 @@ LOCAL_SRC_FILES := \
    auto_correlation.c \
    complex_fft.c \
    copy_set_operations.c \
+    cross_correlation.c \
    division_operations.c \
    dot_product_with_scale.c \
+    downsample_fast.c \
    energy.c \
    filter_ar.c \
    filter_ma_fast_q12.c \
@ -39,6 +41,7 @@ LOCAL_SRC_FILES := \
    resample_by_2.c \
    resample_by_2_internal.c \
    resample_fractional.c \
+    spl_init.c \
    spl_sqrt.c \
    spl_version.c \
    splitting_filter.c \
@ -53,20 +56,6 @@ LOCAL_C_INCLUDES := \
    $(LOCAL_PATH)/include \
    $(LOCAL_PATH)/../.. 

-ifeq ($(ARCH_ARM_HAVE_NEON),true)
-LOCAL_SRC_FILES += \
-    cross_correlation_neon.s \
-    downsample_fast_neon.s \
-    min_max_operations_neon.s \
-    vector_scaling_operations_neon.s
-LOCAL_CFLAGS += \
-    $(MY_ARM_CFLAGS_NEON)
-else
-LOCAL_SRC_FILES += \
-    cross_correlation.c \
-    downsample_fast.c
-endif
-
 ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
 LOCAL_SRC_FILES += \
    filter_ar_fast_q12_armv7.s
@ -99,3 +88,34 @@ ifndef NDK_ROOT
 include external/stlport/libstlport.mk
 endif
 include $(BUILD_STATIC_LIBRARY)
+
+#########################
+# Build the neon library.
+ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+LOCAL_MODULE := libwebrtc_spl_neon
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := \
+    cross_correlation_neon.s \
+    downsample_fast_neon.s \
+    min_max_operations_neon.s \
+    vector_scaling_operations_neon.s
+
+# Flags passed to both C and C++ files.
+LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/include \
+    $(LOCAL_PATH)/../.. 
+
+ifndef NDK_ROOT
+include external/stlport/libstlport.mk
+endif
+include $(BUILD_STATIC_LIBRARY)
+
+endif # ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
+
--- a/src/common_audio/signal_processing/cross_correlation.c
+++ b/src/common_audio/signal_processing/cross_correlation.c
@ -10,13 +10,14 @@

 #include "signal_processing_library.h"

-void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
-                                const int16_t* seq1,
-                                const int16_t* seq2,
-                                int16_t dim_seq,
-                                int16_t dim_cross_correlation,
-                                int16_t right_shifts,
-                                int16_t step_seq2) {
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2) {
  int i = 0, j = 0;

  for (i = 0; i < dim_cross_correlation; i++) {
--- a/src/common_audio/signal_processing/cross_correlation_neon.s
+++ b/src/common_audio/signal_processing/cross_correlation_neon.s
@ -9,7 +9,7 @@
@

@ cross_correlation_neon.s
-@ This file contains the function WebRtcSpl_CrossCorrelation(),
+@ This file contains the function WebRtcSpl_CrossCorrelationNeon(),
@ optimized for ARM Neon platform.
@
@ Reference Ccode at end of this file.
@ -33,9 +33,9 @@
 .fpu neon

 .align  2
-.global WebRtcSpl_CrossCorrelation
+.global WebRtcSpl_CrossCorrelationNeon

-WebRtcSpl_CrossCorrelation:
+WebRtcSpl_CrossCorrelationNeon:

 .fnstart

@ -109,13 +109,13 @@ POST_LOOP_DIM_SEQ_RESIDUAL:   @ Sum the results up and do the shift.


@ TODO(kma): Place this piece of reference code into a C code file.
-@ void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation,
-@                                 WebRtc_Word16* seq1,
-@                                 WebRtc_Word16* seq2,
-@                                 WebRtc_Word16 dim_seq,
-@                                 WebRtc_Word16 dim_cross_correlation,
-@                                 WebRtc_Word16 right_shifts,
-@                                 WebRtc_Word16 step_seq2) {
+@ void WebRtcSpl_CrossCorrelationNeon(WebRtc_Word32* cross_correlation,
+@                                     WebRtc_Word16* seq1,
+@                                     WebRtc_Word16* seq2,
+@                                     WebRtc_Word16 dim_seq,
+@                                     WebRtc_Word16 dim_cross_correlation,
+@                                     WebRtc_Word16 right_shifts,
+@                                     WebRtc_Word16 step_seq2) {
@   int i = 0;
@   int j = 0;
@   int inner_loop_len1 = dim_seq >> 3;
--- a/src/common_audio/signal_processing/downsample_fast.c
+++ b/src/common_audio/signal_processing/downsample_fast.c
@ -11,14 +11,15 @@
 #include "signal_processing_library.h"

 // TODO(Bjornv): Change the function parameter order to WebRTC code style.
-int WebRtcSpl_DownsampleFast(const int16_t* data_in,
-                             int data_in_length,
-                             int16_t* data_out,
-                             int data_out_length,
-                             const int16_t* __restrict coefficients,
-                             int coefficients_length,
-                             int factor,
-                             int delay) {
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay) {
  int i = 0;
  int j = 0;
  int32_t out_s32 = 0;
--- a/src/common_audio/signal_processing/downsample_fast_neon.s
+++ b/src/common_audio/signal_processing/downsample_fast_neon.s
@ -8,7 +8,7 @@
@ be found in the AUTHORS file in the root of the source tree.
@

-@ This file contains the function WebRtcSpl_DownsampleFast(), optimized for
+@ This file contains the function WebRtcSpl_DownsampleFastNeon(), optimized for
@ ARM Neon platform. The description header can be found in
@ signal_processing_library.h
@
@ -18,9 +18,9 @@
 .fpu neon

 .align  2
-.global WebRtcSpl_DownsampleFast
+.global WebRtcSpl_DownsampleFastNeon

-WebRtcSpl_DownsampleFast:
+WebRtcSpl_DownsampleFastNeon:

 .fnstart

--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@ -162,6 +162,16 @@ extern "C"
 // inline functions:
 #include "spl_inl.h"

+// Initialize SPL. Currently it contains only function pointer initialization.
+// If the underlying platform is known to be ARM-Neon (WEBRTC_ARCH_ARM_NEON
+// defined), the pointers will be assigned to code optimized for Neon; otherwise
+// if run-time Neon detection (WEBRTC_DETECT_ARM_NEON) is enabled, the pointers
+// will be assigned to either Neon code or generic C code; otherwise, generic C
+// code will be assigned.
+// Note that this function MUST be called in any application that uses SPL
+// functions.
+void WebRtcSpl_Init();
+
 // Get SPL Version
 WebRtc_Word16 WebRtcSpl_get_version(char* version,
                                    WebRtc_Word16 length_in_bytes);
@ -196,7 +206,8 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
 // End: Copy and set operations.


-// Minimum and maximum operations. Implementation in min_max_operations.c.
+// Minimum and maximum operation functions and their pointers.
+// Implementation in min_max_operations.c.

 // Returns the largest absolute value in a signed 16-bit vector.
 //
@ -206,7 +217,12 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
 //
 // Return value  : Maximum absolute value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
-int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
+typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, int length);
+extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
+#endif

 // Returns the largest absolute value in a signed 32-bit vector.
 //
@ -216,7 +232,12 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
 //
 // Return value  : Maximum absolute value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
+typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, int length);
+extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
+#endif

 // Returns the maximum value of a 16-bit vector.
 //
@ -228,7 +249,12 @@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
+typedef int16_t (*MaxValueW16)(const int16_t* vector, int length);
+extern MaxValueW16 WebRtcSpl_MaxValueW16;
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
+#endif

 // Returns the maximum value of a 32-bit vector.
 //
@ -240,7 +266,12 @@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
+typedef int32_t (*MaxValueW32)(const int32_t* vector, int length);
+extern MaxValueW32 WebRtcSpl_MaxValueW32;
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
+#endif

 // Returns the minimum value of a 16-bit vector.
 //
@ -252,7 +283,12 @@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
+typedef int16_t (*MinValueW16)(const int16_t* vector, int length);
+extern MinValueW16 WebRtcSpl_MinValueW16;
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
+#endif

 // Returns the minimum value of a 32-bit vector.
 //
@ -264,7 +300,12 @@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
+typedef int32_t (*MinValueW32)(const int32_t* vector, int length);
+extern MinValueW32 WebRtcSpl_MinValueW32;
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
+#endif

 // Returns the vector index to the largest absolute value of a 16-bit vector.
 //
@ -358,7 +399,7 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
                                  WebRtc_Word16* out_vector,
                                  int vector_length);

-// Performs the vector operation:
+// The functions (with related pointer) perform the vector operation:
 //   out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
 //        + round_value) >> right_shifts,
 //   where  round_value = (1 << right_shifts) >> 1.
@ -376,14 +417,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
 // Return value            : 0 if OK, -1 if (in_vector1 == NULL
 //                           || in_vector2 == NULL || out_vector == NULL
 //                           || length <= 0 || right_shift < 0).
-int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
-                                          int16_t in_vector1_scale,
-                                          const int16_t* in_vector2,
-                                          int16_t in_vector2_scale,
-                                          int right_shifts,
-                                          int16_t* out_vector,
-                                          int length);
-
+typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length);
+extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(const int16_t* in_vector1,
+                                              int16_t in_vector1_scale,
+                                              const int16_t* in_vector2,
+                                              int16_t in_vector2_scale,
+                                              int right_shifts,
+                                              int16_t* out_vector,
+                                              int length);
+#endif
 // End: Vector scaling operations.

 // iLBC specific functions. Implementations in ilbc_specific_functions.c.
@ -508,7 +565,8 @@ void WebRtcSpl_AutoCorrToReflCoef(G_CONST WebRtc_Word32* auto_corr,
                                  int use_order,
                                  WebRtc_Word16* refl_coef);

-// Calculates the cross-correlation between two sequences |seq1| and |seq2|.
+// The functions (with related pointer) calculate the cross-correlation between
+// two sequences |seq1| and |seq2|.
 // |seq1| is fixed and |seq2| slides as the pointer is increased with the
 // amount |step_seq2|. Note the arguments should obey the relationship:
 // |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
@ -530,13 +588,30 @@ void WebRtcSpl_AutoCorrToReflCoef(G_CONST WebRtc_Word32* auto_corr,
 //
 // Output:
 //      - cross_correlation : The cross-correlation in Q(-right_shifts)
-void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
-                                const int16_t* seq1,
-                                const int16_t* seq2,
-                                int16_t dim_seq,
-                                int16_t dim_cross_correlation,
-                                int16_t right_shifts,
-                                int16_t step_seq2);
+typedef void (*CrossCorrelation)(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2);
+extern CrossCorrelation WebRtcSpl_CrossCorrelation;
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+                                    const int16_t* seq1,
+                                    const int16_t* seq2,
+                                    int16_t dim_seq,
+                                    int16_t dim_cross_correlation,
+                                    int16_t right_shifts,
+                                    int16_t step_seq2);
+#endif

 // Creates (the first half of) a Hanning window. Size must be at least 1 and
 // at most 512.
@ -636,7 +711,8 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
                               int coefficients_length,
                               int data_length);

-// Performs a MA down sampling filter on a vector
+// The functions (with related pointer) perform a MA down sampling filter
+// on a vector.
 // Input:
 //      - data_in            : Input samples (state in positions
 //                               data_in[-order] .. data_in[-1])
@ -651,14 +727,33 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
 // Output:
 //      - data_out           : Filtered samples
 // Return value              : 0 if OK, -1 if |in_vector| is too short
-int WebRtcSpl_DownsampleFast(const int16_t* data_in,
-                             int data_in_length,
-                             int16_t* data_out,
-                             int data_out_length,
-                             const int16_t* __restrict coefficients,
-                             int coefficients_length,
-                             int factor,
-                             int delay);
+typedef int (*DownsampleFast)(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay);
+extern DownsampleFast WebRtcSpl_DownsampleFast;
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+                                 int data_in_length,
+                                 int16_t* data_out,
+                                 int data_out_length,
+                                 const int16_t* __restrict coefficients,
+                                 int coefficients_length,
+                                 int factor,
+                                 int delay);
+#endif

 // End: Filter operations.

--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@ -10,12 +10,12 @@

 /*
 * This file contains the implementation of functions
- * WebRtcSpl_MaxAbsValueW16()
- * WebRtcSpl_MaxAbsValueW32()
- * WebRtcSpl_MaxValueW16()
- * WebRtcSpl_MaxValueW32()
- * WebRtcSpl_MinValueW16()
- * WebRtcSpl_MinValueW32()
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
 * WebRtcSpl_MaxAbsIndexW16()
 * WebRtcSpl_MaxIndexW16()
 * WebRtcSpl_MaxIndexW32()
@ -29,13 +29,11 @@
 #include <stdlib.h>

 // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
-// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+//   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.

-// TODO(kma): Move the code in the #ifndef block into min_max_operations_c.c.
-#ifndef WEBRTC_ARCH_ARM_NEON
-
-// Maximum absolute value of word16 vector.
-int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length) {
  int i = 0, absolute = 0, maximum = 0;

  if (vector == NULL || length <= 0) {
@ -58,8 +56,8 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
  return (int16_t)maximum;
 }

-// Maximum absolute value of word32 vector.
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length) {
  // Use uint32_t for the local variables, to accommodate the return value
  // of abs(0x80000000), which is 0x80000000.

@ -82,8 +80,8 @@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
  return (int32_t)maximum;
 }

-// Maximum value of word16 vector.
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length) {
  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
  int i = 0;

@ -98,8 +96,8 @@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
  return maximum;
 }

-// Maximum value of word32 vector.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length) {
  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
  int i = 0;

@ -114,8 +112,8 @@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
  return maximum;
 }

-// Minimum value of word16 vector.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length) {
  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
  int i = 0;

@ -130,8 +128,8 @@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
  return minimum;
 }

-// Minimum value of word32 vector.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length) {
  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
  int i = 0;

@ -145,8 +143,6 @@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
  }
  return minimum;
 }
-#endif  // WEBRTC_ARCH_ARM_NEON
-

 // Index of maximum absolute value in a word16 vector.
 int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@ -8,7 +8,7 @@
@ be found in the AUTHORS file in the root of the source tree.
@

-@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
+@ This file contains some minimum and maximum functions, optimized for
@ ARM Neon platform. The description header can be found in
@ signal_processing_library.h
@
@ -17,16 +17,16 @@

 .arch armv7-a
 .fpu neon
-.global WebRtcSpl_MaxAbsValueW16
-.global WebRtcSpl_MaxAbsValueW32
-.global WebRtcSpl_MaxValueW16
-.global WebRtcSpl_MaxValueW32
-.global WebRtcSpl_MinValueW16
-.global WebRtcSpl_MinValueW32
+.global WebRtcSpl_MaxAbsValueW16Neon
+.global WebRtcSpl_MaxAbsValueW32Neon
+.global WebRtcSpl_MaxValueW16Neon
+.global WebRtcSpl_MaxValueW32Neon
+.global WebRtcSpl_MinValueW16Neon
+.global WebRtcSpl_MinValueW32Neon
 .align  2

-@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
-WebRtcSpl_MaxAbsValueW16:
+@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MaxAbsValueW16Neon:
 .fnstart

  mov r2, #-1                 @ Initialize the return value.
@ -73,8 +73,8 @@ END_MAX_ABS_VALUE_W16:

 .fnend

-@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxAbsValueW32:
+@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MaxAbsValueW32Neon:
 .fnstart

  cmp r0, #0
@ -127,8 +127,8 @@ EXIT:

 .fnend

-@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
-WebRtcSpl_MaxValueW16:
+@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MaxValueW16Neon:
 .fnstart

  mov r2, #0x8000             @ Initialize the return value.
@ -170,8 +170,8 @@ END_MAX_VALUE_W16:

 .fnend

-@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxValueW32:
+@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MaxValueW32Neon:
 .fnstart

  mov r2, #0x80000000         @ Initialize the return value.
@ -215,8 +215,8 @@ END_MAX_VALUE_W32:

 .fnend

-@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
-WebRtcSpl_MinValueW16:
+@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MinValueW16Neon:
 .fnstart

  movw r2, #0x7FFF            @ Initialize the return value.
@ -259,8 +259,8 @@ END_MIN_VALUE_W16:

 .fnend

-@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
-WebRtcSpl_MinValueW32:
+@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MinValueW32Neon:
 .fnstart

  mov r2, #0x7FFFFFFF         @ Initialize the return value.
--- a/src/common_audio/signal_processing/real_fft_unittest.cc
+++ b/src/common_audio/signal_processing/real_fft_unittest.cc
@ -25,6 +25,10 @@ const int16_t kRefData[kLength] = {
 };

 class RealFFTTest : public ::testing::Test {
+ protected:
+   RealFFTTest() {
+     WebRtcSpl_Init();
+   }
 };

 TEST_F(RealFFTTest, CreateFailsOnBadInput) {
--- a/src/common_audio/signal_processing/signal_processing.gypi
+++ b/src/common_audio/signal_processing/signal_processing.gypi
@ -14,6 +14,9 @@
      'include_dirs': [
        'include',
      ],
+      'dependencies': [
+        '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
+      ],
      'direct_dependent_settings': {
        'include_dirs': [
          'include',
@ -51,6 +54,7 @@
        'resample_by_2_internal.c',
        'resample_by_2_internal.h',
        'resample_fractional.c',
+        'spl_init.c',
        'spl_sqrt.c',
        'spl_sqrt_floor.c',
        'spl_version.c',
@ -70,6 +74,7 @@
          ],
          'conditions': [
            ['armv7==1', {
+              'dependencies': ['signal_processing_neon',],
              'sources': [
                'filter_ar_fast_q12_armv7.s',
              ],
@ -77,18 +82,6 @@
                'filter_ar_fast_q12.c',
              ],
            }],
-            ['arm_neon==1', {
-              'sources': [
-                'cross_correlation_neon.s',
-                'downsample_fast_neon.s',
-                'min_max_operations_neon.s',
-                'vector_scaling_operations_neon.s',
-              ],
-              'sources!': [
-                'cross_correlation.c',
-                'downsample_fast.c',
-              ],
-            }],
          ],
        }],
      ],
@ -112,5 +105,20 @@
        }, # spl_unittests
      ], # targets
    }], # include_tests
+    ['target_arch=="arm" and armv7==1', {
+      'targets': [
+        {
+          'target_name': 'signal_processing_neon',
+          'type': '<(library)',
+          'includes': ['../../build/arm_neon.gypi',],
+          'sources': [
+            'cross_correlation_neon.s',
+            'downsample_fast_neon.s',
+            'min_max_operations_neon.s',
+            'vector_scaling_operations_neon.s',
+          ],
+        },
+      ],
+    }], # 'target_arch=="arm" and armv7==1'
  ], # conditions
 }
--- a/src/common_audio/signal_processing/signal_processing_unittest.cc
+++ b/src/common_audio/signal_processing/signal_processing_unittest.cc
@ -17,12 +17,11 @@ static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,

 class SplTest : public testing::Test {
 protected:
-  virtual ~SplTest() {
-  }
-  void SetUp() {
-  }
-  void TearDown() {
-  }
+   SplTest() {
+     WebRtcSpl_Init();
+   }
+   virtual ~SplTest() {
+   }
 };

 TEST_F(SplTest, MacroTest) {
--- a/src/common_audio/signal_processing/spl_init.c
+++ b/src/common_audio/signal_processing/spl_init.c
@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+/* TODO(kma): Add calls to WebRtcSpl_Init() in all related modules
+ * (AEC, NS, codecs etc.).
+ */
+
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "system_wrappers/interface/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+
+#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_DETECT_ARM_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    InitPointersToNeon();
+  } else {
+    InitPointersToC();
+  }
+#elif defined(WEBRTC_ARCH_ARM_NEON)
+  InitPointersToNeon();
+#else
+  InitPointersToC();
+#endif  /* WEBRTC_DETECT_ARM_NEON */
+}
+
+
+#if defined(WEBRTC_POSIX)
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+  static pthread_once_t lock = PTHREAD_ONCE_INIT;
+  pthread_once(&lock, func);
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void once(void (*func)(void)) {
+  /* Didn't use InitializeCriticalSection() since there's no race-free context
+   * in which to execute it.
+   *
+   * TODO(kma): Change to different implementation (e.g.
+   * InterlockedCompareExchangePointer) to avoid issues similar to
+   * http://code.google.com/p/webm/issues/detail?id=467.
+   */
+  static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
+  static int done = 0;
+
+  EnterCriticalSection(&lock);
+  if (!done) {
+    func();
+    done = 1;
+  }
+  LeaveCriticalSection(&lock);
+}
+
+/* There's no fallback version as an #else block here to ensure thread safety.
+ * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+ * system should pick it up.
+ */
+#endif  /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init() {
+  once(InitFunctionPointers);
+}
--- a/src/common_audio/signal_processing/vector_scaling_operations.c
+++ b/src/common_audio/signal_processing/vector_scaling_operations.c
@ -17,7 +17,7 @@
 * WebRtcSpl_ScaleVector()
 * WebRtcSpl_ScaleVectorWithSat()
 * WebRtcSpl_ScaleAndAddVectors()
- * WebRtcSpl_ScaleAndAddVectorsWithRound()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
 */

 #include "signal_processing_library.h"
@ -148,14 +148,14 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain
    }
 }

-#ifndef WEBRTC_ARCH_ARM_NEON
-int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
-                                          int16_t in_vector1_scale,
-                                          const int16_t* in_vector2,
-                                          int16_t in_vector2_scale,
-                                          int right_shifts,
-                                          int16_t* out_vector,
-                                          int length) {
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length) {
  int i = 0;
  int round_value = (1 << right_shifts) >> 1;

@ -173,4 +173,3 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,

  return 0;
 }
-#endif  // WEBRTC_ARCH_ARM_NEON
--- a/src/common_audio/signal_processing/vector_scaling_operations_neon.s
+++ b/src/common_audio/signal_processing/vector_scaling_operations_neon.s
@ -9,7 +9,7 @@
@

@ vector_scaling_operations_neon.s
-@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
+@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(),
@ optimized for ARM Neon platform. Output is bit-exact with the reference
@ C code in vector_scaling_operations.c.

@ -17,9 +17,9 @@
 .fpu neon

 .align  2
-.global WebRtcSpl_ScaleAndAddVectorsWithRound
+.global WebRtcSpl_ScaleAndAddVectorsWithRoundNeon

-WebRtcSpl_ScaleAndAddVectorsWithRound:
+WebRtcSpl_ScaleAndAddVectorsWithRoundNeon:
 .fnstart

  push {r4-r9}
--- a/src/common_audio/vad/webrtc_vad.c
+++ b/src/common_audio/vad/webrtc_vad.c
@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>

+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "common_audio/vad/vad_core.h"
 #include "typedefs.h"

@ -53,6 +54,7 @@ int WebRtcVad_Free(VadInst* handle) {

 // TODO(bjornv): Move WebRtcVad_InitCore() code here.
 int WebRtcVad_Init(VadInst* handle) {
+  WebRtcSpl_Init();
  // Initialize the core VAD component.
  return WebRtcVad_InitCore((VadInstT*) handle);
 }
--- a/src/modules/audio_coding/codecs/ilbc/init_decode.c
+++ b/src/modules/audio_coding/codecs/ilbc/init_decode.c
@ -23,14 +23,14 @@
 *  Initiation of decoder instance.
 *---------------------------------------------------------------*/

-WebRtc_Word16 WebRtcIlbcfix_InitDecode(		/* (o) Number of decoded samples */
-    iLBC_Dec_Inst_t *iLBCdec_inst,	/* (i/o) Decoder instance */
-    WebRtc_Word16 mode,					/* (i) frame size mode */
-    int use_enhancer           /* (i) 1 to use enhancer
-                                  0 to run without enhancer */
-                                                ) {
+WebRtc_Word16 WebRtcIlbcfix_InitDecode(  /* (o) Number of decoded samples */
+    iLBC_Dec_Inst_t *iLBCdec_inst,  /* (i/o) Decoder instance */
+    WebRtc_Word16 mode,  /* (i) frame size mode */
+    int use_enhancer) {  /* (i) 1: use enhancer, 0: no enhancer */
  int i;

+  WebRtcSpl_Init();
+
  iLBCdec_inst->mode = mode;

  /* Set all the variables that are dependent on the frame size mode */
--- a/src/modules/audio_coding/codecs/ilbc/init_encode.c
+++ b/src/modules/audio_coding/codecs/ilbc/init_encode.c
@ -23,10 +23,11 @@
 *  Initiation of encoder instance.
 *---------------------------------------------------------------*/

-WebRtc_Word16 WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */
-    iLBC_Enc_Inst_t *iLBCenc_inst,     /* (i/o) Encoder instance */
-    WebRtc_Word16 mode     /* (i) frame size mode */
-                                        ){
+WebRtc_Word16 WebRtcIlbcfix_InitEncode(  /* (o) Number of bytes encoded */
+    iLBC_Enc_Inst_t *iLBCenc_inst,  /* (i/o) Encoder instance */
+    WebRtc_Word16 mode) {  /* (i) frame size mode */
+  WebRtcSpl_Init();
+
  iLBCenc_inst->mode = mode;

  /* Set all the variables that are dependent on the frame size mode */
--- a/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c
+++ b/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c
@ -217,6 +217,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
  /* flag encoder init */
  ISAC_inst->initflag |= 2;

+  WebRtcSpl_Init();
+
  if (CodingMode == 0)
    /* Adaptive mode */
    ISAC_inst->ISACenc_obj.new_framelength  = INITIAL_FRAMESAMPLES;
@ -527,6 +529,7 @@ WebRtc_Word16 WebRtcIsacfix_DecoderInit(ISACFIX_MainStruct *ISAC_main_inst)
  /* flag decoder init */
  ISAC_inst->initflag |= 1;

+  WebRtcSpl_Init();

  WebRtcIsacfix_InitMaskingDec(&ISAC_inst->ISACdec_obj.maskfiltstr_obj);
  WebRtcIsacfix_InitPostFilterbank(&ISAC_inst->ISACdec_obj.postfiltbankstr_obj);
--- a/src/modules/audio_coding/neteq/webrtc_neteq.c
+++ b/src/modules/audio_coding/neteq/webrtc_neteq.c
@ -414,6 +414,8 @@ int WebRtcNetEQ_Init(void *inst, WebRtc_UWord16 fs)
        return (-1);
    }

+    WebRtcSpl_Init();
+
 #ifdef NETEQ_VAD
    /* Start out with no PostDecode VAD instance */
    NetEqMainInst->DSPinst.VADInst.VADState = NULL;
--- a/src/modules/audio_device/main/source/audio_device_impl.cc
+++ b/src/modules/audio_device/main/source/audio_device_impl.cc
@ -10,6 +10,7 @@

 #include "audio_device_impl.h"
 #include "audio_device_config.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "system_wrappers/interface/ref_count.h"

 #include <assert.h>
@ -604,6 +605,8 @@ WebRtc_Word32 AudioDeviceModuleImpl::Init()
    if (!_ptrAudioDevice)
        return -1;

+    WebRtcSpl_Init();
+
    _ptrAudioDeviceUtility->Init();

    if (_ptrAudioDevice->Init() == -1)
--- a/src/modules/audio_processing/aecm/echo_control_mobile.c
+++ b/src/modules/audio_processing/aecm/echo_control_mobile.c
@ -13,6 +13,7 @@

 #include "echo_control_mobile.h"
 #include "aecm_core.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "ring_buffer.h"
 #ifdef AEC_DEBUG
 #include <stdio.h>
@ -170,6 +171,8 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
    }
    aecm->sampFreq = sampFreq;

+    WebRtcSpl_Init();
+
    // Initialize AECM core
    if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
    {
--- a/src/modules/audio_processing/ns/noise_suppression.c
+++ b/src/modules/audio_processing/ns/noise_suppression.c
@ -8,12 +8,14 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "noise_suppression.h"
+
 #include <stdlib.h>
 #include <string.h>

-#include "noise_suppression.h"
-#include "ns_core.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "defines.h"
+#include "ns_core.h"

 int WebRtcNs_Create(NsHandle** NS_inst) {
  *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
@ -33,6 +35,7 @@ int WebRtcNs_Free(NsHandle* NS_inst) {


 int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
+  WebRtcSpl_Init();
  return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
 }

--- a/src/voice_engine/voe_base_impl.cc
+++ b/src/voice_engine/voe_base_impl.cc
@ -336,6 +336,8 @@ int VoEBaseImpl::Init(AudioDeviceModule* external_adm)
        "Init(external_adm=0x%p)", external_adm);
    CriticalSectionScoped cs(_shared->crit_sec());

+    WebRtcSpl_Init();
+
    if (_shared->statistics().Initialized())
    {
        return 0;