Added run time ARM-Neon detection feature in SPL functions.

Review URL: https://webrtc-codereview.appspot.com/728010

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2721 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kma@webrtc.org 2012-09-08 00:09:26 +00:00
parent 706a546f69
commit 0221b78e2e
25 changed files with 428 additions and 164 deletions

View File

@ -45,7 +45,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
LOCAL_WHOLE_STATIC_LIBRARIES += \
libwebrtc_aecm_neon \
libwebrtc_ns_neon
libwebrtc_ns_neon \
libwebrtc_spl_neon
endif
LOCAL_STATIC_LIBRARIES := \

View File

@ -29,13 +29,16 @@ MY_WEBRTC_COMMON_DEFS += \
ifeq ($(ARCH_ARM_HAVE_NEON),true)
MY_WEBRTC_COMMON_DEFS += \
'-DWEBRTC_ARCH_ARM_NEON'
MY_ARM_CFLAGS_NEON := \
-flax-vector-conversions
endif
ifneq (,$(filter '-DWEBRTC_DETECT_ARM_NEON' '-DWEBRTC_ARCH_ARM_NEON', \
$(MY_WEBRTC_COMMON_DEFS)))
WEBRTC_BUILD_NEON_LIBS := true
# TODO(kma): Use MY_WEBRTC_COMMON_DEFS for Neon libraies in AECM, NS, and iSAC.
MY_WEBRTC_COMMON_DEFS += \
-mfpu=neon \
-mfloat-abi=softfp \
-flax-vector-conversions
endif
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)

View File

@ -21,8 +21,10 @@ LOCAL_SRC_FILES := \
auto_correlation.c \
complex_fft.c \
copy_set_operations.c \
cross_correlation.c \
division_operations.c \
dot_product_with_scale.c \
downsample_fast.c \
energy.c \
filter_ar.c \
filter_ma_fast_q12.c \
@ -39,6 +41,7 @@ LOCAL_SRC_FILES := \
resample_by_2.c \
resample_by_2_internal.c \
resample_fractional.c \
spl_init.c \
spl_sqrt.c \
spl_version.c \
splitting_filter.c \
@ -53,20 +56,6 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/include \
$(LOCAL_PATH)/../..
ifeq ($(ARCH_ARM_HAVE_NEON),true)
LOCAL_SRC_FILES += \
cross_correlation_neon.s \
downsample_fast_neon.s \
min_max_operations_neon.s \
vector_scaling_operations_neon.s
LOCAL_CFLAGS += \
$(MY_ARM_CFLAGS_NEON)
else
LOCAL_SRC_FILES += \
cross_correlation.c \
downsample_fast.c
endif
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
LOCAL_SRC_FILES += \
filter_ar_fast_q12_armv7.s
@ -99,3 +88,34 @@ ifndef NDK_ROOT
include external/stlport/libstlport.mk
endif
include $(BUILD_STATIC_LIBRARY)
#########################
# Build the neon library.
ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
include $(CLEAR_VARS)
LOCAL_ARM_MODE := arm
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_MODULE := libwebrtc_spl_neon
LOCAL_MODULE_TAGS := optional
LOCAL_SRC_FILES := \
cross_correlation_neon.s \
downsample_fast_neon.s \
min_max_operations_neon.s \
vector_scaling_operations_neon.s
# Flags passed to both C and C++ files.
LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/include \
$(LOCAL_PATH)/../..
ifndef NDK_ROOT
include external/stlport/libstlport.mk
endif
include $(BUILD_STATIC_LIBRARY)
endif # ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)

View File

@ -10,13 +10,14 @@
#include "signal_processing_library.h"
void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2) {
/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2) {
int i = 0, j = 0;
for (i = 0; i < dim_cross_correlation; i++) {

View File

@ -9,7 +9,7 @@
@
@ cross_correlation_neon.s
@ This file contains the function WebRtcSpl_CrossCorrelation(),
@ This file contains the function WebRtcSpl_CrossCorrelationNeon(),
@ optimized for ARM Neon platform.
@
@ Reference Ccode at end of this file.
@ -33,9 +33,9 @@
.fpu neon
.align 2
.global WebRtcSpl_CrossCorrelation
.global WebRtcSpl_CrossCorrelationNeon
WebRtcSpl_CrossCorrelation:
WebRtcSpl_CrossCorrelationNeon:
.fnstart
@ -109,13 +109,13 @@ POST_LOOP_DIM_SEQ_RESIDUAL: @ Sum the results up and do the shift.
@ TODO(kma): Place this piece of reference code into a C code file.
@ void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation,
@ WebRtc_Word16* seq1,
@ WebRtc_Word16* seq2,
@ WebRtc_Word16 dim_seq,
@ WebRtc_Word16 dim_cross_correlation,
@ WebRtc_Word16 right_shifts,
@ WebRtc_Word16 step_seq2) {
@ void WebRtcSpl_CrossCorrelationNeon(WebRtc_Word32* cross_correlation,
@ WebRtc_Word16* seq1,
@ WebRtc_Word16* seq2,
@ WebRtc_Word16 dim_seq,
@ WebRtc_Word16 dim_cross_correlation,
@ WebRtc_Word16 right_shifts,
@ WebRtc_Word16 step_seq2) {
@ int i = 0;
@ int j = 0;
@ int inner_loop_len1 = dim_seq >> 3;

View File

@ -11,14 +11,15 @@
#include "signal_processing_library.h"
// TODO(Bjornv): Change the function parameter order to WebRTC code style.
int WebRtcSpl_DownsampleFast(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay) {
// C version of WebRtcSpl_DownsampleFast() for generic platforms.
int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay) {
int i = 0;
int j = 0;
int32_t out_s32 = 0;

View File

@ -8,7 +8,7 @@
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_DownsampleFast(), optimized for
@ This file contains the function WebRtcSpl_DownsampleFastNeon(), optimized for
@ ARM Neon platform. The description header can be found in
@ signal_processing_library.h
@
@ -18,9 +18,9 @@
.fpu neon
.align 2
.global WebRtcSpl_DownsampleFast
.global WebRtcSpl_DownsampleFastNeon
WebRtcSpl_DownsampleFast:
WebRtcSpl_DownsampleFastNeon:
.fnstart

View File

@ -162,6 +162,16 @@ extern "C"
// inline functions:
#include "spl_inl.h"
// Initialize SPL. Currently it contains only function pointer initialization.
// If the underlying platform is known to be ARM-Neon (WEBRTC_ARCH_ARM_NEON
// defined), the pointers will be assigned to code optimized for Neon; otherwise
// if run-time Neon detection (WEBRTC_DETECT_ARM_NEON) is enabled, the pointers
// will be assigned to either Neon code or generic C code; otherwise, generic C
// code will be assigned.
// Note that this function MUST be called in any application that uses SPL
// functions.
void WebRtcSpl_Init();
// Get SPL Version
WebRtc_Word16 WebRtcSpl_get_version(char* version,
WebRtc_Word16 length_in_bytes);
@ -196,7 +206,8 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
// End: Copy and set operations.
// Minimum and maximum operations. Implementation in min_max_operations.c.
// Minimum and maximum operation functions and their pointers.
// Implementation in min_max_operations.c.
// Returns the largest absolute value in a signed 16-bit vector.
//
@ -206,7 +217,12 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
//
// Return value : Maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, int length);
extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
#endif
// Returns the largest absolute value in a signed 32-bit vector.
//
@ -216,7 +232,12 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
//
// Return value : Maximum absolute value in vector;
// or -1, if (vector == NULL || length <= 0).
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, int length);
extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
#endif
// Returns the maximum value of a 16-bit vector.
//
@ -228,7 +249,12 @@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
typedef int16_t (*MaxValueW16)(const int16_t* vector, int length);
extern MaxValueW16 WebRtcSpl_MaxValueW16;
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
#endif
// Returns the maximum value of a 32-bit vector.
//
@ -240,7 +266,12 @@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
typedef int32_t (*MaxValueW32)(const int32_t* vector, int length);
extern MaxValueW32 WebRtcSpl_MaxValueW32;
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
#endif
// Returns the minimum value of a 16-bit vector.
//
@ -252,7 +283,12 @@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
// value and we can't catch errors purely based on it.
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
typedef int16_t (*MinValueW16)(const int16_t* vector, int length);
extern MinValueW16 WebRtcSpl_MinValueW16;
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
#endif
// Returns the minimum value of a 32-bit vector.
//
@ -264,7 +300,12 @@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
// value and we can't catch errors purely based on it.
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
typedef int32_t (*MinValueW32)(const int32_t* vector, int length);
extern MinValueW32 WebRtcSpl_MinValueW32;
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
#endif
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
@ -358,7 +399,7 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
WebRtc_Word16* out_vector,
int vector_length);
// Performs the vector operation:
// The functions (with related pointer) perform the vector operation:
// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
// + round_value) >> right_shifts,
// where round_value = (1 << right_shifts) >> 1.
@ -376,14 +417,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1,
// Return value : 0 if OK, -1 if (in_vector1 == NULL
// || in_vector2 == NULL || out_vector == NULL
// || length <= 0 || right_shift < 0).
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length);
typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length);
extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length);
#endif
// End: Vector scaling operations.
// iLBC specific functions. Implementations in ilbc_specific_functions.c.
@ -508,7 +565,8 @@ void WebRtcSpl_AutoCorrToReflCoef(G_CONST WebRtc_Word32* auto_corr,
int use_order,
WebRtc_Word16* refl_coef);
// Calculates the cross-correlation between two sequences |seq1| and |seq2|.
// The functions (with related pointer) calculate the cross-correlation between
// two sequences |seq1| and |seq2|.
// |seq1| is fixed and |seq2| slides as the pointer is increased with the
// amount |step_seq2|. Note the arguments should obey the relationship:
// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
@ -530,13 +588,30 @@ void WebRtcSpl_AutoCorrToReflCoef(G_CONST WebRtc_Word32* auto_corr,
//
// Output:
// - cross_correlation : The cross-correlation in Q(-right_shifts)
void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2);
typedef void (*CrossCorrelation)(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2);
extern CrossCorrelation WebRtcSpl_CrossCorrelation;
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
int16_t dim_seq,
int16_t dim_cross_correlation,
int16_t right_shifts,
int16_t step_seq2);
#endif
// Creates (the first half of) a Hanning window. Size must be at least 1 and
// at most 512.
@ -636,7 +711,8 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
int coefficients_length,
int data_length);
// Performs a MA down sampling filter on a vector
// The functions (with related pointer) perform a MA down sampling filter
// on a vector.
// Input:
// - data_in : Input samples (state in positions
// data_in[-order] .. data_in[-1])
@ -651,14 +727,33 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
// Output:
// - data_out : Filtered samples
// Return value : 0 if OK, -1 if |in_vector| is too short
int WebRtcSpl_DownsampleFast(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay);
typedef int (*DownsampleFast)(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay);
extern DownsampleFast WebRtcSpl_DownsampleFast;
int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay);
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
int data_in_length,
int16_t* data_out,
int data_out_length,
const int16_t* __restrict coefficients,
int coefficients_length,
int factor,
int delay);
#endif
// End: Filter operations.

View File

@ -10,12 +10,12 @@
/*
* This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16()
* WebRtcSpl_MaxAbsValueW32()
* WebRtcSpl_MaxValueW16()
* WebRtcSpl_MaxValueW32()
* WebRtcSpl_MinValueW16()
* WebRtcSpl_MinValueW32()
* WebRtcSpl_MaxAbsValueW16C()
* WebRtcSpl_MaxAbsValueW32C()
* WebRtcSpl_MaxValueW16C()
* WebRtcSpl_MaxValueW32C()
* WebRtcSpl_MinValueW16C()
* WebRtcSpl_MinValueW32C()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
@ -29,13 +29,11 @@
#include <stdlib.h>
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
// TODO(kma): Move the next six functions into min_max_operations_c.c.
// TODO(kma): Move the code in the #ifndef block into min_max_operations_c.c.
#ifndef WEBRTC_ARCH_ARM_NEON
// Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length) {
int i = 0, absolute = 0, maximum = 0;
if (vector == NULL || length <= 0) {
@ -58,8 +56,8 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
return (int16_t)maximum;
}
// Maximum absolute value of word32 vector.
int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
// Maximum absolute value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
@ -82,8 +80,8 @@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
return (int32_t)maximum;
}
// Maximum value of word16 vector.
int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
// Maximum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
int i = 0;
@ -98,8 +96,8 @@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
return maximum;
}
// Maximum value of word32 vector.
int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
// Maximum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
int i = 0;
@ -114,8 +112,8 @@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
return maximum;
}
// Minimum value of word16 vector.
int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
// Minimum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int i = 0;
@ -130,8 +128,8 @@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
return minimum;
}
// Minimum value of word32 vector.
int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
// Minimum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
int i = 0;
@ -145,8 +143,6 @@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
}
return minimum;
}
#endif // WEBRTC_ARCH_ARM_NEON
// Index of maximum absolute value in a word16 vector.
int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {

View File

@ -8,7 +8,7 @@
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
@ This file contains some minimum and maximum functions, optimized for
@ ARM Neon platform. The description header can be found in
@ signal_processing_library.h
@
@ -17,16 +17,16 @@
.arch armv7-a
.fpu neon
.global WebRtcSpl_MaxAbsValueW16
.global WebRtcSpl_MaxAbsValueW32
.global WebRtcSpl_MaxValueW16
.global WebRtcSpl_MaxValueW32
.global WebRtcSpl_MinValueW16
.global WebRtcSpl_MinValueW32
.global WebRtcSpl_MaxAbsValueW16Neon
.global WebRtcSpl_MaxAbsValueW32Neon
.global WebRtcSpl_MaxValueW16Neon
.global WebRtcSpl_MaxValueW32Neon
.global WebRtcSpl_MinValueW16Neon
.global WebRtcSpl_MinValueW32Neon
.align 2
@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxAbsValueW16:
@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
WebRtcSpl_MaxAbsValueW16Neon:
.fnstart
mov r2, #-1 @ Initialize the return value.
@ -73,8 +73,8 @@ END_MAX_ABS_VALUE_W16:
.fnend
@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxAbsValueW32:
@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
WebRtcSpl_MaxAbsValueW32Neon:
.fnstart
cmp r0, #0
@ -127,8 +127,8 @@ EXIT:
.fnend
@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
WebRtcSpl_MaxValueW16:
@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
WebRtcSpl_MaxValueW16Neon:
.fnstart
mov r2, #0x8000 @ Initialize the return value.
@ -170,8 +170,8 @@ END_MAX_VALUE_W16:
.fnend
@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
WebRtcSpl_MaxValueW32:
@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
WebRtcSpl_MaxValueW32Neon:
.fnstart
mov r2, #0x80000000 @ Initialize the return value.
@ -215,8 +215,8 @@ END_MAX_VALUE_W32:
.fnend
@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
WebRtcSpl_MinValueW16:
@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
WebRtcSpl_MinValueW16Neon:
.fnstart
movw r2, #0x7FFF @ Initialize the return value.
@ -259,8 +259,8 @@ END_MIN_VALUE_W16:
.fnend
@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
WebRtcSpl_MinValueW32:
@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
WebRtcSpl_MinValueW32Neon:
.fnstart
mov r2, #0x7FFFFFFF @ Initialize the return value.

View File

@ -25,6 +25,10 @@ const int16_t kRefData[kLength] = {
};
class RealFFTTest : public ::testing::Test {
protected:
RealFFTTest() {
WebRtcSpl_Init();
}
};
TEST_F(RealFFTTest, CreateFailsOnBadInput) {

View File

@ -14,6 +14,9 @@
'include_dirs': [
'include',
],
'dependencies': [
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
],
'direct_dependent_settings': {
'include_dirs': [
'include',
@ -51,6 +54,7 @@
'resample_by_2_internal.c',
'resample_by_2_internal.h',
'resample_fractional.c',
'spl_init.c',
'spl_sqrt.c',
'spl_sqrt_floor.c',
'spl_version.c',
@ -70,6 +74,7 @@
],
'conditions': [
['armv7==1', {
'dependencies': ['signal_processing_neon',],
'sources': [
'filter_ar_fast_q12_armv7.s',
],
@ -77,18 +82,6 @@
'filter_ar_fast_q12.c',
],
}],
['arm_neon==1', {
'sources': [
'cross_correlation_neon.s',
'downsample_fast_neon.s',
'min_max_operations_neon.s',
'vector_scaling_operations_neon.s',
],
'sources!': [
'cross_correlation.c',
'downsample_fast.c',
],
}],
],
}],
],
@ -112,5 +105,20 @@
}, # spl_unittests
], # targets
}], # include_tests
['target_arch=="arm" and armv7==1', {
'targets': [
{
'target_name': 'signal_processing_neon',
'type': '<(library)',
'includes': ['../../build/arm_neon.gypi',],
'sources': [
'cross_correlation_neon.s',
'downsample_fast_neon.s',
'min_max_operations_neon.s',
'vector_scaling_operations_neon.s',
],
},
],
}], # 'target_arch=="arm" and armv7==1'
], # conditions
}

View File

@ -17,12 +17,11 @@ static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
class SplTest : public testing::Test {
protected:
virtual ~SplTest() {
}
void SetUp() {
}
void TearDown() {
}
SplTest() {
WebRtcSpl_Init();
}
virtual ~SplTest() {
}
};
TEST_F(SplTest, MacroTest) {

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* The global function contained in this file initializes SPL function
* pointers, currently only for ARM platforms.
*
* Some code came from common/rtcd.c in the WebM project.
*/
/* TODO(kma): Add calls to WebRtcSpl_Init() in all related modules
* (AEC, NS, codecs etc.).
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "system_wrappers/interface/cpu_features_wrapper.h"
/* Declare function pointers. */
MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
MaxValueW16 WebRtcSpl_MaxValueW16;
MaxValueW32 WebRtcSpl_MaxValueW32;
MinValueW16 WebRtcSpl_MinValueW16;
MinValueW32 WebRtcSpl_MinValueW32;
CrossCorrelation WebRtcSpl_CrossCorrelation;
DownsampleFast WebRtcSpl_DownsampleFast;
ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
/* Initialize function pointers to the generic C version. */
static void InitPointersToC() {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
}
#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
/* Initialize function pointers to the Neon version. */
static void InitPointersToNeon() {
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
}
#endif
static void InitFunctionPointers(void) {
#if defined(WEBRTC_DETECT_ARM_NEON)
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
InitPointersToNeon();
} else {
InitPointersToC();
}
#elif defined(WEBRTC_ARCH_ARM_NEON)
InitPointersToNeon();
#else
InitPointersToC();
#endif /* WEBRTC_DETECT_ARM_NEON */
}
#if defined(WEBRTC_POSIX)
#include <pthread.h>
static void once(void (*func)(void)) {
static pthread_once_t lock = PTHREAD_ONCE_INIT;
pthread_once(&lock, func);
}
#elif defined(_WIN32)
#include <windows.h>
static void once(void (*func)(void)) {
/* Didn't use InitializeCriticalSection() since there's no race-free context
* in which to execute it.
*
* TODO(kma): Change to different implementation (e.g.
* InterlockedCompareExchangePointer) to avoid issues similar to
* http://code.google.com/p/webm/issues/detail?id=467.
*/
static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
static int done = 0;
EnterCriticalSection(&lock);
if (!done) {
func();
done = 1;
}
LeaveCriticalSection(&lock);
}
/* There's no fallback version as an #else block here to ensure thread safety.
* In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
* system should pick it up.
*/
#endif /* WEBRTC_POSIX */
void WebRtcSpl_Init() {
once(InitFunctionPointers);
}

View File

@ -17,7 +17,7 @@
* WebRtcSpl_ScaleVector()
* WebRtcSpl_ScaleVectorWithSat()
* WebRtcSpl_ScaleAndAddVectors()
* WebRtcSpl_ScaleAndAddVectorsWithRound()
* WebRtcSpl_ScaleAndAddVectorsWithRoundC()
*/
#include "signal_processing_library.h"
@ -148,14 +148,14 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain
}
}
#ifndef WEBRTC_ARCH_ARM_NEON
int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length) {
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
int length) {
int i = 0;
int round_value = (1 << right_shifts) >> 1;
@ -173,4 +173,3 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
return 0;
}
#endif // WEBRTC_ARCH_ARM_NEON

View File

@ -9,7 +9,7 @@
@
@ vector_scaling_operations_neon.s
@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(),
@ optimized for ARM Neon platform. Output is bit-exact with the reference
@ C code in vector_scaling_operations.c.
@ -17,9 +17,9 @@
.fpu neon
.align 2
.global WebRtcSpl_ScaleAndAddVectorsWithRound
.global WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
WebRtcSpl_ScaleAndAddVectorsWithRound:
WebRtcSpl_ScaleAndAddVectorsWithRoundNeon:
.fnstart
push {r4-r9}

View File

@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_core.h"
#include "typedefs.h"
@ -53,6 +54,7 @@ int WebRtcVad_Free(VadInst* handle) {
// TODO(bjornv): Move WebRtcVad_InitCore() code here.
int WebRtcVad_Init(VadInst* handle) {
WebRtcSpl_Init();
// Initialize the core VAD component.
return WebRtcVad_InitCore((VadInstT*) handle);
}

View File

@ -23,14 +23,14 @@
* Initiation of decoder instance.
*---------------------------------------------------------------*/
WebRtc_Word16 WebRtcIlbcfix_InitDecode( /* (o) Number of decoded samples */
iLBC_Dec_Inst_t *iLBCdec_inst, /* (i/o) Decoder instance */
WebRtc_Word16 mode, /* (i) frame size mode */
int use_enhancer /* (i) 1 to use enhancer
0 to run without enhancer */
) {
WebRtc_Word16 WebRtcIlbcfix_InitDecode( /* (o) Number of decoded samples */
iLBC_Dec_Inst_t *iLBCdec_inst, /* (i/o) Decoder instance */
WebRtc_Word16 mode, /* (i) frame size mode */
int use_enhancer) { /* (i) 1: use enhancer, 0: no enhancer */
int i;
WebRtcSpl_Init();
iLBCdec_inst->mode = mode;
/* Set all the variables that are dependent on the frame size mode */

View File

@ -23,10 +23,11 @@
* Initiation of encoder instance.
*---------------------------------------------------------------*/
WebRtc_Word16 WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */
iLBC_Enc_Inst_t *iLBCenc_inst, /* (i/o) Encoder instance */
WebRtc_Word16 mode /* (i) frame size mode */
){
WebRtc_Word16 WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */
iLBC_Enc_Inst_t *iLBCenc_inst, /* (i/o) Encoder instance */
WebRtc_Word16 mode) { /* (i) frame size mode */
WebRtcSpl_Init();
iLBCenc_inst->mode = mode;
/* Set all the variables that are dependent on the frame size mode */

View File

@ -217,6 +217,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
/* flag encoder init */
ISAC_inst->initflag |= 2;
WebRtcSpl_Init();
if (CodingMode == 0)
/* Adaptive mode */
ISAC_inst->ISACenc_obj.new_framelength = INITIAL_FRAMESAMPLES;
@ -527,6 +529,7 @@ WebRtc_Word16 WebRtcIsacfix_DecoderInit(ISACFIX_MainStruct *ISAC_main_inst)
/* flag decoder init */
ISAC_inst->initflag |= 1;
WebRtcSpl_Init();
WebRtcIsacfix_InitMaskingDec(&ISAC_inst->ISACdec_obj.maskfiltstr_obj);
WebRtcIsacfix_InitPostFilterbank(&ISAC_inst->ISACdec_obj.postfiltbankstr_obj);

View File

@ -414,6 +414,8 @@ int WebRtcNetEQ_Init(void *inst, WebRtc_UWord16 fs)
return (-1);
}
WebRtcSpl_Init();
#ifdef NETEQ_VAD
/* Start out with no PostDecode VAD instance */
NetEqMainInst->DSPinst.VADInst.VADState = NULL;

View File

@ -10,6 +10,7 @@
#include "audio_device_impl.h"
#include "audio_device_config.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "system_wrappers/interface/ref_count.h"
#include <assert.h>
@ -604,6 +605,8 @@ WebRtc_Word32 AudioDeviceModuleImpl::Init()
if (!_ptrAudioDevice)
return -1;
WebRtcSpl_Init();
_ptrAudioDeviceUtility->Init();
if (_ptrAudioDevice->Init() == -1)

View File

@ -13,6 +13,7 @@
#include "echo_control_mobile.h"
#include "aecm_core.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "ring_buffer.h"
#ifdef AEC_DEBUG
#include <stdio.h>
@ -170,6 +171,8 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
}
aecm->sampFreq = sampFreq;
WebRtcSpl_Init();
// Initialize AECM core
if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
{

View File

@ -8,12 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "noise_suppression.h"
#include <stdlib.h>
#include <string.h>
#include "noise_suppression.h"
#include "ns_core.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "defines.h"
#include "ns_core.h"
int WebRtcNs_Create(NsHandle** NS_inst) {
*NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
@ -33,6 +35,7 @@ int WebRtcNs_Free(NsHandle* NS_inst) {
int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
WebRtcSpl_Init();
return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
}

View File

@ -336,6 +336,8 @@ int VoEBaseImpl::Init(AudioDeviceModule* external_adm)
"Init(external_adm=0x%p)", external_adm);
CriticalSectionScoped cs(_shared->crit_sec());
WebRtcSpl_Init();
if (_shared->statistics().Initialized())
{
return 0;