Formalized Real 16-bit FFT for APM.
It also prepares for introducing Real 16-bit FFT Neon code from Openmax to SPL. CL https://webrtc-codereview.appspot.com/1819004/ takes care of that, but this CL is a prerequisite of that one. Tested audioproc with an offline file. Bit exact. R=andrew@webrtc.org, rtoy@google.com Review URL: https://webrtc-codereview.appspot.com/1830004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4390 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
b63c29f48c
commit
fc8aaf02e1
@ -13,70 +13,112 @@
|
|||||||
|
|
||||||
#include "webrtc/typedefs.h"
|
#include "webrtc/typedefs.h"
|
||||||
|
|
||||||
|
// For ComplexFFT(), the maximum fft order is 10;
|
||||||
|
// for OpenMax FFT in ARM, it is 12;
|
||||||
|
// WebRTC APM uses orders of only 7 and 8.
|
||||||
|
enum {kMaxFFTOrder = 10};
|
||||||
|
|
||||||
struct RealFFT;
|
struct RealFFT;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef struct RealFFT* (*CreateRealFFT)(int order);
|
||||||
|
typedef void (*FreeRealFFT)(struct RealFFT* self);
|
||||||
typedef int (*RealForwardFFT)(struct RealFFT* self,
|
typedef int (*RealForwardFFT)(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* real_data_in,
|
||||||
int16_t* data_out);
|
int16_t* complex_data_out);
|
||||||
typedef int (*RealInverseFFT)(struct RealFFT* self,
|
typedef int (*RealInverseFFT)(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* complex_data_in,
|
||||||
int16_t* data_out);
|
int16_t* real_data_out);
|
||||||
|
|
||||||
|
extern CreateRealFFT WebRtcSpl_CreateRealFFT;
|
||||||
|
extern FreeRealFFT WebRtcSpl_FreeRealFFT;
|
||||||
extern RealForwardFFT WebRtcSpl_RealForwardFFT;
|
extern RealForwardFFT WebRtcSpl_RealForwardFFT;
|
||||||
extern RealInverseFFT WebRtcSpl_RealInverseFFT;
|
extern RealInverseFFT WebRtcSpl_RealInverseFFT;
|
||||||
|
|
||||||
struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
|
struct RealFFT* WebRtcSpl_CreateRealFFTC(int order);
|
||||||
void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
|
void WebRtcSpl_FreeRealFFTC(struct RealFFT* self);
|
||||||
|
|
||||||
// TODO(kma): Implement FFT functions for real signals.
|
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
||||||
|
struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order);
|
||||||
|
void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Compute the forward FFT for a complex signal of length 2^order.
|
// Compute an FFT for a real-valued signal of length of 2^order,
|
||||||
|
// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
|
||||||
|
// specification structure, which must be initialized prior to calling the FFT
|
||||||
|
// function with WebRtcSpl_CreateRealFFT().
|
||||||
|
// The relationship between the input and output sequences can
|
||||||
|
// be expressed in terms of the DFT, i.e.:
|
||||||
|
// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
|
||||||
|
// n=0,1,2,...N-1
|
||||||
|
// N=2^order.
|
||||||
|
// The conjugate-symmetric output sequence is represented using a CCS vector,
|
||||||
|
// which is of length N+2, and is organized as follows:
|
||||||
|
// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
|
||||||
|
// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
|
||||||
|
// where R[n] and I[n], respectively, denote the real and imaginary components
|
||||||
|
// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
|
||||||
|
// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
|
||||||
|
// the foldover frequency.
|
||||||
|
//
|
||||||
// Input Arguments:
|
// Input Arguments:
|
||||||
// self - pointer to preallocated and initialized FFT specification structure.
|
// self - pointer to preallocated and initialized FFT specification structure.
|
||||||
// data_in - the input signal.
|
// real_data_in - the input signal. For an ARM Neon platform, it must be
|
||||||
|
// aligned on a 32-byte boundary.
|
||||||
//
|
//
|
||||||
// Output Arguments:
|
// Output Arguments:
|
||||||
// data_out - the output signal; must be different to data_in.
|
// complex_data_out - the output complex signal with (2^order + 2) 16-bit
|
||||||
|
// elements. For an ARM Neon platform, it must be different
|
||||||
|
// from real_data_in, and aligned on a 32-byte boundary.
|
||||||
//
|
//
|
||||||
// Return Value:
|
// Return Value:
|
||||||
// 0 - FFT calculation is successful.
|
// 0 - FFT calculation is successful.
|
||||||
// -1 - Error
|
// -1 - Error with bad arguments (NULL pointers).
|
||||||
//
|
|
||||||
int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
|
int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* real_data_in,
|
||||||
int16_t* data_out);
|
int16_t* complex_data_out);
|
||||||
|
|
||||||
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
||||||
int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
|
int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* real_data_in,
|
||||||
int16_t* data_out);
|
int16_t* complex_data_out);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Compute the inverse FFT for a complex signal of length 2^order.
|
// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
|
||||||
|
// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
|
||||||
|
// the specification structure, which must be initialized prior to calling the
|
||||||
|
// FFT function with WebRtcSpl_CreateRealFFT().
|
||||||
|
// For a transform of length M, the input sequence is represented using a packed
|
||||||
|
// CCS vector of length M+2, which is explained in the comments for
|
||||||
|
// WebRtcSpl_RealForwardFFTC above.
|
||||||
|
//
|
||||||
// Input Arguments:
|
// Input Arguments:
|
||||||
// self - pointer to preallocated and initialized FFT specification structure.
|
// self - pointer to preallocated and initialized FFT specification structure.
|
||||||
// data_in - the input signal.
|
// complex_data_in - the input complex signal with (2^order + 2) 16-bit
|
||||||
|
// elements. For an ARM Neon platform, it must be aligned on
|
||||||
|
// a 32-byte boundary.
|
||||||
//
|
//
|
||||||
// Output Arguments:
|
// Output Arguments:
|
||||||
// data_out - the output signal; must be different to data_in.
|
// real_data_out - the output real signal. For an ARM Neon platform, it must
|
||||||
|
// be different to complex_data_in, and aligned on a 32-byte
|
||||||
|
// boundary.
|
||||||
//
|
//
|
||||||
// Return Value:
|
// Return Value:
|
||||||
// 0 or a positive number - a value that the elements in the |data_out| should
|
// 0 or a positive number - a value that the elements in the |real_data_out|
|
||||||
// be shifted left with in order to get correct
|
// should be shifted left with in order to get
|
||||||
// physical values.
|
// correct physical values.
|
||||||
// -1 - Error
|
// -1 - Error with bad arguments (NULL pointers).
|
||||||
int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
|
int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* complex_data_in,
|
||||||
int16_t* data_out);
|
int16_t* real_data_out);
|
||||||
|
|
||||||
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
|
||||||
int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
|
int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* complex_data_in,
|
||||||
int16_t* data_out);
|
int16_t* real_data_out);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -18,55 +18,109 @@ struct RealFFT {
|
|||||||
int order;
|
int order;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
|
struct RealFFT* WebRtcSpl_CreateRealFFTC(int order) {
|
||||||
struct RealFFT* self = NULL;
|
struct RealFFT* self = NULL;
|
||||||
|
|
||||||
// This constraint comes from ComplexFFT().
|
if (order > kMaxFFTOrder || order < 0) {
|
||||||
if (order > 10 || order < 0) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
self = malloc(sizeof(struct RealFFT));
|
self = malloc(sizeof(struct RealFFT));
|
||||||
|
if (self == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
self->order = order;
|
self->order = order;
|
||||||
|
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
|
void WebRtcSpl_FreeRealFFTC(struct RealFFT* self) {
|
||||||
free(self);
|
if (self != NULL) {
|
||||||
|
free(self);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebRtcSpl_ComplexFFT and WebRtcSpl_ComplexIFFT use in-place algorithm,
|
// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFTC and
|
||||||
// so copy data from data_in to data_out in the next two functions.
|
// WebRtcSpl_RealInverseFFTC) are real-valued FFT wrappers for complex-valued
|
||||||
|
// FFT implementation in SPL.
|
||||||
|
|
||||||
int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
|
int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* real_data_in,
|
||||||
int16_t* data_out) {
|
int16_t* complex_data_out) {
|
||||||
memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
|
int i = 0;
|
||||||
WebRtcSpl_ComplexBitReverse(data_out, self->order);
|
int j = 0;
|
||||||
return WebRtcSpl_ComplexFFT(data_out, self->order, 1);
|
int result = 0;
|
||||||
|
int n = 1 << self->order;
|
||||||
|
// The complex-value FFT implementation needs a buffer to hold 2^order
|
||||||
|
// 16-bit COMPLEX numbers, for both time and frequency data.
|
||||||
|
int16_t complex_buffer[2 << kMaxFFTOrder];
|
||||||
|
|
||||||
|
// Insert zeros to the imaginary parts for complex forward FFT input.
|
||||||
|
for (i = 0, j = 0; i < n; i += 1, j += 2) {
|
||||||
|
complex_buffer[j] = real_data_in[i];
|
||||||
|
complex_buffer[j + 1] = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
|
||||||
|
result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
|
||||||
|
|
||||||
|
// For real FFT output, use only the first N + 2 elements from
|
||||||
|
// complex forward FFT.
|
||||||
|
memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
|
int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* complex_data_in,
|
||||||
int16_t* data_out) {
|
int16_t* real_data_out) {
|
||||||
memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
|
int i = 0;
|
||||||
WebRtcSpl_ComplexBitReverse(data_out, self->order);
|
int j = 0;
|
||||||
return WebRtcSpl_ComplexIFFT(data_out, self->order, 1);
|
int result = 0;
|
||||||
|
int n = 1 << self->order;
|
||||||
|
// Create the buffer specific to complex-valued FFT implementation.
|
||||||
|
int16_t complex_buffer[2 << kMaxFFTOrder];
|
||||||
|
|
||||||
|
// For n-point FFT, first copy the first n + 2 elements into complex
|
||||||
|
// FFT, then construct the remaining n - 2 elements by real FFT's
|
||||||
|
// conjugate-symmetric properties.
|
||||||
|
memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
|
||||||
|
for (i = n + 2; i < 2 * n; i += 2) {
|
||||||
|
complex_buffer[i] = complex_data_in[2 * n - i];
|
||||||
|
complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
|
||||||
|
result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
|
||||||
|
|
||||||
|
// Strip out the imaginary parts of the complex inverse FFT output.
|
||||||
|
for (i = 0, j = 0; i < n; i += 1, j += 2) {
|
||||||
|
real_data_out[i] = complex_buffer[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
|
#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
|
||||||
// TODO(kma): Replace the following function bodies into optimized functions
|
// TODO(kma): Replace the following function bodies into optimized functions
|
||||||
// for ARM Neon.
|
// for ARM Neon.
|
||||||
|
struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order) {
|
||||||
|
return WebRtcSpl_CreateRealFFTC(order);
|
||||||
|
}
|
||||||
|
|
||||||
|
void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self) {
|
||||||
|
WebRtcSpl_FreeRealFFTC(self);
|
||||||
|
}
|
||||||
|
|
||||||
int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
|
int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* real_data_in,
|
||||||
int16_t* data_out) {
|
int16_t* complex_data_out) {
|
||||||
return WebRtcSpl_RealForwardFFTC(self, data_in, data_out);
|
return WebRtcSpl_RealForwardFFTC(self, real_data_in, complex_data_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
|
int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
|
||||||
const int16_t* data_in,
|
const int16_t* complex_data_in,
|
||||||
int16_t* data_out) {
|
int16_t* real_data_out) {
|
||||||
return WebRtcSpl_RealInverseFFTC(self, data_in, data_out);
|
return WebRtcSpl_RealInverseFFTC(self, complex_data_in, real_data_out);
|
||||||
}
|
}
|
||||||
#endif
|
#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
|
||||||
|
@ -17,9 +17,17 @@
|
|||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const int kOrder = 4;
|
// FFT order.
|
||||||
const int kLength = 1 << (kOrder + 1); // +1 to hold complex data.
|
const int kOrder = 5;
|
||||||
const int16_t kRefData[kLength] = {
|
// Lengths for real FFT's time and frequency bufffers.
|
||||||
|
// For N-point FFT, the length requirements from API are N and N+2 respectively.
|
||||||
|
const int kTimeDataLength = 1 << kOrder;
|
||||||
|
const int kFreqDataLength = (1 << kOrder) + 2;
|
||||||
|
// For complex FFT's time and freq buffer. The implementation requires
|
||||||
|
// 2*N 16-bit words.
|
||||||
|
const int kComplexFftDataLength = 2 << kOrder;
|
||||||
|
// Reference data for time signal.
|
||||||
|
const int16_t kRefData[kTimeDataLength] = {
|
||||||
11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
|
11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
|
||||||
-26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
|
-26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
|
||||||
1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
|
1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
|
||||||
@ -40,36 +48,58 @@ TEST_F(RealFFTTest, CreateFailsOnBadInput) {
|
|||||||
EXPECT_TRUE(fft == NULL);
|
EXPECT_TRUE(fft == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(andrew): This won't always be the case, but verifies the current code
|
TEST_F(RealFFTTest, RealAndComplexMatch) {
|
||||||
// at least.
|
int i = 0;
|
||||||
TEST_F(RealFFTTest, RealAndComplexAreIdentical) {
|
int j = 0;
|
||||||
int16_t real_data[kLength] = {0};
|
int16_t real_fft_time[kTimeDataLength] = {0};
|
||||||
int16_t real_data_out[kLength] = {0};
|
int16_t real_fft_freq[kFreqDataLength] = {0};
|
||||||
int16_t complex_data[kLength] = {0};
|
// One common buffer for complex FFT's time and frequency data.
|
||||||
memcpy(real_data, kRefData, sizeof(kRefData));
|
int16_t complex_fft_buff[kComplexFftDataLength] = {0};
|
||||||
memcpy(complex_data, kRefData, sizeof(kRefData));
|
|
||||||
|
|
||||||
|
// Prepare the inputs to forward FFT's.
|
||||||
|
memcpy(real_fft_time, kRefData, sizeof(kRefData));
|
||||||
|
for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
|
||||||
|
complex_fft_buff[j] = kRefData[i];
|
||||||
|
complex_fft_buff[j + 1] = 0; // Insert zero's to imaginary parts.
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create and run real forward FFT.
|
||||||
RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
|
RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
|
||||||
EXPECT_TRUE(fft != NULL);
|
EXPECT_TRUE(fft != NULL);
|
||||||
|
EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
|
||||||
|
|
||||||
EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_data, real_data_out));
|
// Run complex forward FFT.
|
||||||
WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
|
WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
|
||||||
EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_data, kOrder, 1));
|
EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
|
||||||
|
|
||||||
for (int i = 0; i < kLength; i++) {
|
// Verify the results between complex and real forward FFT.
|
||||||
EXPECT_EQ(real_data_out[i], complex_data[i]);
|
for (i = 0; i < kFreqDataLength; i++) {
|
||||||
|
EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(complex_data, kRefData, sizeof(kRefData));
|
// Prepare the inputs to inverse real FFT.
|
||||||
|
// We use whatever data in complex_fft_buff[] since we don't care
|
||||||
|
// about data contents. Only kFreqDataLength 16-bit words are copied
|
||||||
|
// from complex_fft_buff to real_fft_freq since remaining words (2nd half)
|
||||||
|
// are conjugate-symmetric to the first half in theory.
|
||||||
|
memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
|
||||||
|
|
||||||
int real_scale = WebRtcSpl_RealInverseFFT(fft, real_data, real_data_out);
|
// Run real inverse FFT.
|
||||||
|
int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
|
||||||
EXPECT_GE(real_scale, 0);
|
EXPECT_GE(real_scale, 0);
|
||||||
WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
|
|
||||||
int complex_scale = WebRtcSpl_ComplexIFFT(complex_data, kOrder, 1);
|
// Run complex inverse FFT.
|
||||||
|
WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
|
||||||
|
int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
|
||||||
|
|
||||||
|
// Verify the results between complex and real inverse FFT.
|
||||||
|
// They are not bit-exact, since complex IFFT doesn't produce
|
||||||
|
// exactly conjugate-symmetric data (between first and second half).
|
||||||
EXPECT_EQ(real_scale, complex_scale);
|
EXPECT_EQ(real_scale, complex_scale);
|
||||||
for (int i = 0; i < kLength; i++) {
|
for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
|
||||||
EXPECT_EQ(real_data_out[i], complex_data[i]);
|
EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
WebRtcSpl_FreeRealFFT(fft);
|
WebRtcSpl_FreeRealFFT(fft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,8 @@ MinValueW32 WebRtcSpl_MinValueW32;
|
|||||||
CrossCorrelation WebRtcSpl_CrossCorrelation;
|
CrossCorrelation WebRtcSpl_CrossCorrelation;
|
||||||
DownsampleFast WebRtcSpl_DownsampleFast;
|
DownsampleFast WebRtcSpl_DownsampleFast;
|
||||||
ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
|
ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
|
||||||
|
CreateRealFFT WebRtcSpl_CreateRealFFT;
|
||||||
|
FreeRealFFT WebRtcSpl_FreeRealFFT;
|
||||||
RealForwardFFT WebRtcSpl_RealForwardFFT;
|
RealForwardFFT WebRtcSpl_RealForwardFFT;
|
||||||
RealInverseFFT WebRtcSpl_RealInverseFFT;
|
RealInverseFFT WebRtcSpl_RealInverseFFT;
|
||||||
|
|
||||||
@ -45,6 +47,8 @@ static void InitPointersToC() {
|
|||||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
|
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||||
|
WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
|
||||||
|
WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
|
||||||
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
|
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
|
||||||
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
|
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
|
||||||
}
|
}
|
||||||
@ -63,6 +67,8 @@ static void InitPointersToNeon() {
|
|||||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
|
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
|
WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
|
||||||
|
WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTNeon;
|
||||||
|
WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTNeon;
|
||||||
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTNeon;
|
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTNeon;
|
||||||
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTNeon;
|
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTNeon;
|
||||||
}
|
}
|
||||||
@ -80,6 +86,8 @@ static void InitPointersToMIPS() {
|
|||||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
|
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||||
|
WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
|
||||||
|
WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
|
||||||
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
|
WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
|
||||||
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
|
WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
|
||||||
#if defined(MIPS_DSP_R1_LE)
|
#if defined(MIPS_DSP_R1_LE)
|
||||||
|
@ -244,8 +244,6 @@ static const uint16_t* AlignedFarend(AecmCore_t* self, int* far_q, int delay) {
|
|||||||
CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
|
CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
|
||||||
StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
|
StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
|
||||||
ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
|
ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
|
||||||
WindowAndFFT WebRtcAecm_WindowAndFFT;
|
|
||||||
InverseFFTAndWindow WebRtcAecm_InverseFFTAndWindow;
|
|
||||||
|
|
||||||
int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
|
int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
|
||||||
{
|
{
|
||||||
@ -351,41 +349,36 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const int16_t* echo_path)
|
|||||||
aecm->mseChannelCount = 0;
|
aecm->mseChannelCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WindowAndFFTC(AecmCore_t* aecm,
|
static void WindowAndFFT(AecmCore_t* aecm,
|
||||||
int16_t* fft,
|
int16_t* fft,
|
||||||
const int16_t* time_signal,
|
const int16_t* time_signal,
|
||||||
complex16_t* freq_signal,
|
complex16_t* freq_signal,
|
||||||
int time_signal_scaling)
|
int time_signal_scaling) {
|
||||||
{
|
int i = 0;
|
||||||
int i, j;
|
|
||||||
|
|
||||||
memset(fft, 0, sizeof(int16_t) * PART_LEN4);
|
// FFT of signal
|
||||||
// FFT of signal
|
for (i = 0; i < PART_LEN; i++) {
|
||||||
for (i = 0, j = 0; i < PART_LEN; i++, j += 2)
|
// Window time domain signal and insert into real part of
|
||||||
{
|
// transformation array |fft|
|
||||||
// Window time domain signal and insert into real part of
|
fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||||
// transformation array |fft|
|
(time_signal[i] << time_signal_scaling),
|
||||||
fft[j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
|
WebRtcAecm_kSqrtHanning[i],
|
||||||
(time_signal[i] << time_signal_scaling),
|
14);
|
||||||
WebRtcAecm_kSqrtHanning[i],
|
fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||||
14);
|
(time_signal[i + PART_LEN] << time_signal_scaling),
|
||||||
fft[PART_LEN2 + j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
|
WebRtcAecm_kSqrtHanning[PART_LEN - i],
|
||||||
(time_signal[i + PART_LEN] << time_signal_scaling),
|
14);
|
||||||
WebRtcAecm_kSqrtHanning[PART_LEN - i],
|
}
|
||||||
14);
|
|
||||||
// Inserting zeros in imaginary parts not necessary since we
|
|
||||||
// initialized the array with all zeros
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do forward FFT, then take only the first PART_LEN complex samples,
|
// Do forward FFT, then take only the first PART_LEN complex samples,
|
||||||
// and change signs of the imaginary parts.
|
// and change signs of the imaginary parts.
|
||||||
WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
|
WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
|
||||||
for (i = 0; i < PART_LEN; i++) {
|
for (i = 0; i < PART_LEN; i++) {
|
||||||
freq_signal[i].imag = -freq_signal[i].imag;
|
freq_signal[i].imag = -freq_signal[i].imag;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void InverseFFTAndWindowC(AecmCore_t* aecm,
|
static void InverseFFTAndWindow(AecmCore_t* aecm,
|
||||||
int16_t* fft,
|
int16_t* fft,
|
||||||
complex16_t* efw,
|
complex16_t* efw,
|
||||||
int16_t* output,
|
int16_t* output,
|
||||||
@ -395,17 +388,9 @@ static void InverseFFTAndWindowC(AecmCore_t* aecm,
|
|||||||
int32_t tmp32no1;
|
int32_t tmp32no1;
|
||||||
|
|
||||||
// Synthesis
|
// Synthesis
|
||||||
for (i = 1; i < PART_LEN; i++)
|
for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
|
||||||
{
|
fft[j] = efw[i].real;
|
||||||
j = WEBRTC_SPL_LSHIFT_W32(i, 1);
|
fft[j + 1] = -efw[i].imag;
|
||||||
fft[j] = efw[i].real;
|
|
||||||
|
|
||||||
// mirrored data, even
|
|
||||||
fft[PART_LEN4 - j] = efw[i].real;
|
|
||||||
fft[j + 1] = -efw[i].imag;
|
|
||||||
|
|
||||||
//mirrored data, odd
|
|
||||||
fft[PART_LEN4 - (j - 1)] = efw[i].imag;
|
|
||||||
}
|
}
|
||||||
fft[0] = efw[0].real;
|
fft[0] = efw[0].real;
|
||||||
fft[1] = -efw[0].imag;
|
fft[1] = -efw[0].imag;
|
||||||
@ -413,31 +398,23 @@ static void InverseFFTAndWindowC(AecmCore_t* aecm,
|
|||||||
fft[PART_LEN2] = efw[PART_LEN].real;
|
fft[PART_LEN2] = efw[PART_LEN].real;
|
||||||
fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
|
fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
|
||||||
|
|
||||||
// Inverse FFT. Then take only the real values, and keep outCFFT
|
// Inverse FFT. Keep outCFFT to scale the samples in the next block.
|
||||||
// to scale the samples in the next block.
|
outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, output);
|
||||||
outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
|
|
||||||
for (i = 0; i < PART_LEN; i++) {
|
|
||||||
efw[i].real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
|
||||||
efw[i].real,
|
|
||||||
WebRtcAecm_kSqrtHanning[i],
|
|
||||||
14);
|
|
||||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)efw[i].real,
|
|
||||||
outCFFT - aecm->dfaCleanQDomain);
|
|
||||||
efw[i].real = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
|
||||||
tmp32no1 + aecm->outBuf[i],
|
|
||||||
WEBRTC_SPL_WORD16_MIN);
|
|
||||||
output[i] = efw[i].real;
|
|
||||||
|
|
||||||
tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(
|
for (i = 0; i < PART_LEN; i++) {
|
||||||
efw[PART_LEN + i].real,
|
output[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||||
WebRtcAecm_kSqrtHanning[PART_LEN - i],
|
output[i], WebRtcAecm_kSqrtHanning[i], 14);
|
||||||
14);
|
tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)output[i],
|
||||||
|
outCFFT - aecm->dfaCleanQDomain);
|
||||||
|
output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||||
|
tmp32no1 + aecm->outBuf[i], WEBRTC_SPL_WORD16_MIN);
|
||||||
|
|
||||||
|
tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(output[PART_LEN + i],
|
||||||
|
WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
|
||||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
|
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
|
||||||
outCFFT - aecm->dfaCleanQDomain);
|
outCFFT - aecm->dfaCleanQDomain);
|
||||||
aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(
|
aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(
|
||||||
WEBRTC_SPL_WORD16_MAX,
|
WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
|
||||||
tmp32no1,
|
|
||||||
WEBRTC_SPL_WORD16_MIN);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the current block to the old position (aecm->outBuf is shifted elsewhere)
|
// Copy the current block to the old position (aecm->outBuf is shifted elsewhere)
|
||||||
@ -522,9 +499,6 @@ static void ResetAdaptiveChannelC(AecmCore_t* aecm)
|
|||||||
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
||||||
static void WebRtcAecm_InitNeon(void)
|
static void WebRtcAecm_InitNeon(void)
|
||||||
{
|
{
|
||||||
// TODO(kma): Check why WebRtcAecm_InverseFFTAndWindowNeon() doesn't work.
|
|
||||||
WebRtcAecm_WindowAndFFT = WebRtcAecm_WindowAndFFTNeon;
|
|
||||||
WebRtcAecm_InverseFFTAndWindow = InverseFFTAndWindowC;
|
|
||||||
WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
|
WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
|
||||||
WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
|
WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
|
||||||
WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
|
WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
|
||||||
@ -654,8 +628,6 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
|
|||||||
COMPILE_ASSERT(PART_LEN % 16 == 0);
|
COMPILE_ASSERT(PART_LEN % 16 == 0);
|
||||||
|
|
||||||
// Initialize function pointers.
|
// Initialize function pointers.
|
||||||
WebRtcAecm_WindowAndFFT = WindowAndFFTC;
|
|
||||||
WebRtcAecm_InverseFFTAndWindow = InverseFFTAndWindowC;
|
|
||||||
WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
|
WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
|
||||||
WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
|
WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
|
||||||
WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
|
WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
|
||||||
@ -1403,7 +1375,7 @@ static int TimeToFrequencyDomain(AecmCore_t* aecm,
|
|||||||
time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
|
time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WebRtcAecm_WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
|
WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
|
||||||
|
|
||||||
// Extract imaginary and real part, calculate the magnitude for all frequency bins
|
// Extract imaginary and real part, calculate the magnitude for all frequency bins
|
||||||
freq_signal[0].imag = 0;
|
freq_signal[0].imag = 0;
|
||||||
@ -1843,7 +1815,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
|
|||||||
ComfortNoise(aecm, ptrDfaClean, efw, hnl);
|
ComfortNoise(aecm, ptrDfaClean, efw, hnl);
|
||||||
}
|
}
|
||||||
|
|
||||||
WebRtcAecm_InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
|
InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -294,37 +294,10 @@ extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
|
|||||||
typedef void (*ResetAdaptiveChannel)(AecmCore_t* aecm);
|
typedef void (*ResetAdaptiveChannel)(AecmCore_t* aecm);
|
||||||
extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
|
extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
|
||||||
|
|
||||||
typedef void (*WindowAndFFT)(
|
|
||||||
AecmCore_t* aecm,
|
|
||||||
int16_t* fft,
|
|
||||||
const int16_t* time_signal,
|
|
||||||
complex16_t* freq_signal,
|
|
||||||
int time_signal_scaling);
|
|
||||||
extern WindowAndFFT WebRtcAecm_WindowAndFFT;
|
|
||||||
|
|
||||||
typedef void (*InverseFFTAndWindow)(
|
|
||||||
AecmCore_t* aecm,
|
|
||||||
int16_t* fft, complex16_t* efw,
|
|
||||||
int16_t* output,
|
|
||||||
const int16_t* nearendClean);
|
|
||||||
extern InverseFFTAndWindow WebRtcAecm_InverseFFTAndWindow;
|
|
||||||
|
|
||||||
// For the above function pointers, functions for generic platforms are declared
|
// For the above function pointers, functions for generic platforms are declared
|
||||||
// and defined as static in file aecm_core.c, while those for ARM Neon platforms
|
// and defined as static in file aecm_core.c, while those for ARM Neon platforms
|
||||||
// are declared below and defined in file aecm_core_neon.s.
|
// are declared below and defined in file aecm_core_neon.s.
|
||||||
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
|
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
|
||||||
void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,
|
|
||||||
int16_t* fft,
|
|
||||||
const int16_t* time_signal,
|
|
||||||
complex16_t* freq_signal,
|
|
||||||
int time_signal_scaling);
|
|
||||||
|
|
||||||
void WebRtcAecm_InverseFFTAndWindowNeon(AecmCore_t* aecm,
|
|
||||||
int16_t* fft,
|
|
||||||
complex16_t* efw,
|
|
||||||
int16_t* output,
|
|
||||||
const int16_t* nearendClean);
|
|
||||||
|
|
||||||
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
|
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
|
||||||
const uint16_t* far_spectrum,
|
const uint16_t* far_spectrum,
|
||||||
int32_t* echo_est,
|
int32_t* echo_est,
|
||||||
|
@ -17,185 +17,10 @@
|
|||||||
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
||||||
|
|
||||||
GLOBAL_LABEL WebRtcAecm_kSqrtHanning
|
GLOBAL_LABEL WebRtcAecm_kSqrtHanning
|
||||||
GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon
|
|
||||||
GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
|
|
||||||
GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
|
GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
|
||||||
GLOBAL_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
|
GLOBAL_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
|
||||||
GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
|
GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
|
||||||
|
|
||||||
@ void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,
|
|
||||||
@ int16_t* fft,
|
|
||||||
@ const int16_t* time_signal,
|
|
||||||
@ complex16_t* freq_signal,
|
|
||||||
@ int time_signal_scaling);
|
|
||||||
.align 2
|
|
||||||
DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon
|
|
||||||
push {r4, r5, r6, lr}
|
|
||||||
|
|
||||||
ldr r12, [sp, #16] @ time_signal_scaling
|
|
||||||
vdup.16 d16, r12
|
|
||||||
|
|
||||||
vmov.i16 d21, #0 @ For imaginary parts of |fft|.
|
|
||||||
vmov.i16 d27, #0 @ For imaginary parts of |fft|.
|
|
||||||
adr r5, WebRtcAecm_kSqrtHanning
|
|
||||||
adr lr, kSqrtHanningReversed
|
|
||||||
add r4, r1, #(PART_LEN2 * 2) @ &fft[PART_LEN2]
|
|
||||||
add r12, r2, #(PART_LEN * 2) @ time_signal[PART_LEN]
|
|
||||||
mov r6, #(PART_LEN / 4) @ Loop counter, unrolled by 4
|
|
||||||
|
|
||||||
LOOP_PART_LEN:
|
|
||||||
vld1.16 d0, [r2, :64]! @ time_signal[i]
|
|
||||||
vld1.16 d22, [r12, :64]! @ time_signal[i + PART_LEN]
|
|
||||||
vld1.16 d17, [r5, :64]! @ WebRtcAecm_kSqrtHanning[i]
|
|
||||||
vld1.16 d23, [lr, :64]! @ kSqrtHanningReversed[i]
|
|
||||||
vshl.s16 d18, d0, d16
|
|
||||||
vshl.s16 d22, d22, d16
|
|
||||||
vmull.s16 q9, d18, d17
|
|
||||||
vmull.s16 q12, d22, d23
|
|
||||||
subs r6, #1
|
|
||||||
vshrn.i32 d20, q9, #14
|
|
||||||
vshrn.i32 d26, q12, #14
|
|
||||||
vst2.16 {d20, d21}, [r1, :128]! @ fft[j]
|
|
||||||
vst2.16 {d26, d27}, [r4, :128]! @ fft[PART_LEN2 + j]
|
|
||||||
bgt LOOP_PART_LEN
|
|
||||||
|
|
||||||
@ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
|
|
||||||
movw r12, #offset_aecm_real_fft
|
|
||||||
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
|
|
||||||
mov r2, r3 @ freq_signal
|
|
||||||
mov r4, r3
|
|
||||||
ldr r0, [r0, r12] @ aecm->real_fft
|
|
||||||
CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
|
|
||||||
|
|
||||||
mov r12, #(PART_LEN * 2 / 16) @ Loop counter, unrolled by 16.
|
|
||||||
|
|
||||||
LOOP_PART_LEN2:
|
|
||||||
@ freq_signal[i].imag = - freq_signal[i].imag;
|
|
||||||
vld2.16 {d20, d21, d22, d23}, [r4, :256]
|
|
||||||
subs r12, #1
|
|
||||||
vneg.s16 d22, d22
|
|
||||||
vneg.s16 d23, d23
|
|
||||||
vst2.16 {d20, d21, d22, d23}, [r4, :256]!
|
|
||||||
bgt LOOP_PART_LEN2
|
|
||||||
|
|
||||||
pop {r4, r5, r6, pc}
|
|
||||||
|
|
||||||
@ void WebRtcAecm_InverseFFTAndWindowNeon(AecmCore_t* aecm,
|
|
||||||
@ int16_t* fft,
|
|
||||||
@ complex16_t* efw,
|
|
||||||
@ int16_t* output,
|
|
||||||
@ const int16_t* nearendClean);
|
|
||||||
.align 2
|
|
||||||
DEFINE_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
|
|
||||||
push {r4-r8, lr}
|
|
||||||
|
|
||||||
@ Values of r0, r1, and r3 will change in WebRtcSpl_ComplexIFFT
|
|
||||||
@ and WebRtcSpl_ComplexBitReverse.
|
|
||||||
mov r4, r1
|
|
||||||
mov r5, r0
|
|
||||||
mov r7, r3
|
|
||||||
|
|
||||||
add r3, r1, #((PART_LEN4 - 6) * 2) @ &fft[PART_LEN4 - 6]
|
|
||||||
mov r6, #(PART_LEN / 4) @ Loop counter, unrolled by 4
|
|
||||||
add r12, r2, #(PART_LEN * 4) @ &efw[PART_LEN]
|
|
||||||
mov r8, #-16
|
|
||||||
|
|
||||||
LOOP_PRE_IFFT:
|
|
||||||
vld2.16 {q10}, [r2, :128]!
|
|
||||||
vmov q11, q10
|
|
||||||
vneg.s16 d23, d23
|
|
||||||
vst2.16 {d22, d23}, [r1, :128]!
|
|
||||||
vrev64.16 q10, q10
|
|
||||||
subs r6, #1
|
|
||||||
vst2.16 {q10}, [r3], r8
|
|
||||||
bgt LOOP_PRE_IFFT
|
|
||||||
|
|
||||||
@ fft[PART_LEN2] = efw[PART_LEN].real;
|
|
||||||
@ fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
|
|
||||||
ldr r8, [r12]
|
|
||||||
ssub16 r12, r6, r8
|
|
||||||
mov r3, #(PART_LEN2 * 2)
|
|
||||||
pkhbt r8, r8, r12
|
|
||||||
str r8, [r4, r3]
|
|
||||||
|
|
||||||
@ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
|
|
||||||
movw r12, #offset_aecm_real_fft
|
|
||||||
sub r1, #(PART_LEN * 4) @ Get r1 back to &fft[0].
|
|
||||||
sub r2, #(PART_LEN * 4) @ Get r2 back to &efw[0].
|
|
||||||
mov r4, r2 @ Keep efw in r4.
|
|
||||||
ldr r0, [r0, r12] @ aecm->real_fft
|
|
||||||
CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
|
|
||||||
|
|
||||||
movw r6, #offset_aecm_outBuf
|
|
||||||
movw r12, #offset_aecm_dfaCleanQDomain
|
|
||||||
ldr r8, [r5, r6] @ &aecm->outBuf[0]
|
|
||||||
ldrsh r2, [r5, r12] @ &aecm->dfaCleanQDomain[0]
|
|
||||||
|
|
||||||
adr r12, kSqrtHanningReversed
|
|
||||||
adr r6, WebRtcAecm_kSqrtHanning
|
|
||||||
rsb r0, r2, r0 @ outCFFT - aecm->dfaCleanQDomain
|
|
||||||
vdup.32 q9, r0
|
|
||||||
add r0, r4, #(PART_LEN * 4) @ &efw[PART_LEN]
|
|
||||||
mov r3, #(PART_LEN / 4) @ Loop counter, unrolled by 4
|
|
||||||
|
|
||||||
LOOP_POST_IFFT:
|
|
||||||
vld2.16 {d4, d5}, [r4, :128] @ &efw[i];
|
|
||||||
vld1.16 d17, [r6, :64]! @ WebRtcAecm_kSqrtHanning[i]
|
|
||||||
vld1.16 d20, [r8, :64] @ aecm->outBuf[i]
|
|
||||||
vmull.s16 q8, d4, d17
|
|
||||||
vmovl.s16 q10, d20
|
|
||||||
vrshr.s32 q8, q8, #14
|
|
||||||
vld1.16 d0, [r0, :64]! @ &efw[PART_LEN + i]
|
|
||||||
vshl.s32 q8, q8, q9
|
|
||||||
vld1.16 d1, [r12, :64]! @ kSqrtHanningReversed[i]
|
|
||||||
vadd.i32 q8, q10
|
|
||||||
vmull.s16 q0, d0, d1
|
|
||||||
vqmovn.s32 d16, q8
|
|
||||||
vshr.s32 q0, q0, #14
|
|
||||||
vst2.16 {d4, d5}, [r4, :128]! @ &efw[i];
|
|
||||||
vshl.s32 q0, q0, q9
|
|
||||||
vst1.16 d16, [r7, :64]! @ output[i]
|
|
||||||
vqmovn.s32 d0, q0
|
|
||||||
subs r3, #1
|
|
||||||
vst1.16 d0, [r8, :64]! @ aecm->outBuf[i]
|
|
||||||
bgt LOOP_POST_IFFT
|
|
||||||
|
|
||||||
movw r3, #offset_aecm_xBuf
|
|
||||||
movw r12, #offset_aecm_dBufNoisy
|
|
||||||
ldr r3, [r5, r3] @ &aecm->xBuf[0]
|
|
||||||
ldr r1, [r5, r12] @ &aecm->dBufNoisy[0]
|
|
||||||
add r2, r3, #(PART_LEN * 2) @ &aecm->xBuf[PART_LEN]
|
|
||||||
add r0, r1, #(PART_LEN * 2) @ &aecm->dBufNoisy[PART_LEN]
|
|
||||||
mov r4, #(PART_LEN / 16) @ Loop counter, unrolled by 16.
|
|
||||||
|
|
||||||
LOOP_COPY:
|
|
||||||
vld1.16 {q10, q11}, [r2, :256]!
|
|
||||||
vld1.16 {q12, q13}, [r0, :256]!
|
|
||||||
subs r4, #1
|
|
||||||
vst1.16 {q10, q11}, [r3, :256]!
|
|
||||||
vst1.16 {q12, q13}, [r1, :256]!
|
|
||||||
bgt LOOP_COPY
|
|
||||||
|
|
||||||
ldr r2, [sp, #16]
|
|
||||||
cmp r2, #0 @ Check if (nearendClean != NULL).
|
|
||||||
beq END
|
|
||||||
|
|
||||||
movw r4, #offset_aecm_dBufClean
|
|
||||||
ldr r1, [r5, r4] @ &aecm->dBufClean[0]
|
|
||||||
add r0, r1, #(PART_LEN * 2) @ &aecm->dBufClean[PART_LEN]
|
|
||||||
|
|
||||||
vld1.16 {q10, q11}, [r0, :256]!
|
|
||||||
vld1.16 {q12, q13}, [r0, :256]!
|
|
||||||
vst1.16 {q10, q11}, [r1, :256]!
|
|
||||||
vst1.16 {q12, q13}, [r1, :256]!
|
|
||||||
vld1.16 {q10, q11}, [r0, :256]!
|
|
||||||
vld1.16 {q12, q13}, [r0, :256]!
|
|
||||||
vst1.16 {q10, q11}, [r1, :256]!
|
|
||||||
vst1.16 {q12, q13}, [r1, :256]!
|
|
||||||
|
|
||||||
END:
|
|
||||||
pop {r4-r8, pc}
|
|
||||||
|
|
||||||
@ void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
|
@ void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
|
||||||
@ const uint16_t* far_spectrum,
|
@ const uint16_t* far_spectrum,
|
||||||
@ int32_t* echo_est,
|
@ int32_t* echo_est,
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@ -436,26 +435,6 @@ static const int16_t kDeterminantEstMatrix[66] = {
|
|||||||
355, 330
|
355, 330
|
||||||
};
|
};
|
||||||
|
|
||||||
// Declare function pointers.
|
|
||||||
NoiseEstimation WebRtcNsx_NoiseEstimation;
|
|
||||||
PrepareSpectrum WebRtcNsx_PrepareSpectrum;
|
|
||||||
SynthesisUpdate WebRtcNsx_SynthesisUpdate;
|
|
||||||
AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
|
||||||
Denormalize WebRtcNsx_Denormalize;
|
|
||||||
CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
|
||||||
|
|
||||||
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
|
||||||
// Initialize function pointers for ARM Neon platform.
|
|
||||||
static void WebRtcNsx_InitNeon(void) {
|
|
||||||
WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
|
|
||||||
WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
|
|
||||||
WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
|
|
||||||
WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
|
|
||||||
WebRtcNsx_Denormalize = WebRtcNsx_DenormalizeNeon;
|
|
||||||
WebRtcNsx_CreateComplexBuffer = WebRtcNsx_CreateComplexBufferNeon;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Update the noise estimation information.
|
// Update the noise estimation information.
|
||||||
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
||||||
int32_t tmp32no1 = 0;
|
int32_t tmp32no1 = 0;
|
||||||
@ -614,7 +593,6 @@ static void NoiseEstimationC(NsxInst_t* inst,
|
|||||||
// Filter the data in the frequency domain, and create spectrum.
|
// Filter the data in the frequency domain, and create spectrum.
|
||||||
static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
|
static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
|
||||||
int i = 0, j = 0;
|
int i = 0, j = 0;
|
||||||
int16_t tmp16 = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
|
inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
|
||||||
@ -626,22 +604,19 @@ static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
|
|||||||
freq_buf[0] = inst->real[0];
|
freq_buf[0] = inst->real[0];
|
||||||
freq_buf[1] = -inst->imag[0];
|
freq_buf[1] = -inst->imag[0];
|
||||||
for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
|
for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
|
||||||
tmp16 = (inst->anaLen << 1) - j;
|
|
||||||
freq_buf[j] = inst->real[i];
|
freq_buf[j] = inst->real[i];
|
||||||
freq_buf[j + 1] = -inst->imag[i];
|
freq_buf[j + 1] = -inst->imag[i];
|
||||||
freq_buf[tmp16] = inst->real[i];
|
|
||||||
freq_buf[tmp16 + 1] = inst->imag[i];
|
|
||||||
}
|
}
|
||||||
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||||
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Denormalize the input buffer.
|
// Denormalize the real-valued signal |in|, the output from inverse FFT.
|
||||||
static __inline void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) {
|
static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
|
||||||
int i = 0, j = 0;
|
int i = 0;
|
||||||
int32_t tmp32 = 0;
|
int32_t tmp32 = 0;
|
||||||
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
|
for (i = 0; i < inst->anaLen; i += 1) {
|
||||||
tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
|
tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i],
|
||||||
factor - inst->normData);
|
factor - inst->normData);
|
||||||
inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
|
||||||
}
|
}
|
||||||
@ -701,18 +676,32 @@ static void AnalysisUpdateC(NsxInst_t* inst,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
|
// Normalize the real-valued signal |in|, the input to forward FFT.
|
||||||
// zeros, and normalize it.
|
static __inline void NormalizeRealBuffer(NsxInst_t* inst,
|
||||||
static __inline void CreateComplexBufferC(NsxInst_t* inst,
|
const int16_t* in,
|
||||||
int16_t* in,
|
int16_t* out) {
|
||||||
int16_t* out) {
|
int i = 0;
|
||||||
int i = 0, j = 0;
|
for (i = 0; i < inst->anaLen; ++i) {
|
||||||
for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
|
out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
|
||||||
out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
|
|
||||||
out[j + 1] = 0; // Insert zeros in imaginary part
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Declare function pointers.
|
||||||
|
NoiseEstimation WebRtcNsx_NoiseEstimation;
|
||||||
|
PrepareSpectrum WebRtcNsx_PrepareSpectrum;
|
||||||
|
SynthesisUpdate WebRtcNsx_SynthesisUpdate;
|
||||||
|
AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
||||||
|
|
||||||
|
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
||||||
|
// Initialize function pointers for ARM Neon platform.
|
||||||
|
static void WebRtcNsx_InitNeon(void) {
|
||||||
|
WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
|
||||||
|
WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
|
||||||
|
WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
|
||||||
|
WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
|
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
|
||||||
int16_t pink_noise_exp_avg,
|
int16_t pink_noise_exp_avg,
|
||||||
int32_t pink_noise_num_avg,
|
int32_t pink_noise_num_avg,
|
||||||
@ -900,17 +889,14 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
|
|||||||
WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
|
WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
|
||||||
WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
|
WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
|
||||||
WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
|
WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
|
||||||
WebRtcNsx_Denormalize = DenormalizeC;
|
|
||||||
WebRtcNsx_CreateComplexBuffer = CreateComplexBufferC;
|
|
||||||
|
|
||||||
#ifdef WEBRTC_DETECT_ARM_NEON
|
#ifdef WEBRTC_DETECT_ARM_NEON
|
||||||
uint64_t features = WebRtc_GetCPUFeaturesARM();
|
uint64_t features = WebRtc_GetCPUFeaturesARM();
|
||||||
if ((features & kCPUFeatureNEON) != 0)
|
if ((features & kCPUFeatureNEON) != 0) {
|
||||||
{
|
WebRtcNsx_InitNeon();
|
||||||
WebRtcNsx_InitNeon();
|
}
|
||||||
}
|
|
||||||
#elif defined(WEBRTC_ARCH_ARM_NEON)
|
#elif defined(WEBRTC_ARCH_ARM_NEON)
|
||||||
WebRtcNsx_InitNeon();
|
WebRtcNsx_InitNeon();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
inst->initFlag = 1;
|
inst->initFlag = 1;
|
||||||
@ -1606,7 +1592,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU
|
|||||||
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
|
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
|
||||||
|
|
||||||
// create realImag as winData interleaved with zeros (= imag. part), normalize it
|
// create realImag as winData interleaved with zeros (= imag. part), normalize it
|
||||||
WebRtcNsx_CreateComplexBuffer(inst, winData, realImag);
|
NormalizeRealBuffer(inst, winData, realImag);
|
||||||
|
|
||||||
// FFT output will be in winData[].
|
// FFT output will be in winData[].
|
||||||
WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
|
WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
|
||||||
@ -1838,8 +1824,7 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
|
|||||||
// Inverse FFT output will be in rfft_out[].
|
// Inverse FFT output will be in rfft_out[].
|
||||||
outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
|
outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
|
||||||
|
|
||||||
// Denormalize.
|
Denormalize(inst, rfft_out, outCIFFT);
|
||||||
WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
|
|
||||||
|
|
||||||
//scale factor: only do it after END_STARTUP_LONG time
|
//scale factor: only do it after END_STARTUP_LONG time
|
||||||
gainFactor = 8192; // 8192 = Q13(1.0)
|
gainFactor = 8192; // 8192 = Q13(1.0)
|
||||||
|
@ -201,19 +201,6 @@ typedef void (*AnalysisUpdate)(NsxInst_t* inst,
|
|||||||
int16_t* new_speech);
|
int16_t* new_speech);
|
||||||
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
||||||
|
|
||||||
// Denormalize the input buffer.
|
|
||||||
typedef void (*Denormalize)(NsxInst_t* inst,
|
|
||||||
int16_t* in,
|
|
||||||
int factor);
|
|
||||||
extern Denormalize WebRtcNsx_Denormalize;
|
|
||||||
|
|
||||||
// Create a complex number buffer, as the intput interleaved with zeros,
|
|
||||||
// and normalize it.
|
|
||||||
typedef void (*CreateComplexBuffer)(NsxInst_t* inst,
|
|
||||||
int16_t* in,
|
|
||||||
int16_t* out);
|
|
||||||
extern CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
|
||||||
|
|
||||||
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
|
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
|
||||||
// For the above function pointers, functions for generic platforms are declared
|
// For the above function pointers, functions for generic platforms are declared
|
||||||
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
|
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
|
||||||
@ -222,16 +209,12 @@ void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
|
|||||||
uint16_t* magn,
|
uint16_t* magn,
|
||||||
uint32_t* noise,
|
uint32_t* noise,
|
||||||
int16_t* q_noise);
|
int16_t* q_noise);
|
||||||
void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
|
|
||||||
int16_t* in,
|
|
||||||
int16_t* out);
|
|
||||||
void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
|
void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
|
||||||
int16_t* out_frame,
|
int16_t* out_frame,
|
||||||
int16_t gain_factor);
|
int16_t gain_factor);
|
||||||
void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
|
void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
|
||||||
int16_t* out,
|
int16_t* out,
|
||||||
int16_t* new_speech);
|
int16_t* new_speech);
|
||||||
void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
|
|
||||||
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
|
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -20,8 +20,6 @@ GLOBAL_FUNCTION WebRtcNsx_NoiseEstimationNeon
|
|||||||
GLOBAL_FUNCTION WebRtcNsx_PrepareSpectrumNeon
|
GLOBAL_FUNCTION WebRtcNsx_PrepareSpectrumNeon
|
||||||
GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon
|
GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon
|
||||||
GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon
|
GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon
|
||||||
GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon
|
|
||||||
GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon
|
|
||||||
GLOBAL_LABEL WebRtcNsx_kLogTable
|
GLOBAL_LABEL WebRtcNsx_kLogTable
|
||||||
GLOBAL_LABEL WebRtcNsx_kCounterDiv
|
GLOBAL_LABEL WebRtcNsx_kCounterDiv
|
||||||
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
|
GLOBAL_LABEL WebRtcNsx_kLogTableFrac
|
||||||
@ -426,6 +424,7 @@ POST_LOOP_MAGNLEN:
|
|||||||
|
|
||||||
pop {r4, r5, r6, pc}
|
pop {r4, r5, r6, pc}
|
||||||
|
|
||||||
|
@ TODO(kma): Remove copying to 2nd half of freq_buf, for real FFT interface.
|
||||||
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
|
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
|
||||||
.align 2
|
.align 2
|
||||||
DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
|
DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
|
||||||
@ -542,35 +541,6 @@ LOOP_ANALEN2:
|
|||||||
pop {r4-r9}
|
pop {r4-r9}
|
||||||
bx r14
|
bx r14
|
||||||
|
|
||||||
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
|
|
||||||
.align 2
|
|
||||||
DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
|
|
||||||
movw r12, #offset_nsx_normData
|
|
||||||
movw r3, #offset_nsx_real
|
|
||||||
ldr r12, [r0, r12] @ inst->normData
|
|
||||||
add r3, r0 @ &inst->real[0]
|
|
||||||
sub r2, r12
|
|
||||||
vdup.32 q10, r2
|
|
||||||
|
|
||||||
movw r2, #offset_nsx_anaLen
|
|
||||||
ldrsh r2, [r0, r2] @ inst->anaLen
|
|
||||||
add r0, r3, r2, lsl #1 @ &inst->real[inst->anaLen]
|
|
||||||
|
|
||||||
LOOP_ANALEN:
|
|
||||||
vld2.16 {d0, d1}, [r1]! @ &in[]
|
|
||||||
vld2.16 {d2, d3}, [r1]! @ &in[]
|
|
||||||
vmovl.s16 q2, d0
|
|
||||||
vmovl.s16 q3, d2
|
|
||||||
vshl.s32 q2, q10
|
|
||||||
vshl.s32 q3, q10
|
|
||||||
vqmovn.s32 d0, q2
|
|
||||||
vqmovn.s32 d1, q3
|
|
||||||
vst1.16 {d0, d1}, [r3]! @ inst->real[]
|
|
||||||
cmp r3, r0
|
|
||||||
blt LOOP_ANALEN
|
|
||||||
|
|
||||||
bx r14
|
|
||||||
|
|
||||||
@ void SynthesisUpdateNeon(NsxInst_t* inst,
|
@ void SynthesisUpdateNeon(NsxInst_t* inst,
|
||||||
@ int16_t* out_frame,
|
@ int16_t* out_frame,
|
||||||
@ int16_t gain_factor);
|
@ int16_t gain_factor);
|
||||||
@ -704,33 +674,3 @@ LOOP_WINDOW_DATA:
|
|||||||
POST_LOOP_WINDOW_DATA:
|
POST_LOOP_WINDOW_DATA:
|
||||||
pop {r4-r6}
|
pop {r4-r6}
|
||||||
bx r14
|
bx r14
|
||||||
|
|
||||||
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
|
|
||||||
.align 2
|
|
||||||
DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
|
|
||||||
movw r3, #offset_nsx_anaLen
|
|
||||||
movw r12, #offset_nsx_normData
|
|
||||||
ldrsh r3, [r0, r3] @ inst->anaLen
|
|
||||||
ldr r12, [r0, r12] @ inst->normData
|
|
||||||
add r3, r1, r3, lsl #1 @ &in[inst->anaLen]
|
|
||||||
|
|
||||||
vmov.i16 d7, #0 @ For writing to imaginary parts.
|
|
||||||
vmov.i16 d5, #0 @ For writing to imaginary parts.
|
|
||||||
vdup.i16 q10, r12
|
|
||||||
|
|
||||||
LOOP_CREATE_COMPLEX_BUFFER: @ Unrolled by 16.
|
|
||||||
vld1.16 {d0, d1, d2, d3}, [r1]! @ in[]
|
|
||||||
cmp r1, r3
|
|
||||||
vshl.s16 q0, q10
|
|
||||||
vshl.s16 q1, q10
|
|
||||||
vmov d4, d1
|
|
||||||
vmov d1, d5
|
|
||||||
vmov d6, d3
|
|
||||||
vmov d3, d7
|
|
||||||
vst2.16 {d0, d1}, [r2]!
|
|
||||||
vst2.16 {d4, d5}, [r2]!
|
|
||||||
vst2.16 {d2, d3}, [r2]!
|
|
||||||
vst2.16 {d6, d7}, [r2]!
|
|
||||||
blt LOOP_CREATE_COMPLEX_BUFFER
|
|
||||||
|
|
||||||
bx r14
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user