diff --git a/LICENSE_THIRD_PARTY b/LICENSE_THIRD_PARTY index 2086d2844..31d27e8a0 100644 --- a/LICENSE_THIRD_PARTY +++ b/LICENSE_THIRD_PARTY @@ -15,6 +15,7 @@ modules/audio_device/main/source/Mac/portaudio/pa_memorybarrier.h modules/audio_device/main/source/Mac/portaudio/pa_ringbuffer.h modules/audio_device/main/source/Mac/portaudio/pa_ringbuffer.c modules/audio_processing/utility/fft4g.c +modules/audio_processing/aec/main/source/aec_core_rdft.c system_wrappers/interface/fix_interlocked_exchange_pointer_windows.h system_wrappers/source/condition_variable_windows.cc system_wrappers/source/spreadsortlib/constants.hpp diff --git a/modules/audio_processing/aec/main/source/aec.gyp b/modules/audio_processing/aec/main/source/aec.gyp index 96f9bd75b..ed33d7f71 100644 --- a/modules/audio_processing/aec/main/source/aec.gyp +++ b/modules/audio_processing/aec/main/source/aec.gyp @@ -31,6 +31,7 @@ 'echo_cancellation.c', 'aec_core.c', 'aec_core_sse2.c', + 'aec_core_rdft.c', 'aec_core.h', 'resampler.c', 'resampler.h', diff --git a/modules/audio_processing/aec/main/source/aec_core.c b/modules/audio_processing/aec/main/source/aec_core.c index 3303a8a88..eb8a90fc1 100644 --- a/modules/audio_processing/aec/main/source/aec_core.c +++ b/modules/audio_processing/aec/main/source/aec_core.c @@ -291,7 +291,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1], -aec->xfBuf[1][xPos + PART_LEN], ef[0][PART_LEN], ef[1][PART_LEN]); - rdft(PART_LEN2, -1, fft, ip, wfft); + aec_rdft_128(-1, fft, ip, wfft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -301,7 +301,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1], fft[j] *= scale; } } - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); aec->wfBuf[0][pos] += fft[0]; aec->wfBuf[0][pos + PART_LEN] += fft[1]; @@ -613,7 +613,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, // Setting this on the first call initializes work arrays. ip[0] = 0; - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); // Far fft xf[1][0] = 0; @@ -628,7 +628,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, // Near fft memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); df[0][1] = 0; df[PART_LEN][1] = 0; df[0][0] = fft[0]; @@ -704,7 +704,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, fft[2 * i] = yf[0][i]; fft[2 * i + 1] = yf[1][i]; } - rdft(PART_LEN2, -1, fft, ip, wfft); + aec_rdft_128(-1, fft, ip, wfft); scale = 2.0f / PART_LEN2; for (i = 0; i < PART_LEN; i++) { @@ -719,7 +719,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); memset(fft, 0, sizeof(float) * PART_LEN); memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); ef[1][0] = 0; ef[1][PART_LEN] = 0; @@ -842,7 +842,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->xBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->xBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); xfw[0][1] = 0; xfw[PART_LEN][1] = 0; @@ -864,7 +864,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->dBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); dfw[1][0] = 0; dfw[1][PART_LEN] = 0; @@ -880,7 +880,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->eBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); efw[1][0] = 0; efw[1][PART_LEN] = 0; efw[0][0] = fft[0]; @@ -1057,7 +1057,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, // Sign change required by Ooura fft. fft[2*i + 1] = -efw[1][i]; } - rdft(PART_LEN2, -1, fft, ip, wfft); + aec_rdft_128(-1, fft, ip, wfft); // Overlap and add to obtain output. scale = 2.0f / PART_LEN2; @@ -1089,7 +1089,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[2*i] = comfortNoiseHband[i][0]; fft[2*i + 1] = comfortNoiseHband[i][1]; } - rdft(PART_LEN2, -1, fft, ip, wfft); + aec_rdft_128(-1, fft, ip, wfft); scale = 2.0f / PART_LEN2; } diff --git a/modules/audio_processing/aec/main/source/aec_core.h b/modules/audio_processing/aec/main/source/aec_core.h index ae043038a..2a619085d 100644 --- a/modules/audio_processing/aec/main/source/aec_core.h +++ b/modules/audio_processing/aec/main/source/aec_core.h @@ -16,7 +16,6 @@ #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ #include <stdio.h> -#include "fft4g.h" #include "typedefs.h" #include "signal_processing_library.h" @@ -191,5 +190,9 @@ void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend, short *out, short *outH, int knownDelay); +// aec_core_rdft.c +void aec_rdft_128(int, float *, int *, float *); + + #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ diff --git a/modules/audio_processing/aec/main/source/aec_core_rdft.c b/modules/audio_processing/aec/main/source/aec_core_rdft.c new file mode 100644 index 000000000..3f4d30049 --- /dev/null +++ b/modules/audio_processing/aec/main/source/aec_core_rdft.c @@ -0,0 +1,511 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes by the WebRTC authors: + * - Trivial type modifications. + * - Minimal code subset to do rdft of length 128. + * - Optimizations because of known length. + * + * All changes are covered by the WebRTC license and IP grant: + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "aec_core.h" + +static void bitrv2_32or128(int n, int *ip, float *a) { + // n is 32 or 128 + int j, j1, k, k1, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + { + int l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + } + m2 = 2 * m; + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } +} + +static void makewt(int *ip, float *w) { + const int nw = 32; + int j, nwh; + float delta, x, y; + + ip[0] = nw; + ip[1] = 1; + nwh = nw >> 1; + delta = atanf(1.0f) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = cosf(delta * nwh); + w[nwh + 1] = w[nwh]; + for (j = 2; j < nwh; j += 2) { + x = cosf(delta * j); + y = sinf(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2_32or128(nw, ip + 2, w); +} + +static void makect_32(int *ip, float *c) { + const int nc = 32; + int j, nch; + float delta; + + ip[1] = nc; + nch = nc >> 1; + delta = atanf(1.0f) / nch; + c[0] = cosf(delta * nch); + c[nch] = 0.5f * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5f * cosf(delta * j); + c[nc - j] = 0.5f * sinf(delta * j); + } +} + +static void cft1st_128(float *a, float *w) { + const int n = 128; + int j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +static void cftmdl_128(int l, float *a, float *w) { + const int n = 128; + int j, j1, j2, j3, k, k1, k2, m, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +static void cftfsub_128(float *a, float *w) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a, w); + cftmdl_128(8, a, w); + l = 32; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } +} + +static void cftbsub_128(float *a, float *w) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a, w); + cftmdl_128(8, a, w); + l = 32; + + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } +} + +static void rftfsub_128(float *a, int nc, float *c) { + const int n = 128; + int j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + +static void rftbsub_128(float *a, int nc, float *c) { + const int n = 128; + int j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; +} + +void aec_rdft_128(int isgn, float *a, int *ip, float *w) +{ + const int n = 128; + int nw, nc; + float xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect_32(ip, w + nw); + } + if (isgn >= 0) { + bitrv2_32or128(n, ip + 2, a); + cftfsub_128(a, w); + rftfsub_128(a, nc, w + nw); + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + rftbsub_128(a, nc, w + nw); + bitrv2_32or128(n, ip + 2, a); + cftbsub_128(a, w); + } +} diff --git a/modules/audio_processing/aec/main/source/aec_core_sse2.c b/modules/audio_processing/aec/main/source/aec_core_sse2.c index d7f30b58c..c020e7bb4 100644 --- a/modules/audio_processing/aec/main/source/aec_core_sse2.c +++ b/modules/audio_processing/aec/main/source/aec_core_sse2.c @@ -174,7 +174,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1], -aec->xfBuf[1][xPos + PART_LEN], ef[0][PART_LEN], ef[1][PART_LEN]); - rdft(PART_LEN2, -1, fft, ip, wfft); + aec_rdft_128(-1, fft, ip, wfft); memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN); // fft scaling @@ -187,7 +187,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1], _mm_storeu_ps(&fft[j], fft_scale); } } - rdft(PART_LEN2, 1, fft, ip, wfft); + aec_rdft_128(1, fft, ip, wfft); { float wt1 = aec->wfBuf[1][pos];