diff --git a/modules/audio_processing/aec/main/source/aec_core.c b/modules/audio_processing/aec/main/source/aec_core.c index 41f561f90..3f88d6d96 100644 --- a/modules/audio_processing/aec/main/source/aec_core.c +++ b/modules/audio_processing/aec/main/source/aec_core.c @@ -123,8 +123,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, static void BufferFar(aec_t *aec, const short *farend, int farLen); static void FetchFar(aec_t *aec, short *farend, int farLen, int knownDelay); -static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, - short *outputH); +static void NonLinearProcessing(aec_t *aec, short *output, short *outputH); static void GetHighbandGain(const float *lambda, float *nlpGainHband); @@ -256,8 +255,7 @@ static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1]) } } -static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1], - int ip[IP_LEN], float wfft[W_LEN]) { +static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { int i, j; for (i = 0; i < NR_PART; i++) { int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); @@ -292,7 +290,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1], -aec->xfBuf[1][xPos + PART_LEN], ef[0][PART_LEN], ef[1][PART_LEN]); - aec_rdft_128(-1, fft, ip, wfft); + aec_rdft_128(-1, fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -302,7 +300,7 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1], fft[j] *= scale; } } - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); aec->wfBuf[0][pos] += fft[0]; aec->wfBuf[0][pos + PART_LEN] += fft[1]; @@ -574,8 +572,6 @@ static void ProcessBlock(aec_t *aec, const short *farend, float fft[PART_LEN2]; float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; complex_t df[PART_LEN1]; - int ip[IP_LEN]; - float wfft[W_LEN]; const float gPow[2] = {0.9f, 0.1f}; @@ -613,9 +609,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); } - // Setting this on the first call initializes work arrays. - ip[0] = 0; - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); // Far fft xf[1][0] = 0; @@ -630,7 +624,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, // Near fft memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); df[0][1] = 0; df[PART_LEN][1] = 0; df[0][0] = fft[0]; @@ -706,7 +700,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, fft[2 * i] = yf[0][i]; fft[2 * i + 1] = yf[1][i]; } - aec_rdft_128(-1, fft, ip, wfft); + aec_rdft_128(-1, fft); scale = 2.0f / PART_LEN2; for (i = 0; i < PART_LEN; i++) { @@ -721,7 +715,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); memset(fft, 0, sizeof(float) * PART_LEN); memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); ef[1][0] = 0; ef[1][PART_LEN] = 0; @@ -738,12 +732,12 @@ static void ProcessBlock(aec_t *aec, const short *farend, if (aec->adaptToggle) { #endif // Filter adaptation - WebRtcAec_FilterAdaptation(aec, fft, ef, ip, wfft); + WebRtcAec_FilterAdaptation(aec, fft, ef); #ifdef G167 } #endif - NonLinearProcessing(aec, ip, wfft, output, outputH); + NonLinearProcessing(aec, output, outputH); #if defined(AEC_DEBUG) || defined(G167) for (i = 0; i < PART_LEN; i++) { @@ -777,7 +771,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, #endif } -static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, short *outputH) +static void NonLinearProcessing(aec_t *aec, short *output, short *outputH) { float efw[2][PART_LEN1], dfw[2][PART_LEN1]; complex_t xfw[PART_LEN1]; @@ -844,7 +838,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->xBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->xBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); xfw[0][1] = 0; xfw[PART_LEN][1] = 0; @@ -866,7 +860,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->dBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); dfw[1][0] = 0; dfw[1][PART_LEN] = 0; @@ -882,7 +876,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[i] = aec->eBuf[i] * sqrtHanning[i]; fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); efw[1][0] = 0; efw[1][PART_LEN] = 0; efw[0][0] = fft[0]; @@ -1059,7 +1053,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, // Sign change required by Ooura fft. fft[2*i + 1] = -efw[1][i]; } - aec_rdft_128(-1, fft, ip, wfft); + aec_rdft_128(-1, fft); // Overlap and add to obtain output. scale = 2.0f / PART_LEN2; @@ -1091,7 +1085,7 @@ static void NonLinearProcessing(aec_t *aec, int *ip, float *wfft, short *output, fft[2*i] = comfortNoiseHband[i][0]; fft[2*i + 1] = comfortNoiseHband[i][1]; } - aec_rdft_128(-1, fft, ip, wfft); + aec_rdft_128(-1, fft); scale = 2.0f / PART_LEN2; } diff --git a/modules/audio_processing/aec/main/source/aec_core.h b/modules/audio_processing/aec/main/source/aec_core.h index 7abefc1e2..3386b92fc 100644 --- a/modules/audio_processing/aec/main/source/aec_core.h +++ b/modules/audio_processing/aec/main/source/aec_core.h @@ -172,8 +172,7 @@ extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; #define IP_LEN PART_LEN // this must be at least ceil(2 + sqrt(PART_LEN)) #define W_LEN PART_LEN typedef void (*WebRtcAec_FilterAdaptation_t) - (aec_t *aec, float *fft, float ef[2][PART_LEN1], int ip[IP_LEN], - float wfft[W_LEN]); + (aec_t *aec, float *fft, float ef[2][PART_LEN1]); extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; typedef void (*WebRtcAec_OverdriveAndSuppress_t) (aec_t *aec, float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]); diff --git a/modules/audio_processing/aec/main/source/aec_core_sse2.c b/modules/audio_processing/aec/main/source/aec_core_sse2.c index 0a00a2e3c..8dbad75be 100644 --- a/modules/audio_processing/aec/main/source/aec_core_sse2.c +++ b/modules/audio_processing/aec/main/source/aec_core_sse2.c @@ -126,8 +126,7 @@ static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1]) } } -static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1], - int ip[IP_LEN], float wfft[W_LEN]) { +static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { int i, j; for (i = 0; i < NR_PART; i++) { int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); @@ -175,7 +174,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1], -aec->xfBuf[1][xPos + PART_LEN], ef[0][PART_LEN], ef[1][PART_LEN]); - aec_rdft_128(-1, fft, ip, wfft); + aec_rdft_128(-1, fft); memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN); // fft scaling @@ -188,7 +187,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1], _mm_storeu_ps(&fft[j], fft_scale); } } - aec_rdft_128(1, fft, ip, wfft); + aec_rdft_128(1, fft); { float wt1 = aec->wfBuf[1][pos]; diff --git a/modules/audio_processing/aec/main/source/aec_rdft.c b/modules/audio_processing/aec/main/source/aec_rdft.c index 15f5fcf9d..bba372055 100644 --- a/modules/audio_processing/aec/main/source/aec_rdft.c +++ b/modules/audio_processing/aec/main/source/aec_rdft.c @@ -24,6 +24,9 @@ #include "aec_rdft.h" #include "system_wrappers/interface/cpu_features_wrapper.h" +float rdft_w[64]; +static int ip[16]; + static void bitrv2_32or128(int n, int *ip, float *a) { // n is 32 or 128 int j, j1, k, k1, m, m2; @@ -98,7 +101,7 @@ static void bitrv2_32or128(int n, int *ip, float *a) { } } -static void makewt(int *ip, float *w) { +static void makewt_32() { const int nw = 32; int j, nwh; float delta, x, y; @@ -107,22 +110,23 @@ static void makewt(int *ip, float *w) { ip[1] = 1; nwh = nw >> 1; delta = atanf(1.0f) / nwh; - w[0] = 1; - w[1] = 0; - w[nwh] = cosf(delta * nwh); - w[nwh + 1] = w[nwh]; + rdft_w[0] = 1; + rdft_w[1] = 0; + rdft_w[nwh] = cosf(delta * nwh); + rdft_w[nwh + 1] = rdft_w[nwh]; for (j = 2; j < nwh; j += 2) { x = cosf(delta * j); y = sinf(delta * j); - w[j] = x; - w[j + 1] = y; - w[nw - j] = y; - w[nw - j + 1] = x; + rdft_w[j] = x; + rdft_w[j + 1] = y; + rdft_w[nw - j] = y; + rdft_w[nw - j + 1] = x; } - bitrv2_32or128(nw, ip + 2, w); + bitrv2_32or128(nw, ip + 2, rdft_w); } -static void makect_32(int *ip, float *c) { +static void makect_32() { + float *c = rdft_w + 32; const int nc = 32; int j, nch; float delta; @@ -138,7 +142,7 @@ static void makect_32(int *ip, float *c) { } } -static void cft1st_128(float *a, float *w) { +static void cft1st_128(float *a) { const int n = 128; int j, k1, k2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; @@ -160,7 +164,7 @@ static void cft1st_128(float *a, float *w) { a[3] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; - wk1r = w[2]; + wk1r = rdft_w[2]; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; @@ -185,10 +189,10 @@ static void cft1st_128(float *a, float *w) { for (j = 16; j < n; j += 16) { k1 += 2; k2 = 2 * k1; - wk2r = w[k1]; - wk2i = w[k1 + 1]; - wk1r = w[k2]; - wk1i = w[k2 + 1]; + wk2r = rdft_w[k1]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2]; + wk1i = rdft_w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; x0r = a[j] + a[j + 2]; @@ -213,8 +217,8 @@ static void cft1st_128(float *a, float *w) { x0i = x1i - x3r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; - wk1r = w[k2 + 2]; - wk1i = w[k2 + 3]; + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; x0r = a[j + 8] + a[j + 10]; @@ -242,7 +246,7 @@ static void cft1st_128(float *a, float *w) { } } -static void cftmdl_128(int l, float *a, float *w) { +static void cftmdl_128(int l, float *a) { const int n = 128; int j, j1, j2, j3, k, k1, k2, m, m2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; @@ -270,7 +274,7 @@ static void cftmdl_128(int l, float *a, float *w) { a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } - wk1r = w[2]; + wk1r = rdft_w[2]; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; @@ -301,10 +305,10 @@ static void cftmdl_128(int l, float *a, float *w) { for (k = m2; k < n; k += m2) { k1 += 2; k2 = 2 * k1; - wk2r = w[k1]; - wk2i = w[k1 + 1]; - wk1r = w[k2]; - wk1i = w[k2 + 1]; + wk2r = rdft_w[k1]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2]; + wk1i = rdft_w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j < l + k; j += 2) { @@ -334,8 +338,8 @@ static void cftmdl_128(int l, float *a, float *w) { a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } - wk1r = w[k2 + 2]; - wk1i = w[k2 + 3]; + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; for (j = k + m; j < l + (k + m); j += 2) { @@ -368,12 +372,12 @@ static void cftmdl_128(int l, float *a, float *w) { } } -static void cftfsub_128(float *a, float *w) { +static void cftfsub_128(float *a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - cft1st_128(a, w); - cftmdl_128(8, a, w); + cft1st_128(a); + cftmdl_128(8, a); l = 32; for (j = 0; j < l; j += 2) { j1 = j + l; @@ -398,12 +402,12 @@ static void cftfsub_128(float *a, float *w) { } } -static void cftbsub_128(float *a, float *w) { +static void cftbsub_128(float *a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - cft1st_128(a, w); - cftmdl_128(8, a, w); + cft1st_128(a); + cftmdl_128(8, a); l = 32; for (j = 0; j < l; j += 2) { @@ -429,7 +433,8 @@ static void cftbsub_128(float *a, float *w) { } } -static void rftfsub_128_C(float *a, float *c) { +static void rftfsub_128_C(float *a) { + const float *c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; @@ -449,7 +454,8 @@ static void rftfsub_128_C(float *a, float *c) { } } -static void rftbsub_128_C(float *a, float *c) { +static void rftbsub_128_C(float *a) { + const float *c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; @@ -471,33 +477,25 @@ static void rftbsub_128_C(float *a, float *c) { a[65] = -a[65]; } -void aec_rdft_128(int isgn, float *a, int *ip, float *w) -{ +void aec_rdft_128(int isgn, float *a) { const int n = 128; int nw; float xi; nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(ip, w); - } - if (n > (ip[1] << 2)) { - makect_32(ip, w + nw); - } if (isgn >= 0) { bitrv2_32or128(n, ip + 2, a); - cftfsub_128(a, w); - rftfsub_128(a, w + nw); + cftfsub_128(a); + rftfsub_128(a); xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5f * (a[0] - a[1]); a[0] -= a[1]; - rftbsub_128(a, w + nw); + rftbsub_128(a); bitrv2_32or128(n, ip + 2, a); - cftbsub_128(a, w); + cftbsub_128(a); } } @@ -513,4 +511,7 @@ void aec_rdft_init(void) { aec_rdft_init_sse2(); #endif } + // init library constants. + makewt_32(); + makect_32(); } diff --git a/modules/audio_processing/aec/main/source/aec_rdft.h b/modules/audio_processing/aec/main/source/aec_rdft.h index add162c34..335256674 100644 --- a/modules/audio_processing/aec/main/source/aec_rdft.h +++ b/modules/audio_processing/aec/main/source/aec_rdft.h @@ -8,12 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ +// constants shared by all paths (C, SSE2). +extern float rdft_w[64]; + // code path selection function pointers -typedef void (*rft_sub_128_t)(float *a, float *c); +typedef void (*rft_sub_128_t)(float *a); extern rft_sub_128_t rftfsub_128; extern rft_sub_128_t rftbsub_128; // entry points void aec_rdft_init(void); void aec_rdft_init_sse2(void); -void aec_rdft_128(int, float *, int *, float *); +void aec_rdft_128(int isgn, float *a); diff --git a/modules/audio_processing/aec/main/source/aec_rdft_sse2.c b/modules/audio_processing/aec/main/source/aec_rdft_sse2.c index df76aa3f2..901a1b146 100644 --- a/modules/audio_processing/aec/main/source/aec_rdft_sse2.c +++ b/modules/audio_processing/aec/main/source/aec_rdft_sse2.c @@ -20,7 +20,8 @@ # define ALIGN16_END __attribute__((aligned(16))) #endif -static void rftfsub_128_SSE2(float *a, float *c) { +static void rftfsub_128_SSE2(float *a) { + const float *c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; @@ -109,7 +110,8 @@ static void rftfsub_128_SSE2(float *a, float *c) { } } -static void rftbsub_128_SSE2(float *a, float *c) { +static void rftbsub_128_SSE2(float *a) { + const float *c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi;