diff --git a/src/modules/audio_processing/aecm/main/source/aecm.gyp b/src/modules/audio_processing/aecm/main/source/aecm.gyp index a535d2b29..654359917 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm.gyp +++ b/src/modules/audio_processing/aecm/main/source/aecm.gyp @@ -31,6 +31,8 @@ 'echo_control_mobile.c', 'aecm_core.c', 'aecm_core.h', + 'aecm_delay_estimator.c', + 'aecm_delay_estimator.h', ], }, ], diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.c b/src/modules/audio_processing/aecm/main/source/aecm_core.c index 694bb8a5f..d229f7453 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm_core.c +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.c @@ -8,12 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include -#include - #include "aecm_core.h" -#include "ring_buffer.h" + +#include +#include + +#include "aecm_delay_estimator.h" #include "echo_control_mobile.h" +#include "ring_buffer.h" #include "typedefs.h" #ifdef ARM_WINM_LOG @@ -21,16 +23,6 @@ #include #endif -// BANDLAST - BANDFIRST must be < 32 -#define BANDFIRST 12 // Only bit BANDFIRST through bit BANDLAST are processed -#define BANDLAST 43 - -#ifdef ARM_WINM -#define WebRtcSpl_AddSatW32(a,b) _AddSatInt(a,b) -#define WebRtcSpl_SubSatW32(a,b) _SubSatInt(a,b) -#endif -// 16 instructions on most risc machines for 32-bit bitcount ! - #ifdef AEC_DEBUG FILE *dfile; FILE *testfile; @@ -111,109 +103,6 @@ static void WebRtcAecm_ComfortNoise(AecmCore_t* const aecm, const WebRtc_UWord16 WebRtc_Word16 * const outImag, const WebRtc_Word16 * const lambda); -static __inline WebRtc_UWord32 WebRtcAecm_SetBit(WebRtc_UWord32 in, WebRtc_Word32 pos) -{ - WebRtc_UWord32 mask, out; - - mask = WEBRTC_SPL_SHIFT_W32(1, pos); - out = (in | mask); - - return out; -} - -// WebRtcAecm_Hisser(...) -// -// This function compares the binary vector specvec with all rows of the binary matrix specmat -// and counts per row the number of times they have the same value. -// Input: -// - specvec : binary "vector" that is stored in a long -// - specmat : binary "matrix" that is stored as a vector of long -// Output: -// - bcount : "Vector" stored as a long, containing for each row the number of times -// the matrix row and the input vector have the same value -// -// -void WebRtcAecm_Hisser(const WebRtc_UWord32 specvec, const WebRtc_UWord32 * const specmat, - WebRtc_UWord32 * const bcount) -{ - int n; - WebRtc_UWord32 a, b; - register WebRtc_UWord32 tmp; - - a = specvec; - // compare binary vector specvec with all rows of the binary matrix specmat - for (n = 0; n < MAX_DELAY; n++) - { - b = specmat[n]; - a = (specvec ^ b); - // Returns bit counts in tmp - tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); - tmp = ((tmp + (tmp >> 3)) & 030707070707); - tmp = (tmp + (tmp >> 6)); - tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; - - bcount[n] = tmp; - } -} - -// WebRtcAecm_BSpectrum(...) -// -// Computes the binary spectrum by comparing the input spectrum with a threshold spectrum. -// -// Input: -// - spectrum : Spectrum of which the binary spectrum should be calculated. -// - thresvec : Threshold spectrum with which the input spectrum is compared. -// Return: -// - out : Binary spectrum -// -WebRtc_UWord32 WebRtcAecm_BSpectrum(const WebRtc_UWord16 * const spectrum, - const WebRtc_UWord16 * const thresvec) -{ - int k; - WebRtc_UWord32 out; - - out = 0; - for (k = BANDFIRST; k <= BANDLAST; k++) - { - if (spectrum[k] > thresvec[k]) - { - out = WebRtcAecm_SetBit(out, k - BANDFIRST); - } - } - - return out; -} - -// WebRtcAecm_MedianEstimator(...) -// -// Calculates the median recursively. -// -// Input: -// - newVal : new additional value -// - medianVec : vector with current medians -// - factor : factor for smoothing -// -// Output: -// - medianVec : vector with updated median -// -int WebRtcAecm_MedianEstimator(const WebRtc_UWord16 newVal, WebRtc_UWord16 * const medianVec, - const int factor) -{ - WebRtc_Word32 median; - WebRtc_Word32 diff; - - median = (WebRtc_Word32)medianVec[0]; - - //median = median + ((newVal-median)>>factor); - diff = (WebRtc_Word32)newVal - median; - diff = WEBRTC_SPL_SHIFT_W32(diff, -factor); - median = median + diff; - - medianVec[0] = (WebRtc_UWord16)median; - - return 0; -} - int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) { AecmCore_t *aecm = malloc(sizeof(AecmCore_t)); @@ -251,6 +140,13 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) return -1; } + if (WebRtcAecm_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, MAX_DELAY) == -1) + { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; + } + return 0; } @@ -321,31 +217,24 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) aecm->seed = 666; aecm->totCount = 0; - memset(aecm->xfaHistory, 0, sizeof(WebRtc_UWord16) * (PART_LEN1) * MAX_DELAY); - - aecm->delHistoryPos = MAX_DELAY; - - memset(aecm->medianYlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1); - memset(aecm->medianXlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1); - memset(aecm->medianBCount, 0, sizeof(WebRtc_UWord16) * MAX_DELAY); - memset(aecm->bxHistory, 0, sizeof(aecm->bxHistory)); + if (WebRtcAecm_InitDelayEstimator(aecm->delay_estimator) != 0) + { + retVal = -1; + } // Initialize to reasonable values aecm->currentDelay = 8; - aecm->previousDelay = 8; - aecm->delayAdjust = 0; aecm->nlpFlag = 1; aecm->fixedDelay = -1; - memset(aecm->xfaQDomainBuf, 0, sizeof(WebRtc_Word16) * MAX_DELAY); aecm->dfaCleanQDomain = 0; aecm->dfaCleanQDomainOld = 0; aecm->dfaNoisyQDomain = 0; aecm->dfaNoisyQDomainOld = 0; memset(aecm->nearLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); - memset(aecm->farLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); + aecm->farLogEnergy = 0; memset(aecm->echoAdaptLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); memset(aecm->echoStoredLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); @@ -389,20 +278,9 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) aecm->vadUpdateCount = 0; aecm->firstVAD = 1; - aecm->delayCount = 0; - aecm->newDelayCorrData = 0; - aecm->lastDelayUpdateCount = 0; - memset(aecm->delayCorrelation, 0, sizeof(WebRtc_Word16) * ((CORR_MAX << 1) + 1)); - aecm->startupState = 0; aecm->supGain = SUPGAIN_DEFAULT; aecm->supGainOld = SUPGAIN_DEFAULT; - aecm->delayOffsetFlag = 0; - - memset(aecm->delayHistogram, 0, sizeof(aecm->delayHistogram)); - aecm->delayVadCount = 0; - aecm->maxDelayHistIdx = 0; - aecm->lastMinPos = 0; aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; @@ -412,211 +290,16 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) return 0; } -int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag, int delayOffsetFlag) +// TODO(bjornv): This function is currently not used. Add support for these +// parameters from a higher level +int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag) { aecm->nlpFlag = nlpFlag; aecm->fixedDelay = delay; - aecm->delayOffsetFlag = delayOffsetFlag; return 0; } -// WebRtcAecm_GetNewDelPos(...) -// -// Moves the pointer to the next entry. Returns to zero if max position reached. -// -// Input: -// - aecm : Pointer to the AECM instance -// Return: -// - pos : New position in the history. -// -// -WebRtc_Word16 WebRtcAecm_GetNewDelPos(AecmCore_t * const aecm) -{ - WebRtc_Word16 pos; - - pos = aecm->delHistoryPos; - pos++; - if (pos >= MAX_DELAY) - { - pos = 0; - } - aecm->delHistoryPos = pos; - - return pos; -} - -// WebRtcAecm_EstimateDelay(...) -// -// Estimate the delay of the echo signal. -// -// Inputs: -// - aecm : Pointer to the AECM instance -// - farSpec : Delayed farend magnitude spectrum -// - nearSpec : Nearend magnitude spectrum -// - stages : Q-domain of xxFIX and yyFIX (without dynamic Q-domain) -// - xfaQ : normalization factor, i.e., Q-domain before FFT -// Return: -// - delay : Estimated delay -// -WebRtc_Word16 WebRtcAecm_EstimateDelay(AecmCore_t * const aecm, - const WebRtc_UWord16 * const farSpec, - const WebRtc_UWord16 * const nearSpec, - const WebRtc_Word16 xfaQ) -{ - WebRtc_UWord32 bxspectrum, byspectrum; - WebRtc_UWord32 bcount[MAX_DELAY]; - - int i, res; - - WebRtc_UWord16 xmean[PART_LEN1], ymean[PART_LEN1]; - WebRtc_UWord16 dtmp1; - WebRtc_Word16 fcount[MAX_DELAY]; - - //WebRtc_Word16 res; - WebRtc_Word16 histpos; - WebRtc_Word16 maxHistLvl; - WebRtc_UWord16 *state; - WebRtc_Word16 minpos = -1; - - enum - { - kVadCountThreshold = 25 - }; - enum - { - kMaxHistogram = 600 - }; - - histpos = WebRtcAecm_GetNewDelPos(aecm); - - for (i = 0; i < PART_LEN1; i++) - { - aecm->xfaHistory[i][histpos] = farSpec[i]; - - state = &(aecm->medianXlogspec[i]); - res = WebRtcAecm_MedianEstimator(farSpec[i], state, 6); - - state = &(aecm->medianYlogspec[i]); - res = WebRtcAecm_MedianEstimator(nearSpec[i], state, 6); - - // Mean: - // FLOAT: - // ymean = dtmp2/MAX_DELAY - // - // FIX: - // input: dtmp2FIX in Q0 - // output: ymeanFIX in Q8 - // 20 = 1/MAX_DELAY in Q13 = 1/MAX_DELAY * 2^13 - xmean[i] = (aecm->medianXlogspec[i]); - ymean[i] = (aecm->medianYlogspec[i]); - - } - // Update Q-domain buffer - aecm->xfaQDomainBuf[histpos] = xfaQ; - - // Get binary spectra - // FLOAT: - // bxspectrum = bspectrum(xlogspec, xmean); - // - // FIX: - // input: xlogspecFIX,ylogspecFIX in Q8 - // xmeanFIX, ymeanFIX in Q8 - // output: unsigned long bxspectrum, byspectrum in Q0 - bxspectrum = WebRtcAecm_BSpectrum(farSpec, xmean); - byspectrum = WebRtcAecm_BSpectrum(nearSpec, ymean); - - // Shift binary spectrum history - memmove(&(aecm->bxHistory[1]), &(aecm->bxHistory[0]), - (MAX_DELAY - 1) * sizeof(WebRtc_UWord32)); - - aecm->bxHistory[0] = bxspectrum; - - // Compare with delayed spectra - WebRtcAecm_Hisser(byspectrum, aecm->bxHistory, bcount); - - for (i = 0; i < MAX_DELAY; i++) - { - // Update sum - // bcount is constrained to [0, 32], meaning we can smooth with a factor up to 2^11. - dtmp1 = (WebRtc_UWord16)bcount[i]; - dtmp1 = WEBRTC_SPL_LSHIFT_W16(dtmp1, 9); - state = &(aecm->medianBCount[i]); - res = WebRtcAecm_MedianEstimator(dtmp1, state, 9); - fcount[i] = (aecm->medianBCount[i]); - } - - // Find minimum - minpos = WebRtcSpl_MinIndexW16(fcount, MAX_DELAY); - - // If the farend has been active sufficiently long, begin accumulating a histogram - // of the minimum positions. Search for the maximum bin to determine the delay. - if (aecm->currentVADValue == 1) - { - if (aecm->delayVadCount >= kVadCountThreshold) - { - // Increment the histogram at the current minimum position. - if (aecm->delayHistogram[minpos] < kMaxHistogram) - { - aecm->delayHistogram[minpos] += 3; - } - -#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) - // Decrement the entire histogram. - for (i = 0; i < MAX_DELAY; i++) - { - if (aecm->delayHistogram[i] > 0) - { - aecm->delayHistogram[i]--; - } - } - - // Select the histogram index corresponding to the maximum bin as the delay. - maxHistLvl = 0; - aecm->maxDelayHistIdx = 0; - for (i = 0; i < MAX_DELAY; i++) - { - if (aecm->delayHistogram[i] > maxHistLvl) - { - maxHistLvl = aecm->delayHistogram[i]; - aecm->maxDelayHistIdx = i; - } - } -#else - maxHistLvl = 0; - aecm->maxDelayHistIdx = 0; - - for (i = 0; i < MAX_DELAY; i++) - { - WebRtc_Word16 tempVar = aecm->delayHistogram[i]; - - // Decrement the entire histogram. - if (tempVar > 0) - { - tempVar--; - aecm->delayHistogram[i] = tempVar; - - // Select the histogram index corresponding to the maximum bin as the delay. - if (tempVar > maxHistLvl) - { - maxHistLvl = tempVar; - aecm->maxDelayHistIdx = i; - } - } - } -#endif - } else - { - aecm->delayVadCount++; - } - } else - { - aecm->delayVadCount = 0; - } - - return aecm->maxDelayHistIdx; -} - int WebRtcAecm_FreeCore(AecmCore_t *aecm) { if (aecm == NULL) @@ -629,6 +312,7 @@ int WebRtcAecm_FreeCore(AecmCore_t *aecm) WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf); WebRtcApm_FreeBuffer(aecm->outFrameBuf); + WebRtcAecm_FreeDelayEstimator(aecm->delay_estimator); free(aecm); return 0; @@ -728,20 +412,26 @@ WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word // WebRtcAecm_CalcEnergies(...) // // This function calculates the log of energies for nearend, farend and estimated -// echoes. There is also an update of energy decision levels, i.e. internl VAD. +// echoes. There is also an update of energy decision levels, i.e. internal VAD. // // // @param aecm [i/o] Handle of the AECM instance. -// @param delayDiff [in] Delay position in farend buffer. -// @param nearEner [in] Near end energy for current block (Q[aecm->dfaQDomain]). -// @param echoEst [i/o] Estimated echo -// (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]). +// @param far_spectrum [in] Pointer to farend spectrum. +// @param far_q [in] Q-domain of farend spectrum. +// @param nearEner [in] Near end energy for current block in +// Q(aecm->dfaQDomain). +// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). // -void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayDiff, - const WebRtc_UWord32 nearEner, WebRtc_Word32 * const echoEst) +void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, + const WebRtc_UWord16* far_spectrum, + const WebRtc_Word16 far_q, + const WebRtc_UWord32 nearEner, + WebRtc_Word32 * echoEst) { // Local variables - WebRtc_UWord32 tmpAdapt, tmpStored, tmpFar; + WebRtc_UWord32 tmpAdapt = 0; + WebRtc_UWord32 tmpStored = 0; + WebRtc_UWord32 tmpFar = 0; int i; @@ -751,6 +441,7 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD WebRtc_Word16 decrease_max_shifts = 11; WebRtc_Word16 increase_min_shifts = 11; WebRtc_Word16 decrease_min_shifts = 3; + WebRtc_Word16 kLogLowValue = WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); // Get log of near end energy and store in buffer @@ -759,6 +450,7 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); // Logarithm of integrated magnitude spectrum (nearEner) + tmp16 = kLogLowValue; if (nearEner) { zeros = WebRtcSpl_NormU32(nearEner); @@ -766,88 +458,71 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD (WEBRTC_SPL_LSHIFT_U32(nearEner, zeros) & 0x7FFFFFFF), 23); // log2 in Q8 - aecm->nearLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - aecm->nearLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8); - } else - { - aecm->nearLogEnergy[0] = 0; + tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + tmp16 -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8); } - aecm->nearLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + aecm->nearLogEnergy[0] = tmp16; // END: Get log of near end energy // Get energy for the delayed far end signal and estimated // echo using both stored and adapted channels. - tmpAdapt = 0; - tmpStored = 0; - tmpFar = 0; - for (i = 0; i < PART_LEN1; i++) { // Get estimated echo energies for adaptive channel and stored channel echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); - tmpFar += (WebRtc_UWord32)(aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); + tmpFar += (WebRtc_UWord32)(far_spectrum[i]); tmpAdapt += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); tmpStored += (WebRtc_UWord32)echoEst[i]; } // Shift buffers - memmove(aecm->farLogEnergy + 1, aecm->farLogEnergy, - sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); // Logarithm of delayed far end energy + tmp16 = kLogLowValue; if (tmpFar) { zeros = WebRtcSpl_NormU32(tmpFar); frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpFar, zeros) & 0x7FFFFFFF), 23); // log2 in Q8 - aecm->farLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - aecm->farLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->xfaQDomainBuf[delayDiff], 8); - } else - { - aecm->farLogEnergy[0] = 0; + tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + tmp16 -= WEBRTC_SPL_LSHIFT_W16(far_q, 8); } - aecm->farLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + aecm->farLogEnergy = tmp16; // Logarithm of estimated echo energy through adapted channel + tmp16 = kLogLowValue; if (tmpAdapt) { zeros = WebRtcSpl_NormU32(tmpAdapt); frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpAdapt, zeros) & 0x7FFFFFFF), 23); //log2 in Q8 - aecm->echoAdaptLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - aecm->echoAdaptLogEnergy[0] - -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8); - } else - { - aecm->echoAdaptLogEnergy[0] = 0; + tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8); } - aecm->echoAdaptLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + aecm->echoAdaptLogEnergy[0] = tmp16; // Logarithm of estimated echo energy through stored channel + tmp16 = kLogLowValue; if (tmpStored) { zeros = WebRtcSpl_NormU32(tmpStored); frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpStored, zeros) & 0x7FFFFFFF), 23); //log2 in Q8 - aecm->echoStoredLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - aecm->echoStoredLogEnergy[0] - -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8); - } else - { - aecm->echoStoredLogEnergy[0] = 0; + tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8); } - aecm->echoStoredLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + aecm->echoStoredLogEnergy[0] = tmp16; // Update farend energy levels (min, max, vad, mse) - if (aecm->farLogEnergy[0] > FAR_ENERGY_MIN) + if (aecm->farLogEnergy > FAR_ENERGY_MIN) { if (aecm->startupState == 0) { @@ -856,9 +531,9 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD increase_min_shifts = 8; } - aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy[0], + aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy, increase_min_shifts, decrease_min_shifts); - aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy[0], + aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy, increase_max_shifts, decrease_max_shifts); aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); @@ -879,10 +554,12 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; } else { - if (aecm->farEnergyVAD > aecm->farLogEnergy[0]) + if (aecm->farEnergyVAD > aecm->farLogEnergy) { - aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy[0] + tmp16 - - aecm->farEnergyVAD, 6); + aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy + + tmp16 - + aecm->farEnergyVAD, + 6); aecm->vadUpdateCount = 0; } else { @@ -894,7 +571,7 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD } // Update VAD variables - if (aecm->farLogEnergy[0] > aecm->farEnergyVAD) + if (aecm->farLogEnergy > aecm->farEnergyVAD) { if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) { @@ -910,8 +587,9 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD aecm->firstVAD = 0; if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) { - // The estimated echo has higher energy than the near end signal. This means that - // the initialization was too aggressive. Scale down by a factor 8 + // The estimated echo has higher energy than the near end signal. + // This means that the initialization was too aggressive. Scale + // down by a factor 8 for (i = 0; i < PART_LEN1; i++) { aecm->channelAdapt16[i] >>= 3; @@ -921,16 +599,6 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD aecm->firstVAD = 1; } } - // END: Energies of delayed far, echo estimates - // TODO(bjornv): Will be removed in final version. -#ifdef VAD_DATA - fwrite(&(aecm->currentVADValue), sizeof(WebRtc_Word16), 1, aecm->vad_file); - fwrite(&(aecm->currentDelay), sizeof(WebRtc_Word16), 1, aecm->delay_file); - fwrite(&(aecm->farLogEnergy[0]), sizeof(WebRtc_Word16), 1, aecm->far_cur_file); - fwrite(&(aecm->farEnergyMin), sizeof(WebRtc_Word16), 1, aecm->far_min_file); - fwrite(&(aecm->farEnergyMax), sizeof(WebRtc_Word16), 1, aecm->far_max_file); - fwrite(&(aecm->farEnergyVAD), sizeof(WebRtc_Word16), 1, aecm->far_vad_file); -#endif } // WebRtcAecm_CalcStepSize(...) @@ -939,7 +607,7 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayD // // // @param aecm [in] Handle of the AECM instance. -// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. // // WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) @@ -947,11 +615,10 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) WebRtc_Word32 tmp32; WebRtc_Word16 tmp16; - WebRtc_Word16 mu; + WebRtc_Word16 mu = MU_MAX; // Here we calculate the step size mu used in the // following NLMS based Channel estimation algorithm - mu = MU_MAX; if (!aecm->currentVADValue) { // Far end energy level too low, no channel update @@ -963,7 +630,7 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) mu = MU_MIN; } else { - tmp16 = (aecm->farLogEnergy[0] - aecm->farEnergyMin); + tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, MU_DIFF); tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); mu = MU_MIN - 1 - (WebRtc_Word16)(tmp32); @@ -975,7 +642,6 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX } } - // END: Update step size return mu; } @@ -986,15 +652,18 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) // // // @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q) +// @param far_q [in] Q-domain of the farend signal // @param dfa [in] Absolute value of the nearend signal (Q[aecm->dfaQDomain]) -// @param delayDiff [in] Delay position in farend buffer. // @param mu [in] NLMS step size. -// @param echoEst [i/o] Estimated echo -// (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]). +// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). // -void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa, - const WebRtc_Word16 delayDiff, const WebRtc_Word16 mu, - WebRtc_Word32 * const echoEst) +void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, + const WebRtc_UWord16* far_spectrum, + const WebRtc_Word16 far_q, + const WebRtc_UWord16 * const dfa, + const WebRtc_Word16 mu, + WebRtc_Word32 * echoEst) { WebRtc_UWord32 tmpU32no1, tmpU32no2; @@ -1018,21 +687,20 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co // Determine norm of channel and farend to make sure we don't get overflow in // multiplication zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); - zerosFar = WebRtcSpl_NormU32((WebRtc_UWord32)aecm->xfaHistory[i][delayDiff]); + zerosFar = WebRtcSpl_NormU32((WebRtc_UWord32)far_spectrum[i]); if (zerosCh + zerosFar > 31) { // Multiplication is safe tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); shiftChFar = 0; } else { // We need to shift down before multiplication shiftChFar = 32 - zerosCh - zerosFar; - tmpU32no1 - = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], - shiftChFar), - aecm->xfaHistory[i][delayDiff]); + tmpU32no1 = WEBRTC_SPL_UMUL_32_16( + WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], shiftChFar), + far_spectrum[i]); } // Determine Q-domain of numerator zerosNum = WebRtcSpl_NormU32(tmpU32no1); @@ -1043,8 +711,8 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co { zerosDfa = 32; } - tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - RESOLUTION_CHANNEL32 - - aecm->xfaQDomainBuf[delayDiff] + shiftChFar; + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - + RESOLUTION_CHANNEL32 - far_q + shiftChFar; if (zerosNum > tmp16no1 + 1) { xfaQ = tmp16no1; @@ -1052,26 +720,25 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co } else { xfaQ = zerosNum - 2; - dfaQ = RESOLUTION_CHANNEL32 + aecm->xfaQDomainBuf[delayDiff] - - aecm->dfaNoisyQDomain - shiftChFar + xfaQ; + dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain - + shiftChFar + xfaQ; } // Add in the same Q-domain tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); tmpU32no2 = WEBRTC_SPL_SHIFT_W32((WebRtc_UWord32)dfa[i], dfaQ); tmp32no1 = (WebRtc_Word32)tmpU32no2 - (WebRtc_Word32)tmpU32no1; zerosNum = WebRtcSpl_NormW32(tmp32no1); - if ((tmp32no1) && (aecm->xfaHistory[i][delayDiff] > (CHANNEL_VAD - << aecm->xfaQDomainBuf[delayDiff]))) + if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) { // // Update is needed // // This is what we would like to compute // - // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * aecm->xfaHistory[i][delayDiff]) + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i]) // tmp32norm = (i + 1) // aecm->channelAdapt[i] += (2^mu) * tmp32no1 - // / (tmp32norm * aecm->xfaHistory[i][delayDiff]) + // / (tmp32norm * far_spectrum[i]) // // Make sure we don't get overflow in multiplication. @@ -1080,11 +747,11 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co if (tmp32no1 > 0) { tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmp32no1, - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); } else { tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(-tmp32no1, - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); } shiftNum = 0; } else @@ -1094,12 +761,12 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co { tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( WEBRTC_SPL_RSHIFT_W32(tmp32no1, shiftNum), - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); } else { tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( WEBRTC_SPL_RSHIFT_W32(-tmp32no1, shiftNum), - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); } } // Normalize with respect to frequency bin @@ -1132,47 +799,40 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co { // During startup we store the channel every block. memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1); - // TODO(bjornv): Will be removed in final version. -#ifdef STORE_CHANNEL_DATA - fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1, aecm->channel_file_init); -#endif // Recalculate echo estimate #if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) for (i = 0; i < PART_LEN1; i++) { - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); } #else for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples { echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); i++; echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); i++; echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); i++; echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); i++; } echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - aecm->xfaHistory[i][delayDiff]); + far_spectrum[i]); #endif } else { - if (aecm->farLogEnergy[0] < aecm->farEnergyMSE) + if (aecm->farLogEnergy < aecm->farEnergyMSE) { aecm->mseChannelCount = 0; - aecm->delayCount = 0; } else { aecm->mseChannelCount++; - aecm->delayCount++; } // Enough data for validation. Store channel if we can. if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) @@ -1233,32 +893,31 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * co // calculations. Store the adaptive channel. memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1); - // TODO(bjornv): Will be removed in final version. -#ifdef STORE_CHANNEL_DATA - fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1, - aecm->channel_file); -#endif -// Recalculate echo estimate + // Recalculate echo estimate #if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) for (i = 0; i < PART_LEN1; i++) { - echoEst[i] - = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); } #else for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples { - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); i++; - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); i++; - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); i++; - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); i++; } - echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); #endif // Update threshold if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) @@ -1297,13 +956,12 @@ WebRtc_Word16 WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm) { WebRtc_Word32 tmp32no1; - WebRtc_Word16 supGain; + WebRtc_Word16 supGain = SUPGAIN_DEFAULT; WebRtc_Word16 tmp16no1; WebRtc_Word16 dE = 0; // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far // end energy and echo estimation error. - supGain = SUPGAIN_DEFAULT; // Adjust for the far end signal level. A low signal level indicates no far end signal, // hence we set the suppression gain to 0 if (!aecm->currentVADValue) @@ -1363,134 +1021,171 @@ WebRtc_Word16 WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm) return aecm->supGain; } -// WebRtcAecm_DelayCompensation(...) +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. // -// Secondary delay estimation that can be used as a backup or for validation. This function is -// still under construction and not activated in current version. +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values // -// -// @param aecm [i/o] Handle of the AECM instance. -// -// -void WebRtcAecm_DelayCompensation(AecmCore_t * const aecm) +static int TimeToFrequencyDomain(const WebRtc_Word16* time_signal, + WebRtc_Word16* freq_signal_real, + WebRtc_Word16* freq_signal_imag, + WebRtc_UWord16* freq_signal_abs, + WebRtc_UWord32* freq_signal_sum_abs) { - int i, j; - WebRtc_Word32 delayMeanEcho[CORR_BUF_LEN]; - WebRtc_Word32 delayMeanNear[CORR_BUF_LEN]; - WebRtc_Word16 sumBitPattern, bitPatternEcho, bitPatternNear, maxPos, maxValue, - maxValueLeft, maxValueRight; + int i = 0; + int j = 0; + int time_signal_scaling = 0; + int ret = 0; - // Check delay (calculate the delay offset (if we can)). - if ((aecm->startupState > 0) & (aecm->delayCount >= CORR_MAX_BUF) & aecm->delayOffsetFlag) + WebRtc_Word32 tmp32no1; + WebRtc_Word32 tmp32no2; + + WebRtc_Word16 fft[PART_LEN4]; + WebRtc_Word16 post_fft[PART_LEN2]; + WebRtc_Word16 tmp16no1; + WebRtc_Word16 tmp16no2; +#ifdef AECM_WITH_ABS_APPROX + WebRtc_Word16 max_value = 0; + WebRtc_Word16 min_value = 0; + WebRtc_UWord16 alpha = 0; + WebRtc_UWord16 beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + memset(fft, 0, sizeof(WebRtc_Word16) * PART_LEN4); + // FFT of signal + for (i = 0, j = 0; i < PART_LEN; i++, j += 2) { - // Calculate mean values - for (i = 0; i < CORR_BUF_LEN; i++) + // Window time domain signal and insert into real part of + // transformation array |fft| + fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( + (time_signal[i] << time_signal_scaling), + kSqrtHanning[i], + 14); + fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( + (time_signal[PART_LEN + i] << time_signal_scaling), + kSqrtHanning[PART_LEN - i], + 14); + // Inserting zeros in imaginary parts not necessary since we + // initialized the array with all zeros + } + + // Fourier transformation of time domain signal. + // The result is scaled with 1/PART_LEN2, that is, the result is in Q(-6) + // for PART_LEN = 32 + + WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); + ret = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + + // Take only the first PART_LEN2 samples + for (i = 0; i < PART_LEN2; i++) + { + post_fft[i] = fft[i]; + } + // The imaginary part has to switch sign + for (i = 1; i < PART_LEN2;) + { + post_fft[i] = -post_fft[i]; + i += 2; + } + + // Extract imaginary and real part, calculate the magnitude for all frequency bins + freq_signal_imag[0] = 0; + freq_signal_imag[PART_LEN] = 0; + freq_signal_real[0] = post_fft[0]; + freq_signal_real[PART_LEN] = fft[PART_LEN2]; + freq_signal_abs[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( + freq_signal_real[0]); + freq_signal_abs[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( + freq_signal_real[PART_LEN]); + (*freq_signal_sum_abs) = (WebRtc_UWord32)(freq_signal_abs[0]) + + (WebRtc_UWord32)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + freq_signal_real[i] = post_fft[j]; + freq_signal_imag[i] = post_fft[j + 1]; + + if (freq_signal_real[i] == 0) { - delayMeanEcho[i] = 0; - delayMeanNear[i] = 0; -#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) - for (j = 0; j < CORR_WIDTH; j++) + freq_signal_abs[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( + freq_signal_imag[i]); + } + else if (freq_signal_imag[i] == 0) + { + freq_signal_abs[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( + freq_signal_real[i]); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + + tmp16no1 = WEBRTC_SPL_ABS_W16(post_fft[j]); + tmp16no2 = WEBRTC_SPL_ABS_W16(post_fft[j + 1]); + +#ifdef AECM_WITH_ABS_APPROX + if(tmp16no1 > tmp16no2) { - delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; - delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; + max_value = tmp16no1; + min_value = tmp16no2; + } else + { + max_value = tmp16no2; + min_value = tmp16no1; } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(max_value, + alpha, + 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(min_value, + beta, + 15); + freq_signal_abs[i] = (WebRtc_UWord16)tmp16no1 + + (WebRtc_UWord16)tmp16no2; #else - for (j = 0; j < CORR_WIDTH -1; ) - { - delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; - delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; - j++; - delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; - delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; - j++; - } - delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; - delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); + + freq_signal_abs[i] = (WebRtc_UWord16)tmp32no1; #endif } - // Calculate correlation values - for (i = 0; i < CORR_BUF_LEN; i++) - { - sumBitPattern = 0; -#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) - for (j = 0; j < CORR_WIDTH; j++) - { - bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i - + j] * CORR_WIDTH > delayMeanEcho[i]); - bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX - + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); - sumBitPattern += !(bitPatternEcho ^ bitPatternNear); - } -#else - for (j = 0; j < CORR_WIDTH -1; ) - { - bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i - + j] * CORR_WIDTH > delayMeanEcho[i]); - bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX - + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); - sumBitPattern += !(bitPatternEcho ^ bitPatternNear); - j++; - bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i - + j] * CORR_WIDTH > delayMeanEcho[i]); - bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX - + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); - sumBitPattern += !(bitPatternEcho ^ bitPatternNear); - j++; - } - bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + j] - * CORR_WIDTH > delayMeanEcho[i]); - bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + j] - * CORR_WIDTH > delayMeanNear[CORR_MAX]); - sumBitPattern += !(bitPatternEcho ^ bitPatternNear); -#endif - aecm->delayCorrelation[i] = sumBitPattern; - } - aecm->newDelayCorrData = 1; // Indicate we have new correlation data to evaluate + (*freq_signal_sum_abs) += (WebRtc_UWord32)freq_signal_abs[i]; } - if ((aecm->startupState == 2) & (aecm->lastDelayUpdateCount > (CORR_WIDTH << 1)) - & aecm->newDelayCorrData) - { - // Find maximum value and maximum position as well as values on the sides. - maxPos = 0; - maxValue = aecm->delayCorrelation[0]; - maxValueLeft = maxValue; - maxValueRight = aecm->delayCorrelation[CORR_DEV]; - for (i = 1; i < CORR_BUF_LEN; i++) - { - if (aecm->delayCorrelation[i] > maxValue) - { - maxValue = aecm->delayCorrelation[i]; - maxPos = i; - if (maxPos < CORR_DEV) - { - maxValueLeft = aecm->delayCorrelation[0]; - maxValueRight = aecm->delayCorrelation[i + CORR_DEV]; - } else if (maxPos > (CORR_MAX << 1) - CORR_DEV) - { - maxValueLeft = aecm->delayCorrelation[i - CORR_DEV]; - maxValueRight = aecm->delayCorrelation[(CORR_MAX << 1)]; - } else - { - maxValueLeft = aecm->delayCorrelation[i - CORR_DEV]; - maxValueRight = aecm->delayCorrelation[i + CORR_DEV]; - } - } - } - if ((maxPos > 0) & (maxPos < (CORR_MAX << 1))) - { - // Avoid maximum at boundaries. The maximum peak has to be higher than - // CORR_MAX_LEVEL. It also has to be sharp, i.e. the value CORR_DEV bins off should - // be CORR_MAX_LOW lower than the maximum. - if ((maxValue > CORR_MAX_LEVEL) & (maxValueLeft < maxValue - CORR_MAX_LOW) - & (maxValueRight < maxValue - CORR_MAX_LOW)) - { - aecm->delayAdjust += CORR_MAX - maxPos; - aecm->newDelayCorrData = 0; - aecm->lastDelayUpdateCount = 0; - } - } - } - // END: "Check delay" + + return time_signal_scaling; } void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, @@ -1502,38 +1197,31 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons WebRtc_UWord32 xfaSum; WebRtc_UWord32 dfaNoisySum; + WebRtc_UWord32 dfaCleanSum; WebRtc_UWord32 echoEst32Gained; WebRtc_UWord32 tmpU32; WebRtc_Word32 tmp32no1; - WebRtc_Word32 tmp32no2; WebRtc_Word32 echoEst32[PART_LEN1]; WebRtc_UWord16 xfa[PART_LEN1]; WebRtc_UWord16 dfaNoisy[PART_LEN1]; WebRtc_UWord16 dfaClean[PART_LEN1]; WebRtc_UWord16* ptrDfaClean = dfaClean; - + const WebRtc_UWord16* far_spectrum_ptr = NULL; int outCFFT; WebRtc_Word16 fft[PART_LEN4]; - WebRtc_Word16 postFft[PART_LEN2]; WebRtc_Word16 dfwReal[PART_LEN1]; WebRtc_Word16 dfwImag[PART_LEN1]; - WebRtc_Word16 xfwReal[PART_LEN1]; - WebRtc_Word16 xfwImag[PART_LEN1]; WebRtc_Word16 efwReal[PART_LEN1]; WebRtc_Word16 efwImag[PART_LEN1]; WebRtc_Word16 hnl[PART_LEN1]; - WebRtc_Word16 numPosCoef; - WebRtc_Word16 nlpGain; - WebRtc_Word16 delay, diff, diffMinusOne; + WebRtc_Word16 numPosCoef = 0; + WebRtc_Word16 nlpGain = ONE_Q14; + WebRtc_Word16 delay; WebRtc_Word16 tmp16no1; WebRtc_Word16 tmp16no2; -#ifdef AECM_WITH_ABS_APPROX - WebRtc_Word16 maxValue; - WebRtc_Word16 minValue; -#endif WebRtc_Word16 mu; WebRtc_Word16 supGain; WebRtc_Word16 zeros32, zeros16; @@ -1551,10 +1239,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons unsigned int milliseconds; #endif -#ifdef AECM_WITH_ABS_APPROX - WebRtc_UWord16 alpha, beta; -#endif - // Determine startup state. There are three states: // (0) the first CONV_LEN blocks // (1) another CONV_LEN blocks @@ -1573,39 +1257,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons { memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(WebRtc_Word16) * PART_LEN); } - // TODO(bjornv): Will be removed in final version. -#ifdef VAD_DATA - fwrite(aecm->xBuf, sizeof(WebRtc_Word16), PART_LEN, aecm->far_file); -#endif - -#ifdef AECM_DYNAMIC_Q - tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufNoisy, PART_LEN2); - tmp16no2 = WebRtcSpl_MaxAbsValueW16(aecm->xBuf, PART_LEN2); - zerosDBufNoisy = WebRtcSpl_NormW16(tmp16no1); - zerosXBuf = WebRtcSpl_NormW16(tmp16no2); -#else - zerosDBufNoisy = 0; - zerosXBuf = 0; -#endif - aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; - aecm->dfaNoisyQDomain = zerosDBufNoisy; - - if (nearendClean != NULL) - { -#ifdef AECM_DYNAMIC_Q - tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufClean, PART_LEN2); - zerosDBufClean = WebRtcSpl_NormW16(tmp16no1); -#else - zerosDBufClean = 0; -#endif - aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; - aecm->dfaCleanQDomain = zerosDBufClean; - } else - { - zerosDBufClean = zerosDBufNoisy; - aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; - aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; - } #ifdef ARM_WINM_LOG_ // measure tick start @@ -1613,308 +1264,39 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif - // FFT of noisy near end signal - for (i = 0; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - // Window near end - fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufNoisy[i] - << zerosDBufNoisy), kSqrtHanning[i], 14); - fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - (aecm->dBufNoisy[PART_LEN + i] << zerosDBufNoisy), - kSqrtHanning[PART_LEN - i], 14); - // Inserting zeros in imaginary parts - fft[j + 1] = 0; - fft[PART_LEN2 + j + 1] = 0; - } + // Transform far end signal from time domain to frequency domain. + zerosXBuf = TimeToFrequencyDomain(aecm->xBuf, + dfwReal, + dfwImag, + xfa, + &xfaSum); - // Fourier transformation of near end signal. - // The result is scaled with 1/PART_LEN2, that is, the result is in Q(-6) for PART_LEN = 32 + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy, + dfwReal, + dfwImag, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = zerosDBufNoisy; - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); - - // Take only the first PART_LEN2 samples - for (i = 0; i < PART_LEN2; i++) - { - postFft[i] = fft[i]; - } - // The imaginary part has to switch sign - for (i = 1; i < PART_LEN2;) - { - postFft[i] = -postFft[i]; - i += 2; - } - - // Extract imaginary and real part, calculate the magnitude for all frequency bins - dfwImag[0] = 0; - dfwImag[PART_LEN] = 0; - dfwReal[0] = postFft[0]; - dfwReal[PART_LEN] = fft[PART_LEN2]; - dfaNoisy[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]); - dfaNoisy[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]); - dfaNoisySum = (WebRtc_UWord32)(dfaNoisy[0]); - dfaNoisySum += (WebRtc_UWord32)(dfaNoisy[PART_LEN]); - - for (i = 1; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - dfwReal[i] = postFft[j]; - dfwImag[i] = postFft[j + 1]; - - if (dfwReal[i] == 0 || dfwImag[i] == 0) - { - dfaNoisy[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]); - } else - { - // Approximation for magnitude of complex fft output - // magn = sqrt(real^2 + imag^2) - // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) - // - // The parameters alpha and beta are stored in Q15 - - tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); - tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); - -#ifdef AECM_WITH_ABS_APPROX - if(tmp16no1 > tmp16no2) - { - maxValue = tmp16no1; - minValue = tmp16no2; - } else - { - maxValue = tmp16no2; - minValue = tmp16no1; - } - - // Magnitude in Q-6 - if ((maxValue >> 2) > minValue) - { - alpha = kAlpha1; - beta = kBeta1; - } else if ((maxValue >> 1) > minValue) - { - alpha = kAlpha2; - beta = kBeta2; - } else - { - alpha = kAlpha3; - beta = kBeta3; - } - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); - dfaNoisy[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; -#else - tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); - tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); - tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); - tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); - dfaNoisy[i] = (WebRtc_UWord16)tmp32no1; -#endif - } - dfaNoisySum += (WebRtc_UWord32)dfaNoisy[i]; - } - // END: FFT of noisy near end signal if (nearendClean == NULL) { ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; } else { - // FFT of clean near end signal - for (i = 0; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - // Window near end - fft[j] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[i] << zerosDBufClean), kSqrtHanning[i], 14); - fft[PART_LEN2 + j] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[PART_LEN + i] << zerosDBufClean), kSqrtHanning[PART_LEN - i], 14); - // Inserting zeros in imaginary parts - fft[j + 1] = 0; - fft[PART_LEN2 + j + 1] = 0; - } - - // Fourier transformation of near end signal. - // The result is scaled with 1/PART_LEN2, that is, in Q(-6) for PART_LEN = 32 - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); - - // Take only the first PART_LEN2 samples - for (i = 0; i < PART_LEN2; i++) - { - postFft[i] = fft[i]; - } - // The imaginary part has to switch sign - for (i = 1; i < PART_LEN2;) - { - postFft[i] = -postFft[i]; - i += 2; - } - - // Extract imaginary and real part, calculate the magnitude for all frequency bins - dfwImag[0] = 0; - dfwImag[PART_LEN] = 0; - dfwReal[0] = postFft[0]; - dfwReal[PART_LEN] = fft[PART_LEN2]; - dfaClean[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]); - dfaClean[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]); - - for (i = 1; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - dfwReal[i] = postFft[j]; - dfwImag[i] = postFft[j + 1]; - - if (dfwReal[i] == 0 || dfwImag[i] == 0) - { - dfaClean[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]); - } else - { - // Approximation for magnitude of complex fft output - // magn = sqrt(real^2 + imag^2) - // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) - // - // The parameters alpha and beta are stored in Q15 - - tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); - tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); - -#ifdef AECM_WITH_ABS_APPROX - if(tmp16no1 > tmp16no2) - { - maxValue = tmp16no1; - minValue = tmp16no2; - } else - { - maxValue = tmp16no2; - minValue = tmp16no1; - } - - // Magnitude in Q-6 - if ((maxValue >> 2) > minValue) - { - alpha = kAlpha1; - beta = kBeta1; - } else if ((maxValue >> 1) > minValue) - { - alpha = kAlpha2; - beta = kBeta2; - } else - { - alpha = kAlpha3; - beta = kBeta3; - } - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); - dfaClean[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; -#else - tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); - tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); - tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); - tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); - dfaClean[i] = (WebRtc_UWord16)tmp32no1; -#endif - } - } - } - // END: FFT of clean near end signal - - // FFT of far end signal - for (i = 0; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - // Window farend - fft[j] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[i] << zerosXBuf), kSqrtHanning[i], 14); - fft[PART_LEN2 + j] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[PART_LEN + i] << zerosXBuf), kSqrtHanning[PART_LEN - i], 14); - // Inserting zeros in imaginary parts - fft[j + 1] = 0; - fft[PART_LEN2 + j + 1] = 0; - } - // Fourier transformation of far end signal. - // The result is scaled with 1/PART_LEN2, that is the result is in Q(-6) for PART_LEN = 32 - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); - - // Take only the first PART_LEN2 samples - for (i = 0; i < PART_LEN2; i++) - { - postFft[i] = fft[i]; - } - // The imaginary part has to switch sign - for (i = 1; i < PART_LEN2;) - { - postFft[i] = -postFft[i]; - i += 2; - } - - // Extract imaginary and real part, calculate the magnitude for all frequency bins - xfwImag[0] = 0; - xfwImag[PART_LEN] = 0; - xfwReal[0] = postFft[0]; - xfwReal[PART_LEN] = fft[PART_LEN2]; - xfa[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[0]); - xfa[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[PART_LEN]); - xfaSum = (WebRtc_UWord32)(xfa[0]) + (WebRtc_UWord32)(xfa[PART_LEN]); - - for (i = 1; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i,1); - xfwReal[i] = postFft[j]; - xfwImag[i] = postFft[j + 1]; - - if (xfwReal[i] == 0 || xfwImag[i] == 0) - { - xfa[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[i] + xfwImag[i]); - } else - { - // Approximation for magnitude of complex fft output - // magn = sqrt(real^2 + imag^2) - // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) - // - // The parameters alpha and beta are stored in Q15 - - tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); - tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); - -#ifdef AECM_WITH_ABS_APPROX - if(tmp16no1 > xfwImag[i]) - { - maxValue = tmp16no1; - minValue = tmp16no2; - } else - { - maxValue = tmp16no2; - minValue = tmp16no1; - } - // Magnitude in Q-6 - if ((maxValue >> 2) > minValue) - { - alpha = kAlpha1; - beta = kBeta1; - } else if ((maxValue >> 1) > minValue) - { - alpha = kAlpha2; - beta = kBeta2; - } else - { - alpha = kAlpha3; - beta = kBeta3; - } - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); - xfa[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; -#else - tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); - tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); - tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); - tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); - xfa[i] = (WebRtc_UWord16)tmp32no1; -#endif - } - xfaSum += (WebRtc_UWord32)xfa[i]; + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm->dBufClean, + dfwReal, + dfwImag, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = zerosDBufClean; } #ifdef ARM_WINM_LOG_ @@ -1923,27 +1305,23 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons diff__ = ((end - start) * 1000) / (freq/1000); milliseconds = (unsigned int)(diff__ & 0xffffffff); WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - // END: FFT of far end signal - - // Get the delay - - // Fixed delay estimation - // input: dfaFIX, xfaFIX in Q-stages - // output: delay in Q0 - // - // comment on the fixed point accuracy of estimate_delayFIX - // -> due to rounding the fixed point variables xfa and dfa contain a lot more zeros - // than the corresponding floating point variables this results in big differences - // between the floating point and the fixed point logarithmic spectra for small values -#ifdef ARM_WINM_LOG_ // measure tick start QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif + // Get the delay // Save far-end history and estimate delay - delay = WebRtcAecm_EstimateDelay(aecm, xfa, dfaNoisy, zerosXBuf); - + delay = WebRtcAecm_DelayEstimatorProcess(aecm->delay_estimator, + xfa, + dfaNoisy, + PART_LEN1, + zerosXBuf, + aecm->currentVADValue); + if (delay < 0) + { + // We have an error. Continue with last delay value. + delay = aecm->currentDelay; + } if (aecm->fixedDelay >= 0) { // Use fixed delay @@ -1952,53 +1330,37 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons aecm->currentDelay = delay; - if ((aecm->delayOffsetFlag) & (aecm->startupState > 0)) // If delay compensation is on - { - // If the delay estimate changed from previous block, update the offset - if ((aecm->currentDelay != aecm->previousDelay) & !aecm->currentDelay - & !aecm->previousDelay) - { - aecm->delayAdjust += (aecm->currentDelay - aecm->previousDelay); - } - // Compensate with the offset estimate - aecm->currentDelay -= aecm->delayAdjust; - aecm->previousDelay = delay; - } - - diff = aecm->delHistoryPos - aecm->currentDelay; - if (diff < 0) - { - diff = diff + MAX_DELAY; - } - #ifdef ARM_WINM_LOG_ // measure tick end QueryPerformanceCounter((LARGE_INTEGER*)&end); diff__ = ((end - start) * 1000) / (freq/1000); milliseconds = (unsigned int)(diff__ & 0xffffffff); WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - - // END: Get the delay - -#ifdef ARM_WINM_LOG_ // measure tick start QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_GetAlignedFarend(aecm->delay_estimator, + &zerosXBuf); + if (far_spectrum_ptr == NULL) + { + // We have an error. Continue without suppression, which can be done by + // using a zero far end signal. + memset(xfa, 0, sizeof(WebRtc_UWord16) * PART_LEN1); + far_spectrum_ptr = xfa; + } // Calculate log(energy) and update energy threshold levels - WebRtcAecm_CalcEnergies(aecm, diff, dfaNoisySum, echoEst32); + WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, echoEst32); // Calculate stepsize mu = WebRtcAecm_CalcStepSize(aecm); // Update counters aecm->totCount++; - aecm->lastDelayUpdateCount++; // This is the channel estimation algorithm. // It is base on NLMS but has a variable step length, which was calculated above. - WebRtcAecm_UpdateChannel(aecm, dfaNoisy, diff, mu, echoEst32); - WebRtcAecm_DelayCompensation(aecm); + WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, echoEst32); supGain = WebRtcAecm_CalcSuppressionGain(aecm); #ifdef ARM_WINM_LOG_ @@ -2007,20 +1369,11 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons diff__ = ((end - start) * 1000) / (freq/1000); milliseconds = (unsigned int)(diff__ & 0xffffffff); WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - -#ifdef ARM_WINM_LOG_ // measure tick start QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif // Calculate Wiener filter hnl[] - numPosCoef = 0; - diffMinusOne = diff - 1; - if (diff == 0) - { - diffMinusOne = MAX_DELAY; - } for (i = 0; i < PART_LEN1; i++) { // Far end signal through channel estimate in Q8 @@ -2037,12 +1390,12 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], (WebRtc_UWord16)supGain); resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]); + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); } else { tmp16no1 = 17 - zeros32 - zeros16; resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]); + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); if (zeros32 > tmp16no1) { echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], @@ -2065,12 +1418,13 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; } else { - tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], aecm->dfaCleanQDomain - - aecm->dfaCleanQDomainOld); + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], + aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld); qDomainDiff = 0; } tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no2 - tmp16no1, 1, 4); + tmp32no1 = (WebRtc_Word32)(tmp16no2 - tmp16no1); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); tmp16no2 += tmp16no1; zeros16 = WebRtcSpl_NormW16(tmp16no2); if ((tmp16no2) & (-qDomainDiff > zeros16)) @@ -2157,9 +1511,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons diff__ = ((end - start) * 1000) / (freq/1000); milliseconds = (unsigned int)(diff__ & 0xffffffff); WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - -#ifdef ARM_WINM_LOG_ // measure tick start QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif @@ -2214,9 +1565,6 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons diff__ = ((end - start) * 1000) / (freq/1000); milliseconds = (unsigned int)(diff__ & 0xffffffff); WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - -#ifdef ARM_WINM_LOG_ // measure tick start QueryPerformanceCounter((LARGE_INTEGER*)&start); #endif @@ -2300,7 +1648,8 @@ void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * cons // \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]). // \param[in] lambda Suppression gain with which to scale the noise level (Q14). // -static void WebRtcAecm_ComfortNoise(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa, +static void WebRtcAecm_ComfortNoise(AecmCore_t * const aecm, + const WebRtc_UWord16 * const dfa, WebRtc_Word16 * const outReal, WebRtc_Word16 * const outImag, const WebRtc_Word16 * const lambda) diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.h b/src/modules/audio_processing/aecm/main/source/aecm_core.h index a5a8296ba..b062bb34f 100644 --- a/src/modules/audio_processing/aecm/main/source/aecm_core.h +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.h @@ -17,14 +17,8 @@ //#define AECM_WITH_ABS_APPROX //#define AECM_SHORT // for 32 sample partition length (otherwise 64) -// TODO(bjornv): These defines will be removed in final version. -//#define STORE_CHANNEL_DATA -//#define VAD_DATA - #include "typedefs.h" #include "signal_processing_library.h" -// TODO(bjornv): Will be removed in final version. -#include // Algorithm parameters @@ -127,29 +121,21 @@ typedef struct WebRtc_UWord32 seed; // Delay estimation variables - WebRtc_UWord16 medianYlogspec[PART_LEN1]; - WebRtc_UWord16 medianXlogspec[PART_LEN1]; - WebRtc_UWord16 medianBCount[MAX_DELAY]; - WebRtc_UWord16 xfaHistory[PART_LEN1][MAX_DELAY]; - WebRtc_Word16 delHistoryPos; - WebRtc_UWord32 bxHistory[MAX_DELAY]; + void* delay_estimator; WebRtc_UWord16 currentDelay; - WebRtc_UWord16 previousDelay; - WebRtc_Word16 delayAdjust; WebRtc_Word16 nlpFlag; WebRtc_Word16 fixedDelay; WebRtc_UWord32 totCount; - WebRtc_Word16 xfaQDomainBuf[MAX_DELAY]; WebRtc_Word16 dfaCleanQDomain; WebRtc_Word16 dfaCleanQDomainOld; WebRtc_Word16 dfaNoisyQDomain; WebRtc_Word16 dfaNoisyQDomainOld; WebRtc_Word16 nearLogEnergy[MAX_BUF_LEN]; - WebRtc_Word16 farLogEnergy[MAX_BUF_LEN]; + WebRtc_Word16 farLogEnergy; WebRtc_Word16 echoAdaptLogEnergy[MAX_BUF_LEN]; WebRtc_Word16 echoStoredLogEnergy[MAX_BUF_LEN]; @@ -176,43 +162,16 @@ typedef struct WebRtc_Word16 currentVADValue; WebRtc_Word16 vadUpdateCount; - WebRtc_Word16 delayHistogram[MAX_DELAY]; - WebRtc_Word16 delayVadCount; - WebRtc_Word16 maxDelayHistIdx; - WebRtc_Word16 lastMinPos; - WebRtc_Word16 startupState; WebRtc_Word16 mseChannelCount; - WebRtc_Word16 delayCount; - WebRtc_Word16 newDelayCorrData; - WebRtc_Word16 lastDelayUpdateCount; - WebRtc_Word16 delayCorrelation[CORR_BUF_LEN]; WebRtc_Word16 supGain; WebRtc_Word16 supGainOld; - WebRtc_Word16 delayOffsetFlag; WebRtc_Word16 supGainErrParamA; WebRtc_Word16 supGainErrParamD; WebRtc_Word16 supGainErrParamDiffAB; WebRtc_Word16 supGainErrParamDiffBD; - // TODO(bjornv): Will be removed after final version has been committed. -#ifdef VAD_DATA - FILE *vad_file; - FILE *delay_file; - FILE *far_file; - FILE *far_cur_file; - FILE *far_min_file; - FILE *far_max_file; - FILE *far_vad_file; -#endif - - // TODO(bjornv): Will be removed after final version has been committed. -#ifdef STORE_CHANNEL_DATA - FILE *channel_file; - FILE *channel_file_init; -#endif - #ifdef AEC_DEBUG FILE *farFile; FILE *nearFile; @@ -266,7 +225,7 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq); // int WebRtcAecm_FreeCore(AecmCore_t *aecm); -int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag, int delayOffsetFlag); +int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag); /////////////////////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_InitEchoPathCore(...) diff --git a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c new file mode 100644 index 000000000..b7eed1849 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "aecm_delay_estimator.h" + +#include +#include + +#include "signal_processing_library.h" +#include "typedefs.h" + +typedef struct +{ + // Pointers to mean values of spectrum and bit counts + WebRtc_Word32* mean_far_spectrum; + WebRtc_Word32* mean_near_spectrum; + WebRtc_Word32* mean_bit_counts; + + // Binary history variables + WebRtc_UWord32* binary_far_history; + + // Far end history variables + WebRtc_UWord16* far_history; + int far_history_position; + WebRtc_Word16* far_q_domains; + + // Delay histogram variables + WebRtc_Word16* delay_histogram; + WebRtc_Word16 vad_counter; + + // Delay memory + int last_delay; + + // Buffer size parameters + int history_size; + int spectrum_size; + +} DelayEstimator_t; + +// Only bit |kBandFirst| through bit |kBandLast| are processed +// |kBandFirst| - |kBandLast| must be < 32 +static const int kBandFirst = 12; +static const int kBandLast = 43; + +static __inline WebRtc_UWord32 SetBit(WebRtc_UWord32 in, + WebRtc_Word32 pos) +{ + WebRtc_UWord32 mask = WEBRTC_SPL_LSHIFT_W32(1, pos); + WebRtc_UWord32 out = (in | mask); + + return out; +} + +// Compares the binary vector |binary_vector| with all rows of the binary +// matrix |binary_matrix| and counts per row the number of times they have the +// same value. +// Input: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(const WebRtc_UWord32 binary_vector, + const WebRtc_UWord32* binary_matrix, + int matrix_size, + WebRtc_Word32* bit_counts) +{ + int n = 0; + WebRtc_UWord32 a = binary_vector; + register WebRtc_UWord32 tmp; + + // compare binary vector |binary_vector| with all rows of the binary matrix + // |binary_matrix| + for (; n < matrix_size; n++) + { + a = (binary_vector ^ binary_matrix[n]); + // Returns bit counts in tmp + tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + bit_counts[n] = (WebRtc_Word32)tmp; + } +} + +// Computes the binary spectrum by comparing the input |spectrum| with a +// |threshold_spectrum|. +// +// Input: +// - spectrum : Spectrum of which the binary spectrum should +// be calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum +// +static WebRtc_UWord32 GetBinarySpectrum(WebRtc_Word32* spectrum, + WebRtc_Word32* threshold_spectrum) +{ + int k = kBandFirst; + WebRtc_UWord32 out = 0; + + for (; k <= kBandLast; k++) + { + if (spectrum[k] > threshold_spectrum[k]) + { + out = SetBit(out, k - kBandFirst); + } + } + + return out; +} + +// Calculates the mean recursively. +// +// Input: +// - new_value : new additional value +// - factor : factor for smoothing +// +// Input/Output: +// - mean_value : pointer to the mean value that should be updated +// +static void MeanEstimator(const WebRtc_Word32 new_value, + int factor, + WebRtc_Word32* mean_value) +{ + WebRtc_Word32 mean_new = *mean_value; + WebRtc_Word32 diff = new_value - mean_new; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) + { + diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor); + } + else + { + diff = WEBRTC_SPL_RSHIFT_W32(diff, factor); + } + mean_new += diff; + + *mean_value = mean_new; +} + +// Moves the pointer to the next entry and inserts new far end spectrum and +// corresponding Q-domain in its buffer. +// +// Input: +// - handle : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +static void UpdateFarHistory(DelayEstimator_t* self, + WebRtc_UWord16* far_spectrum, + WebRtc_Word16 far_q) +{ + // Get new buffer position + self->far_history_position++; + if (self->far_history_position >= self->history_size) + { + self->far_history_position = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_position] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_position * self->spectrum_size]), + far_spectrum, + sizeof(WebRtc_UWord16) * self->spectrum_size); +} + +int WebRtcAecm_FreeDelayEstimator(void* handle) +{ + DelayEstimator_t* self = (DelayEstimator_t*)handle; + + if (self == NULL) + { + return -1; + } + + if (self->mean_far_spectrum != NULL) + { + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + } + if (self->mean_near_spectrum != NULL) + { + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + } + if (self->far_history != NULL) + { + free(self->far_history); + self->far_history = NULL; + } + if (self->mean_bit_counts != NULL) + { + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + } + if (self->binary_far_history != NULL) + { + free(self->binary_far_history); + self->binary_far_history = NULL; + } + if (self->far_q_domains != NULL) + { + free(self->far_q_domains); + self->far_q_domains = NULL; + } + if (self->delay_histogram != NULL) + { + free(self->delay_histogram); + self->delay_histogram = NULL; + } + + free(self); + + return 0; +} + +int WebRtcAecm_CreateDelayEstimator(void** handle, + int spectrum_size, + int history_size) +{ + // Check if the sub band used in the delay estimation is small enough to + // fit in a Word32. + assert(kBandLast - kBandFirst < 32); + + DelayEstimator_t *self = NULL; + if (spectrum_size < kBandLast) + { + return -1; + } + if (history_size < 0) + { + return -1; + } + + self = malloc(sizeof(DelayEstimator_t)); + *handle = self; + if (self == NULL) + { + return -1; + } + + self->mean_far_spectrum = NULL; + self->mean_near_spectrum = NULL; + self->far_history = NULL; + self->mean_bit_counts = NULL; + self->binary_far_history = NULL; + self->far_q_domains = NULL; + self->delay_histogram = NULL; + + // Allocate memory for spectrum buffers + self->mean_far_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32)); + if (self->mean_far_spectrum == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->mean_near_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32)); + if (self->mean_near_spectrum == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + // Allocate memory for history buffers + self->far_history = malloc(spectrum_size * history_size * + sizeof(WebRtc_UWord16)); + if (self->far_history == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->mean_bit_counts = malloc(history_size * sizeof(WebRtc_Word32)); + if (self->mean_bit_counts == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->binary_far_history = malloc(history_size * sizeof(WebRtc_UWord32)); + if (self->binary_far_history == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->far_q_domains = malloc(history_size * sizeof(WebRtc_Word16)); + if (self->far_q_domains == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + self->delay_histogram = malloc(history_size * sizeof(WebRtc_Word16)); + if (self->delay_histogram == NULL) + { + WebRtcAecm_FreeDelayEstimator(self); + self = NULL; + return -1; + } + + self->spectrum_size = spectrum_size; + self->history_size = history_size; + + return 0; +} + +int WebRtcAecm_InitDelayEstimator(void* handle) +{ + DelayEstimator_t* self = (DelayEstimator_t*)handle; + + if (self == NULL) + { + return -1; + } + // Set averaged far and near end spectra to zero + memset(self->mean_far_spectrum, + 0, + sizeof(WebRtc_Word32) * self->spectrum_size); + memset(self->mean_near_spectrum, + 0, + sizeof(WebRtc_Word32) * self->spectrum_size); + // Set averaged bit counts to zero + memset(self->mean_bit_counts, + 0, + sizeof(WebRtc_Word32) * self->history_size); + // Set far end histories to zero + memset(self->binary_far_history, + 0, + sizeof(WebRtc_UWord32) * self->history_size); + memset(self->far_history, + 0, + sizeof(WebRtc_UWord16) * self->spectrum_size * + self->history_size); + memset(self->far_q_domains, + 0, + sizeof(WebRtc_Word16) * self->history_size); + + self->far_history_position = self->history_size; + // Set delay histogram to zero + memset(self->delay_histogram, + 0, + sizeof(WebRtc_Word16) * self->history_size); + // Set VAD counter to zero + self->vad_counter = 0; + // Set delay memory to zero + self->last_delay = 0; + + return 0; +} + +int WebRtcAecm_DelayEstimatorProcess(void* handle, + WebRtc_UWord16* far_spectrum, + WebRtc_UWord16* near_spectrum, + int spectrum_size, + WebRtc_Word16 far_q, + WebRtc_Word16 vad_value) +{ + DelayEstimator_t* self = (DelayEstimator_t*)handle; + + WebRtc_UWord32 bxspectrum, byspectrum; + + int i; + + WebRtc_Word32 dtmp1; + + WebRtc_Word16 maxHistLvl = 0; + WebRtc_Word16 minpos = -1; + + const int kVadCountThreshold = 25; + const int kMaxHistogram = 600; + + if (self == NULL) + { + return -1; + } + + WebRtc_Word32 bit_counts[self->history_size]; + WebRtc_Word32 far_spectrum_32[self->spectrum_size]; + WebRtc_Word32 near_spectrum_32[self->spectrum_size]; + + if (spectrum_size != self->spectrum_size) + { + // Data sizes don't match + return -1; + } + if (far_q > 15) + { + // If far_Q is larger than 15 we can not guarantee no wrap around + return -1; + } + + // Update far end history + UpdateFarHistory(self, far_spectrum, far_q); + // Update the far and near end means + for (i = 0; i < self->spectrum_size; i++) + { + far_spectrum_32[i] = (WebRtc_Word32)far_spectrum[i]; + MeanEstimator(far_spectrum_32[i], 6, &(self->mean_far_spectrum[i])); + + near_spectrum_32[i] = (WebRtc_Word32)near_spectrum[i]; + MeanEstimator(near_spectrum_32[i], 6, &(self->mean_near_spectrum[i])); + } + + // Shift binary spectrum history + memmove(&(self->binary_far_history[1]), + &(self->binary_far_history[0]), + (self->history_size - 1) * sizeof(WebRtc_UWord32)); + + // Get binary spectra + bxspectrum = GetBinarySpectrum(far_spectrum_32, self->mean_far_spectrum); + byspectrum = GetBinarySpectrum(near_spectrum_32, self->mean_near_spectrum); + // Insert new binary spectrum + self->binary_far_history[0] = bxspectrum; + + // Compare with delayed spectra + BitCountComparison(byspectrum, + self->binary_far_history, + self->history_size, + bit_counts); + + // Smooth bit count curve + for (i = 0; i < self->history_size; i++) + { + // Update sum + // |bit_counts| is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + dtmp1 = WEBRTC_SPL_LSHIFT_W32(bit_counts[i], 9); // Q9 + MeanEstimator(dtmp1, 9, &(self->mean_bit_counts[i])); + } + + // Find minimum position of bit count curve + minpos = WebRtcSpl_MinIndexW32(self->mean_bit_counts, self->history_size); + + // If the farend has been active sufficiently long, begin accumulating a + // histogram of the minimum positions. Search for the maximum bin to + // determine the delay. + if (vad_value == 1) + { + if (self->vad_counter >= kVadCountThreshold) + { + // Increment the histogram at the current minimum position. + if (self->delay_histogram[minpos] < kMaxHistogram) + { + self->delay_histogram[minpos] += 3; + } + +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + // Decrement the entire histogram. + // Select the histogram index corresponding to the maximum bin as + // the delay. + self->last_delay = 0; + for (i = 0; i < self->history_size; i++) + { + if (self->delay_histogram[i] > 0) + { + self->delay_histogram[i]--; + } + if (self->delay_histogram[i] > maxHistLvl) + { + maxHistLvl = self->delay_histogram[i]; + self->last_delay = i; + } + } +#else + self->last_delay = 0; + + for (i = 0; i < self->history_size; i++) + { + WebRtc_Word16 tempVar = self->delay_histogram[i]; + + // Decrement the entire histogram. + if (tempVar > 0) + { + tempVar--; + self->delay_histogram[i] = tempVar; + + // Select the histogram index corresponding to the maximum + // bin as the delay. + if (tempVar > maxHistLvl) + { + maxHistLvl = tempVar; + self->last_delay = i; + } + } + } +#endif + } else + { + self->vad_counter++; + } + } else + { + self->vad_counter = 0; + } + + return self->last_delay; +} + +const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle, + WebRtc_Word16* far_q) +{ + DelayEstimator_t* self = (DelayEstimator_t*)handle; + int buffer_position = 0; + + if (self == NULL) + { + return NULL; + } + + // Get buffer position + buffer_position = self->far_history_position - self->last_delay; + if (buffer_position < 0) + { + buffer_position += self->history_size; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return (self->far_history + (buffer_position * self->spectrum_size)); + +} + +int WebRtcAecm_GetLastDelay(void* handle) +{ + DelayEstimator_t* self = (DelayEstimator_t*)handle; + + if (self == NULL) + { + return -1; + } + + // Return last calculated delay + return self->last_delay; +} diff --git a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h new file mode 100644 index 000000000..dcd5cc2b6 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on a block by block basis +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_ + +#include "typedefs.h" + +// Releases the memory allocated by WebRtcAecm_CreateDelayEstimator(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +int WebRtcAecm_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately using the WebRtcAecm_InitDelayEstimator(...) function. +// +// Input: +// - handle : Instance that should be created +// - spectrum_size : Size of the spectrum used both in far end and near +// end. Used to allocate memory for spectrum specific +// buffers. +// - history_size : Size of the far end history used to estimate the +// delay from. Used to allocate memory for history +// specific buffers. +// +// Output: +// - handle : Created instance +// +int WebRtcAecm_CreateDelayEstimator(void** handle, + int spectrum_size, + int history_size); + +// Initializes the delay estimation instance created with +// WebRtcAecm_CreateDelayEstimator(...) +// Input: +// - handle : Pointer to the delay estimation instance +// +// Output: +// - handle : Initialized instance +// +int WebRtcAecm_InitDelayEstimator(void* handle); + +// Estimates and returns the delay between the far end and near end blocks. +// Input: +// - handle : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum data +// - near_spectrum : Pointer to the near end spectrum data of the current +// block +// - spectrum_size : The size of the data arrays (same for both far and +// near end) +// - far_q : The Q-domain of the far end data +// - vad_value : The VAD decision of the current block +// +// Output: +// - handle : Updated instance +// +// Return value: +// - delay : >= 0 - Calculated delay value +// -1 - Error +// +int WebRtcAecm_DelayEstimatorProcess(void* handle, + WebRtc_UWord16* far_spectrum, + WebRtc_UWord16* near_spectrum, + int spectrum_size, + WebRtc_Word16 far_q, + WebRtc_Word16 vad_value); + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtcAecm_DelayEstimatorProcess(...) should +// have been called before WebRtcAecm_GetAlignedFarend(...). Otherwise, you get +// the pointer to the previous frame. The memory is only valid until the next +// call of WebRtcAecm_DelayEstimatorProcess(...). +// +// Inputs: +// - handle : Pointer to the delay estimation instance +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle, + WebRtc_Word16* far_q); + +// Returns the last calculated delay updated by the function +// WebRtcAecm_DelayEstimatorProcess(...) +// +// Inputs: +// - handle : Pointer to the delay estimation instance +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -1 - Error +// +int WebRtcAecm_GetLastDelay(void* handle); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_